11 年之前 · 352da7186b
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -98,11 +98,14 @@ struct starpu_perfmodel_history_table;
 
																 #define starpu_per_arch_perfmodel starpu_perfmodel_per_arch STARPU_DEPRECATED
															
 
																+typedef double (*starpu_perfmodel_per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
															
 
																+typedef size_t (*starpu_perfmodel_per_arch_size_base)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
															
 
																+
															
 
																 struct starpu_perfmodel_per_arch
															
 
																 {
															
 
																 	double (*cost_model)(struct starpu_data_descr *t) STARPU_DEPRECATED;
															
 
																-	double (*cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
															
 
																-	size_t (*size_base)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl);
															
 
																+	starpu_perfmodel_per_arch_cost_function cost_function;
															
 
																+	starpu_perfmodel_per_arch_size_base size_base;
															
 
																 	struct starpu_perfmodel_history_table *history;
															
 
																 	struct starpu_perfmodel_history_list *list;
															
@@ -132,13 +135,13 @@ struct starpu_perfmodel
 
																 	size_t (*size_base)(struct starpu_task *, unsigned nimpl);
															
 
																 	uint32_t (*footprint)(struct starpu_task *);
															
 
																-	struct starpu_perfmodel_per_arch** per_arch; /*STARPU_MAXIMPLEMENTATIONS*/
															
 
																-
															
 
																 	const char *symbol;
															
 
																-#ifdef STARPU_DEVEL
															
 
																-#warning move all the fields in a private structure
															
 
																-#endif
															
 
																+//#ifdef STARPU_DEVEL
															
 
																+//#warning move all the fields in a private structure. may be difficult as it is not mandatory to call starpu_perfmodel_init when using a perfmodel
															
 
																+//#endif
															
 
																+	struct starpu_perfmodel_per_arch** per_arch; /*STARPU_MAXIMPLEMENTATIONS*/
															
 
																+
															
 
																 	unsigned is_init;
															
 
																 	unsigned is_loaded;
															
 
																 	unsigned benchmarking;
															
@@ -162,6 +165,10 @@ int starpu_get_narch_combs();
 
																 int starpu_perfmodel_arch_comb_add(int ndevices, struct starpu_perfmodel_device* devices);
															
 
																 int starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices);
															
 
																+struct starpu_perfmodel_per_arch *starpu_perfmodel_get_per_arch(struct starpu_perfmodel *model, int impl, ...);
															
 
																+int starpu_perfmodel_set_per_arch_cost_function(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func, ...);
															
 
																+int starpu_perfmodel_set_per_arch_size_base(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func, ...);
															
 
																+
															
 
																 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl);
															
 
																 char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype);
															
 
																 void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl);
															
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -812,7 +812,6 @@ void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
 
																 		free(model->nimpls);
															
 
																 		model->nimpls = NULL;
															
 
																-		model->nimpls = 0;
															
 
																 		free(model->combs);
															
 
																 		model->combs = NULL;
															
@@ -1439,3 +1438,102 @@ int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model)
 
																 	}
															
 
																 	return 0;
															
 
																 }
															
 
																+
															
 
																+struct starpu_perfmodel_per_arch *_starpu_perfmodel_get_per_arch(struct starpu_perfmodel *model, int impl, va_list varg_list)
															
 
																+{
															
 
																+	struct starpu_perfmodel_arch arch;
															
 
																+	va_list varg_list_copy;
															
 
																+	int i, arg_type;
															
 
																+	int is_cpu_set = 0;
															
 
																+
															
 
																+	// We first count the number of devices
															
 
																+	arch.ndevices = 0;
															
 
																+	va_copy(varg_list_copy, varg_list);
															
 
																+	while ((arg_type = va_arg(varg_list_copy, int)) != -1)
															
 
																+	{
															
 
																+		int devid = va_arg(varg_list_copy, int);
															
 
																+		int ncores = va_arg(varg_list_copy, int);
															
 
																+
															
 
																+		arch.ndevices ++;
															
 
																+		if (arg_type == STARPU_CPU_WORKER)
															
 
																+		{
															
 
																+			STARPU_ASSERT_MSG(is_cpu_set == 0, "STARPU_CPU_WORKER can only be specified once\n");
															
 
																+			STARPU_ASSERT_MSG(devid==0, "STARPU_CPU_WORKER must be followed by a value 0 for the device id");
															
 
																+			is_cpu_set = 1;
															
 
																+		}
															
 
																+		else
															
 
																+		{
															
 
																+			STARPU_ASSERT_MSG(ncores==1, "%s must be followed by a value 1 for ncores", starpu_worker_get_type_as_string(arg_type));
															
 
																+		}
															
 
																+	}
															
 
																+	va_end(varg_list_copy);
															
 
																+
															
 
																+	// We set the devices
															
 
																+	arch.devices = (struct starpu_perfmodel_device*)malloc(arch.ndevices * sizeof(struct starpu_perfmodel_device));
															
 
																+	va_copy(varg_list_copy, varg_list);
															
 
																+	for(i=0 ; i<arch.ndevices ; i++)
															
 
																+	{
															
 
																+		arch.devices[i].type = va_arg(varg_list_copy, int);
															
 
																+		arch.devices[i].devid = va_arg(varg_list_copy, int);
															
 
																+		arch.devices[i].ncores = va_arg(varg_list_copy, int);
															
 
																+	}
															
 
																+	va_end(varg_list_copy);
															
 
																+
															
 
																+	// Get the combination for this set of devices
															
 
																+	int comb = starpu_perfmodel_arch_comb_get(arch.ndevices, arch.devices);
															
 
																+	if (comb == -1)
															
 
																+		comb = starpu_perfmodel_arch_comb_add(arch.ndevices, arch.devices);
															
 
																+
															
 
																+	// Realloc if necessary
															
 
																+	if (comb >= model->ncombs_set)
															
 
																+		_starpu_perfmodel_realloc(model, comb+1);
															
 
																+
															
 
																+	// Get the per_arch object
															
 
																+	if (model->per_arch[comb] == NULL)
															
 
																+	{
															
 
																+		model->per_arch[comb] = (struct starpu_perfmodel_per_arch*)malloc((impl+1) * sizeof(struct starpu_perfmodel_per_arch));
															
 
																+		model->nimpls[comb] = 0;
															
 
																+	}
															
 
																+	memset(&model->per_arch[comb][impl], 0, sizeof(struct starpu_perfmodel_per_arch));
															
 
																+	model->nimpls[comb] ++;
															
 
																+
															
 
																+	return &model->per_arch[comb][impl];
															
 
																+}
															
 
																+
															
 
																+struct starpu_perfmodel_per_arch *starpu_perfmodel_get_per_arch(struct starpu_perfmodel *model, int impl, ...)
															
 
																+{
															
 
																+	va_list varg_list;
															
 
																+	struct starpu_perfmodel_per_arch *per_arch;
															
 
																+
															
 
																+	va_start(varg_list, impl);
															
 
																+	per_arch = _starpu_perfmodel_get_per_arch(model, impl, varg_list);
															
 
																+	va_end(varg_list);
															
 
																+
															
 
																+	return per_arch;
															
 
																+}
															
 
																+
															
 
																+int starpu_perfmodel_set_per_arch_cost_function(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func, ...)
															
 
																+{
															
 
																+	va_list varg_list;
															
 
																+	struct starpu_perfmodel_per_arch *per_arch;
															
 
																+
															
 
																+	va_start(varg_list, func);
															
 
																+	per_arch = _starpu_perfmodel_get_per_arch(model, impl, varg_list);
															
 
																+	per_arch->cost_function = func;
															
 
																+	va_end(varg_list);
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																+int starpu_perfmodel_set_per_arch_size_base(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func, ...)
															
 
																+{
															
 
																+	va_list varg_list;
															
 
																+	struct starpu_perfmodel_per_arch *per_arch;
															
 
																+
															
 
																+	va_start(varg_list, func);
															
 
																+	per_arch = _starpu_perfmodel_get_per_arch(model, impl, varg_list);
															
 
																+	per_arch->size_base = func;
															
 
																+	va_end(varg_list);
															
 
																+
															
 
																+	return 0;
															
 
																+}