浏览代码

New functions to avoid users having to access directly per_arch f:ields of the struct starpu_perfmodel

	struct starpu_perfmodel_per_arch *starpu_perfmodel_get_per_arch(struct starpu_perfmodel *model, int impl, ...);
	int starpu_perfmodel_set_per_arch_cost_function(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func, ...);
	int starpu_perfmodel_set_per_arch_size_base(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func, ...);
Nathalie Furmento 10 年之前
父节点
当前提交
352da7186b
共有 2 个文件被更改,包括 113 次插入8 次删除
  1. 14 7
      include/starpu_perfmodel.h
  2. 99 1
      src/core/perfmodel/perfmodel_history.c

+ 14 - 7
include/starpu_perfmodel.h

@@ -98,11 +98,14 @@ struct starpu_perfmodel_history_table;
 
 
 #define starpu_per_arch_perfmodel starpu_perfmodel_per_arch STARPU_DEPRECATED
 #define starpu_per_arch_perfmodel starpu_perfmodel_per_arch STARPU_DEPRECATED
 
 
+typedef double (*starpu_perfmodel_per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
+typedef size_t (*starpu_perfmodel_per_arch_size_base)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
+
 struct starpu_perfmodel_per_arch
 struct starpu_perfmodel_per_arch
 {
 {
 	double (*cost_model)(struct starpu_data_descr *t) STARPU_DEPRECATED;
 	double (*cost_model)(struct starpu_data_descr *t) STARPU_DEPRECATED;
-	double (*cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
-	size_t (*size_base)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl);
+	starpu_perfmodel_per_arch_cost_function cost_function;
+	starpu_perfmodel_per_arch_size_base size_base;
 
 
 	struct starpu_perfmodel_history_table *history;
 	struct starpu_perfmodel_history_table *history;
 	struct starpu_perfmodel_history_list *list;
 	struct starpu_perfmodel_history_list *list;
@@ -132,13 +135,13 @@ struct starpu_perfmodel
 	size_t (*size_base)(struct starpu_task *, unsigned nimpl);
 	size_t (*size_base)(struct starpu_task *, unsigned nimpl);
 	uint32_t (*footprint)(struct starpu_task *);
 	uint32_t (*footprint)(struct starpu_task *);
 
 
-	struct starpu_perfmodel_per_arch** per_arch; /*STARPU_MAXIMPLEMENTATIONS*/
-
 	const char *symbol;
 	const char *symbol;
 
 
-#ifdef STARPU_DEVEL
-#warning move all the fields in a private structure
-#endif
+//#ifdef STARPU_DEVEL
+//#warning move all the fields in a private structure. may be difficult as it is not mandatory to call starpu_perfmodel_init when using a perfmodel
+//#endif
+	struct starpu_perfmodel_per_arch** per_arch; /*STARPU_MAXIMPLEMENTATIONS*/
+
 	unsigned is_init;
 	unsigned is_init;
 	unsigned is_loaded;
 	unsigned is_loaded;
 	unsigned benchmarking;
 	unsigned benchmarking;
@@ -162,6 +165,10 @@ int starpu_get_narch_combs();
 int starpu_perfmodel_arch_comb_add(int ndevices, struct starpu_perfmodel_device* devices);
 int starpu_perfmodel_arch_comb_add(int ndevices, struct starpu_perfmodel_device* devices);
 int starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices);
 int starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices);
 
 
+struct starpu_perfmodel_per_arch *starpu_perfmodel_get_per_arch(struct starpu_perfmodel *model, int impl, ...);
+int starpu_perfmodel_set_per_arch_cost_function(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func, ...);
+int starpu_perfmodel_set_per_arch_size_base(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func, ...);
+
 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl);
 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl);
 char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype);
 char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype);
 void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl);
 void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl);

+ 99 - 1
src/core/perfmodel/perfmodel_history.c

@@ -812,7 +812,6 @@ void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
 
 
 		free(model->nimpls);
 		free(model->nimpls);
 		model->nimpls = NULL;
 		model->nimpls = NULL;
-		model->nimpls = 0;
 
 
 		free(model->combs);
 		free(model->combs);
 		model->combs = NULL;
 		model->combs = NULL;
@@ -1439,3 +1438,102 @@ int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model)
 	}
 	}
 	return 0;
 	return 0;
 }
 }
+
+struct starpu_perfmodel_per_arch *_starpu_perfmodel_get_per_arch(struct starpu_perfmodel *model, int impl, va_list varg_list)
+{
+	struct starpu_perfmodel_arch arch;
+	va_list varg_list_copy;
+	int i, arg_type;
+	int is_cpu_set = 0;
+
+	// We first count the number of devices
+	arch.ndevices = 0;
+	va_copy(varg_list_copy, varg_list);
+	while ((arg_type = va_arg(varg_list_copy, int)) != -1)
+	{
+		int devid = va_arg(varg_list_copy, int);
+		int ncores = va_arg(varg_list_copy, int);
+
+		arch.ndevices ++;
+		if (arg_type == STARPU_CPU_WORKER)
+		{
+			STARPU_ASSERT_MSG(is_cpu_set == 0, "STARPU_CPU_WORKER can only be specified once\n");
+			STARPU_ASSERT_MSG(devid==0, "STARPU_CPU_WORKER must be followed by a value 0 for the device id");
+			is_cpu_set = 1;
+		}
+		else
+		{
+			STARPU_ASSERT_MSG(ncores==1, "%s must be followed by a value 1 for ncores", starpu_worker_get_type_as_string(arg_type));
+		}
+	}
+	va_end(varg_list_copy);
+
+	// We set the devices
+	arch.devices = (struct starpu_perfmodel_device*)malloc(arch.ndevices * sizeof(struct starpu_perfmodel_device));
+	va_copy(varg_list_copy, varg_list);
+	for(i=0 ; i<arch.ndevices ; i++)
+	{
+		arch.devices[i].type = va_arg(varg_list_copy, int);
+		arch.devices[i].devid = va_arg(varg_list_copy, int);
+		arch.devices[i].ncores = va_arg(varg_list_copy, int);
+	}
+	va_end(varg_list_copy);
+
+	// Get the combination for this set of devices
+	int comb = starpu_perfmodel_arch_comb_get(arch.ndevices, arch.devices);
+	if (comb == -1)
+		comb = starpu_perfmodel_arch_comb_add(arch.ndevices, arch.devices);
+
+	// Realloc if necessary
+	if (comb >= model->ncombs_set)
+		_starpu_perfmodel_realloc(model, comb+1);
+
+	// Get the per_arch object
+	if (model->per_arch[comb] == NULL)
+	{
+		model->per_arch[comb] = (struct starpu_perfmodel_per_arch*)malloc((impl+1) * sizeof(struct starpu_perfmodel_per_arch));
+		model->nimpls[comb] = 0;
+	}
+	memset(&model->per_arch[comb][impl], 0, sizeof(struct starpu_perfmodel_per_arch));
+	model->nimpls[comb] ++;
+
+	return &model->per_arch[comb][impl];
+}
+
+struct starpu_perfmodel_per_arch *starpu_perfmodel_get_per_arch(struct starpu_perfmodel *model, int impl, ...)
+{
+	va_list varg_list;
+	struct starpu_perfmodel_per_arch *per_arch;
+
+	va_start(varg_list, impl);
+	per_arch = _starpu_perfmodel_get_per_arch(model, impl, varg_list);
+	va_end(varg_list);
+
+	return per_arch;
+}
+
+int starpu_perfmodel_set_per_arch_cost_function(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func, ...)
+{
+	va_list varg_list;
+	struct starpu_perfmodel_per_arch *per_arch;
+
+	va_start(varg_list, func);
+	per_arch = _starpu_perfmodel_get_per_arch(model, impl, varg_list);
+	per_arch->cost_function = func;
+	va_end(varg_list);
+
+	return 0;
+}
+
+int starpu_perfmodel_set_per_arch_size_base(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func, ...)
+{
+	va_list varg_list;
+	struct starpu_perfmodel_per_arch *per_arch;
+
+	va_start(varg_list, func);
+	per_arch = _starpu_perfmodel_get_per_arch(model, impl, varg_list);
+	per_arch->size_base = func;
+	va_end(varg_list);
+
+	return 0;
+}