9 anni fa · 3fdb99a725
--- a/ChangeLog
+++ b/ChangeLog
@@ -197,6 +197,7 @@ Small features:
 
				   * Add STARPU_CL_ARGS flag to starpu_task_insert() and
			
 
				     starpu_mpi_task_insert() functions call
			
 
				   * Add starpu_fxt_autostart_profiling to be able to avoid autostart.
			
 
				+  * Add arch_cost_function perfmodel function field.
			
 
				 
			
 
				 Changes:
			
 
				   * Data interfaces (variable, vector, matrix and block) now define
			
--- a/doc/doxygen/chapters/12online_performance_tools.doxy
+++ b/doc/doxygen/chapters/12online_performance_tools.doxy
@@ -405,9 +405,11 @@ see for instance
 
				 
			
 
				 <li>
			
 
				 Provided explicitly by the application (model type ::STARPU_PER_ARCH):
			
 
				+either field starpu_perfmodel::arch_cost_function, or
			
 
				 the fields <c>.per_arch[arch][nimpl].cost_function</c> have to be
			
 
				 filled with pointers to functions which return the expected duration
			
 
				-of the task in micro-seconds, one per architecture.
			
 
				+of the task in micro-seconds, one per architecture, see for instance
			
 
				+<c>tests/datawizard/locality.c</c>
			
 
				 </li>
			
 
				 </ul>
			
 
				 
			
--- a/doc/doxygen/chapters/api/performance_model.doxy
+++ b/doc/doxygen/chapters/api/performance_model.doxy
@@ -81,7 +81,9 @@ is the type of performance model
 
				 ::STARPU_NL_REGRESSION_BASED: No other fields needs to be provided,
			
 
				 this is purely history-based.
			
 
				 </li>
			
 
				-<li> ::STARPU_PER_ARCH: field starpu_perfmodel::per_arch has to be
			
 
				+<li> ::STARPU_PER_ARCH: either field starpu_perfmodel::arch_cost_function has to be
			
 
				+filled with a function that returns the cost in micro-seconds on the arch given
			
 
				+as parameter, or field starpu_perfmodel::per_arch has to be
			
 
				 filled with functions which return the cost in micro-seconds.
			
 
				 </li>
			
 
				 <li> ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
			
@@ -97,6 +99,9 @@ be ignored.
 
				 \var double (*starpu_perfmodel::cost_function)(struct starpu_task *, unsigned nimpl)
			
 
				 Used by ::STARPU_COMMON: takes a task and implementation number, and
			
 
				 must return a task duration estimation in micro-seconds.
			
 
				+\var double (*starpu_perfmodel::arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl)
			
 
				+Used by ::STARPU_COMMON: takes a task, an arch and implementation number, and
			
 
				+must return a task duration estimation in micro-seconds on that arch.
			
 
				 \var size_t (*starpu_perfmodel::size_base)(struct starpu_task *, unsigned nimpl)
			
 
				 Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
			
 
				 ::STARPU_NL_REGRESSION_BASED. If not NULL, takes a task and
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2014, 2016  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
@@ -127,6 +127,7 @@ struct starpu_perfmodel
 
				 	enum starpu_perfmodel_type type;
			
 
				 
			
 
				 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
			
 
				+	double (*arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch * arch, unsigned nimpl);
			
 
				 
			
 
				 	size_t (*size_base)(struct starpu_task *, unsigned nimpl);
			
 
				 	uint32_t (*footprint)(struct starpu_task *);
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -84,6 +84,9 @@ static double per_arch_task_expected_perf(struct starpu_perfmodel *model, struct
 
				 	int comb;
			
 
				 	double (*per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
			
 
				 
			
 
				+	if (model->arch_cost_function)
			
 
				+		return model->arch_cost_function(task, arch, nimpl);
			
 
				+
			
 
				 	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				 	STARPU_ASSERT_MSG(comb != -1, "Didn't find the proper arch combination\n");
			
 
				 	STARPU_ASSERT_MSG(model->state->per_arch[comb] != NULL, "STARPU_PER_ARCH needs per-arch cost_function to be defined");
			
--- a/tests/datawizard/locality.c
+++ b/tests/datawizard/locality.c
@@ -40,16 +40,42 @@ unsigned worker_ntask[STARPU_NMAXWORKERS];
 
				 void cpu_f(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *_args)
			
 
				 {
			
 
				 	unsigned i, loop, worker = starpu_worker_get_id();
			
 
				+	enum starpu_worker_archtype worker_type = starpu_worker_get_type(worker);
			
 
				+
			
 
				 	starpu_codelet_unpack_args(_args, &loop, &i);
			
 
				 	task_worker[i][loop] = worker;
			
 
				 	worker_task[worker][worker_ntask[worker]++] = i;
			
 
				-	starpu_sleep(0.001);
			
 
				+	if (worker_type == STARPU_CPU_WORKER)
			
 
				+		starpu_sleep(0.001);
			
 
				+	else
			
 
				+		starpu_sleep(0.0001);
			
 
				 }
			
 
				 
			
 
				+double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i)
			
 
				+{
			
 
				+	(void) t; (void) i;
			
 
				+	STARPU_ASSERT(a->ndevices == 1);
			
 
				+	if (a->devices[0].type == STARPU_CPU_WORKER)
			
 
				+	{
			
 
				+		STARPU_ASSERT(a->devices[0].ncores == 1);
			
 
				+		return 0.001;
			
 
				+	}
			
 
				+	else
			
 
				+		return 0.0001;
			
 
				+}
			
 
				+
			
 
				+static struct starpu_perfmodel perf_model =
			
 
				+{
			
 
				+	.type = STARPU_PER_ARCH,
			
 
				+	.arch_cost_function = cost_function,
			
 
				+};
			
 
				+
			
 
				 static struct starpu_codelet cl =
			
 
				 {
			
 
				 	.cpu_funcs = { cpu_f },
			
 
				 	.cpu_funcs_name = { "cpu_f" },
			
 
				+	.cuda_funcs = { cpu_f },
			
 
				+	.opencl_funcs = { cpu_f },
			
 
				 	.nbuffers = 4,
			
 
				 	.modes =
			
 
				 	{
			
@@ -59,6 +85,7 @@ static struct starpu_codelet cl =
 
				 		STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY,
			
 
				 	},
			
 
				 	.flags = STARPU_CODELET_SIMGRID_EXECUTE,
			
 
				+	.model = &perf_model,
			
 
				 };
			
 
				 
			
 
				 int main(int argc, char *argv[])