Browse Source

Add support for arch-specific but easy-to-user cost function

Samuel Thibault 9 years ago
parent
commit
3fdb99a725

+ 1 - 0
ChangeLog

@@ -197,6 +197,7 @@ Small features:
   * Add STARPU_CL_ARGS flag to starpu_task_insert() and
     starpu_mpi_task_insert() functions call
   * Add starpu_fxt_autostart_profiling to be able to avoid autostart.
+  * Add arch_cost_function perfmodel function field.
 
 Changes:
   * Data interfaces (variable, vector, matrix and block) now define

+ 3 - 1
doc/doxygen/chapters/12online_performance_tools.doxy

@@ -405,9 +405,11 @@ see for instance
 
 <li>
 Provided explicitly by the application (model type ::STARPU_PER_ARCH):
+either field starpu_perfmodel::arch_cost_function, or
 the fields <c>.per_arch[arch][nimpl].cost_function</c> have to be
 filled with pointers to functions which return the expected duration
-of the task in micro-seconds, one per architecture.
+of the task in micro-seconds, one per architecture, see for instance
+<c>tests/datawizard/locality.c</c>
 </li>
 </ul>
 

+ 6 - 1
doc/doxygen/chapters/api/performance_model.doxy

@@ -81,7 +81,9 @@ is the type of performance model
 ::STARPU_NL_REGRESSION_BASED: No other fields needs to be provided,
 this is purely history-based.
 </li>
-<li> ::STARPU_PER_ARCH: field starpu_perfmodel::per_arch has to be
+<li> ::STARPU_PER_ARCH: either field starpu_perfmodel::arch_cost_function has to be
+filled with a function that returns the cost in micro-seconds on the arch given
+as parameter, or field starpu_perfmodel::per_arch has to be
 filled with functions which return the cost in micro-seconds.
 </li>
 <li> ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
@@ -97,6 +99,9 @@ be ignored.
 \var double (*starpu_perfmodel::cost_function)(struct starpu_task *, unsigned nimpl)
 Used by ::STARPU_COMMON: takes a task and implementation number, and
 must return a task duration estimation in micro-seconds.
+\var double (*starpu_perfmodel::arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl)
+Used by ::STARPU_COMMON: takes a task, an arch and implementation number, and
+must return a task duration estimation in micro-seconds on that arch.
 \var size_t (*starpu_perfmodel::size_base)(struct starpu_task *, unsigned nimpl)
 Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
 ::STARPU_NL_REGRESSION_BASED. If not NULL, takes a task and

+ 2 - 1
include/starpu_perfmodel.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2014  Université de Bordeaux
+ * Copyright (C) 2010-2014, 2016  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  *
@@ -127,6 +127,7 @@ struct starpu_perfmodel
 	enum starpu_perfmodel_type type;
 
 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
+	double (*arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch * arch, unsigned nimpl);
 
 	size_t (*size_base)(struct starpu_task *, unsigned nimpl);
 	uint32_t (*footprint)(struct starpu_task *);

+ 3 - 0
src/core/perfmodel/perfmodel.c

@@ -84,6 +84,9 @@ static double per_arch_task_expected_perf(struct starpu_perfmodel *model, struct
 	int comb;
 	double (*per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
 
+	if (model->arch_cost_function)
+		return model->arch_cost_function(task, arch, nimpl);
+
 	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
 	STARPU_ASSERT_MSG(comb != -1, "Didn't find the proper arch combination\n");
 	STARPU_ASSERT_MSG(model->state->per_arch[comb] != NULL, "STARPU_PER_ARCH needs per-arch cost_function to be defined");

+ 28 - 1
tests/datawizard/locality.c

@@ -40,16 +40,42 @@ unsigned worker_ntask[STARPU_NMAXWORKERS];
 void cpu_f(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *_args)
 {
 	unsigned i, loop, worker = starpu_worker_get_id();
+	enum starpu_worker_archtype worker_type = starpu_worker_get_type(worker);
+
 	starpu_codelet_unpack_args(_args, &loop, &i);
 	task_worker[i][loop] = worker;
 	worker_task[worker][worker_ntask[worker]++] = i;
-	starpu_sleep(0.001);
+	if (worker_type == STARPU_CPU_WORKER)
+		starpu_sleep(0.001);
+	else
+		starpu_sleep(0.0001);
 }
 
+double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i)
+{
+	(void) t; (void) i;
+	STARPU_ASSERT(a->ndevices == 1);
+	if (a->devices[0].type == STARPU_CPU_WORKER)
+	{
+		STARPU_ASSERT(a->devices[0].ncores == 1);
+		return 0.001;
+	}
+	else
+		return 0.0001;
+}
+
+static struct starpu_perfmodel perf_model =
+{
+	.type = STARPU_PER_ARCH,
+	.arch_cost_function = cost_function,
+};
+
 static struct starpu_codelet cl =
 {
 	.cpu_funcs = { cpu_f },
 	.cpu_funcs_name = { "cpu_f" },
+	.cuda_funcs = { cpu_f },
+	.opencl_funcs = { cpu_f },
 	.nbuffers = 4,
 	.modes =
 	{
@@ -59,6 +85,7 @@ static struct starpu_codelet cl =
 		STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY,
 	},
 	.flags = STARPU_CODELET_SIMGRID_EXECUTE,
+	.model = &perf_model,
 };
 
 int main(int argc, char *argv[])