6 years ago · ba99542d83
--- a/ChangeLog
+++ b/ChangeLog
@@ -29,6 +29,7 @@ New features:
 
																   * New number_events.data trace file which monitors number of events in trace
															
 
																     files. This file can be parsed by the new script
															
 
																     starpu_fxt_number_events_to_names.py to convert event keys to event names.
															
 
																+  * New STARPU_PER_WORKER perfmodel.
															
 
																 Small changes:
															
 
																   * Use the S4U interface of Simgrid instead of xbt and MSG.
															
--- a/doc/doxygen/chapters/320_scheduling.doxy
+++ b/doc/doxygen/chapters/320_scheduling.doxy
@@ -190,9 +190,10 @@ single task gives the consumption of the task in Joules, which can be given to
 
																 starpu_perfmodel_update_history().
															
 
																 Another way to provide the energy performance is to define a
															
 
																-perfmodel with starpu_perfmodel::type ::STARPU_PER_ARCH, and set the
															
 
																-starpu_perfmodel::arch_cost_function field to a function which shall return the
															
 
																-estimated consumption of the task in Joules. Such a function can for instance
															
 
																+perfmodel with starpu_perfmodel::type ::STARPU_PER_ARCH or
															
 
																+::STARPU_PER_WORKER , and set the starpu_perfmodel::arch_cost_function or
															
 
																+starpu_perfmodel::worker_cost_function field to a function which shall return
															
 
																+the estimated consumption of the task in Joules. Such a function can for instance
															
 
																 use starpu_task_expected_length() on the task (in µs), multiplied by the
															
 
																 typical power consumption of the device, e.g. in W, and divided by 1000000. to
															
 
																 get Joules.
															
--- a/doc/doxygen/chapters/350_scheduling_policy_definition.doxy
+++ b/doc/doxygen/chapters/350_scheduling_policy_definition.doxy
@@ -45,7 +45,8 @@ provides a complete list of the functions available for writing advanced schedul
 
																 This includes getting an estimation for a task computation completion with
															
 
																 starpu_task_expected_length(), for the required data transfers with
															
 
																 starpu_task_expected_data_transfer_time_for(), for the required energy with
															
 
																-starpu_task_expected_energy(), etc. Other
															
 
																+starpu_task_expected_energy(), etc. Per-worker variants are also available with
															
 
																+starpu_task_worker_expected_length(), etc. Other
															
 
																 useful functions include starpu_transfer_bandwidth(), starpu_transfer_latency(),
															
 
																 starpu_transfer_predict(), ...
															
 
																 One can also directly test the presence of a data handle with starpu_data_is_on_node().
															
--- a/doc/doxygen/chapters/370_online_performance_tools.doxy
+++ b/doc/doxygen/chapters/370_online_performance_tools.doxy
@@ -401,6 +401,11 @@ filled with pointers to functions which return the expected duration
 
																 of the task in micro-seconds, one per architecture, see for instance
															
 
																 <c>tests/datawizard/locality.c</c>
															
 
																 </li>
															
 
																+
															
 
																+<li>
															
 
																+Provided explicitly by the application (model type ::STARPU_PER_WORKER)
															
 
																+similarly with the starpu_perfmodel::worker_cost_function field.
															
 
																+</li>
															
 
																 </ul>
															
 
																 For ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED, and
															
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -165,6 +165,7 @@ struct starpu_perfmodel_per_arch
 
																 enum starpu_perfmodel_type
															
 
																 {
															
 
																         STARPU_PERFMODEL_INVALID=0,
															
 
																+	STARPU_PER_WORKER,                /**< Application-provided per-worker cost model function */
															
 
																 	STARPU_PER_ARCH,                  /**< Application-provided per-arch cost model function */
															
 
																 	STARPU_COMMON,                    /**< Application-provided common cost model function, with per-arch factor */
															
 
																 	STARPU_HISTORY_BASED,             /**< Automatic history-based cost model */
															
@@ -226,11 +227,17 @@ struct starpu_perfmodel
 
																 	*/
															
 
																 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
															
 
																 	/**
															
 
																-	   Used by ::STARPU_COMMON. Take a task, an arch and implementation
															
 
																+	   Used by ::STARPU_PER_ARCH. Take a task, an arch and implementation
															
 
																 	   number, and must return a task duration estimation in
															
 
																 	   micro-seconds on that arch.
															
 
																 	*/
															
 
																 	double (*arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch * arch, unsigned nimpl);
															
 
																+	/**
															
 
																+	   Used by ::STARPU_PER_WORKER. Take a task, a worker id and implementation
															
 
																+	   number, and must return a task duration estimation in
															
 
																+	   micro-seconds on that worker.
															
 
																+	*/
															
 
																+	double (*worker_cost_function)(struct starpu_task *, unsigned workerid, unsigned nimpl);
															
 
																 	/**
															
 
																 	   Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
															
--- a/include/starpu_scheduler.h
+++ b/include/starpu_scheduler.h
@@ -110,6 +110,10 @@ struct starpu_sched_policy
 
																 	   to be executed by the worker. This method therefore permits
															
 
																 	   to keep the state of the scheduler coherent even when
															
 
																 	   StarPU bypasses the scheduling strategy.
															
 
																+
															
 
																+	   Note: to get an estimation of the task duration, \p perf_workerid
															
 
																+	   needs to be used rather than \p workerid, for the case of parallel
															
 
																+	   tasks.
															
 
																 	*/
															
 
																 	void (*push_task_notify)(struct starpu_task *, int workerid, int perf_workerid, unsigned sched_ctx_id);
															
@@ -366,6 +370,11 @@ uint32_t starpu_task_data_footprint(struct starpu_task *task);
 
																 double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
															
 
																 /**
															
 
																+   Same as starpu_task_expected_length() but for a precise worker.
															
 
																+*/
															
 
																+double starpu_task_worker_expected_length(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl);
															
 
																+
															
 
																+/**
															
 
																    Return an estimated speedup factor relative to CPU speed
															
 
																 */
															
 
																 double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch *perf_arch);
															
@@ -395,6 +404,11 @@ double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned
 
																 double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
															
 
																 /**
															
 
																+   Same as starpu_task_expected_energy but for a precise worker
															
 
																+*/
															
 
																+double starpu_task_worker_expected_energy(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl);
															
 
																+
															
 
																+/**
															
 
																    Return expected conversion time in ms (multiformat interface only)
															
 
																 */
															
 
																 double starpu_task_expected_conversion_time(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
															
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -81,6 +81,20 @@ struct starpu_perfmodel_arch* starpu_worker_get_perf_archtype(int workerid, unsi
 
																 }
															
 
																 /*
															
 
																+ * PER WORKER model
															
 
																+ */
															
 
																+
															
 
																+static double per_worker_task_expected_perf(struct starpu_perfmodel *model, unsigned workerid, struct starpu_task *task, unsigned nimpl)
															
 
																+{
															
 
																+	double (*worker_cost_function)(struct starpu_task *task, unsigned workerid, unsigned nimpl);
															
 
																+
															
 
																+	worker_cost_function = model->worker_cost_function;
															
 
																+	STARPU_ASSERT_MSG(worker_cost_function, "STARPU_PER_WORKER needs worker_cost_function to be defined");
															
 
																+
															
 
																+	return worker_cost_function(task, workerid, nimpl);
															
 
																+}
															
 
																+
															
 
																+/*
															
 
																  * PER ARCH model
															
 
																  */
															
@@ -156,6 +170,7 @@ void _starpu_init_and_load_perfmodel(struct starpu_perfmodel *model)
 
																 	switch (model->type)
															
 
																 	{
															
 
																+		case STARPU_PER_WORKER:
															
 
																 		case STARPU_PER_ARCH:
															
 
																 		case STARPU_COMMON:
															
 
																 			/* Nothing more to do than init */
															
@@ -220,6 +235,20 @@ static double starpu_model_expected_perf(struct starpu_task *task, struct starpu
 
																 	return exp_perf;
															
 
																 }
															
 
																+static double starpu_model_worker_expected_perf(struct starpu_task *task, struct starpu_perfmodel *model, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl)
															
 
																+{
															
 
																+	if (!model)
															
 
																+		return 0.0;
															
 
																+
															
 
																+	if (model->type == STARPU_PER_WORKER)
															
 
																+		return per_worker_task_expected_perf(model, workerid, task, nimpl);
															
 
																+	else
															
 
																+	{
															
 
																+		struct starpu_perfmodel_arch *per_arch = starpu_worker_get_perf_archtype(workerid, sched_ctx_id);
															
 
																+		return starpu_model_expected_perf(task, model, per_arch, nimpl);
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																 double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
															
 
																 {
															
 
																 	if (!task->cl)
															
@@ -228,6 +257,14 @@ double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfm
 
																 	return starpu_model_expected_perf(task, task->cl->model, arch, nimpl);
															
 
																 }
															
 
																+double starpu_task_worker_expected_length(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl)
															
 
																+{
															
 
																+	if (!task->cl)
															
 
																+		/* Tasks without codelet don't actually take time */
															
 
																+		return 0.0;
															
 
																+	return starpu_model_worker_expected_perf(task, task->cl->model, workerid, sched_ctx_id, nimpl);
															
 
																+}
															
 
																+
															
 
																 double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
															
 
																 {
															
 
																 	if (!task->cl)
															
@@ -236,6 +273,15 @@ double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfm
 
																 	return starpu_model_expected_perf(task, task->cl->energy_model, arch, nimpl);
															
 
																 }
															
 
																+double starpu_task_worker_expected_energy(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl)
															
 
																+{
															
 
																+	if (!task->cl)
															
 
																+		/* Tasks without codelet don't actually take time */
															
 
																+		return 0.0;
															
 
																+	return starpu_model_worker_expected_perf(task, task->cl->energy_model, workerid, sched_ctx_id, nimpl);
															
 
																+
															
 
																+}
															
 
																+
															
 
																 double starpu_task_expected_conversion_time(struct starpu_task *task,
															
 
																 					    struct starpu_perfmodel_arch* arch,
															
 
																 					    unsigned nimpl)
															
--- a/src/sched_policies/component_sched.c
+++ b/src/sched_policies/component_sched.c
@@ -49,7 +49,6 @@ int starpu_sched_component_execute_preds(struct starpu_sched_component * compone
 
																 	    workerid != -1;
															
 
																 	    workerid = starpu_bitmap_next(component->workers_in_ctx, workerid))
															
 
																 	{
															
 
																-		struct starpu_perfmodel_arch* archtype = starpu_worker_get_perf_archtype(workerid, component->tree->sched_ctx_id);
															
 
																 		int nimpl;
															
 
																 		for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
															
 
																 		{
															
@@ -59,9 +58,13 @@ int starpu_sched_component_execute_preds(struct starpu_sched_component * compone
 
																 				double d;
															
 
																 				can_execute = 1;
															
 
																 				if(bundle)
															
 
																+				{
															
 
																+					struct starpu_perfmodel_arch* archtype =
															
 
																+						starpu_worker_get_perf_archtype(workerid, component->tree->sched_ctx_id);
															
 
																 					d = starpu_task_bundle_expected_length(bundle, archtype, nimpl);
															
 
																+				}
															
 
																 				else
															
 
																-					d = starpu_task_expected_length(task, archtype, nimpl);
															
 
																+					d = starpu_task_worker_expected_length(task, workerid, component->tree->sched_ctx_id, nimpl);
															
 
																 				if(isnan(d))
															
 
																 				{
															
 
																 					*length = d;
															
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -487,7 +487,7 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 
																 			}
															
 
																 			double exp_end;
															
 
																-			double local_length = starpu_task_expected_length(task, perf_arch, nimpl);
															
 
																+			double local_length = starpu_task_worker_expected_length(task, worker, sched_ctx_id, nimpl);
															
 
																 			double local_penalty = starpu_task_expected_data_transfer_time_for(task, worker);
															
 
																 			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
															
@@ -679,9 +679,9 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
																 			}
															
 
																 			else
															
 
																 			{
															
 
																-				local_task_length[worker_ctx][nimpl] = starpu_task_expected_length(task, perf_arch, nimpl);
															
 
																+				local_task_length[worker_ctx][nimpl] = starpu_task_worker_expected_length(task, workerid, sched_ctx_id, nimpl);
															
 
																 				local_data_penalty[worker_ctx][nimpl] = starpu_task_expected_data_transfer_time_for(task, workerid);
															
 
																-				local_energy[worker_ctx][nimpl] = starpu_task_expected_energy(task, perf_arch,nimpl);
															
 
																+				local_energy[worker_ctx][nimpl] = starpu_task_worker_expected_energy(task, workerid, sched_ctx_id,nimpl);
															
 
																 				double conversion_time = starpu_task_expected_conversion_time(task, perf_arch, nimpl);
															
 
																 				if (conversion_time > 0.0)
															
 
																 					local_task_length[worker_ctx][nimpl] += conversion_time;
															
@@ -1100,10 +1100,9 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, int pe
 
																 {
															
 
																 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																 	struct _starpu_fifo_taskq *fifo = dt->queue_array[workerid];
															
 
																-	/* Compute the expected penality */
															
 
																-	struct starpu_perfmodel_arch *perf_arch = starpu_worker_get_perf_archtype(perf_workerid, sched_ctx_id);
															
 
																-	double predicted = starpu_task_expected_length(task, perf_arch,
															
 
																+	/* Compute the expected penality */
															
 
																+	double predicted = starpu_task_worker_expected_length(task, perf_workerid, STARPU_NMAX_SCHED_CTXS,
															
 
																 						       starpu_task_get_implementation(task));
															
 
																 	double predicted_transfer = starpu_task_expected_data_transfer_time_for(task, workerid);