лет назад: 5 · ba99542d83
--- a/ChangeLog
+++ b/ChangeLog
@@ -29,6 +29,7 @@ New features:
 
				   * New number_events.data trace file which monitors number of events in trace
			
 
				     files. This file can be parsed by the new script
			
 
				     starpu_fxt_number_events_to_names.py to convert event keys to event names.
			
 
				+  * New STARPU_PER_WORKER perfmodel.
			
 
				 
			
 
				 Small changes:
			
 
				   * Use the S4U interface of Simgrid instead of xbt and MSG.
			
--- a/doc/doxygen/chapters/320_scheduling.doxy
+++ b/doc/doxygen/chapters/320_scheduling.doxy
@@ -190,9 +190,10 @@ single task gives the consumption of the task in Joules, which can be given to
 
				 starpu_perfmodel_update_history().
			
 
				 
			
 
				 Another way to provide the energy performance is to define a
			
 
				-perfmodel with starpu_perfmodel::type ::STARPU_PER_ARCH, and set the
			
 
				-starpu_perfmodel::arch_cost_function field to a function which shall return the
			
 
				-estimated consumption of the task in Joules. Such a function can for instance
			
 
				+perfmodel with starpu_perfmodel::type ::STARPU_PER_ARCH or
			
 
				+::STARPU_PER_WORKER , and set the starpu_perfmodel::arch_cost_function or
			
 
				+starpu_perfmodel::worker_cost_function field to a function which shall return
			
 
				+the estimated consumption of the task in Joules. Such a function can for instance
			
 
				 use starpu_task_expected_length() on the task (in µs), multiplied by the
			
 
				 typical power consumption of the device, e.g. in W, and divided by 1000000. to
			
 
				 get Joules.
			
--- a/doc/doxygen/chapters/350_scheduling_policy_definition.doxy
+++ b/doc/doxygen/chapters/350_scheduling_policy_definition.doxy
@@ -45,7 +45,8 @@ provides a complete list of the functions available for writing advanced schedul
 
				 This includes getting an estimation for a task computation completion with
			
 
				 starpu_task_expected_length(), for the required data transfers with
			
 
				 starpu_task_expected_data_transfer_time_for(), for the required energy with
			
 
				-starpu_task_expected_energy(), etc. Other
			
 
				+starpu_task_expected_energy(), etc. Per-worker variants are also available with
			
 
				+starpu_task_worker_expected_length(), etc. Other
			
 
				 useful functions include starpu_transfer_bandwidth(), starpu_transfer_latency(),
			
 
				 starpu_transfer_predict(), ...
			
 
				 One can also directly test the presence of a data handle with starpu_data_is_on_node().
			
--- a/doc/doxygen/chapters/370_online_performance_tools.doxy
+++ b/doc/doxygen/chapters/370_online_performance_tools.doxy
@@ -401,6 +401,11 @@ filled with pointers to functions which return the expected duration
 
				 of the task in micro-seconds, one per architecture, see for instance
			
 
				 <c>tests/datawizard/locality.c</c>
			
 
				 </li>
			
 
				+
			
 
				+<li>
			
 
				+Provided explicitly by the application (model type ::STARPU_PER_WORKER)
			
 
				+similarly with the starpu_perfmodel::worker_cost_function field.
			
 
				+</li>
			
 
				 </ul>
			
 
				 
			
 
				 For ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED, and
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -165,6 +165,7 @@ struct starpu_perfmodel_per_arch
 
				 enum starpu_perfmodel_type
			
 
				 {
			
 
				         STARPU_PERFMODEL_INVALID=0,
			
 
				+	STARPU_PER_WORKER,                /**< Application-provided per-worker cost model function */
			
 
				 	STARPU_PER_ARCH,                  /**< Application-provided per-arch cost model function */
			
 
				 	STARPU_COMMON,                    /**< Application-provided common cost model function, with per-arch factor */
			
 
				 	STARPU_HISTORY_BASED,             /**< Automatic history-based cost model */
			
@@ -226,11 +227,17 @@ struct starpu_perfmodel
 
				 	*/
			
 
				 	double (*cost_function)(struct starpu_task *, unsigned nimpl);
			
 
				 	/**
			
 
				-	   Used by ::STARPU_COMMON. Take a task, an arch and implementation
			
 
				+	   Used by ::STARPU_PER_ARCH. Take a task, an arch and implementation
			
 
				 	   number, and must return a task duration estimation in
			
 
				 	   micro-seconds on that arch.
			
 
				 	*/
			
 
				 	double (*arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch * arch, unsigned nimpl);
			
 
				+	/**
			
 
				+	   Used by ::STARPU_PER_WORKER. Take a task, a worker id and implementation
			
 
				+	   number, and must return a task duration estimation in
			
 
				+	   micro-seconds on that worker.
			
 
				+	*/
			
 
				+	double (*worker_cost_function)(struct starpu_task *, unsigned workerid, unsigned nimpl);
			
 
				 
			
 
				 	/**
			
 
				 	   Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
			
--- a/include/starpu_scheduler.h
+++ b/include/starpu_scheduler.h
@@ -110,6 +110,10 @@ struct starpu_sched_policy
 
				 	   to be executed by the worker. This method therefore permits
			
 
				 	   to keep the state of the scheduler coherent even when
			
 
				 	   StarPU bypasses the scheduling strategy.
			
 
				+
			
 
				+	   Note: to get an estimation of the task duration, \p perf_workerid
			
 
				+	   needs to be used rather than \p workerid, for the case of parallel
			
 
				+	   tasks.
			
 
				 	*/
			
 
				 	void (*push_task_notify)(struct starpu_task *, int workerid, int perf_workerid, unsigned sched_ctx_id);
			
 
				 
			
@@ -366,6 +370,11 @@ uint32_t starpu_task_data_footprint(struct starpu_task *task);
 
				 double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
			
 
				 
			
 
				 /**
			
 
				+   Same as starpu_task_expected_length() but for a precise worker.
			
 
				+*/
			
 
				+double starpu_task_worker_expected_length(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl);
			
 
				+
			
 
				+/**
			
 
				    Return an estimated speedup factor relative to CPU speed
			
 
				 */
			
 
				 double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch *perf_arch);
			
@@ -395,6 +404,11 @@ double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned
 
				 double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
			
 
				 
			
 
				 /**
			
 
				+   Same as starpu_task_expected_energy but for a precise worker
			
 
				+*/
			
 
				+double starpu_task_worker_expected_energy(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl);
			
 
				+
			
 
				+/**
			
 
				    Return expected conversion time in ms (multiformat interface only)
			
 
				 */
			
 
				 double starpu_task_expected_conversion_time(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -81,6 +81,20 @@ struct starpu_perfmodel_arch* starpu_worker_get_perf_archtype(int workerid, unsi
 
				 }
			
 
				 
			
 
				 /*
			
 
				+ * PER WORKER model
			
 
				+ */
			
 
				+
			
 
				+static double per_worker_task_expected_perf(struct starpu_perfmodel *model, unsigned workerid, struct starpu_task *task, unsigned nimpl)
			
 
				+{
			
 
				+	double (*worker_cost_function)(struct starpu_task *task, unsigned workerid, unsigned nimpl);
			
 
				+
			
 
				+	worker_cost_function = model->worker_cost_function;
			
 
				+	STARPU_ASSERT_MSG(worker_cost_function, "STARPU_PER_WORKER needs worker_cost_function to be defined");
			
 
				+
			
 
				+	return worker_cost_function(task, workerid, nimpl);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				  * PER ARCH model
			
 
				  */
			
 
				 
			
@@ -156,6 +170,7 @@ void _starpu_init_and_load_perfmodel(struct starpu_perfmodel *model)
 
				 
			
 
				 	switch (model->type)
			
 
				 	{
			
 
				+		case STARPU_PER_WORKER:
			
 
				 		case STARPU_PER_ARCH:
			
 
				 		case STARPU_COMMON:
			
 
				 			/* Nothing more to do than init */
			
@@ -220,6 +235,20 @@ static double starpu_model_expected_perf(struct starpu_task *task, struct starpu
 
				 	return exp_perf;
			
 
				 }
			
 
				 
			
 
				+static double starpu_model_worker_expected_perf(struct starpu_task *task, struct starpu_perfmodel *model, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl)
			
 
				+{
			
 
				+	if (!model)
			
 
				+		return 0.0;
			
 
				+
			
 
				+	if (model->type == STARPU_PER_WORKER)
			
 
				+		return per_worker_task_expected_perf(model, workerid, task, nimpl);
			
 
				+	else
			
 
				+	{
			
 
				+		struct starpu_perfmodel_arch *per_arch = starpu_worker_get_perf_archtype(workerid, sched_ctx_id);
			
 
				+		return starpu_model_expected_perf(task, model, per_arch, nimpl);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
			
 
				 {
			
 
				 	if (!task->cl)
			
@@ -228,6 +257,14 @@ double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfm
 
				 	return starpu_model_expected_perf(task, task->cl->model, arch, nimpl);
			
 
				 }
			
 
				 
			
 
				+double starpu_task_worker_expected_length(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl)
			
 
				+{
			
 
				+	if (!task->cl)
			
 
				+		/* Tasks without codelet don't actually take time */
			
 
				+		return 0.0;
			
 
				+	return starpu_model_worker_expected_perf(task, task->cl->model, workerid, sched_ctx_id, nimpl);
			
 
				+}
			
 
				+
			
 
				 double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl)
			
 
				 {
			
 
				 	if (!task->cl)
			
@@ -236,6 +273,15 @@ double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfm
 
				 	return starpu_model_expected_perf(task, task->cl->energy_model, arch, nimpl);
			
 
				 }
			
 
				 
			
 
				+double starpu_task_worker_expected_energy(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl)
			
 
				+{
			
 
				+	if (!task->cl)
			
 
				+		/* Tasks without codelet don't actually take time */
			
 
				+		return 0.0;
			
 
				+	return starpu_model_worker_expected_perf(task, task->cl->energy_model, workerid, sched_ctx_id, nimpl);
			
 
				+
			
 
				+}
			
 
				+
			
 
				 double starpu_task_expected_conversion_time(struct starpu_task *task,
			
 
				 					    struct starpu_perfmodel_arch* arch,
			
 
				 					    unsigned nimpl)
			
--- a/src/sched_policies/component_sched.c
+++ b/src/sched_policies/component_sched.c
@@ -49,7 +49,6 @@ int starpu_sched_component_execute_preds(struct starpu_sched_component * compone
 
				 	    workerid != -1;
			
 
				 	    workerid = starpu_bitmap_next(component->workers_in_ctx, workerid))
			
 
				 	{
			
 
				-		struct starpu_perfmodel_arch* archtype = starpu_worker_get_perf_archtype(workerid, component->tree->sched_ctx_id);
			
 
				 		int nimpl;
			
 
				 		for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				 		{
			
@@ -59,9 +58,13 @@ int starpu_sched_component_execute_preds(struct starpu_sched_component * compone
 
				 				double d;
			
 
				 				can_execute = 1;
			
 
				 				if(bundle)
			
 
				+				{
			
 
				+					struct starpu_perfmodel_arch* archtype =
			
 
				+						starpu_worker_get_perf_archtype(workerid, component->tree->sched_ctx_id);
			
 
				 					d = starpu_task_bundle_expected_length(bundle, archtype, nimpl);
			
 
				+				}
			
 
				 				else
			
 
				-					d = starpu_task_expected_length(task, archtype, nimpl);
			
 
				+					d = starpu_task_worker_expected_length(task, workerid, component->tree->sched_ctx_id, nimpl);
			
 
				 				if(isnan(d))
			
 
				 				{
			
 
				 					*length = d;
			
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -487,7 +487,7 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 
				 			}
			
 
				 
			
 
				 			double exp_end;
			
 
				-			double local_length = starpu_task_expected_length(task, perf_arch, nimpl);
			
 
				+			double local_length = starpu_task_worker_expected_length(task, worker, sched_ctx_id, nimpl);
			
 
				 			double local_penalty = starpu_task_expected_data_transfer_time_for(task, worker);
			
 
				 			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
			
 
				 
			
@@ -679,9 +679,9 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				-				local_task_length[worker_ctx][nimpl] = starpu_task_expected_length(task, perf_arch, nimpl);
			
 
				+				local_task_length[worker_ctx][nimpl] = starpu_task_worker_expected_length(task, workerid, sched_ctx_id, nimpl);
			
 
				 				local_data_penalty[worker_ctx][nimpl] = starpu_task_expected_data_transfer_time_for(task, workerid);
			
 
				-				local_energy[worker_ctx][nimpl] = starpu_task_expected_energy(task, perf_arch,nimpl);
			
 
				+				local_energy[worker_ctx][nimpl] = starpu_task_worker_expected_energy(task, workerid, sched_ctx_id,nimpl);
			
 
				 				double conversion_time = starpu_task_expected_conversion_time(task, perf_arch, nimpl);
			
 
				 				if (conversion_time > 0.0)
			
 
				 					local_task_length[worker_ctx][nimpl] += conversion_time;
			
@@ -1100,10 +1100,9 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, int pe
 
				 {
			
 
				 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
			
 
				 	struct _starpu_fifo_taskq *fifo = dt->queue_array[workerid];
			
 
				-	/* Compute the expected penality */
			
 
				-	struct starpu_perfmodel_arch *perf_arch = starpu_worker_get_perf_archtype(perf_workerid, sched_ctx_id);
			
 
				 
			
 
				-	double predicted = starpu_task_expected_length(task, perf_arch,
			
 
				+	/* Compute the expected penality */
			
 
				+	double predicted = starpu_task_worker_expected_length(task, perf_workerid, STARPU_NMAX_SCHED_CTXS,
			
 
				 						       starpu_task_get_implementation(task));
			
 
				 
			
 
				 	double predicted_transfer = starpu_task_expected_data_transfer_time_for(task, workerid);