vor 14 Jahren · 9b79aeb3a8
--- a/doc/starpu.texi
+++ b/doc/starpu.texi
@@ -1184,6 +1184,11 @@ type). This still assumes performance regularity, but can work with various data
 
				 input sizes, by applying a*n^b+c regression over observed execution times.
			
 
				 @end itemize
			
 
				 
			
 
				+The same can be done for task power consumption estimation, by setting the
			
 
				+@code{power_model} field the same way as the @code{model} field. Note: for
			
 
				+now, the application has to give to the power consumption performance model
			
 
				+a different name.
			
 
				+
			
 
				 @node Theoretical lower bound on execution time
			
 
				 @section Theoretical lower bound on execution time
			
 
				 
			
@@ -1283,8 +1288,9 @@ yet. To force continuing calibration, use @code{export STARPU_CALIBRATE=1}
 
				 start, use @code{export STARPU_CALIBRATE=2}.
			
 
				 
			
 
				 Distributing tasks to balance the load induces data transfer penalty. StarPU
			
 
				-thus needs to find a balance between both. The target function that StarPU
			
 
				-tries to optimise is @code{alpha * T_execution + beta * T_data_transfer}, where
			
 
				+thus needs to find a balance between both. The target function that the
			
 
				+@code{dmda} scheduler of StarPU
			
 
				+tries to minimize is @code{alpha * T_execution + beta * T_data_transfer}, where
			
 
				 @code{T_execution} is the estimated execution time of the codelet (usually
			
 
				 accurate), and @code{T_data_transfer} is the estimated data transfer time. The
			
 
				 latter is however estimated based on bus calibration before execution start,
			
@@ -1298,6 +1304,16 @@ results that a precise estimation would give.
 
				 Measuring the actual data transfer time is however on our TODO-list to
			
 
				 accurately estimate data transfer penalty without the need of a hand-tuned beta parameter.
			
 
				 
			
 
				+If the application can provide some power performance model (through
			
 
				+the @code{power_model} field of the codelet structure), StarPU will
			
 
				+take it into account when distributing tasks. The target function that
			
 
				+the @code{dmda} scheduler minimizes becomes @code{alpha * T_execution +
			
 
				+beta * T_data_transfer + gamma * Consumption} , where @code{Consumption}
			
 
				+is the estimated task consumption in Joules. To tune this parameter, use
			
 
				+@code{export STARPU_GAMMA=3000} for instance, to express that each Joule
			
 
				+(i.e kW during 1000µs) is worth 3000µs execution time penalty. Setting
			
 
				+alpha and beta to zero permits to only take into account power consumption.
			
 
				+
			
 
				 Profiling can be enabled by using @code{export STARPU_PROFILING=1} or by
			
 
				 calling @code{starpu_profiling_status_set} from the source code.
			
 
				 Statistics on the execution can then be obtained by using @code{export
			
@@ -3023,8 +3039,12 @@ array. The constant argument passed with the @code{cl_arg} field of the
 
				 not be above @code{STARPU_NMAXBUFS}.
			
 
				 
			
 
				 @item @code{model} (optional):
			
 
				-This is a pointer to the performance model associated to this codelet. This
			
 
				-optional field is ignored when set to @code{NULL}. TODO
			
 
				+This is a pointer to the task duration performance model associated to this
			
 
				+codelet. This optional field is ignored when set to @code{NULL}. TODO
			
 
				+
			
 
				+@item @code{power_model} (optional):
			
 
				+This is a pointer to the task power consumption performance model associated
			
 
				+to this codelet. This optional field is ignored when set to @code{NULL}. TODO
			
 
				 
			
 
				 @end table
			
 
				 @end table
			
--- a/examples/basic_examples/vector_scal.c
+++ b/examples/basic_examples/vector_scal.c
@@ -1,6 +1,6 @@
 
				 /*
			
 
				  * StarPU
			
 
				- * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
			
 
				+ * Copyright (C) Université Bordeaux 1, CNRS 2008-2011 (see AUTHORS file)
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -34,7 +34,12 @@ extern void scal_opencl_func(void *buffers[], void *_args);
 
				 
			
 
				 static struct starpu_perfmodel_t vector_scal_model = {
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				-	.symbol = "vector_scale_model"
			
 
				+	.symbol = "vector_scale"
			
 
				+};
			
 
				+
			
 
				+static struct starpu_perfmodel_t vector_scal_power_model = {
			
 
				+	.type = STARPU_HISTORY_BASED,
			
 
				+	.symbol = "vector_scale_power"
			
 
				 };
			
 
				 
			
 
				 static starpu_codelet cl = {
			
@@ -50,7 +55,8 @@ static starpu_codelet cl = {
 
				 	.opencl_func = scal_opencl_func,
			
 
				 #endif
			
 
				 	.nbuffers = 1,
			
 
				-	.model = &vector_scal_model
			
 
				+	.model = &vector_scal_model,
			
 
				+	.power_model = &vector_scal_power_model
			
 
				 };
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -1,6 +1,6 @@
 
				 /*
			
 
				  * StarPU
			
 
				- * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
			
 
				+ * Copyright (C) Université Bordeaux 1, CNRS 2008-2011 (see AUTHORS file)
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -117,6 +117,7 @@ int starpu_list_models(void);
 
				 double starpu_task_expected_length(struct starpu_task *task, enum starpu_perf_archtype arch);
			
 
				 double starpu_worker_get_relative_speedup(enum starpu_perf_archtype perf_archtype);
			
 
				 double starpu_data_expected_penalty(uint32_t memory_node, struct starpu_task *task);
			
 
				+double starpu_task_expected_power(struct starpu_task *task, enum starpu_perf_archtype arch);
			
 
				 
			
 
				 void starpu_force_bus_sampling(void);
			
 
				 
			
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -75,6 +75,7 @@ typedef struct starpu_codelet_t {
 
				 	unsigned nbuffers;
			
 
				 
			
 
				 	struct starpu_perfmodel_t *model;
			
 
				+	struct starpu_perfmodel_t *power_model;
			
 
				 
			
 
				 	/* statistics collected at runtime: this is filled by StarPU and should
			
 
				 	 * not be accessed directly (use the starpu_display_codelet_stats
			
@@ -136,7 +137,7 @@ struct starpu_task {
 
				 	 * starpu_profiling_status_set */
			
 
				 	struct starpu_task_profiling_info *profiling_info;
			
 
				 
			
 
				-	/* Predicted duration of the task. This field is only valid if the
			
 
				+	/* Predicted duration of the task in µs. This field is only valid if the
			
 
				 	 * scheduling strategy uses performance models. */
			
 
				 	double predicted;
			
 
				 
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -1,6 +1,6 @@
 
				 /*
			
 
				  * StarPU
			
 
				- * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
			
 
				+ * Copyright (C) Université Bordeaux 1, CNRS 2008-2011 (see AUTHORS file)
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -66,7 +66,7 @@ enum starpu_perf_archtype starpu_worker_get_perf_archtype(int workerid)
 
				  * PER ARCH model
			
 
				  */
			
 
				 
			
 
				-static double per_arch_task_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_task *task)
			
 
				+static double per_arch_task_expected_perf(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_task *task)
			
 
				 {
			
 
				 	double exp = -1.0;
			
 
				 	double (*per_arch_cost_model)(struct starpu_buffer_descr_t *);
			
@@ -116,7 +116,7 @@ double starpu_worker_get_relative_speedup(enum starpu_perf_archtype perf_archtyp
 
				 	return -1.0;
			
 
				 }
			
 
				 
			
 
				-static double common_task_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_task *task)
			
 
				+static double common_task_expected_perf(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_task *task)
			
 
				 {
			
 
				 	double exp;
			
 
				 	double alpha;
			
@@ -133,27 +133,25 @@ static double common_task_expected_length(struct starpu_perfmodel_t *model, enum
 
				 	return -1.0;
			
 
				 }
			
 
				 
			
 
				-double starpu_task_expected_length(struct starpu_task *task, enum starpu_perf_archtype arch)
			
 
				+static double starpu_model_expected_perf(struct starpu_task *task, struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch)
			
 
				 {
			
 
				-	starpu_job_t j = _starpu_get_job_associated_to_task(task);
			
 
				-	struct starpu_perfmodel_t *model = task->cl->model;
			
 
				-
			
 
				 	if (model) {
			
 
				+		starpu_job_t j = _starpu_get_job_associated_to_task(task);
			
 
				 		switch (model->type) {
			
 
				 			case STARPU_PER_ARCH:
			
 
				-				return per_arch_task_expected_length(model, arch, task);
			
 
				+				return per_arch_task_expected_perf(model, arch, task);
			
 
				 
			
 
				 			case STARPU_COMMON:
			
 
				-				return common_task_expected_length(model, arch, task);
			
 
				+				return common_task_expected_perf(model, arch, task);
			
 
				 
			
 
				 			case STARPU_HISTORY_BASED:
			
 
				-				return _starpu_history_based_job_expected_length(model, arch, j);
			
 
				+				return _starpu_history_based_job_expected_perf(model, arch, j);
			
 
				 
			
 
				 			case STARPU_REGRESSION_BASED:
			
 
				-				return _starpu_regression_based_job_expected_length(model, arch, j);
			
 
				+				return _starpu_regression_based_job_expected_perf(model, arch, j);
			
 
				 
			
 
				 			case STARPU_NL_REGRESSION_BASED:
			
 
				-				return _starpu_non_linear_regression_based_job_expected_length(model, arch, j);
			
 
				+				return _starpu_non_linear_regression_based_job_expected_perf(model, arch, j);
			
 
				 
			
 
				 			default:
			
 
				 				STARPU_ABORT();
			
@@ -164,6 +162,16 @@ double starpu_task_expected_length(struct starpu_task *task, enum starpu_perf_ar
 
				 	return 0.0;
			
 
				 }
			
 
				 
			
 
				+double starpu_task_expected_length(struct starpu_task *task, enum starpu_perf_archtype arch)
			
 
				+{
			
 
				+	return starpu_model_expected_perf(task, task->cl->model, arch);
			
 
				+}
			
 
				+
			
 
				+double starpu_task_expected_power(struct starpu_task *task, enum starpu_perf_archtype arch)
			
 
				+{
			
 
				+	return starpu_model_expected_perf(task, task->cl->power_model, arch);
			
 
				+}
			
 
				+
			
 
				 /* Data transfer performance modeling */
			
 
				 double starpu_data_expected_penalty(uint32_t memory_node, struct starpu_task *task)
			
 
				 {
			
--- a/src/core/perfmodel/perfmodel.h
+++ b/src/core/perfmodel/perfmodel.h
@@ -1,6 +1,6 @@
 
				 /*
			
 
				  * StarPU
			
 
				- * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
			
 
				+ * Copyright (C) Université Bordeaux 1, CNRS 2008-2011 (see AUTHORS file)
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -90,16 +90,16 @@ void _starpu_get_perf_model_dir_codelets(char *path, size_t maxlen);
 
				 void _starpu_get_perf_model_dir_bus(char *path, size_t maxlen);
			
 
				 void _starpu_get_perf_model_dir_debug(char *path, size_t maxlen);
			
 
				 
			
 
				-double _starpu_history_based_job_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_job_s *j);
			
 
				+double _starpu_history_based_job_expected_perf(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_job_s *j);
			
 
				 void _starpu_register_model(struct starpu_perfmodel_t *model);
			
 
				 void _starpu_initialize_registered_performance_models(void);
			
 
				 void _starpu_deinitialize_registered_performance_models(void);
			
 
				 
			
 
				-double _starpu_regression_based_job_expected_length(struct starpu_perfmodel_t *model,
			
 
				+double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel_t *model,
			
 
				 					enum starpu_perf_archtype arch, struct starpu_job_s *j);
			
 
				-double _starpu_non_linear_regression_based_job_expected_length(struct starpu_perfmodel_t *model,
			
 
				+double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel_t *model,
			
 
				 					enum starpu_perf_archtype arch, struct starpu_job_s *j);
			
 
				-void _starpu_update_perfmodel_history(struct starpu_job_s *j, enum starpu_perf_archtype arch,
			
 
				+void _starpu_update_perfmodel_history(struct starpu_job_s *j, struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch,
			
 
				 				unsigned cpuid, double measured);
			
 
				 
			
 
				 void _starpu_create_sampling_directory_if_needed(void);
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -1,6 +1,6 @@
 
				 /*
			
 
				  * StarPU
			
 
				- * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
			
 
				+ * Copyright (C) Université Bordeaux 1, CNRS 2008-2011 (see AUTHORS file)
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -590,7 +590,7 @@ void starpu_perfmodel_debugfilepath(struct starpu_perfmodel_t *model,
 
				 	get_model_debug_path(model, archname, path, maxlen);
			
 
				 }
			
 
				 
			
 
				-double _starpu_regression_based_job_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_job_s *j)
			
 
				+double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_job_s *j)
			
 
				 {
			
 
				 	double exp = -1.0;
			
 
				 	size_t size = _starpu_job_get_data_size(j);
			
@@ -606,7 +606,7 @@ double _starpu_regression_based_job_expected_length(struct starpu_perfmodel_t *m
 
				 	return exp;
			
 
				 }
			
 
				 
			
 
				-double _starpu_non_linear_regression_based_job_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_job_s *j)
			
 
				+double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_job_s *j)
			
 
				 {
			
 
				 	double exp = -1.0;
			
 
				 	size_t size = _starpu_job_get_data_size(j);
			
@@ -622,7 +622,7 @@ double _starpu_non_linear_regression_based_job_expected_length(struct starpu_per
 
				 	return exp;
			
 
				 }
			
 
				 
			
 
				-double _starpu_history_based_job_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_job_s *j)
			
 
				+double _starpu_history_based_job_expected_perf(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_job_s *j)
			
 
				 {
			
 
				 	double exp;
			
 
				 	struct starpu_per_arch_perfmodel_t *per_arch_model;
			
@@ -651,10 +651,8 @@ double _starpu_history_based_job_expected_length(struct starpu_perfmodel_t *mode
 
				 	return exp;
			
 
				 }
			
 
				 
			
 
				-void _starpu_update_perfmodel_history(starpu_job_t j, enum starpu_perf_archtype arch, unsigned cpuid __attribute__((unused)), double measured)
			
 
				+void _starpu_update_perfmodel_history(starpu_job_t j, struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, unsigned cpuid __attribute__((unused)), double measured)
			
 
				 {
			
 
				-	struct starpu_perfmodel_t *model = j->task->cl->model;
			
 
				-
			
 
				 	if (model)
			
 
				 	{
			
 
				 		PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -100,8 +100,7 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args,
 
				 		_starpu_push_task_output(task, 0);
			
 
				 
			
 
				 		_starpu_driver_update_job_feedback(j, cpu_args, profiling_info,
			
 
				-				calibrate_model, perf_arch,
			
 
				-				&codelet_start, &codelet_end);
			
 
				+				perf_arch, &codelet_start, &codelet_end);
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -202,7 +202,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 
				 
			
 
				 	_starpu_push_task_output(task, mask);
			
 
				 
			
 
				-	_starpu_driver_update_job_feedback(j, args, profiling_info, calibrate_model, args->perf_arch,
			
 
				+	_starpu_driver_update_job_feedback(j, args, profiling_info, args->perf_arch,
			
 
				 			&codelet_start, &codelet_end);
			
 
				 
			
 
				 	return 0;
			
--- a/src/drivers/driver_common/driver_common.c
+++ b/src/drivers/driver_common/driver_common.c
@@ -24,45 +24,49 @@
 
				 
			
 
				 void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *worker_args,
			
 
				 					struct starpu_task_profiling_info *profiling_info,
			
 
				-					unsigned calibrate_model, enum starpu_perf_archtype perf_arch,
			
 
				+					enum starpu_perf_archtype perf_arch,
			
 
				 					struct timespec *codelet_start, struct timespec *codelet_end)
			
 
				 {
			
 
				 	struct timespec measured_ts;
			
 
				 	double measured;
			
 
				 	int workerid = worker_args->workerid;
			
 
				+	struct starpu_codelet_t *cl = j->task->cl;
			
 
				+	int calibrate_model;
			
 
				+	int profiling = starpu_profiling_status_get();
			
 
				+	int updated = 0;
			
 
				+
			
 
				+	if (cl->model && cl->model->benchmarking)
			
 
				+		calibrate_model = 1;
			
 
				 
			
 
				 	if (profiling_info || calibrate_model)
			
 
				 	{
			
 
				 		starpu_timespec_sub(codelet_end, codelet_start, &measured_ts);
			
 
				 		measured = starpu_timing_timespec_to_us(&measured_ts);
			
 
				 
			
 
				-		if (starpu_profiling_status_get())
			
 
				+		if (profiling && profiling_info)
			
 
				 		{
			
 
				-			if (profiling_info)
			
 
				-			{
			
 
				-				memcpy(&profiling_info->start_time, codelet_start, sizeof(struct timespec));
			
 
				-				memcpy(&profiling_info->end_time, codelet_end, sizeof(struct timespec));
			
 
				+			memcpy(&profiling_info->start_time, codelet_start, sizeof(struct timespec));
			
 
				+			memcpy(&profiling_info->end_time, codelet_end, sizeof(struct timespec));
			
 
				 
			
 
				-				profiling_info->workerid = workerid;
			
 
				-				
			
 
				-				_starpu_worker_update_profiling_info_executing(workerid, &measured_ts, 1,
			
 
				-					profiling_info->used_cycles,
			
 
				-					profiling_info->stall_cycles,
			
 
				-					profiling_info->power_consumed);
			
 
				-			}
			
 
				-		} else {
			
 
				-			_starpu_worker_update_profiling_info_executing(workerid, 0, 1, 0, 0, 0);
			
 
				+			profiling_info->workerid = workerid;
			
 
				+			
			
 
				+			_starpu_worker_update_profiling_info_executing(workerid, &measured_ts, 1,
			
 
				+				profiling_info->used_cycles,
			
 
				+				profiling_info->stall_cycles,
			
 
				+				profiling_info->power_consumed);
			
 
				+			updated =  1;
			
 
				 		}
			
 
				 
			
 
				-		if (calibrate_model) {
			
 
				-			if (profiling_info && profiling_info->power_consumed) {
			
 
				-				/* TODO: update power model history */
			
 
				-			}
			
 
				-			_starpu_update_perfmodel_history(j, perf_arch, worker_args->devid, measured);
			
 
				-		}
			
 
				-	} else {
			
 
				-		_starpu_worker_update_profiling_info_executing(workerid, 0, 1, 0, 0, 0);
			
 
				+		if (calibrate_model)
			
 
				+			_starpu_update_perfmodel_history(j, j->task->cl->model,  perf_arch, worker_args->devid, measured);
			
 
				 	}
			
 
				+	if (!updated)
			
 
				+		_starpu_worker_update_profiling_info_executing(workerid, 0, 1, 0, 0, 0);
			
 
				+
			
 
				+printf("ici %p %lf %p %d\n", profiling_info, profiling_info->power_consumed, cl->power_model, cl->power_model->benchmarking);
			
 
				+	if (profiling_info && profiling_info->power_consumed && cl->power_model && cl->power_model->benchmarking) {
			
 
				+		_starpu_update_perfmodel_history(j, j->task->cl->power_model,  perf_arch, worker_args->devid, profiling_info->power_consumed);
			
 
				+		}
			
 
				 }
			
 
				 
			
 
				 /* Workers may block when there is no work to do at all. We assume that the
			
--- a/src/drivers/driver_common/driver_common.h
+++ b/src/drivers/driver_common/driver_common.h
@@ -1,6 +1,6 @@
 
				 /*
			
 
				  * StarPU
			
 
				- * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
			
 
				+ * Copyright (C) Université Bordeaux 1, CNRS 2008-2011 (see AUTHORS file)
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -26,7 +26,7 @@
 
				 
			
 
				 void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *worker_args,
			
 
				 		struct starpu_task_profiling_info *profiling_info,
			
 
				-		unsigned calibrate_model, enum starpu_perf_archtype perf_arch,
			
 
				+		enum starpu_perf_archtype perf_arch,
			
 
				 		struct timespec *codelet_start, struct timespec *codelet_end);
			
 
				 
			
 
				 void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex);
			
--- a/src/drivers/gordon/driver_gordon.c
+++ b/src/drivers/gordon/driver_gordon.c
@@ -1,6 +1,6 @@
 
				 /*
			
 
				  * StarPU
			
 
				- * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
			
 
				+ * Copyright (C) Université Bordeaux 1, CNRS 2008-2011 (see AUTHORS file)
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -206,7 +206,7 @@ static void gordon_callback_list_func(void *arg)
 
				 			double measured = (double)gordon_task->measured;
			
 
				 			unsigned cpuid = 0; /* XXX */
			
 
				 
			
 
				-			_starpu_update_perfmodel_history(j, STARPU_GORDON_DEFAULT, cpuid, measured);
			
 
				+			_starpu_update_perfmodel_history(j, j->task->cl->model, STARPU_GORDON_DEFAULT, cpuid, measured);
			
 
				 		}
			
 
				 
			
 
				 		_starpu_push_task_output(j->task, 0);
			
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -540,7 +540,7 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 
				 
			
 
				 	_starpu_push_task_output(task, mask);
			
 
				 
			
 
				-	_starpu_driver_update_job_feedback(j, args, profiling_info, calibrate_model, args->perf_arch,
			
 
				+	_starpu_driver_update_job_feedback(j, args, profiling_info, args->perf_arch,
			
 
				 							&codelet_start, &codelet_end);
			
 
				 
			
 
				 	return EXIT_SUCCESS;
			
--- a/src/drivers/opencl/driver_opencl_utils.c
+++ b/src/drivers/opencl/driver_opencl_utils.c
@@ -197,7 +197,7 @@ cl_int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs
 
				         return CL_SUCCESS;
			
 
				 }
			
 
				 
			
 
				-int starpu_opencl_collect_stats(cl_event event)
			
 
				+int starpu_opencl_collect_stats(cl_event event __attribute__((unused)))
			
 
				 {
			
 
				 	struct starpu_task *task = starpu_get_current_task();
			
 
				 	struct starpu_task_profiling_info *info = task->profiling_info;
			
@@ -226,7 +226,7 @@ int starpu_opencl_collect_stats(cl_event event)
 
				 	}
			
 
				 #endif
			
 
				 #ifdef CL_PROFILING_POWER_CONSUMED
			
 
				-	if (info && (starpu_profiling_status_get() || (task->cl && task->cl->model && task->cl->model->benchmarking))) {
			
 
				+	if (info && (starpu_profiling_status_get() || (task->cl && task->cl->power_model && task->cl->power_model->benchmarking))) {
			
 
				 		cl_int err;
			
 
				 		double power_consumed;
			
 
				 		size_t size;
			
--- a/src/profiling/bound.c
+++ b/src/profiling/bound.c
@@ -1,6 +1,6 @@
 
				 /*
			
 
				  * StarPU
			
 
				- * Copyright (C) Université Bordeaux 1, CNRS 2010 (see AUTHORS file)
			
 
				+ * Copyright (C) Université Bordeaux 1, CNRS 2010-2011 (see AUTHORS file)
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -338,7 +338,7 @@ static void _starpu_get_tasks_times(int nw, int nt, double *times) {
 
				 				.footprint_is_computed = 1,
			
 
				 			};
			
 
				 			enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
			
 
				-			double length = _starpu_history_based_job_expected_length(tp->cl->model, arch, &j);
			
 
				+			double length = _starpu_history_based_job_expected_perf(tp->cl->model, arch, &j);
			
 
				 			if (length == -1.0)
			
 
				 				times[w*nt+t] = -1.0;
			
 
				 			else
			
@@ -404,7 +404,7 @@ void starpu_bound_print_lp(FILE *output)
 
				 			for (w = 0; w < nw; w++) {
			
 
				 				enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
			
 
				 				if (t1->duration[arch] == 0.) {
			
 
				-					double length = _starpu_history_based_job_expected_length(t1->cl->model, arch, &j);
			
 
				+					double length = _starpu_history_based_job_expected_perf(t1->cl->model, arch, &j);
			
 
				 					if (length == -1.0)
			
 
				 						/* Avoid problems with binary coding of doubles */
			
 
				 						t1->duration[arch] = -1.0;
			
--- a/src/profiling/profiling.c
+++ b/src/profiling/profiling.c
@@ -118,7 +118,7 @@ struct starpu_task_profiling_info *_starpu_allocate_profiling_info_if_needed(str
 
				 	struct starpu_task_profiling_info *info = NULL;
			
 
				 
			
 
				 	/* If we are benchmarking, we need room for the power consumption */
			
 
				-	if (profiling || (task->cl && task->cl->model && task->cl->model->benchmarking))
			
 
				+	if (profiling || (task->cl && task->cl->power_model && (task->cl->power_model->benchmarking || _starpu_get_calibrate_flag())))
			
 
				 	{
			
 
				 		info = calloc(1, sizeof(struct starpu_task_profiling_info));
			
 
				 		STARPU_ASSERT(info);
			
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -1,6 +1,6 @@
 
				 /*
			
 
				  * StarPU
			
 
				- * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
			
 
				+ * Copyright (C) Université Bordeaux 1, CNRS 2008-2011 (see AUTHORS file)
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -30,6 +30,7 @@ static pthread_mutex_t sched_mutex[STARPU_NMAXWORKERS];
 
				 
			
 
				 static double alpha = 1.0;
			
 
				 static double beta = 1.0;
			
 
				+static double _gamma = 1000.0;
			
 
				 
			
 
				 #ifdef STARPU_VERBOSE
			
 
				 static long int total_task_cnt = 0;
			
@@ -363,6 +364,7 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 
			
 
				 	double local_task_length[nworkers];
			
 
				 	double local_data_penalty[nworkers];
			
 
				+	double local_power[nworkers];
			
 
				 	double exp_end[nworkers];
			
 
				 
			
 
				 	double fitness[nworkers];
			
@@ -403,6 +405,10 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 			/* a better solution was found */
			
 
				 			best_exp_end = exp_end[worker];
			
 
				 		}
			
 
				+
			
 
				+		local_power[worker] = starpu_task_expected_power(task, perf_arch);
			
 
				+		if (local_power[worker] == -1.0)
			
 
				+			local_power[worker] = 0.;
			
 
				 	}
			
 
				 
			
 
				 	double best_fitness = -1;
			
@@ -420,7 +426,8 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 			}
			
 
				 	
			
 
				 			fitness[worker] = alpha*(exp_end[worker] - best_exp_end) 
			
 
				-					+ beta*(local_data_penalty[worker]);
			
 
				+					+ beta*(local_data_penalty[worker])
			
 
				+					+ _gamma*(local_power[worker]);
			
 
				 
			
 
				 			if (best == -1 || fitness[worker] < best_fitness)
			
 
				 			{
			
@@ -428,7 +435,7 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 				best_fitness = fitness[worker];
			
 
				 				best = worker;
			
 
				 
			
 
				-	//			_STARPU_DEBUG("best fitness (worker %d) %le = alpha*(%le) + beta(%le) \n", worker, best_fitness, exp_end[worker] - best_exp_end, local_data_penalty[worker]);
			
 
				+	//			_STARPU_DEBUG("best fitness (worker %d) %le = alpha*(%le) + beta(%le) +gamma(%le)\n", worker, best_fitness, exp_end[worker] - best_exp_end, local_data_penalty[worker], local_power[worker]);
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
@@ -492,12 +499,16 @@ static void initialize_dmda_policy(struct starpu_machine_topology_s *topology,
 
				 
			
 
				 	const char *strval_alpha = getenv("STARPU_SCHED_ALPHA");
			
 
				 	if (strval_alpha)
			
 
				-		beta = atof(strval_alpha);
			
 
				+		alpha = atof(strval_alpha);
			
 
				 
			
 
				 	const char *strval_beta = getenv("STARPU_SCHED_BETA");
			
 
				 	if (strval_beta)
			
 
				 		beta = atof(strval_beta);
			
 
				 
			
 
				+	const char *strval_gamma = getenv("STARPU_SCHED_GAMMA");
			
 
				+	if (strval_gamma)
			
 
				+		_gamma = atof(strval_gamma);
			
 
				+
			
 
				 	unsigned workerid;
			
 
				 	for (workerid = 0; workerid < nworkers; workerid++)
			
 
				 	{