14 years ago · 86a13c15d2
--- a/TODO
+++ b/TODO
@@ -12,12 +12,6 @@ TODO list
 
				 
			
 
				 - Update all tests and examples
			
 
				 
			
 
				-- Depreciate cost_model field in struct starpu_per_arch_perfmodel
			
 
				-                                 struct starpu_perfmodel
			
 
				-
			
 
				-- Add a field cost_function
			
 
				-  double (*cost_function)(starpu_data_handle_t *, enum starpu_perf_archtype arch, unsigned nimpl); /* returns expected duration in µs */
			
 
				-
			
 
				 - Update all cost functions in examples
			
 
				 
			
 
				 - When cost_model is provided, but not cost_function, need to rebuild a struct starpu_buffer_descr
			
--- a/doc/chapters/advanced-examples.texi
+++ b/doc/chapters/advanced-examples.texi
@@ -362,13 +362,13 @@ means that the first execution uses history-based performance model to perform
 
				 scheduling.
			
 
				 
			
 
				 @item
			
 
				-Provided as an estimation from the application itself (@code{STARPU_COMMON} model type and @code{cost_model} field),
			
 
				+Provided as an estimation from the application itself (@code{STARPU_COMMON} model type and @code{cost_function} field),
			
 
				 see for instance
			
 
				 @code{examples/common/blas_model.h} and @code{examples/common/blas_model.c}.
			
 
				 
			
 
				 @item
			
 
				 Provided explicitly by the application (@code{STARPU_PER_ARCH} model type): the
			
 
				-@code{.per_arch[i].cost_model} fields have to be filled with pointers to
			
 
				+@code{.per_arch[arch][nimpl].cost_function} fields have to be filled with pointers to
			
 
				 functions which return the expected duration of the task in micro-seconds, one
			
 
				 per architecture.
			
 
				 
			
--- a/examples/cholesky/cholesky_models.c
+++ b/examples/cholesky/cholesky_models.c
@@ -17,8 +17,8 @@
 
				  */
			
 
				 
			
 
				 /*
			
 
				- * As a convention, in that file, descr[0] is represented by A,
			
 
				- * 				  descr[1] is B ...
			
 
				+ * As a convention, in that file, buffers[0] is represented by A,
			
 
				+ * 				  buffers[1] is B ...
			
 
				  */
			
 
				 
			
 
				 /*
			
@@ -36,11 +36,11 @@
 
				 #define PERTURBATE(a)	(a)
			
 
				 #endif
			
 
				 
			
 
				-static double cpu_chol_task_11_cost(struct starpu_buffer_descr *descr)
			
 
				+static double cpu_chol_task_11_cost(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/1000.0f*0.894/0.79176);
			
 
				 
			
@@ -51,11 +51,11 @@ static double cpu_chol_task_11_cost(struct starpu_buffer_descr *descr)
 
				 	return PERTURBATE(cost);
			
 
				 }
			
 
				 
			
 
				-static double cuda_chol_task_11_cost(struct starpu_buffer_descr *descr)
			
 
				+static double cuda_chol_task_11_cost(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/50.0f/10.75/5.088633/0.9883);
			
 
				 
			
@@ -66,11 +66,11 @@ static double cuda_chol_task_11_cost(struct starpu_buffer_descr *descr)
 
				 	return PERTURBATE(cost);
			
 
				 }
			
 
				 
			
 
				-static double cpu_chol_task_21_cost(struct starpu_buffer_descr *descr)
			
 
				+static double cpu_chol_task_21_cost(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/7706.674/0.95/0.9965);
			
 
				 
			
@@ -81,11 +81,11 @@ static double cpu_chol_task_21_cost(struct starpu_buffer_descr *descr)
 
				 	return PERTURBATE(cost);
			
 
				 }
			
 
				 
			
 
				-static double cuda_chol_task_21_cost(struct starpu_buffer_descr *descr)
			
 
				+static double cuda_chol_task_21_cost(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/50.0f/10.75/87.29520);
			
 
				 
			
@@ -96,11 +96,11 @@ static double cuda_chol_task_21_cost(struct starpu_buffer_descr *descr)
 
				 	return PERTURBATE(cost);
			
 
				 }
			
 
				 
			
 
				-static double cpu_chol_task_22_cost(struct starpu_buffer_descr *descr)
			
 
				+static double cpu_chol_task_22_cost(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760);
			
 
				 
			
@@ -111,11 +111,11 @@ static double cpu_chol_task_22_cost(struct starpu_buffer_descr *descr)
 
				 	return PERTURBATE(cost);
			
 
				 }
			
 
				 
			
 
				-static double cuda_chol_task_22_cost(struct starpu_buffer_descr *descr)
			
 
				+static double cuda_chol_task_22_cost(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = (((double)(n)*n*n)/50.0f/10.75/76.30666);
			
 
				 
			
@@ -130,8 +130,8 @@ struct starpu_perfmodel chol_model_11 =
 
				 {
			
 
				 	.per_arch =
			
 
				 	{
			
 
				-		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_11_cost },
			
 
				-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_11_cost }
			
 
				+		[STARPU_CPU_DEFAULT][0] = { .cost_function = cpu_chol_task_11_cost },
			
 
				+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = cuda_chol_task_11_cost }
			
 
				 	},
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "chol_model_11"
			
@@ -141,8 +141,8 @@ struct starpu_perfmodel chol_model_21 =
 
				 {
			
 
				 	.per_arch =
			
 
				 	{
			
 
				-		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_21_cost },
			
 
				-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_21_cost }
			
 
				+		[STARPU_CPU_DEFAULT][0] = { .cost_function = cpu_chol_task_21_cost },
			
 
				+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = cuda_chol_task_21_cost }
			
 
				 	},
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "chol_model_21"
			
@@ -152,8 +152,8 @@ struct starpu_perfmodel chol_model_22 =
 
				 {
			
 
				 	.per_arch =
			
 
				 	{
			
 
				-		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_22_cost },
			
 
				-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_22_cost }
			
 
				+		[STARPU_CPU_DEFAULT][0] = { .cost_function = cpu_chol_task_22_cost },
			
 
				+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = cuda_chol_task_22_cost }
			
 
				 	},
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "chol_model_22"
			
--- a/examples/common/blas_model.c
+++ b/examples/common/blas_model.c
@@ -27,15 +27,15 @@
 
				  *	Number of flops of Gemm 
			
 
				  */
			
 
				 
			
 
				-double gemm_cost(struct starpu_buffer_descr *descr)
			
 
				+double gemm_cost(struct starpu_task *task)
			
 
				 {
			
 
				 	/* C = A * B */
			
 
				 	uint32_t nxC, nyC, nxA;
			
 
				 
			
 
				 
			
 
				-	nxC = starpu_matrix_get_nx(descr[2].handle);
			
 
				-	nyC = starpu_matrix_get_ny(descr[2].handle);
			
 
				-	nxA = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	nxC = starpu_matrix_get_nx(task->descr[2].handle);
			
 
				+	nyC = starpu_matrix_get_ny(task->descr[2].handle);
			
 
				+	nxA = starpu_matrix_get_nx(task->descr[0].handle);
			
 
				 
			
 
				 /*	printf("nxC %d nxC %d nxA %d\n", nxC, nyC, nxA); */
			
 
				 
			
--- a/examples/common/blas_model.h
+++ b/examples/common/blas_model.h
@@ -36,7 +36,7 @@ static struct starpu_perfmodel starpu_sgemm_model =
 
				 
			
 
				 static struct starpu_perfmodel starpu_sgemm_model_common =
			
 
				 {
			
 
				-	.cost_model = gemm_cost,
			
 
				+	.cost_function = gemm_cost,
			
 
				 	.type = STARPU_COMMON,
			
 
				 };
			
 
				 
			
@@ -54,7 +54,7 @@ static struct starpu_perfmodel starpu_dgemm_model =
 
				 
			
 
				 static struct starpu_perfmodel starpu_dgemm_model_common =
			
 
				 {
			
 
				-	.cost_model = gemm_cost,
			
 
				+	.cost_function = gemm_cost,
			
 
				 	.type = STARPU_COMMON,
			
 
				 };
			
 
				 
			
--- a/examples/heat/lu_kernels_model.c
+++ b/examples/heat/lu_kernels_model.c
@@ -19,8 +19,8 @@
 
				 #include "lu_kernels_model.h"
			
 
				 
			
 
				 /*
			
 
				- * As a convention, in that file, descr[0] is represented by A,
			
 
				- * 				  descr[1] is B ...
			
 
				+ * As a convention, in that file, buffers[0] is represented by A,
			
 
				+ * 				  buffers[1] is B ...
			
 
				  */
			
 
				 
			
 
				 /*
			
@@ -42,22 +42,22 @@
 
				  *
			
 
				  */
			
 
				 
			
 
				-double task_11_cost(struct starpu_buffer_descr *descr)
			
 
				+double task_11_cost(struct starpu_task *task, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/537.5);
			
 
				 
			
 
				 	return PERTURBATE(cost);
			
 
				 }
			
 
				 
			
 
				-double task_12_cost(struct starpu_buffer_descr *descr)
			
 
				+double task_12_cost(struct starpu_task *task, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 /*	double cost = ((n*n*n)/1744.695); */
			
 
				 	double cost = ((n*n*n)/3210.80);
			
@@ -67,11 +67,11 @@ double task_12_cost(struct starpu_buffer_descr *descr)
 
				 }
			
 
				 
			
 
				 
			
 
				-double task_21_cost(struct starpu_buffer_descr *descr)
			
 
				+double task_21_cost(struct starpu_task *task, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 /*	double cost = ((n*n*n)/1744.695); */
			
 
				 	double cost = ((n*n*n)/3691.53);
			
@@ -82,13 +82,13 @@ double task_21_cost(struct starpu_buffer_descr *descr)
 
				 
			
 
				 
			
 
				 
			
 
				-double task_22_cost(struct starpu_buffer_descr *descr)
			
 
				+double task_22_cost(struct starpu_task *task, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t nx, ny, nz;
			
 
				 
			
 
				-	nx = starpu_matrix_get_nx(descr[2].handle);
			
 
				-	ny = starpu_matrix_get_ny(descr[2].handle);
			
 
				-	nz = starpu_matrix_get_ny(descr[0].handle);
			
 
				+	nx = starpu_matrix_get_nx(task->buffers[2].handle);
			
 
				+	ny = starpu_matrix_get_ny(task->buffers[2].handle);
			
 
				+	nz = starpu_matrix_get_ny(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = ((nx*ny*nz)/4110.0);
			
 
				 
			
@@ -102,11 +102,11 @@ double task_22_cost(struct starpu_buffer_descr *descr)
 
				  */
			
 
				 
			
 
				 
			
 
				-double task_11_cost_cuda(struct starpu_buffer_descr *descr)
			
 
				+double task_11_cost_cuda(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/1853.7806);
			
 
				 
			
@@ -114,11 +114,11 @@ double task_11_cost_cuda(struct starpu_buffer_descr *descr)
 
				 	return PERTURBATE(cost);
			
 
				 }
			
 
				 
			
 
				-double task_12_cost_cuda(struct starpu_buffer_descr *descr)
			
 
				+double task_12_cost_cuda(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/42838.5718);
			
 
				 
			
@@ -127,11 +127,11 @@ double task_12_cost_cuda(struct starpu_buffer_descr *descr)
 
				 }
			
 
				 
			
 
				 
			
 
				-double task_21_cost_cuda(struct starpu_buffer_descr *descr)
			
 
				+double task_21_cost_cuda(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/49208.667);
			
 
				 
			
@@ -141,13 +141,13 @@ double task_21_cost_cuda(struct starpu_buffer_descr *descr)
 
				 
			
 
				 
			
 
				 
			
 
				-double task_22_cost_cuda(struct starpu_buffer_descr *descr)
			
 
				+double task_22_cost_cuda(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t nx, ny, nz;
			
 
				 
			
 
				-	nx = starpu_matrix_get_nx(descr[2].handle);
			
 
				-	ny = starpu_matrix_get_ny(descr[2].handle);
			
 
				-	nz = starpu_matrix_get_ny(descr[0].handle);
			
 
				+	nx = starpu_matrix_get_nx(task->buffers[2].handle);
			
 
				+	ny = starpu_matrix_get_ny(task->buffers[2].handle);
			
 
				+	nz = starpu_matrix_get_ny(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = ((nx*ny*nz)/57523.560);
			
 
				 
			
@@ -161,11 +161,11 @@ double task_22_cost_cuda(struct starpu_buffer_descr *descr)
 
				  *
			
 
				  */
			
 
				 
			
 
				-double task_11_cost_cpu(struct starpu_buffer_descr *descr)
			
 
				+double task_11_cost_cpu(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/537.5);
			
 
				 
			
@@ -173,11 +173,11 @@ double task_11_cost_cpu(struct starpu_buffer_descr *descr)
 
				 	return PERTURBATE(cost);
			
 
				 }
			
 
				 
			
 
				-double task_12_cost_cpu(struct starpu_buffer_descr *descr)
			
 
				+double task_12_cost_cpu(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/6668.224);
			
 
				 
			
@@ -186,11 +186,11 @@ double task_12_cost_cpu(struct starpu_buffer_descr *descr)
 
				 }
			
 
				 
			
 
				 
			
 
				-double task_21_cost_cpu(struct starpu_buffer_descr *descr)
			
 
				+double task_21_cost_cpu(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t n;
			
 
				 
			
 
				-	n = starpu_matrix_get_nx(descr[0].handle);
			
 
				+	n = starpu_matrix_get_nx(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = ((n*n*n)/6793.8423);
			
 
				 
			
@@ -200,13 +200,13 @@ double task_21_cost_cpu(struct starpu_buffer_descr *descr)
 
				 
			
 
				 
			
 
				 
			
 
				-double task_22_cost_cpu(struct starpu_buffer_descr *descr)
			
 
				+double task_22_cost_cpu(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				 {
			
 
				 	uint32_t nx, ny, nz;
			
 
				 
			
 
				-	nx = starpu_matrix_get_nx(descr[2].handle);
			
 
				-	ny = starpu_matrix_get_ny(descr[2].handle);
			
 
				-	nz = starpu_matrix_get_ny(descr[0].handle);
			
 
				+	nx = starpu_matrix_get_nx(task->buffers[2].handle);
			
 
				+	ny = starpu_matrix_get_ny(task->buffers[2].handle);
			
 
				+	nz = starpu_matrix_get_ny(task->buffers[0].handle);
			
 
				 
			
 
				 	double cost = ((nx*ny*nz)/4203.0175);
			
 
				 
			
@@ -216,11 +216,11 @@ double task_22_cost_cpu(struct starpu_buffer_descr *descr)
 
				 
			
 
				 struct starpu_perfmodel model_11 =
			
 
				 {
			
 
				-	.cost_model = task_11_cost,
			
 
				+	.cost_function = task_11_cost,
			
 
				 	.per_arch =
			
 
				 	{
			
 
				-		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_11_cost_cpu },
			
 
				-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_11_cost_cuda }
			
 
				+		[STARPU_CPU_DEFAULT][0] = { .cost_function = task_11_cost_cpu },
			
 
				+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = task_11_cost_cuda }
			
 
				 	},
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 #ifdef STARPU_ATLAS
			
@@ -234,11 +234,11 @@ struct starpu_perfmodel model_11 =
 
				 
			
 
				 struct starpu_perfmodel model_12 =
			
 
				 {
			
 
				-	.cost_model = task_12_cost,
			
 
				+	.cost_function = task_12_cost,
			
 
				 	.per_arch =
			
 
				 	{
			
 
				-		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_12_cost_cpu },
			
 
				-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_12_cost_cuda }
			
 
				+		[STARPU_CPU_DEFAULT][0] = { .cost_function = task_12_cost_cpu },
			
 
				+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = task_12_cost_cuda }
			
 
				 	},
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 #ifdef STARPU_ATLAS
			
@@ -252,11 +252,11 @@ struct starpu_perfmodel model_12 =
 
				 
			
 
				 struct starpu_perfmodel model_21 =
			
 
				 {
			
 
				-	.cost_model = task_21_cost,
			
 
				+	.cost_function = task_21_cost,
			
 
				 	.per_arch =
			
 
				 	{
			
 
				-		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_21_cost_cpu },
			
 
				-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_21_cost_cuda }
			
 
				+		[STARPU_CPU_DEFAULT][0] = { .cost_function = task_21_cost_cpu },
			
 
				+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = task_21_cost_cuda }
			
 
				 	},
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 #ifdef STARPU_ATLAS
			
@@ -270,11 +270,11 @@ struct starpu_perfmodel model_21 =
 
				 
			
 
				 struct starpu_perfmodel model_22 =
			
 
				 {
			
 
				-	.cost_model = task_22_cost,
			
 
				+	.cost_function = task_22_cost,
			
 
				 	.per_arch =
			
 
				 	{
			
 
				-		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_22_cost_cpu },
			
 
				-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_22_cost_cuda }
			
 
				+		[STARPU_CPU_DEFAULT][0] = { .cost_function = task_22_cost_cpu },
			
 
				+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = task_22_cost_cuda }
			
 
				 	},
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 #ifdef STARPU_ATLAS
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -22,6 +22,8 @@
 
				 #include <starpu.h>
			
 
				 #include <stdio.h>
			
 
				 
			
 
				+#include <starpu_util.h>
			
 
				+
			
 
				 #if ! defined(_MSC_VER)
			
 
				 #  include <pthread.h>
			
 
				 #endif
			
@@ -131,7 +133,8 @@ struct starpu_regression_model
 
				 
			
 
				 struct starpu_per_arch_perfmodel
			
 
				 {
			
 
				-	double (*cost_model)(struct starpu_buffer_descr *t); /* returns expected duration in µs */
			
 
				+	double (*cost_model)(struct starpu_buffer_descr *t) STARPU_DEPRECATED; /* returns expected duration in µs */
			
 
				+	double (*cost_function)(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl); /* returns expected duration in µs */
			
 
				 
			
 
				 	/* internal variables */
			
 
				 	double alpha;
			
@@ -158,7 +161,8 @@ struct starpu_perfmodel
 
				 	enum starpu_perfmodel_type type;
			
 
				 
			
 
				 	/* single cost model (STARPU_COMMON), returns expected duration in µs */
			
 
				-	double (*cost_model)(struct starpu_buffer_descr *);
			
 
				+	double (*cost_model)(struct starpu_buffer_descr *) STARPU_DEPRECATED;
			
 
				+	double (*cost_function)(struct starpu_task *, unsigned nimpl);
			
 
				 
			
 
				 	/* per-architecture model */
			
 
				 	struct starpu_per_arch_perfmodel per_arch[STARPU_NARCH_VARIATIONS][STARPU_MAXIMPLEMENTATIONS];
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -71,11 +71,15 @@ enum starpu_perf_archtype starpu_worker_get_perf_archtype(int workerid)
 
				 static double per_arch_task_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct starpu_task *task, unsigned nimpl)
			
 
				 {
			
 
				 	double exp = -1.0;
			
 
				+	double (*per_arch_cost_function)(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl);
			
 
				 	double (*per_arch_cost_model)(struct starpu_buffer_descr *);
			
 
				 
			
 
				+	per_arch_cost_function = model->per_arch[arch][nimpl].cost_function;
			
 
				 	per_arch_cost_model = model->per_arch[arch][nimpl].cost_model;
			
 
				 
			
 
				-	if (per_arch_cost_model)
			
 
				+	if (per_arch_cost_function)
			
 
				+		exp = per_arch_cost_function(task, arch, nimpl);
			
 
				+	else if (per_arch_cost_model)
			
 
				 		exp = per_arch_cost_model(task->buffers);
			
 
				 
			
 
				 	return exp;
			
@@ -111,12 +115,21 @@ double starpu_worker_get_relative_speedup(enum starpu_perf_archtype perf_archtyp
 
				 	return -1.0;
			
 
				 }
			
 
				 
			
 
				-static double common_task_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct starpu_task *task)
			
 
				+static double common_task_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct starpu_task *task, unsigned nimpl)
			
 
				 {
			
 
				 	double exp;
			
 
				 	double alpha;
			
 
				 
			
 
				-	if (model->cost_model)
			
 
				+	if (model->cost_function)
			
 
				+	{
			
 
				+		exp = model->cost_function(task, nimpl);
			
 
				+		alpha = starpu_worker_get_relative_speedup(arch);
			
 
				+
			
 
				+		STARPU_ASSERT(alpha != 0.0f);
			
 
				+
			
 
				+		return (exp/alpha);
			
 
				+	}
			
 
				+	else if (model->cost_model)
			
 
				 	{
			
 
				 		exp = model->cost_model(task->buffers);
			
 
				 		alpha = starpu_worker_get_relative_speedup(arch);
			
@@ -172,7 +185,7 @@ static double starpu_model_expected_perf(struct starpu_task *task, struct starpu
 
				 
			
 
				 				return per_arch_task_expected_perf(model, arch, task, nimpl);
			
 
				 			case STARPU_COMMON:
			
 
				-				return common_task_expected_perf(model, arch, task);
			
 
				+				return common_task_expected_perf(model, arch, task, nimpl);
			
 
				 
			
 
				 			case STARPU_HISTORY_BASED: