Browse Source

deprecate cost_model, and introduce cost_function, which is provided with the whole task structure, the target arch and implementation number

Samuel Thibault 13 years ago
parent
commit
86a13c15d2

+ 0 - 6
TODO

@@ -12,12 +12,6 @@ TODO list
 
 - Update all tests and examples
 
-- Depreciate cost_model field in struct starpu_per_arch_perfmodel
-                                 struct starpu_perfmodel
-
-- Add a field cost_function
-  double (*cost_function)(starpu_data_handle_t *, enum starpu_perf_archtype arch, unsigned nimpl); /* returns expected duration in µs */
-
 - Update all cost functions in examples
 
 - When cost_model is provided, but not cost_function, need to rebuild a struct starpu_buffer_descr

+ 2 - 2
doc/chapters/advanced-examples.texi

@@ -362,13 +362,13 @@ means that the first execution uses history-based performance model to perform
 scheduling.
 
 @item
-Provided as an estimation from the application itself (@code{STARPU_COMMON} model type and @code{cost_model} field),
+Provided as an estimation from the application itself (@code{STARPU_COMMON} model type and @code{cost_function} field),
 see for instance
 @code{examples/common/blas_model.h} and @code{examples/common/blas_model.c}.
 
 @item
 Provided explicitly by the application (@code{STARPU_PER_ARCH} model type): the
-@code{.per_arch[i].cost_model} fields have to be filled with pointers to
+@code{.per_arch[arch][nimpl].cost_function} fields have to be filled with pointers to
 functions which return the expected duration of the task in micro-seconds, one
 per architecture.
 

+ 20 - 20
examples/cholesky/cholesky_models.c

@@ -17,8 +17,8 @@
  */
 
 /*
- * As a convention, in that file, descr[0] is represented by A,
- * 				  descr[1] is B ...
+ * As a convention, in that file, buffers[0] is represented by A,
+ * 				  buffers[1] is B ...
  */
 
 /*
@@ -36,11 +36,11 @@
 #define PERTURBATE(a)	(a)
 #endif
 
-static double cpu_chol_task_11_cost(struct starpu_buffer_descr *descr)
+static double cpu_chol_task_11_cost(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 	double cost = (((double)(n)*n*n)/1000.0f*0.894/0.79176);
 
@@ -51,11 +51,11 @@ static double cpu_chol_task_11_cost(struct starpu_buffer_descr *descr)
 	return PERTURBATE(cost);
 }
 
-static double cuda_chol_task_11_cost(struct starpu_buffer_descr *descr)
+static double cuda_chol_task_11_cost(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 	double cost = (((double)(n)*n*n)/50.0f/10.75/5.088633/0.9883);
 
@@ -66,11 +66,11 @@ static double cuda_chol_task_11_cost(struct starpu_buffer_descr *descr)
 	return PERTURBATE(cost);
 }
 
-static double cpu_chol_task_21_cost(struct starpu_buffer_descr *descr)
+static double cpu_chol_task_21_cost(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 	double cost = (((double)(n)*n*n)/7706.674/0.95/0.9965);
 
@@ -81,11 +81,11 @@ static double cpu_chol_task_21_cost(struct starpu_buffer_descr *descr)
 	return PERTURBATE(cost);
 }
 
-static double cuda_chol_task_21_cost(struct starpu_buffer_descr *descr)
+static double cuda_chol_task_21_cost(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 	double cost = (((double)(n)*n*n)/50.0f/10.75/87.29520);
 
@@ -96,11 +96,11 @@ static double cuda_chol_task_21_cost(struct starpu_buffer_descr *descr)
 	return PERTURBATE(cost);
 }
 
-static double cpu_chol_task_22_cost(struct starpu_buffer_descr *descr)
+static double cpu_chol_task_22_cost(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 	double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760);
 
@@ -111,11 +111,11 @@ static double cpu_chol_task_22_cost(struct starpu_buffer_descr *descr)
 	return PERTURBATE(cost);
 }
 
-static double cuda_chol_task_22_cost(struct starpu_buffer_descr *descr)
+static double cuda_chol_task_22_cost(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 	double cost = (((double)(n)*n*n)/50.0f/10.75/76.30666);
 
@@ -130,8 +130,8 @@ struct starpu_perfmodel chol_model_11 =
 {
 	.per_arch =
 	{
-		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_11_cost },
-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_11_cost }
+		[STARPU_CPU_DEFAULT][0] = { .cost_function = cpu_chol_task_11_cost },
+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = cuda_chol_task_11_cost }
 	},
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "chol_model_11"
@@ -141,8 +141,8 @@ struct starpu_perfmodel chol_model_21 =
 {
 	.per_arch =
 	{
-		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_21_cost },
-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_21_cost }
+		[STARPU_CPU_DEFAULT][0] = { .cost_function = cpu_chol_task_21_cost },
+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = cuda_chol_task_21_cost }
 	},
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "chol_model_21"
@@ -152,8 +152,8 @@ struct starpu_perfmodel chol_model_22 =
 {
 	.per_arch =
 	{
-		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_22_cost },
-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_22_cost }
+		[STARPU_CPU_DEFAULT][0] = { .cost_function = cpu_chol_task_22_cost },
+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = cuda_chol_task_22_cost }
 	},
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "chol_model_22"

+ 4 - 4
examples/common/blas_model.c

@@ -27,15 +27,15 @@
  *	Number of flops of Gemm 
  */
 
-double gemm_cost(struct starpu_buffer_descr *descr)
+double gemm_cost(struct starpu_task *task)
 {
 	/* C = A * B */
 	uint32_t nxC, nyC, nxA;
 
 
-	nxC = starpu_matrix_get_nx(descr[2].handle);
-	nyC = starpu_matrix_get_ny(descr[2].handle);
-	nxA = starpu_matrix_get_nx(descr[0].handle);
+	nxC = starpu_matrix_get_nx(task->descr[2].handle);
+	nyC = starpu_matrix_get_ny(task->descr[2].handle);
+	nxA = starpu_matrix_get_nx(task->descr[0].handle);
 
 /*	printf("nxC %d nxC %d nxA %d\n", nxC, nyC, nxA); */
 

+ 2 - 2
examples/common/blas_model.h

@@ -36,7 +36,7 @@ static struct starpu_perfmodel starpu_sgemm_model =
 
 static struct starpu_perfmodel starpu_sgemm_model_common =
 {
-	.cost_model = gemm_cost,
+	.cost_function = gemm_cost,
 	.type = STARPU_COMMON,
 };
 
@@ -54,7 +54,7 @@ static struct starpu_perfmodel starpu_dgemm_model =
 
 static struct starpu_perfmodel starpu_dgemm_model_common =
 {
-	.cost_model = gemm_cost,
+	.cost_function = gemm_cost,
 	.type = STARPU_COMMON,
 };
 

+ 44 - 44
examples/heat/lu_kernels_model.c

@@ -19,8 +19,8 @@
 #include "lu_kernels_model.h"
 
 /*
- * As a convention, in that file, descr[0] is represented by A,
- * 				  descr[1] is B ...
+ * As a convention, in that file, buffers[0] is represented by A,
+ * 				  buffers[1] is B ...
  */
 
 /*
@@ -42,22 +42,22 @@
  *
  */
 
-double task_11_cost(struct starpu_buffer_descr *descr)
+double task_11_cost(struct starpu_task *task, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 	double cost = ((n*n*n)/537.5);
 
 	return PERTURBATE(cost);
 }
 
-double task_12_cost(struct starpu_buffer_descr *descr)
+double task_12_cost(struct starpu_task *task, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 /*	double cost = ((n*n*n)/1744.695); */
 	double cost = ((n*n*n)/3210.80);
@@ -67,11 +67,11 @@ double task_12_cost(struct starpu_buffer_descr *descr)
 }
 
 
-double task_21_cost(struct starpu_buffer_descr *descr)
+double task_21_cost(struct starpu_task *task, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 /*	double cost = ((n*n*n)/1744.695); */
 	double cost = ((n*n*n)/3691.53);
@@ -82,13 +82,13 @@ double task_21_cost(struct starpu_buffer_descr *descr)
 
 
 
-double task_22_cost(struct starpu_buffer_descr *descr)
+double task_22_cost(struct starpu_task *task, unsigned nimpl)
 {
 	uint32_t nx, ny, nz;
 
-	nx = starpu_matrix_get_nx(descr[2].handle);
-	ny = starpu_matrix_get_ny(descr[2].handle);
-	nz = starpu_matrix_get_ny(descr[0].handle);
+	nx = starpu_matrix_get_nx(task->buffers[2].handle);
+	ny = starpu_matrix_get_ny(task->buffers[2].handle);
+	nz = starpu_matrix_get_ny(task->buffers[0].handle);
 
 	double cost = ((nx*ny*nz)/4110.0);
 
@@ -102,11 +102,11 @@ double task_22_cost(struct starpu_buffer_descr *descr)
  */
 
 
-double task_11_cost_cuda(struct starpu_buffer_descr *descr)
+double task_11_cost_cuda(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 	double cost = ((n*n*n)/1853.7806);
 
@@ -114,11 +114,11 @@ double task_11_cost_cuda(struct starpu_buffer_descr *descr)
 	return PERTURBATE(cost);
 }
 
-double task_12_cost_cuda(struct starpu_buffer_descr *descr)
+double task_12_cost_cuda(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 	double cost = ((n*n*n)/42838.5718);
 
@@ -127,11 +127,11 @@ double task_12_cost_cuda(struct starpu_buffer_descr *descr)
 }
 
 
-double task_21_cost_cuda(struct starpu_buffer_descr *descr)
+double task_21_cost_cuda(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 	double cost = ((n*n*n)/49208.667);
 
@@ -141,13 +141,13 @@ double task_21_cost_cuda(struct starpu_buffer_descr *descr)
 
 
 
-double task_22_cost_cuda(struct starpu_buffer_descr *descr)
+double task_22_cost_cuda(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t nx, ny, nz;
 
-	nx = starpu_matrix_get_nx(descr[2].handle);
-	ny = starpu_matrix_get_ny(descr[2].handle);
-	nz = starpu_matrix_get_ny(descr[0].handle);
+	nx = starpu_matrix_get_nx(task->buffers[2].handle);
+	ny = starpu_matrix_get_ny(task->buffers[2].handle);
+	nz = starpu_matrix_get_ny(task->buffers[0].handle);
 
 	double cost = ((nx*ny*nz)/57523.560);
 
@@ -161,11 +161,11 @@ double task_22_cost_cuda(struct starpu_buffer_descr *descr)
  *
  */
 
-double task_11_cost_cpu(struct starpu_buffer_descr *descr)
+double task_11_cost_cpu(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 	double cost = ((n*n*n)/537.5);
 
@@ -173,11 +173,11 @@ double task_11_cost_cpu(struct starpu_buffer_descr *descr)
 	return PERTURBATE(cost);
 }
 
-double task_12_cost_cpu(struct starpu_buffer_descr *descr)
+double task_12_cost_cpu(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 	double cost = ((n*n*n)/6668.224);
 
@@ -186,11 +186,11 @@ double task_12_cost_cpu(struct starpu_buffer_descr *descr)
 }
 
 
-double task_21_cost_cpu(struct starpu_buffer_descr *descr)
+double task_21_cost_cpu(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t n;
 
-	n = starpu_matrix_get_nx(descr[0].handle);
+	n = starpu_matrix_get_nx(task->buffers[0].handle);
 
 	double cost = ((n*n*n)/6793.8423);
 
@@ -200,13 +200,13 @@ double task_21_cost_cpu(struct starpu_buffer_descr *descr)
 
 
 
-double task_22_cost_cpu(struct starpu_buffer_descr *descr)
+double task_22_cost_cpu(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
 {
 	uint32_t nx, ny, nz;
 
-	nx = starpu_matrix_get_nx(descr[2].handle);
-	ny = starpu_matrix_get_ny(descr[2].handle);
-	nz = starpu_matrix_get_ny(descr[0].handle);
+	nx = starpu_matrix_get_nx(task->buffers[2].handle);
+	ny = starpu_matrix_get_ny(task->buffers[2].handle);
+	nz = starpu_matrix_get_ny(task->buffers[0].handle);
 
 	double cost = ((nx*ny*nz)/4203.0175);
 
@@ -216,11 +216,11 @@ double task_22_cost_cpu(struct starpu_buffer_descr *descr)
 
 struct starpu_perfmodel model_11 =
 {
-	.cost_model = task_11_cost,
+	.cost_function = task_11_cost,
 	.per_arch =
 	{
-		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_11_cost_cpu },
-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_11_cost_cuda }
+		[STARPU_CPU_DEFAULT][0] = { .cost_function = task_11_cost_cpu },
+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = task_11_cost_cuda }
 	},
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
@@ -234,11 +234,11 @@ struct starpu_perfmodel model_11 =
 
 struct starpu_perfmodel model_12 =
 {
-	.cost_model = task_12_cost,
+	.cost_function = task_12_cost,
 	.per_arch =
 	{
-		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_12_cost_cpu },
-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_12_cost_cuda }
+		[STARPU_CPU_DEFAULT][0] = { .cost_function = task_12_cost_cpu },
+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = task_12_cost_cuda }
 	},
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
@@ -252,11 +252,11 @@ struct starpu_perfmodel model_12 =
 
 struct starpu_perfmodel model_21 =
 {
-	.cost_model = task_21_cost,
+	.cost_function = task_21_cost,
 	.per_arch =
 	{
-		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_21_cost_cpu },
-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_21_cost_cuda }
+		[STARPU_CPU_DEFAULT][0] = { .cost_function = task_21_cost_cpu },
+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = task_21_cost_cuda }
 	},
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
@@ -270,11 +270,11 @@ struct starpu_perfmodel model_21 =
 
 struct starpu_perfmodel model_22 =
 {
-	.cost_model = task_22_cost,
+	.cost_function = task_22_cost,
 	.per_arch =
 	{
-		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_22_cost_cpu },
-		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_22_cost_cuda }
+		[STARPU_CPU_DEFAULT][0] = { .cost_function = task_22_cost_cpu },
+		[STARPU_CUDA_DEFAULT][0] = { .cost_function = task_22_cost_cuda }
 	},
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS

+ 6 - 2
include/starpu_perfmodel.h

@@ -22,6 +22,8 @@
 #include <starpu.h>
 #include <stdio.h>
 
+#include <starpu_util.h>
+
 #if ! defined(_MSC_VER)
 #  include <pthread.h>
 #endif
@@ -131,7 +133,8 @@ struct starpu_regression_model
 
 struct starpu_per_arch_perfmodel
 {
-	double (*cost_model)(struct starpu_buffer_descr *t); /* returns expected duration in µs */
+	double (*cost_model)(struct starpu_buffer_descr *t) STARPU_DEPRECATED; /* returns expected duration in µs */
+	double (*cost_function)(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl); /* returns expected duration in µs */
 
 	/* internal variables */
 	double alpha;
@@ -158,7 +161,8 @@ struct starpu_perfmodel
 	enum starpu_perfmodel_type type;
 
 	/* single cost model (STARPU_COMMON), returns expected duration in µs */
-	double (*cost_model)(struct starpu_buffer_descr *);
+	double (*cost_model)(struct starpu_buffer_descr *) STARPU_DEPRECATED;
+	double (*cost_function)(struct starpu_task *, unsigned nimpl);
 
 	/* per-architecture model */
 	struct starpu_per_arch_perfmodel per_arch[STARPU_NARCH_VARIATIONS][STARPU_MAXIMPLEMENTATIONS];

+ 17 - 4
src/core/perfmodel/perfmodel.c

@@ -71,11 +71,15 @@ enum starpu_perf_archtype starpu_worker_get_perf_archtype(int workerid)
 static double per_arch_task_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct starpu_task *task, unsigned nimpl)
 {
 	double exp = -1.0;
+	double (*per_arch_cost_function)(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl);
 	double (*per_arch_cost_model)(struct starpu_buffer_descr *);
 
+	per_arch_cost_function = model->per_arch[arch][nimpl].cost_function;
 	per_arch_cost_model = model->per_arch[arch][nimpl].cost_model;
 
-	if (per_arch_cost_model)
+	if (per_arch_cost_function)
+		exp = per_arch_cost_function(task, arch, nimpl);
+	else if (per_arch_cost_model)
 		exp = per_arch_cost_model(task->buffers);
 
 	return exp;
@@ -111,12 +115,21 @@ double starpu_worker_get_relative_speedup(enum starpu_perf_archtype perf_archtyp
 	return -1.0;
 }
 
-static double common_task_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct starpu_task *task)
+static double common_task_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct starpu_task *task, unsigned nimpl)
 {
 	double exp;
 	double alpha;
 
-	if (model->cost_model)
+	if (model->cost_function)
+	{
+		exp = model->cost_function(task, nimpl);
+		alpha = starpu_worker_get_relative_speedup(arch);
+
+		STARPU_ASSERT(alpha != 0.0f);
+
+		return (exp/alpha);
+	}
+	else if (model->cost_model)
 	{
 		exp = model->cost_model(task->buffers);
 		alpha = starpu_worker_get_relative_speedup(arch);
@@ -172,7 +185,7 @@ static double starpu_model_expected_perf(struct starpu_task *task, struct starpu
 
 				return per_arch_task_expected_perf(model, arch, task, nimpl);
 			case STARPU_COMMON:
-				return common_task_expected_perf(model, arch, task);
+				return common_task_expected_perf(model, arch, task, nimpl);
 
 			case STARPU_HISTORY_BASED: