Преглед на файлове

Enable codelets using BLAS on MIC

Samuel Thibault преди 9 години
родител
ревизия
a95613dc3b

+ 1 - 0
examples/axpy/axpy.c

@@ -91,6 +91,7 @@ static struct starpu_perfmodel axpy_model =
 static struct starpu_codelet axpy_cl =
 {
 	.cpu_funcs = {axpy_cpu},
+	.cpu_funcs_name = {"axpy_cpu"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {axpy_gpu},
 #elif defined(STARPU_SIMGRID)

+ 18 - 9
examples/cg/cg_kernels.c

@@ -85,7 +85,7 @@ static void accumulate_variable_cuda(void *descr[], void *cl_arg)
 }
 #endif
 
-static void accumulate_variable_cpu(void *descr[], void *cl_arg)
+void accumulate_variable_cpu(void *descr[], void *cl_arg)
 {
 	TYPE *v_dst = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]);
 	TYPE *v_src = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]);
@@ -103,6 +103,7 @@ struct starpu_codelet accumulate_variable_cl =
 {
 	.can_execute = can_execute,
 	.cpu_funcs = {accumulate_variable_cpu},
+	.cpu_funcs_name = {"accumulate_variable_cpu"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {accumulate_variable_cuda},
 	.cuda_flags = {STARPU_CUDA_ASYNC},
@@ -123,7 +124,7 @@ static void accumulate_vector_cuda(void *descr[], void *cl_arg)
 }
 #endif
 
-static void accumulate_vector_cpu(void *descr[], void *cl_arg)
+void accumulate_vector_cpu(void *descr[], void *cl_arg)
 {
 	TYPE *v_dst = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]);
 	TYPE *v_src = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]);
@@ -142,6 +143,7 @@ struct starpu_codelet accumulate_vector_cl =
 {
 	.can_execute = can_execute,
 	.cpu_funcs = {accumulate_vector_cpu},
+	.cpu_funcs_name = {"accumulate_vector_cpu"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {accumulate_vector_cuda},
 	.cuda_flags = {STARPU_CUDA_ASYNC},
@@ -166,7 +168,7 @@ static void bzero_variable_cuda(void *descr[], void *cl_arg)
 }
 #endif
 
-static void bzero_variable_cpu(void *descr[], void *cl_arg)
+void bzero_variable_cpu(void *descr[], void *cl_arg)
 {
 	TYPE *v = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]);
 	*v = (TYPE)0.0;
@@ -182,6 +184,7 @@ struct starpu_codelet bzero_variable_cl =
 {
 	.can_execute = can_execute,
 	.cpu_funcs = {bzero_variable_cpu},
+	.cpu_funcs_name = {"bzero_variable_cpu"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {bzero_variable_cuda},
 	.cuda_flags = {STARPU_CUDA_ASYNC},
@@ -201,7 +204,7 @@ static void bzero_vector_cuda(void *descr[], void *cl_arg)
 }
 #endif
 
-static void bzero_vector_cpu(void *descr[], void *cl_arg)
+void bzero_vector_cpu(void *descr[], void *cl_arg)
 {
 	TYPE *v = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]);
 	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
@@ -219,6 +222,7 @@ struct starpu_codelet bzero_vector_cl =
 {
 	.can_execute = can_execute,
 	.cpu_funcs = {bzero_vector_cpu},
+	.cpu_funcs_name = {"bzero_vector_cpu"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {bzero_vector_cuda},
 	.cuda_flags = {STARPU_CUDA_ASYNC},
@@ -250,7 +254,7 @@ static void dot_kernel_cuda(void *descr[], void *cl_arg)
 }
 #endif
 
-static void dot_kernel_cpu(void *descr[], void *cl_arg)
+void dot_kernel_cpu(void *descr[], void *cl_arg)
 {
 	TYPE *dot = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); 
 	TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]);
@@ -276,6 +280,7 @@ static struct starpu_codelet dot_kernel_cl =
 {
 	.can_execute = can_execute,
 	.cpu_funcs = {dot_kernel_cpu},
+	.cpu_funcs_name = {"dot_kernel_cpu"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {dot_kernel_cuda},
 #endif
@@ -334,7 +339,7 @@ static void scal_kernel_cuda(void *descr[], void *cl_arg)
 }
 #endif
 
-static void scal_kernel_cpu(void *descr[], void *cl_arg)
+void scal_kernel_cpu(void *descr[], void *cl_arg)
 {
 	TYPE alpha;
 	starpu_codelet_unpack_args(cl_arg, &alpha);
@@ -356,6 +361,7 @@ static struct starpu_codelet scal_kernel_cl =
 {
 	.can_execute = can_execute,
 	.cpu_funcs = {scal_kernel_cpu},
+	.cpu_funcs_name = {"scal_kernel_cpu"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {scal_kernel_cuda},
 	.cuda_flags = {STARPU_CUDA_ASYNC},
@@ -387,7 +393,7 @@ static void gemv_kernel_cuda(void *descr[], void *cl_arg)
 }
 #endif
 
-static void gemv_kernel_cpu(void *descr[], void *cl_arg)
+void gemv_kernel_cpu(void *descr[], void *cl_arg)
 {
 	TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]);
 	TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[2]);
@@ -431,6 +437,7 @@ static struct starpu_codelet gemv_kernel_cl =
 	.type = STARPU_SPMD,
 	.max_parallelism = INT_MAX,
 	.cpu_funcs = {gemv_kernel_cpu},
+	.cpu_funcs_name = {"gemv_kernel_cpu"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {gemv_kernel_cuda},
 	.cuda_flags = {STARPU_CUDA_ASYNC},
@@ -502,7 +509,7 @@ static void scal_axpy_kernel_cuda(void *descr[], void *cl_arg)
 }
 #endif
 
-static void scal_axpy_kernel_cpu(void *descr[], void *cl_arg)
+void scal_axpy_kernel_cpu(void *descr[], void *cl_arg)
 {
 	TYPE p1, p2;
 	starpu_codelet_unpack_args(cl_arg, &p1, &p2);
@@ -530,6 +537,7 @@ static struct starpu_codelet scal_axpy_kernel_cl =
 {
 	.can_execute = can_execute,
 	.cpu_funcs = {scal_axpy_kernel_cpu},
+	.cpu_funcs_name = {"scal_axpy_kernel_cpu"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {scal_axpy_kernel_cuda},
 	.cuda_flags = {STARPU_CUDA_ASYNC},
@@ -580,7 +588,7 @@ static void axpy_kernel_cuda(void *descr[], void *cl_arg)
 }
 #endif
 
-static void axpy_kernel_cpu(void *descr[], void *cl_arg)
+void axpy_kernel_cpu(void *descr[], void *cl_arg)
 {
 	TYPE p1;
 	starpu_codelet_unpack_args(cl_arg, &p1);
@@ -605,6 +613,7 @@ static struct starpu_codelet axpy_kernel_cl =
 {
 	.can_execute = can_execute,
 	.cpu_funcs = {axpy_kernel_cpu},
+	.cpu_funcs_name = {"axpy_kernel_cpu"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {axpy_kernel_cuda},
 	.cuda_flags = {STARPU_CUDA_ASYNC},

+ 3 - 0
examples/cholesky/cholesky_kernels.c

@@ -274,6 +274,7 @@ struct starpu_codelet cl11 =
 {
 	.type = STARPU_SEQ,
 	.cpu_funcs = {chol_cpu_codelet_update_u11},
+	.cpu_funcs_name = {"chol_cpu_codelet_update_u11"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {chol_cublas_codelet_update_u11},
 #elif defined(STARPU_SIMGRID)
@@ -288,6 +289,7 @@ struct starpu_codelet cl21 =
 {
 	.type = STARPU_SEQ,
 	.cpu_funcs = {chol_cpu_codelet_update_u21},
+	.cpu_funcs_name = {"chol_cpu_codelet_update_u21"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {chol_cublas_codelet_update_u21},
 #elif defined(STARPU_SIMGRID)
@@ -304,6 +306,7 @@ struct starpu_codelet cl22 =
 	.type = STARPU_SEQ,
 	.max_parallelism = INT_MAX,
 	.cpu_funcs = {chol_cpu_codelet_update_u22},
+	.cpu_funcs_name = {"chol_cpu_codelet_update_u22"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {chol_cublas_codelet_update_u22},
 #elif defined(STARPU_SIMGRID)

+ 4 - 0
examples/heat/dw_factolu.c

@@ -57,6 +57,7 @@ static unsigned no_prio = 0;
 static struct starpu_codelet cl11 =
 {
 	.cpu_funcs = {dw_cpu_codelet_update_u11},
+	.cpu_funcs_name = {"dw_cpu_codelet_update_u11"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {dw_cublas_codelet_update_u11},
 #endif
@@ -68,6 +69,7 @@ static struct starpu_codelet cl11 =
 static struct starpu_codelet cl12 =
 {
 	.cpu_funcs = {dw_cpu_codelet_update_u12},
+	.cpu_funcs_name = {"dw_cpu_codelet_update_u12"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {dw_cublas_codelet_update_u12},
 	.cuda_flags = {STARPU_CUDA_ASYNC},
@@ -80,6 +82,7 @@ static struct starpu_codelet cl12 =
 static struct starpu_codelet cl21 =
 {
 	.cpu_funcs = {dw_cpu_codelet_update_u21},
+	.cpu_funcs_name = {"dw_cpu_codelet_update_u21"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {dw_cublas_codelet_update_u21},
 	.cuda_flags = {STARPU_CUDA_ASYNC},
@@ -92,6 +95,7 @@ static struct starpu_codelet cl21 =
 static struct starpu_codelet cl22 =
 {
 	.cpu_funcs = {dw_cpu_codelet_update_u22},
+	.cpu_funcs_name = {"dw_cpu_codelet_update_u22"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {dw_cublas_codelet_update_u22},
 	.cuda_flags = {STARPU_CUDA_ASYNC},

+ 4 - 0
examples/heat/dw_factolu_grain.c

@@ -60,6 +60,7 @@ static struct starpu_codelet cl11 =
 {
 	.modes = { STARPU_RW },
 	.cpu_funcs = {dw_cpu_codelet_update_u11},
+	.cpu_funcs_name = {"dw_cpu_codelet_update_u11"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {dw_cublas_codelet_update_u11},
 #endif
@@ -94,6 +95,7 @@ static struct starpu_codelet cl12 =
 {
 	.modes = { STARPU_R, STARPU_RW },
 	.cpu_funcs = {dw_cpu_codelet_update_u12},
+	.cpu_funcs_name = {"dw_cpu_codelet_update_u12"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {dw_cublas_codelet_update_u12},
 #endif
@@ -138,6 +140,7 @@ static struct starpu_codelet cl21 =
 {
 	.modes = { STARPU_R, STARPU_RW },
 	.cpu_funcs = {dw_cpu_codelet_update_u21},
+	.cpu_funcs_name = {"dw_cpu_codelet_update_u21"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {dw_cublas_codelet_update_u21},
 #endif
@@ -179,6 +182,7 @@ static struct starpu_codelet cl22 =
 {
 	.modes = { STARPU_R, STARPU_R, STARPU_RW },
 	.cpu_funcs = {dw_cpu_codelet_update_u22},
+	.cpu_funcs_name = {"dw_cpu_codelet_update_u22"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {dw_cublas_codelet_update_u22},
 #endif

+ 4 - 0
examples/heat/dw_factolu_tag.c

@@ -58,6 +58,7 @@ static struct starpu_codelet cl11 =
 {
 	.modes = { STARPU_RW },
 	.cpu_funcs = {dw_cpu_codelet_update_u11},
+	.cpu_funcs_name = {"dw_cpu_codelet_update_u11"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {dw_cublas_codelet_update_u11},
 #endif
@@ -93,6 +94,7 @@ static struct starpu_codelet cl12 =
 {
 	.modes = { STARPU_R, STARPU_RW },
 	.cpu_funcs = {dw_cpu_codelet_update_u12},
+	.cpu_funcs_name = {"dw_cpu_codelet_update_u12"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {dw_cublas_codelet_update_u12},
 #endif
@@ -137,6 +139,7 @@ static struct starpu_codelet cl21 =
 {
 	.modes = { STARPU_R, STARPU_RW },
 	.cpu_funcs = {dw_cpu_codelet_update_u21},
+	.cpu_funcs_name = {"dw_cpu_codelet_update_u21"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {dw_cublas_codelet_update_u21},
 #endif
@@ -178,6 +181,7 @@ static struct starpu_codelet cl22 =
 {
 	.modes = { STARPU_R, STARPU_R, STARPU_RW },
 	.cpu_funcs = {dw_cpu_codelet_update_u22},
+	.cpu_funcs_name = {"dw_cpu_codelet_update_u22"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {dw_cublas_codelet_update_u22},
 #endif

+ 9 - 0
examples/heat/dw_sparse_cg.c

@@ -141,6 +141,7 @@ void init_cg(struct cg_problem *problem)
 	struct starpu_task *task1 = create_task(1UL);
 	task1->cl->where = STARPU_CPU;
 	task1->cl->cpu_funcs[0] = cpu_codelet_func_1;
+	task1->cl->cpu_funcs_name[0] = "cpu_codelet_func_1";
 	task1->cl->nbuffers = 4;
 	task1->cl->modes[0] = STARPU_R;
 	task1->cl->modes[1] = STARPU_R;
@@ -156,6 +157,7 @@ void init_cg(struct cg_problem *problem)
 	struct starpu_task *task2 = create_task(2UL);
 	task2->cl->where = STARPU_CPU;
 	task2->cl->cpu_funcs[0] = cpu_codelet_func_2;
+	task2->cl->cpu_funcs_name[0] = "cpu_codelet_func_2";
 	task2->cl->nbuffers = 2;
 	task2->cl->modes[0] = STARPU_W;
 	task2->cl->modes[1] = STARPU_R;
@@ -172,6 +174,7 @@ void init_cg(struct cg_problem *problem)
 	task3->cl->cuda_funcs[0] = cublas_codelet_func_3;
 #endif
 	task3->cl->cpu_funcs[0] = cpu_codelet_func_3;
+	task3->cl->cpu_funcs_name[0] = "cpu_codelet_func_3";
 	task3->cl_arg = problem;
 	task3->cl->nbuffers = 1;
 	task3->cl->modes[0] = STARPU_R;
@@ -209,6 +212,7 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 	struct starpu_task *task4 = create_task(maskiter | 4UL);
 	task4->cl->where = STARPU_CPU;
 	task4->cl->cpu_funcs[0] = cpu_codelet_func_4;
+	task4->cl->cpu_funcs_name[0] = "cpu_codelet_func_4";
 	task4->cl->nbuffers = 3;
 	task4->cl->modes[0] = STARPU_R;
 	task4->cl->modes[1] = STARPU_R;
@@ -225,6 +229,7 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 	task5->cl->cuda_funcs[0] = cublas_codelet_func_5;
 #endif
 	task5->cl->cpu_funcs[0] = cpu_codelet_func_5;
+	task5->cl->cpu_funcs_name[0] = "cpu_codelet_func_5";
 	task5->cl_arg = problem;
 	task5->cl->nbuffers = 2;
 	task5->cl->modes[0] = STARPU_R;
@@ -242,6 +247,7 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 	task6->cl->cuda_funcs[0] = cublas_codelet_func_6;
 #endif
 	task6->cl->cpu_funcs[0] = cpu_codelet_func_6;
+	task6->cl->cpu_funcs_name[0] = "cpu_codelet_func_6";
 	task6->cl_arg = problem;
 	task6->cl->nbuffers = 2;
 	task6->cl->modes[0] = STARPU_RW;
@@ -259,6 +265,7 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 	task7->cl->cuda_funcs[0] = cublas_codelet_func_7;
 #endif
 	task7->cl->cpu_funcs[0] = cpu_codelet_func_7;
+	task7->cl->cpu_funcs_name[0] = "cpu_codelet_func_7";
 	task7->cl_arg = problem;
 	task7->cl->nbuffers = 2;
 	task7->cl->modes[0] = STARPU_RW;
@@ -276,6 +283,7 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 	task8->cl->cuda_funcs[0] = cublas_codelet_func_8;
 #endif
 	task8->cl->cpu_funcs[0] = cpu_codelet_func_8;
+	task8->cl->cpu_funcs_name[0] = "cpu_codelet_func_8";
 	task8->cl_arg = problem;
 	task8->cl->nbuffers = 1;
 	task8->cl->modes[0] = STARPU_R;
@@ -290,6 +298,7 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 	task9->cl->cuda_funcs[0] = cublas_codelet_func_9;
 #endif
 	task9->cl->cpu_funcs[0] = cpu_codelet_func_9;
+	task9->cl->cpu_funcs_name[0] = "cpu_codelet_func_9";
 	task9->cl_arg = problem;
 	task9->cl->nbuffers = 2;
 	task9->cl->modes[0] = STARPU_RW;

+ 8 - 0
examples/lu/xlu_kernels.c

@@ -132,10 +132,13 @@ static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nim
 }
 #endif
 
+#define STRINGIFY_(x) #x
+#define STRINGIFY(x) STRINGIFY_(x)
 struct starpu_codelet cl22 =
 {
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u22)},
+	.cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_u22))},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {STARPU_LU(cublas_u22)},
 	CAN_EXECUTE
@@ -224,6 +227,7 @@ struct starpu_codelet cl12 =
 {
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u12)},
+	.cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_u12))},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {STARPU_LU(cublas_u12)},
 	CAN_EXECUTE
@@ -310,6 +314,7 @@ struct starpu_codelet cl21 =
 {
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u21)},
+	.cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_u21))},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {STARPU_LU(cublas_u21)},
 	CAN_EXECUTE
@@ -413,6 +418,7 @@ struct starpu_codelet cl11 =
 {
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u11)},
+	.cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_u11))},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {STARPU_LU(cublas_u11)},
 	CAN_EXECUTE
@@ -556,6 +562,7 @@ struct starpu_codelet cl11_pivot =
 {
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u11_pivot)},
+	.cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_u11_pivot))},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {STARPU_LU(cublas_u11_pivot)},
 	CAN_EXECUTE
@@ -646,6 +653,7 @@ struct starpu_codelet cl_pivot =
 {
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_pivot)},
+	.cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_pivot))},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {STARPU_LU(cublas_pivot)},
 	CAN_EXECUTE

+ 2 - 1
examples/mult/xgemm.c

@@ -163,7 +163,7 @@ static void cublas_mult(void *descr[], STARPU_ATTRIBUTE_UNUSED void *arg)
 }
 #endif
 
-static void cpu_mult(void *descr[], STARPU_ATTRIBUTE_UNUSED  void *arg)
+void cpu_mult(void *descr[], STARPU_ATTRIBUTE_UNUSED  void *arg)
 {
 	TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]);
 	TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]);
@@ -212,6 +212,7 @@ static struct starpu_codelet cl =
 	.type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */
 	.max_parallelism = INT_MAX,
 	.cpu_funcs = {cpu_mult},
+	.cpu_funcs_name = {"cpu_mult"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {cublas_mult},
 #elif defined(STARPU_SIMGRID)

+ 3 - 0
examples/pipeline/pipeline.c

@@ -78,6 +78,7 @@ static struct starpu_perfmodel pipeline_model_x =
 static struct starpu_codelet pipeline_codelet_x =
 {
 	.cpu_funcs = {pipeline_cpu_x},
+	.cpu_funcs_name = {"pipeline_cpu_x"},
 	.nbuffers = 1,
 	.modes = {STARPU_W},
 	.model = &pipeline_model_x
@@ -114,6 +115,7 @@ static struct starpu_perfmodel pipeline_model_axpy =
 static struct starpu_codelet pipeline_codelet_axpy =
 {
 	.cpu_funcs = {pipeline_cpu_axpy},
+	.cpu_funcs_name = {"pipeline_cpu_axpy"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {pipeline_cublas_axpy},
 #endif
@@ -157,6 +159,7 @@ static struct starpu_perfmodel pipeline_model_sum =
 static struct starpu_codelet pipeline_codelet_sum =
 {
 	.cpu_funcs = {pipeline_cpu_sum},
+	.cpu_funcs_name = {"pipeline_cpu_sum"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {pipeline_cublas_sum},
 #endif

+ 1 - 0
examples/profiling/profiling.c

@@ -65,6 +65,7 @@ int main(int argc, char **argv)
 	struct starpu_codelet cl =
 	{
 		.cpu_funcs = {sleep_codelet},
+		.cpu_funcs_name = {"sleep_codelet"},
 		.cuda_funcs = {sleep_codelet},
 		.opencl_funcs = {sleep_codelet},
 		.nbuffers = 0,

+ 1 - 0
examples/spmv/dw_block_spmv.c

@@ -163,6 +163,7 @@ unsigned totaltasks;
 struct starpu_codelet cl =
 {
 	.cpu_funcs = { cpu_block_spmv},
+	.cpu_funcs_name = { "cpu_block_spmv" },
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {cublas_block_spmv},
 #endif

+ 1 - 0
examples/spmv/spmv.c

@@ -100,6 +100,7 @@ static struct starpu_data_filter vector_f =
 static struct starpu_codelet spmv_cl =
 {
 	.cpu_funcs = {spmv_kernel_cpu},
+	.cpu_funcs_name = {"spmv_kernel_cpu"},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {spmv_kernel_cuda},
 	.cuda_flags = {STARPU_CUDA_ASYNC},