4 年之前 · 18f21ba151
--- a/ChangeLog
+++ b/ChangeLog
@@ -54,6 +54,7 @@ New features:
 
				 
			
 
				 Small changes:
			
 
				   * Add a synthetic energy efficiency testcase.
			
 
				+  * Make reduction methods want the commute flag.
			
 
				 
			
 
				 StarPU 1.3.8
			
 
				 ====================================================================
			
--- a/doc/doxygen/chapters/310_data_management.doxy
+++ b/doc/doxygen/chapters/310_data_management.doxy
@@ -643,7 +643,8 @@ struct starpu_codelet accumulate_variable_cl =
 
				         .cpu_funcs = { accumulate_variable_cpu },
			
 
				         .cpu_funcs_name = { "accumulate_variable_cpu" },
			
 
				         .cuda_funcs = { accumulate_variable_cuda },
			
 
				-        .nbuffers = 1,
			
 
				+        .nbuffers = 2,
			
 
				+	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 }
			
 
				 \endcode
			
 
				 
			
--- a/examples/cg/cg_kernels.c
+++ b/examples/cg/cg_kernels.c
@@ -120,7 +120,7 @@ struct starpu_codelet accumulate_variable_cl =
 
				 	.cuda_funcs = {accumulate_variable_cuda},
			
 
				 	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				 #endif
			
 
				-	.modes = {STARPU_RW, STARPU_R},
			
 
				+	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2,
			
 
				 	.model = &accumulate_variable_model
			
 
				 };
			
@@ -164,7 +164,7 @@ struct starpu_codelet accumulate_vector_cl =
 
				 	.cuda_funcs = {accumulate_vector_cuda},
			
 
				 	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				 #endif
			
 
				-	.modes = {STARPU_RW, STARPU_R},
			
 
				+	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2,
			
 
				 	.model = &accumulate_vector_model
			
 
				 };
			
--- a/examples/pi/pi_redux.c
+++ b/examples/pi/pi_redux.c
@@ -322,7 +322,7 @@ static struct starpu_codelet redux_codelet =
 
				 	.cuda_funcs = {redux_cuda_func},
			
 
				 	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				 #endif
			
 
				-	.modes = {STARPU_RW, STARPU_R},
			
 
				+	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2
			
 
				 };
			
 
				 
			
--- a/examples/reductions/dot_product.c
+++ b/examples/reductions/dot_product.c
@@ -211,7 +211,7 @@ static struct starpu_codelet redux_codelet =
 
				 	.opencl_funcs = {redux_opencl_func},
			
 
				 	.opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				 #endif
			
 
				-	.modes = {STARPU_RW, STARPU_R},
			
 
				+	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2,
			
 
				 	.name = "redux"
			
 
				 };
			
--- a/examples/reductions/minmax_reduction.c
+++ b/examples/reductions/minmax_reduction.c
@@ -95,7 +95,7 @@ static struct starpu_codelet minmax_redux_codelet =
 
				 {
			
 
				 	.cpu_funcs = {minmax_redux_cpu_func},
			
 
				 	.cpu_funcs_name = {"minmax_redux_cpu_func"},
			
 
				-	.modes = {STARPU_RW, STARPU_R},
			
 
				+	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2,
			
 
				 	.name = "redux"
			
 
				 };
			
--- a/include/starpu_data.h
+++ b/include/starpu_data.h
@@ -560,8 +560,10 @@ struct starpu_codelet;
 
				 /**
			
 
				    Set the codelets to be used for \p handle when it is accessed in the
			
 
				    mode ::STARPU_REDUX. Per-worker buffers will be initialized with
			
 
				-   the codelet \p init_cl, and reduction between per-worker buffers will be
			
 
				-   done with the codelet \p redux_cl.
			
 
				+   the codelet \p init_cl (which has to take one handle with STARPU_W), and
			
 
				+   reduction between per-worker buffers will be done with the codelet \p
			
 
				+   redux_cl (which has to take a first accumulation handle with
			
 
				+   STARPU_RW|STARPU_COMMUTE, and a second contribution handle with STARPU_R).
			
 
				 */
			
 
				 void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl);
			
 
				 
			
--- a/mpi/tests/mpi_reduction.c
+++ b/mpi/tests/mpi_reduction.c
@@ -37,7 +37,7 @@ static struct starpu_codelet init_codelet =
 
				 static struct starpu_codelet redux_codelet =
			
 
				 {
			
 
				 	.cpu_funcs = {redux_cpu_func},
			
 
				-	.modes = {STARPU_RW, STARPU_R},
			
 
				+	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2,
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	.model = &starpu_perfmodel_nop,
			
--- a/src/datawizard/reduction.c
+++ b/src/datawizard/reduction.c
@@ -280,12 +280,21 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 					redux_task->cl = handle->redux_cl;
			
 
				 					STARPU_ASSERT(redux_task->cl);
			
 
				 					if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0)))
			
 
				-						STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_RW, 0);
			
 
				+						STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_RW|STARPU_COMMUTE, 0);
			
 
				 					if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 1)))
			
 
				 						STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_R, 1);
			
 
				 
			
 
				-					STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_RW, "First parameter of reduction codelet %p has to be RW", redux_task->cl);
			
 
				+					STARPU_ASSERT_MSG((STARPU_CODELET_GET_MODE(redux_task->cl, 0) & ~STARPU_COMMUTE) == STARPU_RW, "First parameter of reduction codelet %p has to be RW", redux_task->cl);
			
 
				 					STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 1) == STARPU_R, "Second parameter of reduction codelet %p has to be R", redux_task->cl);
			
 
				+					if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0) & STARPU_COMMUTE))
			
 
				+					{
			
 
				+						static int warned;
			
 
				+						if (!warned)
			
 
				+						{
			
 
				+							warned = 1;
			
 
				+							_STARPU_DISP("Warning: for reductions, codelet %p should have STARPU_COMMUTE along STARPU_RW\n", redux_task->cl);
			
 
				+						}
			
 
				+					}
			
 
				 
			
 
				 					STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i], 0);
			
 
				 					STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i+step], 1);
			
--- a/tests/datawizard/increment_redux.c
+++ b/tests/datawizard/increment_redux.c
@@ -137,7 +137,7 @@ static struct starpu_codelet redux_cl =
 
				 #endif
			
 
				 	.cpu_funcs = {redux_cpu_kernel},
			
 
				 	.cpu_funcs_name = {"redux_cpu_kernel"},
			
 
				-	.modes = {STARPU_RW, STARPU_R},
			
 
				+	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2
			
 
				 };
			
 
				 
			
--- a/tests/datawizard/increment_redux_lazy.c
+++ b/tests/datawizard/increment_redux_lazy.c
@@ -125,7 +125,7 @@ static struct starpu_codelet redux_cl =
 
				 #endif
			
 
				 	.cpu_funcs = {redux_cpu_kernel},
			
 
				 	.cpu_funcs_name = {"redux_cpu_kernel"},
			
 
				-	.modes = {STARPU_RW, STARPU_R},
			
 
				+	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2
			
 
				 };
			
 
				 
			
--- a/tests/datawizard/increment_redux_v2.c
+++ b/tests/datawizard/increment_redux_v2.c
@@ -138,7 +138,7 @@ static struct starpu_codelet redux_cl =
 
				 #endif
			
 
				 	.cpu_funcs = {redux_cpu_kernel},
			
 
				 	.cpu_funcs_name = {"redux_cpu_kernel"},
			
 
				-	.modes = {STARPU_RW, STARPU_R},
			
 
				+	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2
			
 
				 };
			
 
				 
			
--- a/tests/datawizard/redux_acquire.c
+++ b/tests/datawizard/redux_acquire.c
@@ -45,7 +45,7 @@ static struct starpu_codelet init_codelet =
 
				 static struct starpu_codelet redux_codelet =
			
 
				 {
			
 
				 	.cpu_funcs = {redux_cpu_func},
			
 
				-	.modes = {STARPU_RW, STARPU_R},
			
 
				+	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2,
			
 
				 	.name = "redux_codelet"
			
 
				 };
			
--- a/tests/microbenchs/parallel_redux_heterogeneous_tasks_data.c
+++ b/tests/microbenchs/parallel_redux_heterogeneous_tasks_data.c
@@ -127,7 +127,7 @@ static struct starpu_codelet cl_redux =
 
				 	.opencl_funcs = { wait_OPENCL },
			
 
				 	.cpu_funcs_name = { "wait_CPU" },
			
 
				 	.nbuffers = 2,
			
 
				-	.modes = {STARPU_RW, STARPU_R},
			
 
				+	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.flags = STARPU_CODELET_SIMGRID_EXECUTE,
			
 
				 	.model = &perf_model_redux,
			
 
				 	.name = "redux",
			
--- a/tests/microbenchs/parallel_redux_homogeneous_tasks_data.c
+++ b/tests/microbenchs/parallel_redux_homogeneous_tasks_data.c
@@ -96,7 +96,7 @@ static struct starpu_codelet cl_redux =
 
				 	.opencl_funcs = { wait_homogeneous },
			
 
				 	.cpu_funcs_name = { "wait_homogeneous" },
			
 
				 	.nbuffers = 2,
			
 
				-	.modes = {STARPU_RW, STARPU_R},
			
 
				+	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.flags = STARPU_CODELET_SIMGRID_EXECUTE,
			
 
				 	.model = &perf_model_redux,
			
 
				 	.name = "redux",