浏览代码

StarPU automatically calls starpu_data_start_reduction_mode when the data is
acquired in REDUX mode (and that the previous mode was not a reduction).

Cédric Augonnet 15 年之前
父节点
当前提交
af9ea8975f
共有 4 个文件被更改,包括 24 次插入10 次删除
  1. 0 3
      examples/dot_product/dot_product.c
  2. 9 7
      examples/pi/pi_redux.c
  3. 13 0
      src/core/dependencies/data_concurrency.c
  4. 2 0
      src/datawizard/coherency.h

+ 0 - 3
examples/dot_product/dot_product.c

@@ -146,7 +146,6 @@ static struct starpu_codelet_t dot_codelet = {
  *	Tasks initialization
  */
 
-extern void starpu_data_start_reduction_mode(starpu_data_handle handle);
 extern void starpu_data_end_reduction_mode(starpu_data_handle handle);
 
 int main(int argc, char **argv)
@@ -195,8 +194,6 @@ int main(int argc, char **argv)
 	 */
 	starpu_data_set_reduction_methods(dot_handle, &redux_codelet, &init_codelet);
 
-	starpu_data_start_reduction_mode(dot_handle);
-
 	for (block = 0; block < nblocks; block++)
 	{
 		struct starpu_task *task = starpu_task_create();

+ 9 - 7
examples/pi/pi_redux.c

@@ -18,6 +18,8 @@
 #include <starpu_cuda.h>
 #include <stdlib.h>
 
+#define PI	3.14159265358979323846
+
 #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_CURAND)
 #error CURAND is required to run that example on CUDA devices
 #endif
@@ -30,7 +32,7 @@
 #define NSHOT_PER_TASK	(1024*1024)
 
 /* default value */
-static unsigned ntasks = 128;
+static unsigned ntasks = 1024;
 
 /*
  *	Initialization of the Random Number Generators (RNG)
@@ -187,7 +189,6 @@ static struct starpu_codelet_t init_codelet = {
         .nbuffers = 1
 };
 
-
 void redux_cpu_func(void *descr[], void *cl_arg)
 {
 	unsigned long *a = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[0]);
@@ -232,7 +233,6 @@ int main(int argc, char **argv)
 
 	starpu_data_set_reduction_methods(shot_cnt_handle,
 					&redux_codelet, &init_codelet);
-	starpu_data_start_reduction_mode(shot_cnt_handle);
 
 	struct timeval start;
 	struct timeval end;
@@ -262,10 +262,12 @@ int main(int argc, char **argv)
 	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
 	starpu_data_unregister(shot_cnt_handle);
 
-	/* Total surface : Pi * r^ 2 = Pi*1^2, total square surface : 2^2 = 4, probability to impact the disk: pi/4 */
-	unsigned long total = ntasks*NSHOT_PER_TASK;	
-	fprintf(stderr, "Pi approximation : %f (%ld / %ld)\n",
-			((float)shot_cnt*4.0)/total, shot_cnt, total);
+	/* Total surface : Pi * r^ 2 = Pi*1^2, total square surface : 2^2 = 4,
+	 * probability to impact the disk: pi/4 */
+	unsigned long total = ntasks*NSHOT_PER_TASK;
+	double pi_approx = ((double)shot_cnt*4.0)/total;
+	fprintf(stderr, "Pi approximation : %lf (%ld / %ld)\n", pi_approx, shot_cnt, total);
+	fprintf(stderr, "Error %le \n", pi_approx - PI);
 	fprintf(stderr, "Total time : %f ms\n", timing/1000.0);
 	fprintf(stderr, "Speed : %f GShot/s\n", total/(1e3*timing));
 

+ 13 - 0
src/core/dependencies/data_concurrency.c

@@ -88,8 +88,13 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 	if ((handle->refcnt == 0) || (!(mode == STARPU_W) && (handle->current_mode == mode)))
 	{
 		handle->refcnt++;
+
+		starpu_access_mode previous_mode = handle->current_mode;
 		handle->current_mode = mode;
 
+		if ((mode == STARPU_REDUX) && (previous_mode != STARPU_REDUX))
+			starpu_data_start_reduction_mode(handle);
+
 		/* success */
 		ret = 0;
 	}
@@ -208,8 +213,16 @@ void _starpu_notify_data_dependencies(starpu_data_handle handle)
 		if (r_mode == STARPU_RW)
 			r_mode = STARPU_W;
 
+		starpu_access_mode previous_mode = handle->current_mode;
 		handle->current_mode = r_mode;
 
+		/* In case we enter in a reduction mode, we invalidate all per
+		 * worker replicates. Note that the "per_node" replicates are
+		 * kept intact because we'll reduce a valid copy of the
+		 * "per-node replicate" with the per-worker replicates .*/
+		if ((r_mode == STARPU_REDUX) && (previous_mode != STARPU_REDUX))
+			starpu_data_start_reduction_mode(handle);
+
 		_starpu_spin_unlock(&handle->header_lock);
 
 		if (r->is_requested_by_codelet)

+ 2 - 0
src/datawizard/coherency.h

@@ -202,5 +202,7 @@ uint32_t _starpu_select_node_to_handle_request(uint32_t src_node, uint32_t dst_n
 uint32_t _starpu_select_src_node(struct starpu_data_state_t *state);
 
 void _starpu_redux_init_data_replicate(starpu_data_handle handle, struct starpu_data_replicate_s *replicate, int workerid);
+void starpu_data_start_reduction_mode(starpu_data_handle handle);
+void starpu_data_end_reduction_mode(starpu_data_handle handle);
 
 #endif // __COHERENCY__H__