15 years ago · 7eed13a288
--- a/examples/mandelbrot/mandelbrot.c
+++ b/examples/mandelbrot/mandelbrot.c
@@ -233,7 +233,8 @@ static void compute_block_opencl(void *descr[], void *cl_arg)
 
				 {
			
 
				 	int iby, block_size;
			
 
				 	double stepX, stepY;
			
 
				-	starpu_unpack_cl_args(cl_arg, &iby, &block_size, &stepX, &stepY);
			
 
				+	int *pcnt; // unused for CUDA tasks
			
 
				+	starpu_unpack_cl_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);
			
 
				 
			
 
				 	cl_mem data = (cl_mem)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				 
			
@@ -277,7 +278,8 @@ static void compute_block(void *descr[], void *cl_arg)
 
				 
			
 
				 	int iby, block_size;
			
 
				 	double stepX, stepY;
			
 
				-	starpu_unpack_cl_args(cl_arg, &iby, &block_size, &stepX, &stepY);
			
 
				+	int *pcnt; // unused for sequential tasks
			
 
				+	starpu_unpack_cl_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);
			
 
				 
			
 
				 	unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				 
			
@@ -317,23 +319,28 @@ static void compute_block(void *descr[], void *cl_arg)
 
				 
			
 
				 static void compute_block_spmd(void *descr[], void *cl_arg)
			
 
				 {
			
 
				-	int ix, iy;
			
 
				 
			
 
				 	int iby, block_size;
			
 
				 	double stepX, stepY;
			
 
				-	starpu_unpack_cl_args(cl_arg, &iby, &block_size, &stepX, &stepY);
			
 
				+	int *pcnt;
			
 
				+	starpu_unpack_cl_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);
			
 
				 
			
 
				 	int size = starpu_combined_worker_get_size();
			
 
				 	int rank = starpu_combined_worker_get_rank();
			
 
				 
			
 
				 	unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				 
			
 
				-	int local_block_size = block_size/size;
			
 
				+	int ix, iy; // global coordinates
			
 
				+	int local_iy; // current line
			
 
				 
			
 
				-	int local_iy;
			
 
				-	for (local_iy = rank*local_block_size; local_iy < (rank + 1)*local_block_size; local_iy++)
			
 
				+	while (1)
			
 
				 	{
			
 
				+		local_iy = STARPU_ATOMIC_ADD(pcnt, 1);
			
 
				+		if (local_iy >= size)
			
 
				+			break;
			
 
				+
			
 
				 		iy = iby*block_size + local_iy;
			
 
				+	
			
 
				 		for (ix = 0; ix < width; ix++)
			
 
				 		{
			
 
				 			double cx = leftX + ix * stepX;
			
@@ -493,14 +500,24 @@ int main(int argc, char **argv)
 
				 		double stepX = (rightX - leftX)/width;
			
 
				 		double stepY = (topY - bottomY)/height;
			
 
				 
			
 
				+		/* In case we have a SPMD task, each worker will grab tasks in
			
 
				+		 * a greedy and select which piece of image to compute by
			
 
				+		 * incrementing a counter shared by all the workers within the
			
 
				+		 * parallel task. */
			
 
				+		int per_block_cnt[nblocks];
			
 
				+
			
 
				 		for (iby = 0; iby < nblocks; iby++)
			
 
				 		{
			
 
				+			per_block_cnt[iby] = 0;
			
 
				+			int *pcnt = &per_block_cnt[iby];
			
 
				+
			
 
				 			starpu_insert_task(use_spmd?&spmd_mandelbrot_cl:&mandelbrot_cl,
			
 
				 				STARPU_VALUE, &iby, sizeof(iby),
			
 
				 				STARPU_VALUE, &block_size, sizeof(block_size),
			
 
				 				STARPU_VALUE, &stepX, sizeof(stepX),
			
 
				 				STARPU_VALUE, &stepY, sizeof(stepY),
			
 
				 				STARPU_W, block_handles[iby],
			
 
				+				STARPU_VALUE, &pcnt, sizeof(int *),
			
 
				 				0);
			
 
				 		}