Преглед на файлове

mpi/examples: fix spacing

Nathalie Furmento преди 12 години
родител
ревизия
490532ebd6

+ 13 - 13
mpi/examples/cholesky/mpi_cholesky.c

@@ -51,18 +51,18 @@ int main(int argc, char **argv)
 
 	if (dblockx == -1 || dblocky == -1)
 	{
-	     int factor;
-	     dblockx = nodes;
-	     dblocky = 1;
-	     for(factor=sqrt(nodes) ; factor>1 ; factor--)
-	     {
-		  if (nodes % factor == 0)
-		  {
-		       dblockx = nodes/factor;
-		       dblocky = factor;
-		       break;
-		  }
-	     }
+		int factor;
+		dblockx = nodes;
+		dblocky = 1;
+		for(factor=sqrt(nodes) ; factor>1 ; factor--)
+		{
+			if (nodes % factor == 0)
+			{
+				dblockx = nodes/factor;
+				dblocky = factor;
+				break;
+			}
+		}
 	}
 
 	unsigned i,j,x,y;
@@ -198,7 +198,7 @@ int main(int argc, char **argv)
 	}
 
 	int correctness = 1;
-	for(x = 0; x < nblocks ;  x++)
+	for(x = 0; x < nblocks ; x++)
 	{
 		for (y = 0; y < nblocks; y++)
 		{

+ 7 - 7
mpi/examples/cholesky/mpi_cholesky.h

@@ -57,31 +57,31 @@ static void __attribute__((unused)) parse_args(int argc, char **argv)
 	{
 		if (strcmp(argv[i], "-size") == 0)
 		{
-		        char *argptr;
+			char *argptr;
 			size = strtol(argv[++i], &argptr, 10);
 		}
 
 		if (strcmp(argv[i], "-dblockx") == 0)
 		{
-		        char *argptr;
+			char *argptr;
 			dblockx = strtol(argv[++i], &argptr, 10);
 		}
-		
+
 		if (strcmp(argv[i], "-dblocky") == 0)
 		{
-		        char *argptr;
+			char *argptr;
 			dblocky = strtol(argv[++i], &argptr, 10);
 		}
-	
+
 		if (strcmp(argv[i], "-nblocks") == 0)
 		{
-		        char *argptr;
+			char *argptr;
 			nblocks = strtol(argv[++i], &argptr, 10);
 		}
 
 		if (strcmp(argv[i], "-nbigblocks") == 0)
 		{
-		        char *argptr;
+			char *argptr;
 			nbigblocks = strtol(argv[++i], &argptr, 10);
 		}
 

+ 2 - 2
mpi/examples/cholesky/mpi_cholesky_codelets.c

@@ -79,7 +79,7 @@ void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblocks, in
 	data_handles = malloc(nblocks*sizeof(starpu_data_handle_t *));
 	for(x=0 ; x<nblocks ; x++) data_handles[x] = malloc(nblocks*sizeof(starpu_data_handle_t));
 
-	for(x = 0; x < nblocks ;  x++)
+	for(x = 0; x < nblocks ; x++)
 	{
 		for (y = 0; y < nblocks; y++)
 		{
@@ -148,7 +148,7 @@ void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblocks, in
 
 	starpu_task_wait_for_all();
 
-	for(x = 0; x < nblocks ;  x++)
+	for(x = 0; x < nblocks ; x++)
 	{
 		for (y = 0; y < nblocks; y++)
 		{

+ 12 - 12
mpi/examples/cholesky/mpi_cholesky_distributed.c

@@ -50,18 +50,18 @@ int main(int argc, char **argv)
 
 	if (dblockx == -1 || dblocky == -1)
 	{
-	     int factor;
-	     dblockx = nodes;
-	     dblocky = 1;
-	     for(factor=sqrt(nodes) ; factor>1 ; factor--)
-	     {
-		  if (nodes % factor == 0)
-		  {
-		       dblockx = nodes/factor;
-		       dblocky = factor;
-		       break;
-		  }
-	     }
+		int factor;
+		dblockx = nodes;
+		dblocky = 1;
+		for(factor=sqrt(nodes) ; factor>1 ; factor--)
+		{
+			if (nodes % factor == 0)
+			{
+				dblockx = nodes/factor;
+				dblocky = factor;
+				break;
+			}
+		}
 	}
 
 	unsigned i,j,x,y;

+ 1 - 1
mpi/examples/cholesky/mpi_cholesky_kernels.c

@@ -29,7 +29,7 @@
 #endif
 
 /*
- *   U22
+ * U22
  */
 
 static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, __attribute__((unused)) void *_args)

+ 12 - 12
mpi/examples/complex/mpi_complex.c

@@ -74,12 +74,12 @@ int main(int argc, char **argv)
 			starpu_insert_task(&cl_compare, STARPU_R, handle, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0);
 
 			{
-			     // We send a dummy variable only to check communication with predefined datatypes
-			     int foo=12;
-			     starpu_data_handle_t foo_handle;
-			     starpu_variable_data_register(&foo_handle, 0, (uintptr_t)&foo, sizeof(foo));
-			     starpu_mpi_isend_detached(foo_handle, 1, 40, MPI_COMM_WORLD, NULL, NULL);
-			     starpu_insert_task(&foo_display, STARPU_R, foo_handle, 0);
+				// We send a dummy variable only to check communication with predefined datatypes
+				int foo=12;
+				starpu_data_handle_t foo_handle;
+				starpu_variable_data_register(&foo_handle, 0, (uintptr_t)&foo, sizeof(foo));
+				starpu_mpi_isend_detached(foo_handle, 1, 40, MPI_COMM_WORLD, NULL, NULL);
+				starpu_insert_task(&foo_display, STARPU_R, foo_handle, 0);
 			}
 		}
 		else if (rank == 1)
@@ -94,12 +94,12 @@ int main(int argc, char **argv)
 			starpu_mpi_isend_detached(handle, 0, 20, MPI_COMM_WORLD, NULL, NULL);
 
 			{
-			     // We send a dummy variable only to check communication with predefined datatypes
-			     int foo=12;
-			     starpu_data_handle_t foo_handle;
-			     starpu_variable_data_register(&foo_handle, -1, (uintptr_t)NULL, sizeof(foo));
-			     starpu_mpi_irecv_detached(foo_handle, 0, 40, MPI_COMM_WORLD, NULL, NULL);
-			     starpu_insert_task(&foo_display, STARPU_R, foo_handle, 0);
+				// We send a dummy variable only to check communication with predefined datatypes
+				int foo=12;
+				starpu_data_handle_t foo_handle;
+				starpu_variable_data_register(&foo_handle, -1, (uintptr_t)NULL, sizeof(foo));
+				starpu_mpi_irecv_detached(foo_handle, 0, 40, MPI_COMM_WORLD, NULL, NULL);
+				starpu_insert_task(&foo_display, STARPU_R, foo_handle, 0);
 			}
 
 		}

+ 12 - 12
mpi/examples/mpi_lu/plu_example.c

@@ -301,7 +301,7 @@ static void init_matrix(int rank)
 		allocated_memory_extra += 2*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *));
 	}
 #endif
-	
+
 	for (k = 0; k < nblocks; k++)
 	{
 #ifdef SINGLE_TMP1221
@@ -333,7 +333,7 @@ static void init_matrix(int rank)
 			starpu_malloc((void **)&tmp_12_block[i][k], blocksize);
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_12_block[i][k]);
-	
+
 			starpu_matrix_data_register(&tmp_12_block_handles[i][k], 0,
 				(uintptr_t)tmp_12_block[i][k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
@@ -344,7 +344,7 @@ static void init_matrix(int rank)
 			starpu_malloc((void **)&tmp_21_block[i][k], blocksize);
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_21_block[i][k]);
-	
+
 			starpu_matrix_data_register(&tmp_21_block_handles[i][k], 0,
 				(uintptr_t)tmp_21_block[i][k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
@@ -381,7 +381,7 @@ static void display_grid(int rank, unsigned nblocks)
 	//if (rank == 0)
 	{
 		fprintf(stderr, "2D grid layout (Rank %d): \n", rank);
-		
+
 		unsigned i, j;
 		for (j = 0; j < nblocks; j++)
 		{
@@ -534,7 +534,7 @@ int main(int argc, char **argv)
 
 		y2 = calloc(size, sizeof(TYPE));
 		STARPU_ASSERT(y);
-		
+
 		if (rank == 0)
 		{
 			for (ind = 0; ind < size; ind++)
@@ -546,13 +546,13 @@ int main(int argc, char **argv)
 		STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank);
 
 		/* Compute y2 = y2 - y */
-	        CPU_AXPY(size, -1.0, y, 1, y2, 1);
-	
-	        TYPE err = CPU_ASUM(size, y2, 1);
-	        int max = CPU_IAMAX(size, y2, 1);
-	
-	        fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size));
-	        fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]);
+		CPU_AXPY(size, -1.0, y, 1, y2, 1);
+
+		TYPE err = CPU_ASUM(size, y2, 1);
+		int max = CPU_IAMAX(size, y2, 1);
+
+		fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size));
+		fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]);
 #endif
 	}
 

+ 54 - 55
mpi/examples/mpi_lu/plu_solve.c

@@ -25,19 +25,19 @@
 
 static double frobenius_norm(TYPE *v, unsigned n)
 {
-        double sum2 = 0.0;
+	double sum2 = 0.0;
 
-        /* compute sqrt(Sum(|x|^2)) */
+	/* compute sqrt(Sum(|x|^2)) */
 
-        unsigned i,j;
-        for (j = 0; j < n; j++)
-        for (i = 0; i < n; i++)
-        {
-                double a = fabsl((double)v[i+n*j]);
-                sum2 += a*a;
-        }
+	unsigned i,j;
+	for (j = 0; j < n; j++)
+		for (i = 0; i < n; i++)
+		{
+			double a = fabsl((double)v[i+n*j]);
+			sum2 += a*a;
+		}
 
-        return sqrt(sum2);
+	return sqrt(sum2);
 }
 
 void STARPU_PLU(display_data_content)(TYPE *data, unsigned blocksize)
@@ -105,9 +105,9 @@ static void STARPU_PLU(compute_ax_block_upper)(unsigned size, unsigned nblocks,
 	/* Take a copy of the upper part of the diagonal block */
 	TYPE *upper_block_copy = calloc((block_size)*(block_size), sizeof(TYPE));
 	STARPU_PLU(extract_upper)(block_size, block_data, upper_block_copy);
-		
+
 	STARPU_PLU(compute_ax_block)(block_size, upper_block_copy, sub_x, sub_y);
-	
+
 	free(upper_block_copy);
 }
 
@@ -121,7 +121,7 @@ static void STARPU_PLU(compute_ax_block_lower)(unsigned size, unsigned nblocks,
 	STARPU_PLU(extract_lower)(block_size, block_data, lower_block_copy);
 
 	STARPU_PLU(compute_ax_block)(size/nblocks, lower_block_copy, sub_x, sub_y);
-	
+
 	free(lower_block_copy);
 }
 
@@ -242,7 +242,7 @@ TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks)
 		TYPE *block;
 
 		int block_rank = get_block_rank(bi, bj);
-		
+
 		if (block_rank == 0)
 		{
 			block = STARPU_PLU(get_block)(bi, bj);
@@ -335,60 +335,59 @@ void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved
 
 	if (rank == 0)
 	{
-	        TYPE *L = malloc((size_t)size*size*sizeof(TYPE));
-	        TYPE *U = malloc((size_t)size*size*sizeof(TYPE));
-	
-	        memset(L, 0, size*size*sizeof(TYPE));
-	        memset(U, 0, size*size*sizeof(TYPE));
-	
-	        /* only keep the lower part */
+		TYPE *L = malloc((size_t)size*size*sizeof(TYPE));
+		TYPE *U = malloc((size_t)size*size*sizeof(TYPE));
+
+		memset(L, 0, size*size*sizeof(TYPE));
+		memset(U, 0, size*size*sizeof(TYPE));
+
+		/* only keep the lower part */
 		unsigned i, j;
-	        for (j = 0; j < size; j++)
-	        {
-	                for (i = 0; i < j; i++)
-	                {
-	                        L[j+i*size] = all_r[j+i*size];
-	                }
-	
-	                /* diag i = j */
-	                L[j+j*size] = all_r[j+j*size];
-	                U[j+j*size] = 1.0;
-	
-	                for (i = j+1; i < size; i++)
-	                {
-	                        U[j+i*size] = all_r[j+i*size];
-	                }
-	        }
-	
+		for (j = 0; j < size; j++)
+		{
+			for (i = 0; i < j; i++)
+			{
+				L[j+i*size] = all_r[j+i*size];
+			}
+
+			/* diag i = j */
+			L[j+j*size] = all_r[j+j*size];
+			U[j+j*size] = 1.0;
+
+			for (i = j+1; i < size; i++)
+			{
+				U[j+i*size] = all_r[j+i*size];
+			}
+		}
+
 		STARPU_PLU(display_data_content)(L, size);
 		STARPU_PLU(display_data_content)(U, size);
-	
-	        /* now A_err = L, compute L*U */
-	        CPU_TRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size);
-	
+
+		/* now A_err = L, compute L*U */
+		CPU_TRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size);
+
 		if (display)
 			fprintf(stderr, "\nLU\n");
 
 		STARPU_PLU(display_data_content)(L, size);
-	
-	        /* compute "LU - A" in L*/
-	        CPU_AXPY(size*size, -1.0, Asaved, 1, L, 1);
-	
-	        TYPE err = CPU_ASUM(size*size, L, 1);
-	        int max = CPU_IAMAX(size*size, L, 1);
-	
+
+		/* compute "LU - A" in L*/
+		CPU_AXPY(size*size, -1.0, Asaved, 1, L, 1);
+
+		TYPE err = CPU_ASUM(size*size, L, 1);
+		int max = CPU_IAMAX(size*size, L, 1);
+
 		if (display)
 			fprintf(stderr, "DISPLAY ERROR\n");
 
 		STARPU_PLU(display_data_content)(L, size);
-	
-	        fprintf(stderr, "(A - LU) Avg error : %e\n", err/(size*size));
-	        fprintf(stderr, "(A - LU) Max error : %e\n", L[max]);
-	
+
+		fprintf(stderr, "(A - LU) Avg error : %e\n", err/(size*size));
+		fprintf(stderr, "(A - LU) Max error : %e\n", L[max]);
+
 		double residual = frobenius_norm(L, size);
 		double matnorm = frobenius_norm(Asaved, size);
-	
+
 		fprintf(stderr, "||A-LU|| / (||A||*N) : %e\n", residual/(matnorm*size));
 	}
 }
-

+ 1 - 1
mpi/examples/mpi_lu/pxlu.c

@@ -736,7 +736,7 @@ static void create_task_22_real(unsigned k, unsigned i, unsigned j)
 	STARPU_ASSERT(task->handles[1] != STARPU_POISON_PTR);
 	STARPU_ASSERT(task->handles[2] != STARPU_POISON_PTR);
 
-	if (!no_prio &&  (i == k + 1) && (j == k +1) ) {
+	if (!no_prio && (i == k + 1) && (j == k +1) ) {
 		task->priority = STARPU_MAX_PRIO;
 	}
 

+ 12 - 14
mpi/examples/mpi_lu/pxlu_kernels.c

@@ -22,7 +22,7 @@
 ///#define VERBOSE_KERNELS	1
 
 /*
- *   U22 
+ * U22
  */
 
 static inline void STARPU_PLU(common_u22)(void *descr[],
@@ -55,7 +55,7 @@ static inline void STARPU_PLU(common_u22)(void *descr[],
 
 	switch (s) {
 		case 0:
-			CPU_GEMM("N", "N", dy, dx, dz, 
+			CPU_GEMM("N", "N", dy, dx, dz,
 				(TYPE)-1.0, right, ld21, left, ld12,
 				(TYPE)1.0, center, ld22);
 			break;
@@ -129,7 +129,7 @@ static inline void STARPU_PLU(common_u12)(void *descr[],
 	TYPE *sub11;
 	TYPE *sub12;
 
-	sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]);	
+	sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]);
 	sub12 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]);
 
 	unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]);
@@ -227,7 +227,7 @@ struct starpu_codelet STARPU_PLU(cl12) = {
 };
 
 
-/* 
+/*
  * U21
  */
 
@@ -245,7 +245,7 @@ static inline void STARPU_PLU(common_u21)(void *descr[],
 
 	unsigned nx21 = STARPU_MATRIX_GET_NX(descr[1]);
 	unsigned ny21 = STARPU_MATRIX_GET_NY(descr[1]);
-	
+
 #ifdef VERBOSE_KERNELS
 	struct debug_info *info = _args;
 
@@ -311,7 +311,7 @@ static void STARPU_PLU(cublas_u21)(void *descr[], void *_args)
 {
 	STARPU_PLU(common_u21)(descr, 1, _args);
 }
-#endif 
+#endif
 
 static struct starpu_perfmodel STARPU_PLU(model_21) = {
 	.type = STARPU_HISTORY_BASED,
@@ -345,7 +345,7 @@ static inline void STARPU_PLU(common_u11)(void *descr[],
 {
 	TYPE *sub11;
 
-	sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); 
+	sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]);
 
 	unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]);
 	unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]);
@@ -367,9 +367,9 @@ static inline void STARPU_PLU(common_u11)(void *descr[],
 				TYPE pivot;
 				pivot = sub11[z+z*ld];
 				STARPU_ASSERT(pivot != 0.0);
-		
+
 				CPU_SCAL(nx - z - 1, (1.0/pivot), &sub11[z+(z+1)*ld], ld);
-		
+
 				CPU_GER(nx - z - 1, nx - z - 1, -1.0,
 						&sub11[(z+1)+z*ld], 1,
 						&sub11[z+(z+1)*ld], ld,
@@ -385,15 +385,15 @@ static inline void STARPU_PLU(common_u11)(void *descr[],
 				cudaStreamSynchronize(starpu_cuda_get_local_stream());
 
 				STARPU_ASSERT(pivot != 0.0);
-				
+
 				CUBLAS_SCAL(nx - z - 1, 1.0/pivot, &sub11[z+(z+1)*ld], ld);
-				
+
 				CUBLAS_GER(nx - z - 1, nx - z - 1, -1.0,
 						&sub11[(z+1)+z*ld], 1,
 						&sub11[z+(z+1)*ld], ld,
 						&sub11[(z+1) + (z+1)*ld],ld);
 			}
-			
+
 			cudaStreamSynchronize(starpu_cuda_get_local_stream());
 
 			break;
@@ -440,5 +440,3 @@ struct starpu_codelet STARPU_PLU(cl11) = {
 	.modes = {STARPU_RW},
 	.model = &STARPU_PLU(model_11)
 };
-
-

+ 60 - 60
mpi/examples/stencil/stencil5.c

@@ -25,15 +25,15 @@ void stencil5_cpu(void *descr[], __attribute__ ((unused)) void *_args)
 	unsigned *xym1 = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[3]);
 	unsigned *xyp1 = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[4]);
 
-        //        fprintf(stdout, "VALUES: %d %d %d %d %d\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
-        *xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5;
+	//fprintf(stdout, "VALUES: %d %d %d %d %d\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
+	*xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5;
 }
 
 struct starpu_codelet stencil5_cl =
 {
 	.where = STARPU_CPU,
 	.cpu_funcs = {stencil5_cpu, NULL},
-        .nbuffers = 5,
+	.nbuffers = 5,
 	.modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}
 };
 
@@ -75,10 +75,10 @@ static void parse_args(int argc, char **argv)
 
 int main(int argc, char **argv)
 {
-        int my_rank, size, x, y, loop;
-        int value=0, mean=0;
-        unsigned matrix[X][Y];
-        starpu_data_handle_t data_handles[X][Y];
+	int my_rank, size, x, y, loop;
+	int value=0, mean=0;
+	unsigned matrix[X][Y];
+	starpu_data_handle_t data_handles[X][Y];
 
 	int ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
@@ -86,81 +86,81 @@ int main(int argc, char **argv)
 	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
 	MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-        parse_args(argc, argv);
+	parse_args(argc, argv);
 
-        for(x = 0; x < X; x++)
+	for(x = 0; x < X; x++)
 	{
-                for (y = 0; y < Y; y++)
+		for (y = 0; y < Y; y++)
 		{
-                        matrix[x][y] = (my_rank+1)*10 + value;
-                        value++;
-                        mean += matrix[x][y];
-                }
-        }
-        mean /= value;
-
-        for(x = 0; x < X; x++)
+			matrix[x][y] = (my_rank+1)*10 + value;
+			value++;
+			mean += matrix[x][y];
+		}
+	}
+	mean /= value;
+
+	for(x = 0; x < X; x++)
 	{
-                for (y = 0; y < Y; y++)
+		for (y = 0; y < Y; y++)
 		{
-                        int mpi_rank = my_distrib(x, y, size);
-                        if (mpi_rank == my_rank)
+			int mpi_rank = my_distrib(x, y, size);
+			if (mpi_rank == my_rank)
 			{
-                                //fprintf(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y);
-                                starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
-                        }
+				//fprintf(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y);
+				starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
+			}
 			else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
-			      || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
+				 || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
 			{
-                                /* I don't own that index, but will need it for my computations */
-                                //fprintf(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y);
-                                starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(unsigned));
-                        }
-                        else
+				/* I don't own that index, but will need it for my computations */
+				//fprintf(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y);
+				starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(unsigned));
+			}
+			else
 			{
-                                /* I know it's useless to allocate anything for this */
-                                data_handles[x][y] = NULL;
-                        }
-                        if (data_handles[x][y])
+				/* I know it's useless to allocate anything for this */
+				data_handles[x][y] = NULL;
+			}
+			if (data_handles[x][y])
 			{
-                                starpu_data_set_rank(data_handles[x][y], mpi_rank);
-                                starpu_data_set_tag(data_handles[x][y], (y*X)+x);
+				starpu_data_set_rank(data_handles[x][y], mpi_rank);
+				starpu_data_set_tag(data_handles[x][y], (y*X)+x);
 			}
-                }
-        }
+		}
+	}
 
-        for(loop=0 ; loop<niter; loop++)
+	for(loop=0 ; loop<niter; loop++)
 	{
-                for (x = 1; x < X-1; x++)
+		for (x = 1; x < X-1; x++)
 		{
-                        for (y = 1; y < Y-1; y++)
+			for (y = 1; y < Y-1; y++)
 			{
-                                starpu_mpi_insert_task(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y],
-                                                       STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y],
-                                                       STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1],
-                                                       0);
-                        }
-                }
-        }
-        fprintf(stderr, "Waiting ...\n");
-        starpu_task_wait_for_all();
+				starpu_mpi_insert_task(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y],
+						       STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y],
+						       STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1],
+						       0);
+			}
+		}
+	}
+	fprintf(stderr, "Waiting ...\n");
+	starpu_task_wait_for_all();
 
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 
-        if (display)
+	if (display)
 	{
-                fprintf(stdout, "[%d] mean=%d\n", my_rank, mean);
-                for(x = 0; x < X; x++)
+		fprintf(stdout, "[%d] mean=%d\n", my_rank, mean);
+		for(x = 0; x < X; x++)
 		{
-                        fprintf(stdout, "[%d] ", my_rank);
-                        for (y = 0; y < Y; y++)
+			fprintf(stdout, "[%d] ", my_rank);
+			for (y = 0; y < Y; y++)
 			{
-                                fprintf(stdout, "%3u ", matrix[x][y]);
-                        }
-                        fprintf(stdout, "\n");
-                }
-        }
+				fprintf(stdout, "%3u ", matrix[x][y]);
+			}
+			fprintf(stdout, "\n");
+		}
+	}
 
 	return 0;
 }