13 éve · b9476e6d4a
--- a/examples/audio/starpu_audio_processing.c
+++ b/examples/audio/starpu_audio_processing.c
@@ -101,7 +101,8 @@ void read_16bit_wav(FILE *infile, unsigned size, float *arrayout, FILE *save_fil
 
				 	/* we skip the header to only keep the data */
			
 
				 	fseek(infile, headersize, SEEK_SET);
			
 
				 	
			
 
				-	for (v=0;v<size;v++) {
			
 
				+	for (v=0;v<size;v++)
			
 
				+	{
			
 
				 		signed char val = (signed char)fgetc(infile);
			
 
				 		signed char val2 = (signed char)fgetc(infile);
			
 
				 
			
@@ -124,7 +125,8 @@ void write_16bit_wav(FILE *outfile, unsigned size, float *arrayin, FILE *save_fi
 
				 	/* we assume that the header is copied using copy_wav_header */
			
 
				 	fseek(outfile, headersize, SEEK_SET);
			
 
				 	
			
 
				-	for (v=0;v<size;v++) {
			
 
				+	for (v=0;v<size;v++)
			
 
				+	{
			
 
				 		signed char val = ((int)arrayin[v]) % 256; 
			
 
				 		signed char val2  = ((int)arrayin[v]) / 256;
			
 
				 
			
@@ -146,7 +148,8 @@ void write_16bit_wav(FILE *outfile, unsigned size, float *arrayin, FILE *save_fi
 
				  */
			
 
				 
			
 
				 /* we don't reinitialize the CUFFT plan for every kernel, so we "cache" it */
			
 
				-typedef struct {
			
 
				+typedef struct
			
 
				+{
			
 
				 	unsigned is_initialized;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	cufftHandle plan;
			
@@ -268,12 +271,14 @@ static void band_filter_kernel_cpu(void *descr[], __attribute__((unused)) void *
 
				 		localA[i] /= nsamples;
			
 
				 }
			
 
				 
			
 
				-struct starpu_perfmodel band_filter_model = {
			
 
				+struct starpu_perfmodel band_filter_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "FFT_band_filter"
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet band_filter_cl = {
			
 
				+static struct starpu_codelet band_filter_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	.cuda_funcs = {band_filter_kernel_gpu, NULL},
			
@@ -330,7 +335,8 @@ static void init_problem(void)
 
				 	{
			
 
				 		starpu_malloc((void **)&A, length_data*sizeof(float));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		A = malloc(length_data*sizeof(float));
			
 
				 	}
			
 
				 
			
@@ -344,31 +350,38 @@ static void init_problem(void)
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-h") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-h") == 0)
			
 
				+		{
			
 
				 			fprintf(stderr, "Usage: %s [-pin] [-nsamples block_size] [-i input.wav] [-o output.wav | -no-output] [-h]\n", argv[0]);
			
 
				 			exit(-1);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-i") == 0) {
			
 
				+		if (strcmp(argv[i], "-i") == 0)
			
 
				+		{
			
 
				 			inputfilename = argv[++i];;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-o") == 0) {
			
 
				+		if (strcmp(argv[i], "-o") == 0)
			
 
				+		{
			
 
				 			outputfilename = argv[++i];;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-no-output") == 0) {
			
 
				+		if (strcmp(argv[i], "-no-output") == 0)
			
 
				+		{
			
 
				 			outputfilename = NULL;;
			
 
				 		}
			
 
				 
			
 
				 		/* block size */
			
 
				-		if (strcmp(argv[i], "-nsamples") == 0) {
			
 
				+		if (strcmp(argv[i], "-nsamples") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			nsamples = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-pin") == 0) {
			
 
				+		if (strcmp(argv[i], "-pin") == 0)
			
 
				+		{
			
 
				 			use_pin = 1;
			
 
				 		}
			
 
				 	}
			
@@ -393,7 +406,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	starpu_vector_data_register(&A_handle, 0, (uintptr_t)A, niter*nsamples, sizeof(float));
			
 
				 
			
 
				-	struct starpu_data_filter f = 
			
 
				+	struct starpu_data_filter f =
			
 
				 	{
			
 
				 		.filter_func = starpu_block_filter_func_vector,
			
 
				 		.nchildren = niter
			
--- a/examples/axpy/axpy.c
+++ b/examples/axpy/axpy.c
@@ -70,7 +70,8 @@ void axpy_gpu(void *descr[], __attribute__((unused)) void *arg)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static struct starpu_codelet axpy_cl = {
			
 
				+static struct starpu_codelet axpy_cl =
			
 
				+{
			
 
				         .where =
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				                 STARPU_CUDA|
			
@@ -116,7 +117,8 @@ int main(int argc, char **argv)
 
				 	starpu_vector_data_register(&handle_y, 0, (uintptr_t)vec_y, N, sizeof(TYPE));
			
 
				 
			
 
				 	/* Divide the vector into blocks */
			
 
				-	struct starpu_data_filter block_filter = {
			
 
				+	struct starpu_data_filter block_filter =
			
 
				+	{
			
 
				 		.filter_func = starpu_block_filter_func_vector,
			
 
				 		.nchildren = NBLOCKS
			
 
				 	};
			
--- a/examples/basic_examples/block.c
+++ b/examples/basic_examples/block.c
@@ -57,7 +57,8 @@ int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny,
 
				 	task->cl_arg_size = sizeof(multiplier);
			
 
				 
			
 
				         int ret = starpu_task_submit(task);
			
 
				-        if (STARPU_UNLIKELY(ret == -ENODEV)) {
			
 
				+        if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+	{
			
 
				                 FPRINTF(stderr, "No worker may execute this task\n");
			
 
				                 return 1;
			
 
				 	}
			
@@ -67,8 +68,9 @@ int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny,
 
				 	/* update the array in RAM */
			
 
				 	starpu_data_unregister(block_handle);
			
 
				 
			
 
				-        for(i=0 ; i<pnx*pny*pnz; i++) {
			
 
				-          FPRINTF(stderr, "%f ", block[i]);
			
 
				+        for(i=0 ; i<pnx*pny*pnz; i++)
			
 
				+	{
			
 
				+		FPRINTF(stderr, "%f ", block[i]);
			
 
				         }
			
 
				         FPRINTF(stderr, "\n");
			
 
				 
			
@@ -88,9 +90,12 @@ int main(int argc, char **argv)
 
				 
			
 
				         block = (float*)malloc(nx*ny*nz*sizeof(float));
			
 
				         assert(block);
			
 
				-        for(k=0 ; k<nz ; k++) {
			
 
				-                for(j=0 ; j<ny ; j++) {
			
 
				-                        for(i=0 ; i<nx ; i++) {
			
 
				+        for(k=0 ; k<nz ; k++)
			
 
				+	{
			
 
				+                for(j=0 ; j<ny ; j++)
			
 
				+		{
			
 
				+                        for(i=0 ; i<nx ; i++)
			
 
				+			{
			
 
				                                 block[(k*nx*ny)+(j*nx)+i] = n++;
			
 
				                         }
			
 
				                 }
			
@@ -110,11 +115,13 @@ int main(int argc, char **argv)
 
				 
			
 
				         /* Check result is correct */
			
 
				         ret=1;
			
 
				-        for(i=0 ; i<nx*ny*nz ; i++) {
			
 
				-          if (block[i] != (i+1) * multiplier) {
			
 
				-            ret=0;
			
 
				-            break;
			
 
				-          }
			
 
				+        for(i=0 ; i<nx*ny*nz ; i++)
			
 
				+	{
			
 
				+		if (block[i] != (i+1) * multiplier)
			
 
				+		{
			
 
				+			ret=0;
			
 
				+			break;
			
 
				+		}
			
 
				         }
			
 
				 
			
 
				         FPRINTF(stderr,"TEST %s\n", ret==1?"PASSED":"FAILED");
			
--- a/examples/basic_examples/block_cpu.c
+++ b/examples/basic_examples/block_cpu.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -28,8 +28,10 @@ void cpu_codelet(void *descr[], void *_args)
 
				         float *multiplier = (float *)_args;
			
 
				         unsigned i, j, k;
			
 
				 
			
 
				-        for(k=0; k<nz ; k++) {
			
 
				-                for(j=0; j<ny ; j++) {
			
 
				+        for(k=0; k<nz ; k++)
			
 
				+	{
			
 
				+                for(j=0; j<ny ; j++)
			
 
				+		{
			
 
				                         for(i=0; i<nx ; i++)
			
 
				                                 block[(k*ldz)+(j*ldy)+i] *= *multiplier;
			
 
				                 }
			
--- a/examples/basic_examples/block_cuda.cu
+++ b/examples/basic_examples/block_cuda.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -20,8 +20,10 @@
 
				 static __global__ void cuda_block(float *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier)
			
 
				 {
			
 
				         int i, j, k;
			
 
				-        for(k=0; k<nz ; k++) {
			
 
				-                for(j=0; j<ny ; j++) {
			
 
				+        for(k=0; k<nz ; k++)
			
 
				+	{
			
 
				+                for(j=0; j<ny ; j++)
			
 
				+		{
			
 
				                         for(i=0; i<nx ; i++)
			
 
				                                 block[(k*ldz)+(j*ldy)+i] *= multiplier;
			
 
				                 }
			
--- a/examples/basic_examples/block_opencl.c
+++ b/examples/basic_examples/block_opencl.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010, 2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,7 +19,8 @@
 
				 #include <starpu_opencl.h>
			
 
				 
			
 
				 #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr)       \
			
 
				-do {                                                        \
			
 
				+do						    	    \
			
 
				+{							    \
			
 
				 	int err;                                            \
			
 
				 	err = clSetKernelArg(kernel, n, size, ptr);         \
			
 
				 	if (err != CL_SUCCESS)                              \
			
--- a/examples/basic_examples/block_opencl_kernel.cl
+++ b/examples/basic_examples/block_opencl_kernel.cl
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -17,8 +17,10 @@
 
				 __kernel void block(__global float *b, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier)
			
 
				 {
			
 
				         int i, j, k;
			
 
				-        for(k=0; k<nz ; k++) {
			
 
				-                for(j=0; j<ny ; j++) {
			
 
				+        for(k=0; k<nz ; k++)
			
 
				+	{
			
 
				+                for(j=0; j<ny ; j++)
			
 
				+		{
			
 
				                         for(i=0; i<nx ; i++)
			
 
				                                 b[(k*ldz)+(j*ldy)+i] *= multiplier;
			
 
				                 }
			
--- a/examples/basic_examples/hello_world.c
+++ b/examples/basic_examples/hello_world.c
@@ -46,10 +46,12 @@ void callback_func(void *callback_arg)
 
				  * DSM; the second arguments references read-only data that is passed as an
			
 
				  * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
			
 
				  * are no data input/output managed by the DSM (cl.nbuffers = 0) */
			
 
				-struct params {
			
 
				+struct params
			
 
				+{
			
 
				 	int i;
			
 
				 	float f;
			
 
				 };
			
 
				+
			
 
				 void cpu_func(void *buffers[], void *cl_arg)
			
 
				 {
			
 
				 	struct params *params = (struct params *) cl_arg;
			
--- a/examples/basic_examples/mult.c
+++ b/examples/basic_examples/mult.c
@@ -133,20 +133,26 @@ static void init_problem_data(void)
 
				 
			
 
				 	/* fill the A and B matrices */
			
 
				 	srand(2009);
			
 
				-	for (j=0; j < ydim; j++) {
			
 
				-		for (i=0; i < zdim; i++) {
			
 
				+	for (j=0; j < ydim; j++)
			
 
				+	{
			
 
				+		for (i=0; i < zdim; i++)
			
 
				+		{
			
 
				 			A[j+i*ydim] = (float)(starpu_drand48());
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	for (j=0; j < zdim; j++) {
			
 
				-		for (i=0; i < xdim; i++) {
			
 
				+	for (j=0; j < zdim; j++)
			
 
				+	{
			
 
				+		for (i=0; i < xdim; i++)
			
 
				+		{
			
 
				 			B[j+i*zdim] = (float)(starpu_drand48());
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	for (j=0; j < ydim; j++) {
			
 
				-		for (i=0; i < xdim; i++) {
			
 
				+	for (j=0; j < ydim; j++)
			
 
				+	{
			
 
				+		for (i=0; i < xdim; i++)
			
 
				+		{
			
 
				 			C[j+i*ydim] = (float)(0);
			
 
				 		}
			
 
				 	}
			
@@ -186,16 +192,18 @@ static void partition_mult_data(void)
 
				 	/* StarPU supplies some basic filters such as the partition of a matrix
			
 
				 	 * into blocks, note that we are using a FORTRAN ordering so that the
			
 
				 	 * name of the filters are a bit misleading */
			
 
				-	struct starpu_data_filter vert = {
			
 
				+	struct starpu_data_filter vert =
			
 
				+	{
			
 
				 		.filter_func = starpu_vertical_block_filter_func,
			
 
				 		.nchildren = nslicesx
			
 
				 	};
			
 
				-		
			
 
				-	struct starpu_data_filter horiz = {
			
 
				+
			
 
				+	struct starpu_data_filter horiz =
			
 
				+	{
			
 
				 		.filter_func = starpu_block_filter_func,
			
 
				 		.nchildren = nslicesy
			
 
				 	};
			
 
				-		
			
 
				+
			
 
				 /*
			
 
				  *	Illustration with nslicex = 4 and nslicey = 2, it is possible to access
			
 
				  *	sub-data by using the "starpu_data_get_sub_data" method, which takes a data handle,
			
@@ -246,12 +254,14 @@ static void partition_mult_data(void)
 
				 	starpu_data_map_filters(C_handle, 2, &vert, &horiz);
			
 
				 }
			
 
				 
			
 
				-static struct starpu_perfmodel mult_perf_model = {
			
 
				+static struct starpu_perfmodel mult_perf_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "mult_perf_model"
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet cl = {
			
 
				+static struct starpu_codelet cl =
			
 
				+{
			
 
				         /* we can only execute that kernel on a CPU yet */
			
 
				         .where = STARPU_CPU,
			
 
				         /* CPU implementation of the codelet */
			
--- a/examples/basic_examples/multiformat.c
+++ b/examples/basic_examples/multiformat.c
@@ -32,7 +32,8 @@ multiformat_scal_cpu_func(void *buffers[], void *args)
 
				 	aos = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				 	n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				 
			
 
				-	for (i = 0; i < n; i++) {
			
 
				+	for (i = 0; i < n; i++)
			
 
				+	{
			
 
				 		aos[i].x *= aos[i].y;
			
 
				 	}
			
 
				 }
			
@@ -47,7 +48,8 @@ extern struct starpu_codelet cpu_to_opencl_cl;
 
				 extern struct starpu_codelet opencl_to_cpu_cl;
			
 
				 #endif
			
 
				 
			
 
				-static struct starpu_multiformat_data_interface_ops format_ops = {
			
 
				+static struct starpu_multiformat_data_interface_ops format_ops =
			
 
				+{
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	.cuda_elemsize = 2* sizeof(float),
			
 
				 	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
			
@@ -69,7 +71,8 @@ extern void multiformat_scal_cuda_func(void *buffers[], void *arg);
 
				 extern void multiformat_scal_opencl_func(void *buffers[], void *arg);
			
 
				 #endif
			
 
				 
			
 
				-static struct starpu_codelet  cl = {
			
 
				+static struct starpu_codelet  cl =
			
 
				+{
			
 
				 	.where = STARPU_CUDA | STARPU_OPENCL,
			
 
				 	.cpu_funcs = {multiformat_scal_cpu_func, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -89,7 +92,8 @@ static void
 
				 init_problem_data(void)
			
 
				 {
			
 
				 	int i; 
			
 
				-	for (i = 0; i < N_ELEMENTS; i++) {
			
 
				+	for (i = 0; i < N_ELEMENTS; i++)
			
 
				+	{
			
 
				 		array_of_structs[i].x = 1.0 + i;
			
 
				 		array_of_structs[i].y = 42.0;
			
 
				 	}
			
@@ -154,7 +158,8 @@ static void
 
				 print_it(void)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 0; i < N_ELEMENTS; i++) {
			
 
				+	for (i = 0; i < N_ELEMENTS; i++)
			
 
				+	{
			
 
				 		FPRINTF(stderr, "(%.2f %.2f) ",
			
 
				 			array_of_structs[i].x,
			
 
				 			array_of_structs[i].y);
			
@@ -166,7 +171,8 @@ static int
 
				 check_it(void)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 0; i < N_ELEMENTS; i++) {
			
 
				+	for (i = 0; i < N_ELEMENTS; i++)
			
 
				+	{
			
 
				 		float expected_value = i + 1.0;
			
 
				 #if STARPU_USE_CUDA
			
 
				 		expected_value *= array_of_structs[i].y;
			
--- a/examples/basic_examples/multiformat_conversion_codelets.c
+++ b/examples/basic_examples/multiformat_conversion_codelets.c
@@ -24,21 +24,24 @@ void cuda_to_cpu(void *buffers[], void *arg)
 
				 	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				 	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				 	int i;
			
 
				-	for (i = 0; i < n; i++) {
			
 
				+	for (i = 0; i < n; i++)
			
 
				+	{
			
 
				 		dst[i].x = src->x[i];
			
 
				 		dst[i].y = src->y[i];
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 extern void cpu_to_cuda_cuda_func(void *buffers[], void *args);
			
 
				-struct starpu_codelet cpu_to_cuda_cl = {
			
 
				+struct starpu_codelet cpu_to_cuda_cl =
			
 
				+{
			
 
				 	.where = STARPU_CUDA,
			
 
				 	.cuda_funcs = {cpu_to_cuda_cuda_func, NULL},
			
 
				 	.nbuffers = 1,
			
 
				 	.name = "codelet_cpu_to_cuda"
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet cuda_to_cpu_cl = {
			
 
				+struct starpu_codelet cuda_to_cpu_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU,
			
 
				 	.cpu_funcs = {cuda_to_cpu, NULL},
			
 
				 	.nbuffers = 1,
			
@@ -54,20 +57,23 @@ void opencl_to_cpu(void *buffers[], void *arg)
 
				 	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				 	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				 	int i;
			
 
				-	for (i = 0; i < n; i++) {
			
 
				+	for (i = 0; i < n; i++)
			
 
				+	{
			
 
				 		dst[i].x = src->x[i];
			
 
				 		dst[i].y = src->y[i];
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 extern void cpu_to_opencl_opencl_func(void *buffers[], void *args);
			
 
				-struct starpu_codelet cpu_to_opencl_cl = {
			
 
				+struct starpu_codelet cpu_to_opencl_cl =
			
 
				+{
			
 
				 	.where = STARPU_OPENCL,
			
 
				 	.opencl_funcs = {cpu_to_opencl_opencl_func, NULL},
			
 
				 	.nbuffers = 1
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet opencl_to_cpu_cl = {
			
 
				+struct starpu_codelet opencl_to_cpu_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU,
			
 
				 	.cpu_funcs = {opencl_to_cpu, NULL},
			
 
				 	.nbuffers = 1
			
--- a/examples/basic_examples/multiformat_conversion_codelets_cuda.cu
+++ b/examples/basic_examples/multiformat_conversion_codelets_cuda.cu
@@ -23,7 +23,8 @@ static __global__ void cpu_to_cuda_cuda(struct point *src,
 
				 {
			
 
				         unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
			
 
				 
			
 
				-	if (i < n) {
			
 
				+	if (i < n)
			
 
				+	{
			
 
				 		dst->x[i] = src[i].x;
			
 
				 		dst->y[i] = src[i].y;
			
 
				 	}
			
--- a/examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl
+++ b/examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl
@@ -21,7 +21,8 @@ __kernel void cpu_to_opencl_opencl(__global struct point *src,
 
				 				   unsigned int n)
			
 
				 {
			
 
				 	const unsigned int i = get_global_id(0);
			
 
				-	if (i < n) {
			
 
				+	if (i < n)
			
 
				+	{
			
 
				 		dst->x[i] = src[i].x;
			
 
				 		dst->y[i] = src[i].y;
			
 
				 	}
			
--- a/examples/basic_examples/multiformat_types.h
+++ b/examples/basic_examples/multiformat_types.h
@@ -18,11 +18,13 @@
 
				 
			
 
				 #define N_ELEMENTS 10
			
 
				 
			
 
				-struct struct_of_arrays{
			
 
				+struct struct_of_arrays
			
 
				+{
			
 
				 	float x[N_ELEMENTS];
			
 
				 	float y[N_ELEMENTS];
			
 
				 };
			
 
				-struct point {
			
 
				+struct point
			
 
				+{
			
 
				 	float x, y;
			
 
				 };
			
 
				 
			
--- a/examples/basic_examples/vector_scal.c
+++ b/examples/basic_examples/vector_scal.c
@@ -37,17 +37,20 @@ extern void scal_sse_func_icc(void *buffers[], void *_args);
 
				 extern void scal_cuda_func(void *buffers[], void *_args);
			
 
				 extern void scal_opencl_func(void *buffers[], void *_args);
			
 
				 
			
 
				-static struct starpu_perfmodel vector_scal_model = {
			
 
				+static struct starpu_perfmodel vector_scal_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "vector_scale"
			
 
				 };
			
 
				 
			
 
				-static struct starpu_perfmodel vector_scal_power_model = {
			
 
				+static struct starpu_perfmodel vector_scal_power_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "vector_scale_power"
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet cl = {
			
 
				+static struct starpu_codelet cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
			
 
				 	/* CPU implementation of the codelet */
			
 
				 	.cpu_funcs = {
			
--- a/examples/basic_examples/vector_scal_c.c
+++ b/examples/basic_examples/vector_scal_c.c
@@ -33,12 +33,14 @@
 
				 extern void scal_cpu_func(void *buffers[], void *_args);
			
 
				 extern void scal_cuda_func(void *buffers[], void *_args);
			
 
				 
			
 
				-static struct starpu_perfmodel vector_scal_model = {
			
 
				+static struct starpu_perfmodel vector_scal_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "vector_scale_model"
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet cl = {
			
 
				+static struct starpu_codelet cl =
			
 
				+{
			
 
				   .where = STARPU_CPU | STARPU_CUDA,
			
 
				   /* CPU implementation of the codelet */
			
 
				   .cpu_funcs = {scal_cpu_func, NULL},
			
--- a/examples/basic_examples/vector_scal_opencl_kernel.cl
+++ b/examples/basic_examples/vector_scal_opencl_kernel.cl
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -17,7 +17,8 @@
 
				 __kernel void vector_mult_opencl(__global float* val, int nx, float factor)
			
 
				 {
			
 
				         const int i = get_global_id(0);
			
 
				-        if (i < nx) {
			
 
				+        if (i < nx)
			
 
				+	{
			
 
				                 val[i] *= factor;
			
 
				         }
			
 
				 }
			
--- a/examples/cg/cg.c
+++ b/examples/cg/cg.c
@@ -299,7 +299,8 @@ static void cg(void)
 
				 			/* r <- r - A x */
			
 
				 			gemv_kernel(r_handle, A_handle, x_handle, 1.0, -1.0, nblocks, use_reduction); 
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* r <- r - alpha q */
			
 
				 			axpy_kernel(r_handle, q_handle, -alpha, nblocks);
			
 
				 		}
			
@@ -342,28 +343,34 @@ static int check(void)
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-	        if (strcmp(argv[i], "-n") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+	        if (strcmp(argv[i], "-n") == 0)
			
 
				+		{
			
 
				 			n = (int long long)atoi(argv[++i]);
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-	        if (strcmp(argv[i], "-maxiter") == 0) {
			
 
				+	        if (strcmp(argv[i], "-maxiter") == 0)
			
 
				+		{
			
 
				 			i_max = atoi(argv[++i]);
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-	        if (strcmp(argv[i], "-nblocks") == 0) {
			
 
				+	        if (strcmp(argv[i], "-nblocks") == 0)
			
 
				+		{
			
 
				 			nblocks = atoi(argv[++i]);
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-	        if (strcmp(argv[i], "-no-reduction") == 0) {
			
 
				+	        if (strcmp(argv[i], "-no-reduction") == 0)
			
 
				+		{
			
 
				 			use_reduction = 0;
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-	        if (strcmp(argv[i], "-h") == 0) {
			
 
				+	        if (strcmp(argv[i], "-h") == 0)
			
 
				+		{
			
 
				 			FPRINTF(stderr, "usage: %s [-h] [-nblocks #blocks] [-n problem_size] [-no-reduction] [-maxiter i]\n", argv[0]);
			
 
				 			exit(-1);
			
 
				 			continue;
			
--- a/examples/cg/cg_kernels.c
+++ b/examples/cg/cg_kernels.c
@@ -68,12 +68,14 @@ static void accumulate_variable_cpu(void *descr[], void *cl_arg)
 
				 	*v_dst = *v_dst + *v_src;
			
 
				 }
			
 
				 
			
 
				-static struct starpu_perfmodel accumulate_variable_model = {
			
 
				+static struct starpu_perfmodel accumulate_variable_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "accumulate_variable"
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet accumulate_variable_cl = {
			
 
				+struct starpu_codelet accumulate_variable_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {accumulate_variable_cpu, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -104,12 +106,14 @@ static void accumulate_vector_cpu(void *descr[], void *cl_arg)
 
				 	AXPY(n, (TYPE)1.0, v_src, 1, v_dst, 1);
			
 
				 }
			
 
				 
			
 
				-static struct starpu_perfmodel accumulate_vector_model = {
			
 
				+static struct starpu_perfmodel accumulate_vector_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "accumulate_vector"
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet accumulate_vector_cl = {
			
 
				+struct starpu_codelet accumulate_vector_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {accumulate_vector_cpu, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -142,12 +146,14 @@ static void bzero_variable_cpu(void *descr[], void *cl_arg)
 
				 	*v = (TYPE)0.0;
			
 
				 }
			
 
				 
			
 
				-static struct starpu_perfmodel bzero_variable_model = {
			
 
				+static struct starpu_perfmodel bzero_variable_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "bzero_variable"
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet bzero_variable_cl = {
			
 
				+struct starpu_codelet bzero_variable_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {bzero_variable_cpu, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -177,12 +183,14 @@ static void bzero_vector_cpu(void *descr[], void *cl_arg)
 
				 	memset(v, 0, n*sizeof(TYPE));
			
 
				 }
			
 
				 
			
 
				-static struct starpu_perfmodel bzero_vector_model = {
			
 
				+static struct starpu_perfmodel bzero_vector_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "bzero_vector"
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet bzero_vector_cl = {
			
 
				+struct starpu_codelet bzero_vector_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {bzero_vector_cpu, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -230,12 +238,14 @@ static void dot_kernel_cpu(void *descr[], void *cl_arg)
 
				 	*dot = *dot + local_dot;
			
 
				 }
			
 
				 
			
 
				-static struct starpu_perfmodel dot_kernel_model = {
			
 
				+static struct starpu_perfmodel dot_kernel_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "dot_kernel"
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet dot_kernel_cl = {
			
 
				+static struct starpu_codelet dot_kernel_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {dot_kernel_cpu, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -297,12 +307,14 @@ static void scal_kernel_cpu(void *descr[], void *cl_arg)
 
				 	SCAL(n, alpha, v1, 1);
			
 
				 }
			
 
				 
			
 
				-static struct starpu_perfmodel scal_kernel_model = {
			
 
				+static struct starpu_perfmodel scal_kernel_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "scal_kernel"
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet scal_kernel_cl = {
			
 
				+static struct starpu_codelet scal_kernel_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {scal_kernel_cpu, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -368,12 +380,14 @@ static void gemv_kernel_cpu(void *descr[], void *cl_arg)
 
				 	GEMV("N", nx, ny, alpha, M, ld, v2, 1, beta, v1, 1);
			
 
				 }
			
 
				 
			
 
				-static struct starpu_perfmodel gemv_kernel_model = {
			
 
				+static struct starpu_perfmodel gemv_kernel_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "gemv_kernel"
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet gemv_kernel_cl = {
			
 
				+static struct starpu_codelet gemv_kernel_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.type = STARPU_SPMD,
			
 
				 	.max_parallelism = INT_MAX,
			
@@ -460,12 +474,14 @@ static void scal_axpy_kernel_cpu(void *descr[], void *cl_arg)
 
				 	AXPY(nx, p2, v2, 1, v1, 1);
			
 
				 }
			
 
				 
			
 
				-static struct starpu_perfmodel scal_axpy_kernel_model = {
			
 
				+static struct starpu_perfmodel scal_axpy_kernel_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "scal_axpy_kernel"
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet scal_axpy_kernel_cl = {
			
 
				+static struct starpu_codelet scal_axpy_kernel_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {scal_axpy_kernel_cpu, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -528,12 +544,14 @@ static void axpy_kernel_cpu(void *descr[], void *cl_arg)
 
				 	AXPY(nx, p1, v2, 1, v1, 1);
			
 
				 }
			
 
				 
			
 
				-static struct starpu_perfmodel axpy_kernel_model = {
			
 
				+static struct starpu_perfmodel axpy_kernel_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "axpy_kernel"
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet axpy_kernel_cl = {
			
 
				+static struct starpu_codelet axpy_kernel_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {axpy_kernel_cpu, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -588,12 +606,14 @@ static void copy_handle_cuda(void *descr[], void *cl_arg)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static struct starpu_perfmodel copy_handle_model = {
			
 
				+static struct starpu_perfmodel copy_handle_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "copy_handle"
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet copy_handle_cl = {
			
 
				+static struct starpu_codelet copy_handle_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {copy_handle_cpu, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
--- a/examples/cholesky/cholesky.h
+++ b/examples/cholesky/cholesky.h
@@ -81,39 +81,48 @@ extern struct starpu_perfmodel chol_model_22;
 
				 static void __attribute__((unused)) parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-size") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-size") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			size = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-nblocks") == 0) {
			
 
				+		if (strcmp(argv[i], "-nblocks") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			nblocks = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-nbigblocks") == 0) {
			
 
				+		if (strcmp(argv[i], "-nbigblocks") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			nbigblocks = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-pin") == 0) {
			
 
				+		if (strcmp(argv[i], "-pin") == 0)
			
 
				+		{
			
 
				 			pinned = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-no-prio") == 0) {
			
 
				+		if (strcmp(argv[i], "-no-prio") == 0)
			
 
				+		{
			
 
				 			noprio = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-bound") == 0) {
			
 
				+		if (strcmp(argv[i], "-bound") == 0)
			
 
				+		{
			
 
				 			bound = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-check") == 0) {
			
 
				+		if (strcmp(argv[i], "-check") == 0)
			
 
				+		{
			
 
				 			check = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-h") == 0) {
			
 
				+		if (strcmp(argv[i], "-h") == 0)
			
 
				+		{
			
 
				 			printf("usage : %s [-pin] [-size size] [-nblocks nblocks] [-check]\n", argv[0]);
			
 
				 		}
			
 
				 	}
			
--- a/examples/cholesky/cholesky_grain_tag.c
+++ b/examples/cholesky/cholesky_grain_tag.c
@@ -63,7 +63,8 @@ static struct starpu_task * create_task_11(starpu_data_handle_t dataA, unsigned
 
				 	task->priority = STARPU_MAX_PRIO;
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG11_AUX(k, reclevel), 1, TAG22_AUX(k-1, k, k, reclevel));
			
 
				 	}
			
 
				 
			
@@ -93,15 +94,18 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j, u
 
				 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j);
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				-	if (j == k+1) {
			
 
				+	if (j == k+1)
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG21_AUX(k, j, reclevel), 2, TAG11_AUX(k, reclevel), TAG22_AUX(k-1, k, j, reclevel));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG21_AUX(k, j, reclevel), 1, TAG11_AUX(k, reclevel));
			
 
				 	}
			
 
				 
			
@@ -135,15 +139,18 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 
				 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j);
			
 
				 	task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				-	if ( (i == k + 1) && (j == k +1) ) {
			
 
				+	if ( (i == k + 1) && (j == k +1) )
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22_AUX(k, i, j, reclevel), 3, TAG22_AUX(k-1, i, j, reclevel), TAG21_AUX(k, i, reclevel), TAG21_AUX(k, j, reclevel));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22_AUX(k, i, j, reclevel), 2, TAG21_AUX(k, i, reclevel), TAG21_AUX(k, j, reclevel));
			
 
				 	}
			
 
				 
			
@@ -173,12 +180,14 @@ static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 
				 
			
 
				 	starpu_data_set_sequential_consistency_flag(dataA, 0);
			
 
				 
			
 
				-	struct starpu_data_filter f = {
			
 
				+	struct starpu_data_filter f =
			
 
				+	{
			
 
				 		.filter_func = starpu_vertical_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
 
				 
			
 
				-	struct starpu_data_filter f2 = {
			
 
				+	struct starpu_data_filter f2 =
			
 
				+	{
			
 
				 		.filter_func = starpu_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
@@ -189,10 +198,12 @@ static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 
				 	{
			
 
				 		struct starpu_task *task = create_task_11(dataA, k, reclevel);
			
 
				 		/* we defer the launch of the first task */
			
 
				-		if (k == 0) {
			
 
				+		if (k == 0)
			
 
				+		{
			
 
				 			entry_task = task;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			starpu_task_submit(task);
			
 
				 		}
			
 
				 
			
@@ -223,7 +234,8 @@ static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 
				 		starpu_data_unpartition(dataA, 0);
			
 
				 		return;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		STARPU_ASSERT(reclevel == 0);
			
 
				 		unsigned ndeps_tags = (nblocks - nbigblocks)*(nblocks - nbigblocks);
			
 
				 
			
@@ -261,7 +273,8 @@ static void initialize_system(float **A, unsigned dim, unsigned pinned)
 
				 	{
			
 
				 		starpu_malloc((void **)A, dim*dim*sizeof(float));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		*A = malloc(dim*dim*sizeof(float));
			
 
				 	}
			
 
				 }
			
@@ -321,10 +334,12 @@ int main(int argc, char **argv)
 
				 	{
			
 
				 		for (i = 0; i < size; i++)
			
 
				 		{
			
 
				-			if (i <= j) {
			
 
				+			if (i <= j)
			
 
				+			{
			
 
				 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				FPRINTF(stdout, ".\t");
			
 
				 			}
			
 
				 		}
			
@@ -342,10 +357,12 @@ int main(int argc, char **argv)
 
				 	{
			
 
				 		for (i = 0; i < size; i++)
			
 
				 		{
			
 
				-			if (i <= j) {
			
 
				+			if (i <= j)
			
 
				+			{
			
 
				 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				FPRINTF(stdout, ".\t");
			
 
				 				mat[j+i*size] = 0.0f; /* debug */
			
 
				 			}
			
@@ -365,10 +382,12 @@ int main(int argc, char **argv)
 
				 	{
			
 
				 		for (i = 0; i < size; i++)
			
 
				 		{
			
 
				-			if (i <= j) {
			
 
				+			if (i <= j)
			
 
				+			{
			
 
				                                 FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]);
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				FPRINTF(stdout, ".\t");
			
 
				 			}
			
 
				 		}
			
--- a/examples/cholesky/cholesky_implicit.c
+++ b/examples/cholesky/cholesky_implicit.c
@@ -137,7 +137,8 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
				 
			
 
				 	double flop = (1.0f*n*n*n)/3.0f;
			
 
				 	FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
			
 
				-	if (bound) {
			
 
				+	if (bound)
			
 
				+	{
			
 
				 		double res;
			
 
				 		starpu_bound_compute(&res, NULL, 0);
			
 
				 		FPRINTF(stderr, "Theoretical GFlops: %2.2f\n", (flop/res/1000000.0f));
			
@@ -152,12 +153,14 @@ static void cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				 
			
 
				-	struct starpu_data_filter f = {
			
 
				+	struct starpu_data_filter f =
			
 
				+	{
			
 
				 		.filter_func = starpu_vertical_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
 
				 
			
 
				-	struct starpu_data_filter f2 = {
			
 
				+	struct starpu_data_filter f2 =
			
 
				+	{
			
 
				 		.filter_func = starpu_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
@@ -203,10 +206,12 @@ int main(int argc, char **argv)
 
				 	{
			
 
				 		for (i = 0; i < size; i++)
			
 
				 		{
			
 
				-			if (i <= j) {
			
 
				+			if (i <= j)
			
 
				+			{
			
 
				 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				FPRINTF(stdout, ".\t");
			
 
				 			}
			
 
				 		}
			
@@ -222,10 +227,12 @@ int main(int argc, char **argv)
 
				 	{
			
 
				 		for (i = 0; i < size; i++)
			
 
				 		{
			
 
				-			if (i <= j) {
			
 
				+			if (i <= j)
			
 
				+			{
			
 
				 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				FPRINTF(stdout, ".\t");
			
 
				 				mat[j+i*size] = 0.0f; /* debug */
			
 
				 			}
			
@@ -241,7 +248,8 @@ int main(int argc, char **argv)
 
				 		{
			
 
				 			for (i = 0; i < size; i++)
			
 
				 			{
			
 
				-				if (i > j) {
			
 
				+				if (i > j)
			
 
				+				{
			
 
				 					mat[j+i*size] = 0.0f; /* debug */
			
 
				 				}
			
 
				 			}
			
@@ -258,10 +266,12 @@ int main(int argc, char **argv)
 
				 		{
			
 
				 			for (i = 0; i < size; i++)
			
 
				 			{
			
 
				-				if (i <= j) {
			
 
				+				if (i <= j)
			
 
				+				{
			
 
				 					FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]);
			
 
				 				}
			
 
				-				else {
			
 
				+				else
			
 
				+				{
			
 
				 					FPRINTF(stdout, ".\t");
			
 
				 				}
			
 
				 			}
			
@@ -273,10 +283,12 @@ int main(int argc, char **argv)
 
				 		{
			
 
				 			for (i = 0; i < size; i++)
			
 
				 			{
			
 
				-				if (i <= j) {
			
 
				+				if (i <= j)
			
 
				+				{
			
 
				 	                                float orig = (1.0f/(1.0f+i+j)) + ((i == j)?1.0f*size:0.0f);
			
 
				 	                                float err = abs(test_mat[j +i*size] - orig);
			
 
				-	                                if (err > 0.00001) {
			
 
				+	                                if (err > 0.00001)
			
 
				+					{
			
 
				 	                                        FPRINTF(stderr, "Error[%u, %u] --> %2.2f != %2.2f (err %2.2f)\n", i, j, test_mat[j +i*size], orig, err);
			
 
				 	                                        assert(0);
			
 
				 	                                }
			
--- a/examples/cholesky/cholesky_kernels.c
+++ b/examples/cholesky/cholesky_kernels.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -55,7 +55,8 @@ static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, __at
 
				 			SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, 
			
 
				 				right, ld12, 1.0f, center, ld22);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* Parallel CPU kernel */
			
 
				 			int rank = starpu_combined_worker_get_rank();
			
 
				 
			
@@ -113,7 +114,8 @@ static inline void chol_common_codelet_update_u21(void *descr[], int s, __attrib
 
				 	unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]);
			
 
				 	unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]);
			
 
				 
			
 
				-	switch (s) {
			
 
				+	switch (s)
			
 
				+	{
			
 
				 		case 0:
			
 
				 			STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
			
 
				 			break;
			
@@ -157,7 +159,8 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, __attrib
 
				 
			
 
				 	unsigned z;
			
 
				 
			
 
				-	switch (s) {
			
 
				+	switch (s)
			
 
				+	{
			
 
				 		case 0:
			
 
				 
			
 
				 			/*
			
@@ -188,7 +191,8 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, __attrib
 
				 			int ret;
			
 
				 			int info;
			
 
				 			ret = magma_spotrf_gpu('L', nx, sub11, ld, &info);
			
 
				-			if (ret != MAGMA_SUCCESS) {
			
 
				+			if (ret != MAGMA_SUCCESS)
			
 
				+			{
			
 
				 				fprintf(stderr, "Error in Magma: %d\n", ret);
			
 
				 				STARPU_ABORT();
			
 
				 			}
			
--- a/examples/cholesky/cholesky_models.c
+++ b/examples/cholesky/cholesky_models.c
@@ -126,8 +126,10 @@ static double cuda_chol_task_22_cost(struct starpu_buffer_descr *descr)
 
				 	return PERTURBATE(cost);
			
 
				 }
			
 
				 
			
 
				-struct starpu_perfmodel chol_model_11 = {
			
 
				-	.per_arch = {
			
 
				+struct starpu_perfmodel chol_model_11 =
			
 
				+{
			
 
				+	.per_arch =
			
 
				+	{
			
 
				 		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_11_cost },
			
 
				 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_11_cost }
			
 
				 	},
			
@@ -135,8 +137,10 @@ struct starpu_perfmodel chol_model_11 = {
 
				 	.symbol = "chol_model_11"
			
 
				 };
			
 
				 
			
 
				-struct starpu_perfmodel chol_model_21 = {
			
 
				-	.per_arch = {
			
 
				+struct starpu_perfmodel chol_model_21 =
			
 
				+{
			
 
				+	.per_arch =
			
 
				+	{
			
 
				 		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_21_cost },
			
 
				 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_21_cost }
			
 
				 	},
			
@@ -144,8 +148,10 @@ struct starpu_perfmodel chol_model_21 = {
 
				 	.symbol = "chol_model_21"
			
 
				 };
			
 
				 
			
 
				-struct starpu_perfmodel chol_model_22 = {
			
 
				-	.per_arch = {
			
 
				+struct starpu_perfmodel chol_model_22 =
			
 
				+{
			
 
				+	.per_arch =
			
 
				+	{
			
 
				 		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_22_cost },
			
 
				 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_22_cost }
			
 
				 	},
			
--- a/examples/cholesky/cholesky_tag.c
+++ b/examples/cholesky/cholesky_tag.c
@@ -64,7 +64,8 @@ static struct starpu_task * create_task_11(starpu_data_handle_t dataA, unsigned
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
			
 
				 	}
			
 
				 
			
@@ -94,20 +95,24 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j)
 
				 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				-	if (!noprio && (j == k+1)) {
			
 
				+	if (!noprio && (j == k+1))
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG21(k, j), 2, TAG11(k), TAG22(k-1, k, j));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
			
 
				 	}
			
 
				 
			
 
				 	int ret = starpu_task_submit(task);
			
 
				-        if (STARPU_UNLIKELY(ret == -ENODEV)) {
			
 
				+        if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+	{
			
 
				                 FPRINTF(stderr, "No worker may execute this task\n");
			
 
				                 exit(0);
			
 
				         }
			
@@ -141,20 +146,24 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 
				 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
			
 
				 	task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				-	if (!noprio && (i == k + 1) && (j == k +1) ) {
			
 
				+	if (!noprio && (i == k + 1) && (j == k +1) )
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG21(k, i), TAG21(k, j));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
			
 
				 	}
			
 
				 
			
 
				 	int ret = starpu_task_submit(task);
			
 
				-        if (STARPU_UNLIKELY(ret == -ENODEV)) {
			
 
				+        if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+	{
			
 
				                 FPRINTF(stderr, "No worker may execute this task\n");
			
 
				                 exit(0);
			
 
				         }
			
@@ -183,12 +192,15 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
				 	{
			
 
				 		struct starpu_task *task = create_task_11(dataA, k);
			
 
				 		/* we defer the launch of the first task */
			
 
				-		if (k == 0) {
			
 
				+		if (k == 0)
			
 
				+		{
			
 
				 			entry_task = task;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			int ret = starpu_task_submit(task);
			
 
				-                        if (STARPU_UNLIKELY(ret == -ENODEV)) {
			
 
				+                        if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+			{
			
 
				                                 FPRINTF(stderr, "No worker may execute this task\n");
			
 
				                                 exit(0);
			
 
				                         }
			
@@ -209,7 +221,8 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
				 
			
 
				 	/* schedule the codelet */
			
 
				 	int ret = starpu_task_submit(entry_task);
			
 
				-        if (STARPU_UNLIKELY(ret == -ENODEV)) {
			
 
				+        if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+	{
			
 
				                 FPRINTF(stderr, "No worker may execute this task\n");
			
 
				                 exit(0);
			
 
				         }
			
@@ -243,7 +256,8 @@ static void initialize_system(float **A, unsigned dim, unsigned pinned)
 
				 	{
			
 
				 		starpu_malloc((void **)A, (size_t)dim*dim*sizeof(float));
			
 
				 	} 
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		*A = malloc(dim*dim*sizeof(float));
			
 
				 	}
			
 
				 }
			
@@ -258,12 +272,14 @@ static void cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
				 
			
 
				 	starpu_data_set_sequential_consistency_flag(dataA, 0);
			
 
				 
			
 
				-	struct starpu_data_filter f = {
			
 
				+	struct starpu_data_filter f =
			
 
				+	{
			
 
				 		.filter_func = starpu_vertical_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
 
				 
			
 
				-	struct starpu_data_filter f2 = {
			
 
				+	struct starpu_data_filter f2 =
			
 
				+	{
			
 
				 		.filter_func = starpu_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
@@ -311,10 +327,12 @@ int main(int argc, char **argv)
 
				 	{
			
 
				 		for (i = 0; i < size; i++)
			
 
				 		{
			
 
				-			if (i <= j) {
			
 
				+			if (i <= j)
			
 
				+			{
			
 
				 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				FPRINTF(stdout, ".\t");
			
 
				 			}
			
 
				 		}
			
@@ -332,10 +350,12 @@ int main(int argc, char **argv)
 
				 	{
			
 
				 		for (i = 0; i < size; i++)
			
 
				 		{
			
 
				-			if (i <= j) {
			
 
				+			if (i <= j)
			
 
				+			{
			
 
				 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				FPRINTF(stdout, ".\t");
			
 
				 				mat[j+i*size] = 0.0f; /* debug */
			
 
				 			}
			
@@ -355,10 +375,12 @@ int main(int argc, char **argv)
 
				 	{
			
 
				 		for (i = 0; i < size; i++)
			
 
				 		{
			
 
				-			if (i <= j) {
			
 
				+			if (i <= j)
			
 
				+			{
			
 
				 				FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]);
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				FPRINTF(stdout, ".\t");
			
 
				 			}
			
 
				 		}
			
--- a/examples/cholesky/cholesky_tile_tag.c
+++ b/examples/cholesky/cholesky_tile_tag.c
@@ -73,7 +73,8 @@ static struct starpu_task * create_task_11(unsigned k, unsigned nblocks)
 
				 	task->priority = STARPU_MAX_PRIO;
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
			
 
				 	}
			
 
				 
			
@@ -110,15 +111,18 @@ static void create_task_21(unsigned k, unsigned j)
 
				 	task->buffers[1].handle = A_state[j][k]; 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				-	if (j == k+1) {
			
 
				+	if (j == k+1)
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG21(k, j), 2, TAG11(k), TAG22(k-1, k, j));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
			
 
				 	}
			
 
				 
			
@@ -159,15 +163,18 @@ static void create_task_22(unsigned k, unsigned i, unsigned j)
 
				 	task->buffers[2].handle = A_state[j][i]; 
			
 
				 	task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				-	if ( (i == k + 1) && (j == k +1) ) {
			
 
				+	if ( (i == k + 1) && (j == k +1) )
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG21(k, i), TAG21(k, j));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
			
 
				 	}
			
 
				 
			
@@ -195,10 +202,12 @@ static void cholesky_no_stride(void)
 
				 	{
			
 
				 		struct starpu_task *task = create_task_11(k, nblocks);
			
 
				 		/* we defer the launch of the first task */
			
 
				-		if (k == 0) {
			
 
				+		if (k == 0)
			
 
				+		{
			
 
				 			entry_task = task;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			starpu_task_submit(task);
			
 
				 		}
			
 
				 		
			
@@ -251,7 +260,8 @@ int main(int argc, char **argv)
 
				 	for (y = 0; y < nblocks; y++)
			
 
				 	for (x = 0; x < nblocks; x++)
			
 
				 	{
			
 
				-		if (x <= y) {
			
 
				+		if (x <= y)
			
 
				+		{
			
 
				 			A[y][x] = malloc(BLOCKSIZE*BLOCKSIZE*sizeof(float));
			
 
				 			assert(A[y][x]);
			
 
				 		}
			
@@ -261,7 +271,8 @@ int main(int argc, char **argv)
 
				 	for (y = 0; y < nblocks; y++)
			
 
				 	for (x = 0; x < nblocks; x++)
			
 
				 	{
			
 
				-		if (x <= y) {
			
 
				+		if (x <= y)
			
 
				+		{
			
 
				 #ifdef STARPU_HAVE_POSIX_MEMALIGN
			
 
				 			posix_memalign((void **)&A[y][x], 128, BLOCKSIZE*BLOCKSIZE*sizeof(float));
			
 
				 #else
			
@@ -277,7 +288,8 @@ int main(int argc, char **argv)
 
				 	 * */
			
 
				 	for (y = 0; y < nblocks; y++)
			
 
				 	for (x = 0; x < nblocks; x++)
			
 
				-	if (x <= y) {
			
 
				+	if (x <= y)
			
 
				+	{
			
 
				 		for (i = 0; i < BLOCKSIZE; i++)
			
 
				 		for (j = 0; j < BLOCKSIZE; j++)
			
 
				 		{
			
@@ -293,7 +305,8 @@ int main(int argc, char **argv)
 
				 	for (y = 0; y < nblocks; y++)
			
 
				 	for (x = 0; x < nblocks; x++)
			
 
				 	{
			
 
				-		if (x <= y) {
			
 
				+		if (x <= y)
			
 
				+		{
			
 
				 			starpu_matrix_data_register(&A_state[y][x], 0, (uintptr_t)A[y][x], 
			
 
				 				BLOCKSIZE, BLOCKSIZE, BLOCKSIZE, sizeof(float));
			
 
				 		}
			
@@ -304,7 +317,8 @@ int main(int argc, char **argv)
 
				 	for (y = 0; y < nblocks; y++)
			
 
				 	for (x = 0; x < nblocks; x++)
			
 
				 	{
			
 
				-		if (x <= y) {
			
 
				+		if (x <= y)
			
 
				+		{
			
 
				 			starpu_data_unregister(A_state[y][x]);
			
 
				 		}
			
 
				 	}
			
--- a/examples/common/blas_model.h
+++ b/examples/common/blas_model.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -22,7 +22,8 @@
 
				 
			
 
				 double gemm_cost(struct starpu_buffer_descr *descr);
			
 
				 
			
 
				-static struct starpu_perfmodel starpu_sgemm_model = {
			
 
				+static struct starpu_perfmodel starpu_sgemm_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 #ifdef STARPU_ATLAS
			
 
				 	.symbol = "sgemm_atlas"
			
@@ -33,12 +34,14 @@ static struct starpu_perfmodel starpu_sgemm_model = {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-static struct starpu_perfmodel starpu_sgemm_model_common = {
			
 
				+static struct starpu_perfmodel starpu_sgemm_model_common =
			
 
				+{
			
 
				 	.cost_model = gemm_cost,
			
 
				 	.type = STARPU_COMMON,
			
 
				 };
			
 
				 
			
 
				-static struct starpu_perfmodel starpu_dgemm_model = {
			
 
				+static struct starpu_perfmodel starpu_dgemm_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 #ifdef STARPU_ATLAS
			
 
				 	.symbol = "dgemm_atlas"
			
@@ -49,7 +52,8 @@ static struct starpu_perfmodel starpu_dgemm_model = {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-static struct starpu_perfmodel starpu_dgemm_model_common = {
			
 
				+static struct starpu_perfmodel starpu_dgemm_model_common =
			
 
				+{
			
 
				 	.cost_model = gemm_cost,
			
 
				 	.type = STARPU_COMMON,
			
 
				 };
			
--- a/examples/filters/fblock.c
+++ b/examples/filters/fblock.c
@@ -39,9 +39,12 @@ void print_block(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz)
 
				 {
			
 
				         int i, j, k;
			
 
				         FPRINTF(stderr, "block=%p nx=%d ny=%d nz=%d ldy=%u ldz=%u\n", block, nx, ny, nz, ldy, ldz);
			
 
				-        for(k=0 ; k<nz ; k++) {
			
 
				-                for(j=0 ; j<ny ; j++) {
			
 
				-                        for(i=0 ; i<nx ; i++) {
			
 
				+        for(k=0 ; k<nz ; k++)
			
 
				+	{
			
 
				+                for(j=0 ; j<ny ; j++)
			
 
				+		{
			
 
				+                        for(i=0 ; i<nx ; i++)
			
 
				+			{
			
 
				                                 FPRINTF(stderr, "%2d ", block[(k*ldz)+(j*ldy)+i]);
			
 
				                         }
			
 
				                         FPRINTF(stderr,"\n");
			
@@ -74,9 +77,12 @@ int main(int argc, char **argv)
 
				 
			
 
				         block = (int*)malloc(NX*NY*NZ*sizeof(block[0]));
			
 
				         assert(block);
			
 
				-        for(k=0 ; k<NZ ; k++) {
			
 
				-                for(j=0 ; j<NY ; j++) {
			
 
				-                        for(i=0 ; i<NX ; i++) {
			
 
				+        for(k=0 ; k<NZ ; k++)
			
 
				+	{
			
 
				+                for(j=0 ; j<NY ; j++)
			
 
				+		{
			
 
				+                        for(i=0 ; i<NX ; i++)
			
 
				+			{
			
 
				                                 block[(k*NX*NY)+(j*NX)+i] = n++;
			
 
				                         }
			
 
				                 }
			
@@ -138,7 +144,8 @@ int main(int argc, char **argv)
 
				                 task->cl_arg = &multiplier;
			
 
				 
			
 
				                 ret = starpu_task_submit(task);
			
 
				-                if (ret) {
			
 
				+                if (ret)
			
 
				+		{
			
 
				                         FPRINTF(stderr, "Error when submitting task\n");
			
 
				                         exit(ret);
			
 
				                 }
			
--- a/examples/filters/fblock_cpu.c
+++ b/examples/filters/fblock_cpu.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -27,8 +27,10 @@ void cpu_func(void *buffers[], void *cl_arg)
 
				         unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]);
			
 
				         unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]);
			
 
				 
			
 
				-        for(k=0; k<nz ; k++) {
			
 
				-                for(j=0; j<ny ; j++) {
			
 
				+        for(k=0; k<nz ; k++)
			
 
				+	{
			
 
				+                for(j=0; j<ny ; j++)
			
 
				+		{
			
 
				                         for(i=0; i<nx ; i++)
			
 
				                                 block[(k*ldz)+(j*ldy)+i] = *factor;
			
 
				                 }
			
--- a/examples/filters/fblock_cuda.cu
+++ b/examples/filters/fblock_cuda.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -20,8 +20,10 @@
 
				 static __global__ void fblock_cuda(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor)
			
 
				 {
			
 
				         int i, j, k;
			
 
				-        for(k=0; k<nz ; k++) {
			
 
				-                for(j=0; j<ny ; j++) {
			
 
				+        for(k=0; k<nz ; k++)
			
 
				+	{
			
 
				+                for(j=0; j<ny ; j++)
			
 
				+		{
			
 
				                         for(i=0; i<nx ; i++)
			
 
				                                 block[(k*ldz)+(j*ldy)+i] = factor;
			
 
				                 }
			
--- a/examples/filters/fblock_opencl.c
+++ b/examples/filters/fblock_opencl.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Université de Bordeaux 1
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -19,7 +19,8 @@
 
				 #include <starpu_opencl.h>
			
 
				 
			
 
				 #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr)       \
			
 
				-do {                                                        \
			
 
				+do                                                          \
			
 
				+{							    \
			
 
				 	int err;                                            \
			
 
				 	err = clSetKernelArg(kernel, n, size, ptr);         \
			
 
				 	if (err != CL_SUCCESS)                              \
			
--- a/examples/filters/fblock_opencl_kernel.cl
+++ b/examples/filters/fblock_opencl_kernel.cl
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -17,8 +17,10 @@
 
				 __kernel void fblock_opencl(__global int* block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, int factor)
			
 
				 {
			
 
				         int i, j, k;
			
 
				-        for(k=0; k<nz ; k++) {
			
 
				-                for(j=0; j<ny ; j++) {
			
 
				+        for(k=0; k<nz ; k++)
			
 
				+	{
			
 
				+                for(j=0; j<ny ; j++)
			
 
				+		{
			
 
				                         for(i=0; i<nx ; i++)
			
 
				                                 block[(k*ldz)+(j*ldy)+i] = factor;
			
 
				                 }
			
--- a/examples/filters/fmatrix.c
+++ b/examples/filters/fmatrix.c
@@ -34,7 +34,8 @@ void cpu_func(void *buffers[], void *cl_arg)
 
				         /* local copy of the matrix pointer */
			
 
				         int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]);
			
 
				 
			
 
				-        for(j=0; j<ny ; j++) {
			
 
				+        for(j=0; j<ny ; j++)
			
 
				+	{
			
 
				                 for(i=0; i<nx ; i++)
			
 
				                         val[(j*ld)+i] = *factor;
			
 
				         }
			
@@ -46,8 +47,10 @@ int main(int argc, char **argv)
 
				         int matrix[NX*NY];
			
 
				 
			
 
				         FPRINTF(stderr,"IN  Matrix: \n");
			
 
				-        for(j=0 ; j<NY ; j++) {
			
 
				-                for(i=0 ; i<NX ; i++) {
			
 
				+        for(j=0 ; j<NY ; j++)
			
 
				+	{
			
 
				+                for(i=0 ; i<NX ; i++)
			
 
				+		{
			
 
				                         matrix[(j*NX)+i] = n++;
			
 
				                         FPRINTF(stderr, "%2d ", matrix[(j*NX)+i]);
			
 
				                 }
			
@@ -56,7 +59,8 @@ int main(int argc, char **argv)
 
				         FPRINTF(stderr,"\n");
			
 
				 
			
 
				         starpu_data_handle_t handle;
			
 
				-        struct starpu_codelet cl = {
			
 
				+        struct starpu_codelet cl =
			
 
				+	{
			
 
				                 .where = STARPU_CPU,
			
 
				                 .cpu_funcs = {cpu_func, NULL},
			
 
				                 .nbuffers = 1
			
@@ -96,8 +100,10 @@ int main(int argc, char **argv)
 
				 
			
 
				         /* Print result matrix */
			
 
				         FPRINTF(stderr,"OUT Matrix: \n");
			
 
				-        for(j=0 ; j<NY ; j++) {
			
 
				-                for(i=0 ; i<NX ; i++) {
			
 
				+        for(j=0 ; j<NY ; j++)
			
 
				+	{
			
 
				+                for(i=0 ; i<NX ; i++)
			
 
				+		{
			
 
				                         FPRINTF(stderr, "%2d ", matrix[(j*NX)+i]);
			
 
				                 }
			
 
				                 FPRINTF(stderr,"\n");
			
--- a/examples/filters/fvector.c
+++ b/examples/filters/fvector.c
@@ -42,7 +42,8 @@ int main(int argc, char **argv)
 
				         starpu_data_handle_t handle;
			
 
				         int factor=1;
			
 
				 
			
 
				-        struct starpu_codelet cl = {
			
 
				+        struct starpu_codelet cl =
			
 
				+	{
			
 
				                 .where = STARPU_CPU,
			
 
				                 .cpu_funcs = {cpu_func, NULL},
			
 
				                 .nbuffers = 1
			
--- a/examples/heat/dw_factolu.c
+++ b/examples/heat/dw_factolu.c
@@ -97,7 +97,8 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 
				 	/* we did task 22k,i,j */
			
 
				 	advance_22[k*nblocks*nblocks + i + j*nblocks] = DONE;
			
 
				 	
			
 
				-	if ( (i == j) && (i == k+1)) {
			
 
				+	if ( (i == j) && (i == k+1))
			
 
				+	{
			
 
				 		/* we now reduce the LU22 part (recursion appears there) */
			
 
				 		cl_args *u11arg = malloc(sizeof(cl_args));
			
 
				 
			
@@ -123,14 +124,17 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 
				 	}
			
 
				 
			
 
				 	/* 11k+1 + 22k,k+1,j => 21 k+1,j */
			
 
				-	if ( i == k + 1) {
			
 
				+	if ( i == k + 1)
			
 
				+	{
			
 
				 		uint8_t dep;
			
 
				 		/* 11 k+1*/
			
 
				 		dep = advance_11[(k+1)];
			
 
				-		if (dep & DONE) {
			
 
				+		if (dep & DONE)
			
 
				+		{
			
 
				 			/* try to push the task */
			
 
				 			uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[(k+1) + j*nblocks], STARTED);
			
 
				-				if ((u & STARTED) == 0) {
			
 
				+				if ((u & STARTED) == 0)
			
 
				+				{
			
 
				 					/* we are the only one that should 
			
 
				 					 * launch that task */
			
 
				 					cl_args *u21a = malloc(sizeof(cl_args));
			
@@ -159,14 +163,17 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 
				 	}
			
 
				 
			
 
				 	/* 11k + 22k-1,i,k => 12 k,i */
			
 
				-	if (j == k + 1) {
			
 
				+	if (j == k + 1)
			
 
				+	{
			
 
				 		uint8_t dep;
			
 
				 		/* 11 k+1*/
			
 
				 		dep = advance_11[(k+1)];
			
 
				-		if (dep & DONE) {
			
 
				+		if (dep & DONE)
			
 
				+		{
			
 
				 			/* try to push the task */
			
 
				 			uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[(k+1)*nblocks + i], STARTED);
			
 
				-				 if ((u & STARTED) == 0) {
			
 
				+				 if ((u & STARTED) == 0)
			
 
				+				 {
			
 
				 					/* we are the only one that should launch that task */
			
 
				 					cl_args *u12a = malloc(sizeof(cl_args));
			
 
				 
			
@@ -217,7 +224,8 @@ void dw_callback_v2_codelet_update_u12(void *argcb)
 
				 		{
			
 
				 			/* perhaps we may schedule the 22 i,args->k,slicey task */
			
 
				 			uint8_t u = STARPU_ATOMIC_OR(&advance_22[i*nblocks*nblocks + slicey*nblocks + k], STARTED);
			
 
				-                        if ((u & STARTED) == 0) {
			
 
				+                        if ((u & STARTED) == 0)
			
 
				+			{
			
 
				 				/* update that square matrix */
			
 
				 				cl_args *u22a = malloc(sizeof(cl_args));
			
 
				 
			
@@ -276,7 +284,8 @@ void dw_callback_v2_codelet_update_u21(void *argcb)
 
				 		{
			
 
				 			/* perhaps we may schedule the 22 i,args->k,slicey task */
			
 
				 			uint8_t u = STARPU_ATOMIC_OR(&advance_22[i*nblocks*nblocks + k*nblocks + slicex], STARTED);
			
 
				-                        if ((u & STARTED) == 0) {
			
 
				+                        if ((u & STARTED) == 0)
			
 
				+			{
			
 
				 				/* update that square matrix */
			
 
				 				cl_args *u22a = malloc(sizeof(cl_args));
			
 
				 
			
@@ -340,16 +349,20 @@ void dw_callback_v2_codelet_update_u11(void *argcb)
 
				 
			
 
				 			/* can we launch 12i,slice ? */
			
 
				 			uint8_t deps12;
			
 
				-			if (i == 0) {
			
 
				+			if (i == 0)
			
 
				+			{
			
 
				 				deps12 = DONE;
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				deps12 = advance_22[(i-1)*nblocks*nblocks + slice + i*nblocks];		
			
 
				 			}
			
 
				-			if (deps12 & DONE) {
			
 
				+			if (deps12 & DONE)
			
 
				+			{
			
 
				 				/* we may perhaps launch the task 12i,slice */
			
 
				 				 uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i*nblocks + slice], STARTED);
			
 
				-				 if ((u & STARTED) == 0) {
			
 
				+				 if ((u & STARTED) == 0)
			
 
				+				 {
			
 
				 					/* we are the only one that should launch that task */
			
 
				 					cl_args *u12a = malloc(sizeof(cl_args));
			
 
				 
			
@@ -377,16 +390,20 @@ void dw_callback_v2_codelet_update_u11(void *argcb)
 
				 			}
			
 
				 
			
 
				 			/* can we launch 21i,slice ? */
			
 
				-			if (i == 0) {
			
 
				+			if (i == 0)
			
 
				+			{
			
 
				 				deps12 = DONE;
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				deps12 = advance_22[(i-1)*nblocks*nblocks + slice*nblocks + i];		
			
 
				 			}
			
 
				-			if (deps12 & DONE) {
			
 
				+			if (deps12 & DONE)
			
 
				+			{
			
 
				 				/* we may perhaps launch the task 12i,slice */
			
 
				 				 uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i + slice*nblocks], STARTED);
			
 
				-				 if ((u & STARTED) == 0) {
			
 
				+				 if ((u & STARTED) == 0)
			
 
				+				 {
			
 
				 					/* we are the only one that should launch that task */
			
 
				 					cl_args *u21a = malloc(sizeof(cl_args));
			
 
				 
			
@@ -700,7 +717,8 @@ void initialize_system(float **A, float **B, unsigned dim, unsigned pinned)
 
				 		starpu_malloc((void **)A, (size_t)dim*dim*sizeof(float));
			
 
				 		starpu_malloc((void **)B, (size_t)dim*sizeof(float));
			
 
				 	} 
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		*A = malloc((size_t)dim*dim*sizeof(float));
			
 
				 		STARPU_ASSERT(*A);
			
 
				 		*B = malloc((size_t)dim*sizeof(float));
			
@@ -730,19 +748,22 @@ void dw_factoLU(float *matA, unsigned size,
 
				 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, 
			
 
				 			size, size, sizeof(float));
			
 
				 
			
 
				-	struct starpu_data_filter f = {
			
 
				+	struct starpu_data_filter f =
			
 
				+	{
			
 
				 		.filter_func = starpu_vertical_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
 
				 
			
 
				-	struct starpu_data_filter f2 = {
			
 
				+	struct starpu_data_filter f2 =
			
 
				+	{
			
 
				 		.filter_func = starpu_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
 
				 
			
 
				 	starpu_data_map_filters(dataA, 2, &f, &f2);
			
 
				 
			
 
				-	switch (version) {
			
 
				+	switch (version)
			
 
				+	{
			
 
				 		case 1:
			
 
				 			dw_codelet_facto(dataA, nblocks);
			
 
				 			break;
			
--- a/examples/heat/dw_factolu.h
+++ b/examples/heat/dw_factolu.h
@@ -41,7 +41,8 @@
 
				 #define BLAS3_FLOP(n1,n2,n3)    \
			
 
				         (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
			
 
				 
			
 
				-typedef struct {
			
 
				+typedef struct
			
 
				+{
			
 
				 	starpu_data_handle_t dataA;
			
 
				 	unsigned i;
			
 
				 	unsigned j;
			
@@ -89,10 +90,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 
				 	{
			
 
				 		for (i = 0; i < size; i++)
			
 
				 		{
			
 
				-/*			if (i <= j) { */
			
 
				+/*			if (i <= j)
			
 
				+			{ */
			
 
				 				FPRINTF(stdout, "%2.2f\t", LU[j +i*size]);
			
 
				 /*			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				FPRINTF(stdout, ".\t");
			
 
				 			} */
			
 
				 		}
			
@@ -107,10 +110,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 
				 	{
			
 
				 		for (i = 0; i < size; i++)
			
 
				 		{
			
 
				-/*			if (i <= j) { */
			
 
				+/*			if (i <= j)
			
 
				+			{ */
			
 
				 				FPRINTF(stdout, "%2.2f\t", L[j +i*size]);
			
 
				 /*			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				FPRINTF(stdout, ".\t");
			
 
				 			} */
			
 
				 		}
			
@@ -123,10 +128,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 
				 	{
			
 
				 		for (i = 0; i < size; i++)
			
 
				 		{
			
 
				-/*			if (i <= j) { */
			
 
				+/*			if (i <= j)
			
 
				+			{ */
			
 
				 				FPRINTF(stdout, "%2.2f\t", U[j +i*size]);
			
 
				 /*			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				FPRINTF(stdout, ".\t");
			
 
				 			} */
			
 
				 		}
			
@@ -155,10 +162,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 
				 	{
			
 
				 		for (i = 0; i < size; i++)
			
 
				 		{
			
 
				-	/*		if (i <= j) { */
			
 
				+	/*		if (i <= j)
			
 
				+			{ */
			
 
				 	      			FPRINTF(stdout, "%2.2f\t", A[j +i*size]);
			
 
				 	/*		}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				FPRINTF(stdout, ".\t");
			
 
				 			} */
			
 
				 		}
			
@@ -172,10 +181,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 
				 	{
			
 
				 		for (i = 0; i < size; i++)
			
 
				 		{
			
 
				-	/*		if (i <= j) { */
			
 
				+	/*		if (i <= j)
			
 
				+			{ */
			
 
				 	      			FPRINTF(stdout, "%2.2f\t", L[j +i*size]);
			
 
				 	/*		}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				FPRINTF(stdout, ".\t");
			
 
				 			} */
			
 
				 		}
			
--- a/examples/heat/dw_factolu_grain.c
+++ b/examples/heat/dw_factolu_grain.c
@@ -42,7 +42,8 @@ static struct starpu_task *create_task(starpu_tag_t id)
 
				 	return task;
			
 
				 }
			
 
				 
			
 
				-static struct starpu_codelet cl11 = {
			
 
				+static struct starpu_codelet cl11 =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {dw_cpu_codelet_update_u11, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -68,14 +69,16 @@ static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k
 
				 	task->priority = STARPU_MAX_PRIO;
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG11(k, tag_prefix), 1, TAG22(k-1, k, k, tag_prefix));
			
 
				 	}
			
 
				 
			
 
				 	return task;
			
 
				 }
			
 
				 
			
 
				-static struct starpu_codelet cl12 = {
			
 
				+static struct starpu_codelet cl12 =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {dw_cpu_codelet_update_u12, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -99,22 +102,26 @@ static void create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 
				 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, i, k); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				-	if (i == k+1) {
			
 
				+	if (i == k+1)
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG12(k, i, tag_prefix), 2, TAG11(k, tag_prefix), TAG22(k-1, i, k, tag_prefix));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG12(k, i, tag_prefix), 1, TAG11(k, tag_prefix));
			
 
				 	}
			
 
				 
			
 
				 	starpu_task_submit(task);
			
 
				 }
			
 
				 
			
 
				-static struct starpu_codelet cl21 = {
			
 
				+static struct starpu_codelet cl21 =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {dw_cpu_codelet_update_u21, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -136,22 +143,26 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j, u
 
				 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				-	if (j == k+1) {
			
 
				+	if (j == k+1)
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG21(k, j, tag_prefix), 2, TAG11(k, tag_prefix), TAG22(k-1, k, j, tag_prefix));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG21(k, j, tag_prefix), 1, TAG11(k, tag_prefix));
			
 
				 	}
			
 
				 
			
 
				 	starpu_task_submit(task);
			
 
				 }
			
 
				 
			
 
				-static struct starpu_codelet cl22 = {
			
 
				+static struct starpu_codelet cl22 =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {dw_cpu_codelet_update_u22, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -177,15 +188,18 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 
				 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
			
 
				 	task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				-	if ( (i == k + 1) && (j == k +1) ) {
			
 
				+	if ( (i == k + 1) && (j == k +1) )
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22(k, i, j, tag_prefix), 3, TAG22(k-1, i, j, tag_prefix), TAG12(k, i, tag_prefix), TAG21(k, j, tag_prefix));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22(k, i, j, tag_prefix), 2, TAG12(k, i, tag_prefix), TAG21(k, j, tag_prefix));
			
 
				 	}
			
 
				 
			
@@ -207,12 +221,14 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 
				 	unsigned nblocks = size / blocksize;
			
 
				 	unsigned maxk = inner_size / blocksize;
			
 
				 
			
 
				-	struct starpu_data_filter f = {
			
 
				+	struct starpu_data_filter f =
			
 
				+	{
			
 
				 		.filter_func = starpu_vertical_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
 
				 
			
 
				-	struct starpu_data_filter f2 = {
			
 
				+	struct starpu_data_filter f2 =
			
 
				+	{
			
 
				 		.filter_func = starpu_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
@@ -235,10 +251,12 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 
				 		struct starpu_task *task = create_task_11(dataA, k, tag_prefix);
			
 
				 
			
 
				 		/* we defer the launch of the first task */
			
 
				-		if (k == 0) {
			
 
				+		if (k == 0)
			
 
				+		{
			
 
				 			entry_task = task;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			starpu_task_submit(task);
			
 
				 		}
			
 
				 		
			
@@ -272,7 +290,8 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 
				 		starpu_data_unpartition(dataA, 0);		
			
 
				 		return;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/*
			
 
				 		 * call dw_factoLU_grain_inner recursively in the remaining blocks
			
 
				 		 */
			
@@ -301,7 +320,8 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 
				 		{
			
 
				 			dw_factoLU_grain_inner(newmatA, size-inner_size, (size-inner_size)/2, ld, blocksize/2, tag_prefix+1);
			
 
				 		}
			
 
				-		else { */
			
 
				+		else
			
 
				+		{ */
			
 
				 			dw_factoLU_grain_inner(newmatA, size-inner_size, size-inner_size, ld, blocksize/2, tag_prefix+1);
			
 
				 /*		} */
			
 
				 	}
			
--- a/examples/heat/dw_factolu_kernels.c
+++ b/examples/heat/dw_factolu_kernels.c
@@ -121,7 +121,8 @@ static inline void dw_common_cpu_codelet_update_u22(void *descr[], int s, __attr
 
				 	cublasStatus status;
			
 
				 #endif
			
 
				 
			
 
				-	switch (s) {
			
 
				+	switch (s)
			
 
				+	{
			
 
				 		case 0:
			
 
				 			SGEMM("N", "N",	dy, dx, dz, 
			
 
				 				-1.0f, left, ld21, right, ld12,
			
@@ -168,7 +169,8 @@ void dw_cublas_codelet_update_u22(void *descr[], void *_args)
 
				  * U12
			
 
				  */
			
 
				 
			
 
				-static inline void dw_common_codelet_update_u12(void *descr[], int s, __attribute__((unused)) void *_args) {
			
 
				+static inline void dw_common_codelet_update_u12(void *descr[], int s, __attribute__((unused)) void *_args)
			
 
				+{
			
 
				 	float *sub11;
			
 
				 	float *sub12;
			
 
				 
			
@@ -186,7 +188,8 @@ static inline void dw_common_codelet_update_u12(void *descr[], int s, __attribut
 
				 #endif
			
 
				 
			
 
				 	/* solve L11 U12 = A12 (find U12) */
			
 
				-	switch (s) {
			
 
				+	switch (s)
			
 
				+	{
			
 
				 		case 0:
			
 
				 			STRSM("L", "L", "N", "N",
			
 
				 					 nx12, ny12, 1.0f, sub11, ld11, sub12, ld12);
			
@@ -231,7 +234,8 @@ void dw_cublas_codelet_update_u12(void *descr[], void *_args)
 
				  * U21
			
 
				  */
			
 
				 
			
 
				-static inline void dw_common_codelet_update_u21(void *descr[], int s, __attribute__((unused)) void *_args) {
			
 
				+static inline void dw_common_codelet_update_u21(void *descr[], int s, __attribute__((unused)) void *_args)
			
 
				+{
			
 
				 	float *sub11;
			
 
				 	float *sub21;
			
 
				 
			
@@ -248,7 +252,8 @@ static inline void dw_common_codelet_update_u21(void *descr[], int s, __attribut
 
				 	cublasStatus status;
			
 
				 #endif
			
 
				 
			
 
				-	switch (s) {
			
 
				+	switch (s)
			
 
				+	{
			
 
				 		case 0:
			
 
				 			STRSM("R", "U", "N", "U", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
			
 
				 			break;
			
@@ -317,7 +322,8 @@ static inline void dw_common_codelet_update_u11(void *descr[], int s, __attribut
 
				 
			
 
				 	unsigned long z;
			
 
				 
			
 
				-	switch (s) {
			
 
				+	switch (s)
			
 
				+	{
			
 
				 		case 0:
			
 
				 			for (z = 0; z < nx; z++)
			
 
				 			{
			
--- a/examples/heat/dw_factolu_tag.c
+++ b/examples/heat/dw_factolu_tag.c
@@ -44,7 +44,8 @@ static struct starpu_task *create_task(starpu_tag_t id)
 
				 	return task;
			
 
				 }
			
 
				 
			
 
				-static struct starpu_codelet cl11 = {
			
 
				+static struct starpu_codelet cl11 =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {dw_cpu_codelet_update_u11, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -71,14 +72,16 @@ static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
			
 
				 	}
			
 
				 
			
 
				 	return task;
			
 
				 }
			
 
				 
			
 
				-static struct starpu_codelet cl12 = {
			
 
				+static struct starpu_codelet cl12 =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {dw_cpu_codelet_update_u12, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -102,22 +105,26 @@ static void create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned i)
 
				 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, i, k); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				-	if (!no_prio && (i == k+1)) {
			
 
				+	if (!no_prio && (i == k+1))
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG12(k, i), 2, TAG11(k), TAG22(k-1, i, k));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG12(k, i), 1, TAG11(k));
			
 
				 	}
			
 
				 
			
 
				 	starpu_task_submit(task);
			
 
				 }
			
 
				 
			
 
				-static struct starpu_codelet cl21 = {
			
 
				+static struct starpu_codelet cl21 =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {dw_cpu_codelet_update_u21, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -139,22 +146,26 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j)
 
				 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				-	if (!no_prio && (j == k+1)) {
			
 
				+	if (!no_prio && (j == k+1))
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG21(k, j), 2, TAG11(k), TAG22(k-1, k, j));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
			
 
				 	}
			
 
				 
			
 
				 	starpu_task_submit(task);
			
 
				 }
			
 
				 
			
 
				-static struct starpu_codelet cl22 = {
			
 
				+static struct starpu_codelet cl22 =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {dw_cpu_codelet_update_u22, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -180,15 +191,18 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 
				 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
			
 
				 	task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				-	if (!no_prio &&  (i == k + 1) && (j == k +1) ) {
			
 
				+	if (!no_prio &&  (i == k + 1) && (j == k +1) )
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG12(k, i), TAG21(k, j));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG12(k, i), TAG21(k, j));
			
 
				 	}
			
 
				 
			
@@ -214,10 +228,12 @@ static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
				 		struct starpu_task *task = create_task_11(dataA, k);
			
 
				 
			
 
				 		/* we defer the launch of the first task */
			
 
				-		if (k == 0) {
			
 
				+		if (k == 0)
			
 
				+		{
			
 
				 			entry_task = task;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			starpu_task_submit(task);
			
 
				 		}
			
 
				 		
			
@@ -280,12 +296,14 @@ void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, u
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
			
 
				 
			
 
				-	struct starpu_data_filter f = {
			
 
				+	struct starpu_data_filter f =
			
 
				+	{
			
 
				 		.filter_func = starpu_vertical_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
 
				 
			
 
				-	struct starpu_data_filter f2 = {
			
 
				+	struct starpu_data_filter f2 =
			
 
				+	{
			
 
				 		.filter_func = starpu_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
--- a/examples/heat/dw_sparse_cg.c
+++ b/examples/heat/dw_sparse_cg.c
@@ -63,7 +63,8 @@ static void create_data(float **_nzvalA, float **_vecb, float **_vecx, uint32_t
 
				 	{
			
 
				 		rowptr[row] = pos;
			
 
				 
			
 
				-		if (row > 0) {
			
 
				+		if (row > 0)
			
 
				+		{
			
 
				 			nzval[pos] = 1.0f;
			
 
				 			colind[pos] = row-1;
			
 
				 			pos++;
			
@@ -73,7 +74,8 @@ static void create_data(float **_nzvalA, float **_vecb, float **_vecx, uint32_t
 
				 		colind[pos] = row;
			
 
				 		pos++;
			
 
				 
			
 
				-		if (row < size - 1) {
			
 
				+		if (row < size - 1)
			
 
				+		{
			
 
				 			nzval[pos] = 1.0f;
			
 
				 			colind[pos] = row+1;
			
 
				 			pos++;
			
@@ -312,7 +314,8 @@ void iteration_cg(void *problem)
 
				 		/* we did not reach the stop condition yet */
			
 
				 		launch_new_cg_iteration(problem);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* we may stop */
			
 
				 		FPRINTF(stdout, "We are done ... after %d iterations \n", pb->i - 1);
			
 
				 		FPRINTF(stdout, "i : %d\n\tdelta_new %2.5f\n", pb->i, pb->delta_new);
			
--- a/examples/heat/dw_sparse_cg.h
+++ b/examples/heat/dw_sparse_cg.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -46,7 +46,8 @@ static unsigned usecpu = 0;
 
				 static unsigned blocks = 512;
			
 
				 static unsigned grids  = 8;
			
 
				 
			
 
				-struct cg_problem {
			
 
				+struct cg_problem
			
 
				+{
			
 
				 	starpu_data_handle_t ds_matrixA;
			
 
				 	starpu_data_handle_t ds_vecx;
			
 
				 	starpu_data_handle_t ds_vecb;
			
@@ -71,23 +72,28 @@ struct cg_problem {
 
				 static void __attribute__((unused)) parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-size") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-size") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			size = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-block") == 0) {
			
 
				+		if (strcmp(argv[i], "-block") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			blocks = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-grid") == 0) {
			
 
				+		if (strcmp(argv[i], "-grid") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			grids = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-cpu") == 0) {
			
 
				+		if (strcmp(argv[i], "-cpu") == 0)
			
 
				+		{
			
 
				 			usecpu = 1;
			
 
				 		}
			
 
				 	}
			
--- a/examples/heat/heat.c
+++ b/examples/heat/heat.c
@@ -36,65 +36,80 @@ extern void do_conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint3
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-cg") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-cg") == 0)
			
 
				+		{
			
 
				 			use_cg = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-shape") == 0) {
			
 
				+		if (strcmp(argv[i], "-shape") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			shape = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-nthick") == 0) {
			
 
				+		if (strcmp(argv[i], "-nthick") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			nthick = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-ntheta") == 0) {
			
 
				+		if (strcmp(argv[i], "-ntheta") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			ntheta = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-nblocks") == 0) {
			
 
				+		if (strcmp(argv[i], "-nblocks") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			nblocks = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-nbigblocks") == 0) {
			
 
				+		if (strcmp(argv[i], "-nbigblocks") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			nbigblocks = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-v1") == 0) {
			
 
				+		if (strcmp(argv[i], "-v1") == 0)
			
 
				+		{
			
 
				 			version = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-v2") == 0) {
			
 
				+		if (strcmp(argv[i], "-v2") == 0)
			
 
				+		{
			
 
				 			version = 2;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-v3") == 0) {
			
 
				+		if (strcmp(argv[i], "-v3") == 0)
			
 
				+		{
			
 
				 			version = 3;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-v4") == 0) {
			
 
				+		if (strcmp(argv[i], "-v4") == 0)
			
 
				+		{
			
 
				 			version = 4;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-pin") == 0) {
			
 
				+		if (strcmp(argv[i], "-pin") == 0)
			
 
				+		{
			
 
				 			pinned = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-check") == 0) {
			
 
				+		if (strcmp(argv[i], "-check") == 0)
			
 
				+		{
			
 
				 			check = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-no-prio") == 0) {
			
 
				+		if (strcmp(argv[i], "-no-prio") == 0)
			
 
				+		{
			
 
				 			no_prio = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-size") == 0) {
			
 
				+		if (strcmp(argv[i], "-size") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			unsigned size = strtol(argv[++i], &argptr, 10);
			
 
				 			nthick = 130;
			
@@ -102,7 +117,8 @@ static void parse_args(int argc, char **argv)
 
				 			STARPU_ASSERT((nthick - 2)*(ntheta - 2) == size);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-h") == 0) {
			
 
				+		if (strcmp(argv[i], "-h") == 0)
			
 
				+		{
			
 
				 			printf("usage : %s [-v1|-v2|-v3] [-pin] [-nthick number] [-ntheta number] [-shape [0|1|2]] [-cg] [-size number] [-no-prio]\n", argv[0]);
			
 
				 		}
			
 
				 	}
			
@@ -136,11 +152,14 @@ static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side
 
				 	ya = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y;
			
 
				 
			
 
				 	/* B */
			
 
				-	if (side_tr) {
			
 
				+	if (side_tr)
			
 
				+	{
			
 
				 		/* lower D is actually B here */
			
 
				 		xb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
			
 
				 		yb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		/* upper */
			
 
				 		xb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
			
 
				 		yb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
			
@@ -150,24 +169,31 @@ static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side
 
				 	yc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
			
 
				 
			
 
				 	/* now look for the actual psi node */
			
 
				-	if (NODE_NUMBER(theta_tr, thick_tr) == NODE_NUMBER(theta_psi, thick_psi)) {
			
 
				+	if (NODE_NUMBER(theta_tr, thick_tr) == NODE_NUMBER(theta_psi, thick_psi))
			
 
				+	{
			
 
				 		/* A nothing to do */
			
 
				-	} else if (NODE_NUMBER(theta_tr+1, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi)) {
			
 
				+	}
			
 
				+	else if (NODE_NUMBER(theta_tr+1, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi))
			
 
				+	{
			
 
				 		/* psi matches C */
			
 
				 		/* swap A and C coordinates  */
			
 
				 		tmp = xa; xa = xc; xc = tmp;
			
 
				 		tmp = ya; ya = yc; yc = tmp;
			
 
				-	} else if
			
 
				-		(side_tr && (NODE_NUMBER(theta_tr+1, thick_tr) == NODE_NUMBER(theta_psi, thick_psi))) {
			
 
				+	}
			
 
				+	else if (side_tr && (NODE_NUMBER(theta_tr+1, thick_tr) == NODE_NUMBER(theta_psi, thick_psi)))
			
 
				+	{
			
 
				 		/* psi is D (that was stored in C) XXX */
			
 
				 		tmp = xa; xa = xb; xb = tmp;
			
 
				 		tmp = ya; ya = yb; yb = tmp;
			
 
				-	} else if
			
 
				-		(!side_tr && (NODE_NUMBER(theta_tr, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi))) {
			
 
				+	}
			
 
				+	else if	(!side_tr && (NODE_NUMBER(theta_tr, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi)))
			
 
				+	{
			
 
				 		/* psi is C */
			
 
				 		tmp = xa; xa = xb; xb = tmp;
			
 
				 		tmp = ya; ya = yb; yb = tmp;
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		/* the psi node is not a node of the current triangle */
			
 
				 		return 0.0f;
			
 
				 	}
			
@@ -178,7 +204,8 @@ static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side
 
				 
			
 
				 	denom = (xa - xb)*(yc - ya) - (xc - xb)*(ya - yb);
			
 
				 
			
 
				-	switch (xy) {
			
 
				+	switch (xy)
			
 
				+	{
			
 
				 		case X:
			
 
				 			value = (yc - yb)/denom;
			
 
				 			break;
			
@@ -220,11 +247,14 @@ static inline float surface_triangle(unsigned theta_tr, unsigned thick_tr, unsig
 
				 	xj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x;
			
 
				 	yj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
			
 
				 
			
 
				-	if (side_tr) {
			
 
				+	if (side_tr)
			
 
				+	{
			
 
				 		/* lower */
			
 
				 		xk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
			
 
				 		yk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		xk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
			
 
				 		yk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
			
 
				 	}
			
@@ -314,8 +344,6 @@ done:
 
				 
			
 
				 static void solve_system(unsigned size, unsigned subsize, float *result, int *RefArray, float *Bformer, float *A, float *B)
			
 
				 {
			
 
				-
			
 
				-
			
 
				 	unsigned i;
			
 
				 
			
 
				 	/* solve the actual problem LU X = B */
			
@@ -452,7 +480,8 @@ void build_mesh(point *mesh)
 
				 			float r;
			
 
				 			r = thick * (RMAX - RMIN)/(nthick - 1) + RMIN;
			
 
				 
			
 
				-			switch (shape) {
			
 
				+			switch (shape)
			
 
				+			{
			
 
				 				default:
			
 
				 				case 0:
			
 
				 					mesh[NODE_NUMBER(theta,thick)].x = r*cosf(angle);
			
@@ -604,11 +633,13 @@ static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uin
 
				 			float val;
			
 
				 			unsigned nodeneighbour =  neighbours[neighbour];
			
 
				 
			
 
				-			if (nodeneighbour < newsize) {
			
 
				+			if (nodeneighbour < newsize)
			
 
				+			{
			
 
				 
			
 
				 				val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
			
 
				 	
			
 
				-				if (val != 0.0f) {
			
 
				+				if (val != 0.0f)
			
 
				+				{
			
 
				 					*nzval = realloc(*nzval, (pos+1)*sizeof(float));
			
 
				 					*colind = realloc(*colind, (pos+1)*sizeof(uint32_t));
			
 
				 	
			
@@ -648,7 +679,8 @@ static void build_dense_stiffness_matrix_A(point *pmesh, float *A, unsigned news
 
				 		{
			
 
				 			unsigned long nodeneighbour =  neighbours[neighbour];
			
 
				 
			
 
				-			if (nodeneighbour < newsize) {
			
 
				+			if (nodeneighbour < newsize)
			
 
				+			{
			
 
				 				float val;
			
 
				 				val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
			
 
				 				A[j+ (unsigned long)newsize*nodeneighbour] = val;
			
@@ -686,7 +718,8 @@ int main(int argc, char **argv)
 
				 
			
 
				 	/* we can either use a direct method (LU decomposition here) or an 
			
 
				 	 * iterative method (conjugate gradient here) */
			
 
				-	if (use_cg) {
			
 
				+	if (use_cg)
			
 
				+	{
			
 
				 		unsigned nnz;
			
 
				 		float *nzval;
			
 
				 		uint32_t *colind;
			
@@ -718,7 +751,8 @@ int main(int argc, char **argv)
 
				 		}
			
 
				 	
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 
			
 
				 		/* unfortunately CUDA does not allow late memory registration, 
			
 
				 		 * we need to do the malloc using CUDA itself ... */
			
@@ -733,7 +767,8 @@ int main(int argc, char **argv)
 
				 
			
 
				 		STARPU_ASSERT(newsize % nblocks == 0);
			
 
				 
			
 
				-		switch (version) {
			
 
				+		switch (version)
			
 
				+		{
			
 
				 			case 1:
			
 
				 			case 2:
			
 
				 				dw_factoLU(A, newsize, newsize, nblocks, version, no_prio);
			
--- a/examples/heat/heat.h
+++ b/examples/heat/heat.h
@@ -52,7 +52,8 @@
 
				 #define NODE_TO_THICK(n)		((n) % nthick)
			
 
				 #define NODE_TO_THETA(n)		((n) / nthick)
			
 
				 
			
 
				-typedef struct point_t {
			
 
				+typedef struct point_t
			
 
				+{
			
 
				 	float x;
			
 
				 	float y;
			
 
				 } point;
			
--- a/examples/heat/heat_display.c
+++ b/examples/heat/heat_display.c
@@ -52,13 +52,15 @@ static void generate_graph(void)
 
				 			float colorA_G, colorB_G, colorC_G, colorD_G;
			
 
				 			float colorA_B, colorB_B, colorC_B, colorD_B;
			
 
				 
			
 
				-			if (maxval == minval) {
			
 
				+			if (maxval == minval)
			
 
				+			{
			
 
				 				colorA_R = 1.0f; colorA_G = 1.0f; colorA_B = 1.0f;
			
 
				 				colorB_R = 1.0f; colorB_G = 1.0f; colorB_B = 1.0f;
			
 
				 				colorC_R = 1.0f; colorC_G = 1.0f; colorC_B = 1.0f;
			
 
				 				colorD_R = 1.0f; colorD_G = 1.0f; colorD_B = 1.0f;
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				float amplitude = maxval - minval;
			
 
				 
			
 
				 				float coeffA, coeffB, coeffC, coeffD;
			
@@ -84,7 +86,8 @@ static void generate_graph(void)
 
				 				colorD_G = coeffD<0.5f?1.0f:2.0*(1 - coeffD)*1.0f;
			
 
				 			}
			
 
				 
			
 
				-			if (printmesh) {
			
 
				+			if (printmesh)
			
 
				+			{
			
 
				 				glColor3f (0.0f, 0.0f, 0.0f);
			
 
				 				glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
			
 
				 				glLineWidth(3.0f);
			
@@ -142,7 +145,8 @@ static void display(void)
 
				 
			
 
				 static void pressKey(unsigned char key, int x __attribute__ ((unused)), int y  __attribute__ ((unused)))
			
 
				 {
			
 
				-	switch (key) {
			
 
				+	switch (key)
			
 
				+	{
			
 
				 		case 'q':
			
 
				 			exit(0);
			
 
				 		default:
			
--- a/examples/heat/lu_kernels_model.c
+++ b/examples/heat/lu_kernels_model.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -214,9 +214,11 @@ double task_22_cost_cpu(struct starpu_buffer_descr *descr)
 
				 	return PERTURBATE(cost);
			
 
				 }
			
 
				 
			
 
				-struct starpu_perfmodel model_11 = {
			
 
				+struct starpu_perfmodel model_11 =
			
 
				+{
			
 
				 	.cost_model = task_11_cost,
			
 
				-	.per_arch = {
			
 
				+	.per_arch =
			
 
				+	{
			
 
				 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_11_cost_cpu },
			
 
				 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_11_cost_cuda }
			
 
				 	},
			
@@ -230,9 +232,11 @@ struct starpu_perfmodel model_11 = {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-struct starpu_perfmodel model_12 = {
			
 
				+struct starpu_perfmodel model_12 =
			
 
				+{
			
 
				 	.cost_model = task_12_cost,
			
 
				-	.per_arch = {
			
 
				+	.per_arch =
			
 
				+	{
			
 
				 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_12_cost_cpu },
			
 
				 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_12_cost_cuda }
			
 
				 	},
			
@@ -246,9 +250,11 @@ struct starpu_perfmodel model_12 = {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-struct starpu_perfmodel model_21 = {
			
 
				+struct starpu_perfmodel model_21 =
			
 
				+{
			
 
				 	.cost_model = task_21_cost,
			
 
				-	.per_arch = {
			
 
				+	.per_arch =
			
 
				+	{
			
 
				 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_21_cost_cpu },
			
 
				 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_21_cost_cuda }
			
 
				 	},
			
@@ -262,9 +268,11 @@ struct starpu_perfmodel model_21 = {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-struct starpu_perfmodel model_22 = {
			
 
				+struct starpu_perfmodel model_22 =
			
 
				+{
			
 
				 	.cost_model = task_22_cost,
			
 
				-	.per_arch = {
			
 
				+	.per_arch =
			
 
				+	{
			
 
				 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_22_cost_cpu },
			
 
				 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_22_cost_cuda }
			
 
				 	},
			
--- a/examples/incrementer/incrementer.c
+++ b/examples/incrementer/incrementer.c
@@ -109,7 +109,8 @@ int main(int argc, char **argv)
 
				 	FPRINTF(stderr, "array -> %f, %f, %f, %f\n", float_array[0],
			
 
				                 float_array[1], float_array[2], float_array[3]);
			
 
				 
			
 
				-	if (float_array[0] != niter || float_array[0] != float_array[1] + float_array[2] + float_array[3]) {
			
 
				+	if (float_array[0] != niter || float_array[0] != float_array[1] + float_array[2] + float_array[3])
			
 
				+	{
			
 
				 		FPRINTF(stderr, "Incorrect result\n");
			
 
				 		ret = 1;
			
 
				 	}
			
--- a/examples/lu/lu_example.c
+++ b/examples/lu/lu_example.c
@@ -46,41 +46,51 @@ TYPE **A_blocks;
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-size") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-size") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			size = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-nblocks") == 0) {
			
 
				+		if (strcmp(argv[i], "-nblocks") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			nblocks = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-check") == 0) {
			
 
				+		if (strcmp(argv[i], "-check") == 0)
			
 
				+		{
			
 
				 			check = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-piv") == 0) {
			
 
				+		if (strcmp(argv[i], "-piv") == 0)
			
 
				+		{
			
 
				 			pivot = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-no-stride") == 0) {
			
 
				+		if (strcmp(argv[i], "-no-stride") == 0)
			
 
				+		{
			
 
				 			no_stride = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-profile") == 0) {
			
 
				+		if (strcmp(argv[i], "-profile") == 0)
			
 
				+		{
			
 
				 			profile = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-bound") == 0) {
			
 
				+		if (strcmp(argv[i], "-bound") == 0)
			
 
				+		{
			
 
				 			bound = 1;
			
 
				 		}
			
 
				-		if (strcmp(argv[i], "-bounddeps") == 0) {
			
 
				+		if (strcmp(argv[i], "-bounddeps") == 0)
			
 
				+		{
			
 
				 			bound = 1;
			
 
				 			bounddeps = 1;
			
 
				 		}
			
 
				-		if (strcmp(argv[i], "-bounddepsprio") == 0) {
			
 
				+		if (strcmp(argv[i], "-bounddepsprio") == 0)
			
 
				+		{
			
 
				 			bound = 1;
			
 
				 			bounddeps = 1;
			
 
				 			boundprio = 1;
			
@@ -344,14 +354,18 @@ int main(int argc, char **argv)
 
				 		starpu_bus_profiling_helper_display_summary();
			
 
				 	}
			
 
				 
			
 
				-	if (bound) {
			
 
				+	if (bound)
			
 
				+	{
			
 
				 		double min;
			
 
				 		starpu_bound_stop();
			
 
				-		if (bounddeps) {
			
 
				+		if (bounddeps)
			
 
				+		{
			
 
				 			FILE *f = fopen("lu.pl", "w");
			
 
				 			starpu_bound_print_lp(f);
			
 
				 			FPRINTF(stderr,"system printed to lu.pl\n");
			
 
				-		} else {
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				 			starpu_bound_compute(&min, NULL, 0);
			
 
				 			if (min != 0.)
			
 
				 				FPRINTF(stderr, "theoretical min: %f ms\n", min);
			
--- a/examples/lu/xlu.c
+++ b/examples/lu/xlu.c
@@ -65,7 +65,8 @@ static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
			
 
				 	}
			
 
				 
			
@@ -86,15 +87,18 @@ static void create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 
				 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, j, k); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				-	if (!no_prio && (j == k+1)) {
			
 
				+	if (!no_prio && (j == k+1))
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG12(k, j), 2, TAG11(k), TAG22(k-1, k, j));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG12(k, j), 1, TAG11(k));
			
 
				 	}
			
 
				 
			
@@ -113,15 +117,18 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 
				 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, i); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				-	if (!no_prio && (i == k+1)) {
			
 
				+	if (!no_prio && (i == k+1))
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG21(k, i), 2, TAG11(k), TAG22(k-1, i, k));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG21(k, i), 1, TAG11(k));
			
 
				 	}
			
 
				 
			
@@ -144,15 +151,18 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 
				 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, j, i); /* produced by TAG22(k-1, i, j) */
			
 
				 	task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				-	if (!no_prio &&  (i == k + 1) && (j == k +1) ) {
			
 
				+	if (!no_prio &&  (i == k + 1) && (j == k +1) )
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG12(k, j), TAG21(k, i));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG12(k, j), TAG21(k, i));
			
 
				 	}
			
 
				 
			
@@ -178,10 +188,12 @@ static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
				 		struct starpu_task *task = create_task_11(dataA, k);
			
 
				 
			
 
				 		/* we defer the launch of the first task */
			
 
				-		if (k == 0) {
			
 
				+		if (k == 0)
			
 
				+		{
			
 
				 			entry_task = task;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			starpu_task_submit(task);
			
 
				 		}
			
 
				 		
			
@@ -236,12 +248,14 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 
				 	/* We already enforce deps by hand */
			
 
				 	starpu_data_set_sequential_consistency_flag(dataA, 0);
			
 
				 
			
 
				-	struct starpu_data_filter f = {
			
 
				+	struct starpu_data_filter f =
			
 
				+	{
			
 
				 		.filter_func = starpu_vertical_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
 
				 
			
 
				-	struct starpu_data_filter f2 = {
			
 
				+	struct starpu_data_filter f2 =
			
 
				+	{
			
 
				 		.filter_func = starpu_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
--- a/examples/lu/xlu.h
+++ b/examples/lu/xlu.h
@@ -106,7 +106,8 @@ extern struct starpu_perfmodel model_12;
 
				 extern struct starpu_perfmodel model_21;
			
 
				 extern struct starpu_perfmodel model_22;
			
 
				 
			
 
				-struct piv_s {
			
 
				+struct piv_s
			
 
				+{
			
 
				 	unsigned *piv; /* complete pivot array */
			
 
				 	unsigned first; /* first element */
			
 
				 	unsigned last; /* last element */
			
--- a/examples/lu/xlu_implicit.c
+++ b/examples/lu/xlu_implicit.c
@@ -143,12 +143,14 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				 	
			
 
				-	struct starpu_data_filter f = {
			
 
				+	struct starpu_data_filter f =
			
 
				+	{
			
 
				 		.filter_func = starpu_vertical_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
 
				 
			
 
				-	struct starpu_data_filter f2 = {
			
 
				+	struct starpu_data_filter f2 =
			
 
				+	{
			
 
				 		.filter_func = starpu_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
--- a/examples/lu/xlu_implicit_pivot.c
+++ b/examples/lu/xlu_implicit_pivot.c
@@ -189,12 +189,14 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 
				 	 * one block is now determined by 2 unsigned (i,j) */
			
 
				 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
			
 
				 
			
 
				-	struct starpu_data_filter f = {
			
 
				+	struct starpu_data_filter f =
			
 
				+	{
			
 
				 		.filter_func = starpu_vertical_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
 
				 
			
 
				-	struct starpu_data_filter f2 = {
			
 
				+	struct starpu_data_filter f2 =
			
 
				+	{
			
 
				 		.filter_func = starpu_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
--- a/examples/lu/xlu_kernels.c
+++ b/examples/lu/xlu_kernels.c
@@ -51,7 +51,8 @@ static inline void STARPU_LU(common_u22)(void *descr[],
 
				 	cudaError_t cures;
			
 
				 #endif
			
 
				 
			
 
				-	switch (s) {
			
 
				+	switch (s)
			
 
				+	{
			
 
				 		case 0:
			
 
				 			CPU_GEMM("N", "N", dy, dx, dz, 
			
 
				 				(TYPE)-1.0, right, ld21, left, ld12,
			
@@ -59,7 +60,8 @@ static inline void STARPU_LU(common_u22)(void *descr[],
 
				 			break;
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case 1: {
			
 
				+		case 1:
			
 
				+		{
			
 
				 			CUBLAS_GEMM('n', 'n', dx, dy, dz,
			
 
				 				*(CUBLAS_TYPE*)&m1, (CUBLAS_TYPE *)right, ld21, (CUBLAS_TYPE *)left, ld12,
			
 
				 				*(CUBLAS_TYPE*)&p1, (CUBLAS_TYPE *)center, ld22);
			
@@ -92,7 +94,8 @@ void STARPU_LU(cublas_u22)(void *descr[], void *_args)
 
				 }
			
 
				 #endif /* STARPU_USE_CUDA */
			
 
				 
			
 
				-static struct starpu_perfmodel STARPU_LU(model_22) = {
			
 
				+static struct starpu_perfmodel STARPU_LU(model_22) =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 #ifdef STARPU_ATLAS
			
 
				 	.symbol = STARPU_LU_STR(lu_model_22_atlas)
			
@@ -103,7 +106,8 @@ static struct starpu_perfmodel STARPU_LU(model_22) = {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet cl22 = {
			
 
				+struct starpu_codelet cl22 =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {STARPU_LU(cpu_u22), NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -138,7 +142,8 @@ static inline void STARPU_LU(common_u12)(void *descr[],
 
				 #endif
			
 
				 
			
 
				 	/* solve L11 U12 = A12 (find U12) */
			
 
				-	switch (s) {
			
 
				+	switch (s)
			
 
				+	{
			
 
				 		case 0:
			
 
				 			CPU_TRSM("L", "L", "N", "N", nx12, ny12,
			
 
				 					(TYPE)1.0, sub11, ld11, sub12, ld12);
			
@@ -175,7 +180,8 @@ void STARPU_LU(cublas_u12)(void *descr[], void *_args)
 
				 }
			
 
				 #endif /* STARPU_USE_CUDA */
			
 
				 
			
 
				-static struct starpu_perfmodel STARPU_LU(model_12) = {
			
 
				+static struct starpu_perfmodel STARPU_LU(model_12) =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 #ifdef STARPU_ATLAS
			
 
				 	.symbol = STARPU_LU_STR(lu_model_12_atlas)
			
@@ -186,7 +192,8 @@ static struct starpu_perfmodel STARPU_LU(model_12) = {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet cl12 = {
			
 
				+struct starpu_codelet cl12 =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {STARPU_LU(cpu_u12), NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -219,7 +226,8 @@ static inline void STARPU_LU(common_u21)(void *descr[],
 
				 	cublasStatus status;
			
 
				 #endif
			
 
				 
			
 
				-	switch (s) {
			
 
				+	switch (s)
			
 
				+	{
			
 
				 		case 0:
			
 
				 			CPU_TRSM("R", "U", "N", "U", nx21, ny21,
			
 
				 					(TYPE)1.0, sub11, ld11, sub21, ld21);
			
@@ -255,7 +263,8 @@ void STARPU_LU(cublas_u21)(void *descr[], void *_args)
 
				 }
			
 
				 #endif 
			
 
				 
			
 
				-static struct starpu_perfmodel STARPU_LU(model_21) = {
			
 
				+static struct starpu_perfmodel STARPU_LU(model_21) =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 #ifdef STARPU_ATLAS
			
 
				 	.symbol = STARPU_LU_STR(lu_model_21_atlas)
			
@@ -266,7 +275,8 @@ static struct starpu_perfmodel STARPU_LU(model_21) = {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet cl21 = {
			
 
				+struct starpu_codelet cl21 =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {STARPU_LU(cpu_u21), NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -292,7 +302,8 @@ static inline void STARPU_LU(common_u11)(void *descr[],
 
				 
			
 
				 	unsigned long z;
			
 
				 
			
 
				-	switch (s) {
			
 
				+	switch (s)
			
 
				+	{
			
 
				 		case 0:
			
 
				 			for (z = 0; z < nx; z++)
			
 
				 			{
			
@@ -350,7 +361,8 @@ void STARPU_LU(cublas_u11)(void *descr[], void *_args)
 
				 }
			
 
				 #endif /* STARPU_USE_CUDA */
			
 
				 
			
 
				-static struct starpu_perfmodel STARPU_LU(model_11) = {
			
 
				+static struct starpu_perfmodel STARPU_LU(model_11) =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 #ifdef STARPU_ATLAS
			
 
				 	.symbol = STARPU_LU_STR(lu_model_11_atlas)
			
@@ -361,7 +373,8 @@ static struct starpu_perfmodel STARPU_LU(model_11) = {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet cl11 = {
			
 
				+struct starpu_codelet cl11 =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {STARPU_LU(cpu_u11), NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -391,7 +404,8 @@ static inline void STARPU_LU(common_u11_pivot)(void *descr[],
 
				 	unsigned *ipiv = piv->piv;
			
 
				 	unsigned first = piv->first;
			
 
				 
			
 
				-	switch (s) {
			
 
				+	switch (s)
			
 
				+	{
			
 
				 		case 0:
			
 
				 			for (z = 0; z < nx; z++)
			
 
				 			{
			
@@ -486,7 +500,8 @@ void STARPU_LU(cublas_u11_pivot)(void *descr[], void *_args)
 
				 }
			
 
				 #endif /* STARPU_USE_CUDA */
			
 
				 
			
 
				-static struct starpu_perfmodel STARPU_LU(model_11_pivot) = {
			
 
				+static struct starpu_perfmodel STARPU_LU(model_11_pivot) =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 #ifdef STARPU_ATLAS
			
 
				 	.symbol = STARPU_LU_STR(lu_model_11_pivot_atlas)
			
@@ -497,7 +512,8 @@ static struct starpu_perfmodel STARPU_LU(model_11_pivot) = {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet cl11_pivot = {
			
 
				+struct starpu_codelet cl11_pivot =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {STARPU_LU(cpu_u11_pivot), NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -526,7 +542,8 @@ static inline void STARPU_LU(common_pivot)(void *descr[],
 
				 	unsigned *ipiv = piv->piv;
			
 
				 	unsigned first = piv->first;
			
 
				 
			
 
				-	switch (s) {
			
 
				+	switch (s)
			
 
				+	{
			
 
				 		case 0:
			
 
				 			for (row = 0; row < nx; row++)
			
 
				 			{
			
@@ -571,7 +588,8 @@ void STARPU_LU(cublas_pivot)(void *descr[], void *_args)
 
				 
			
 
				 #endif /* STARPU_USE_CUDA */
			
 
				 
			
 
				-static struct starpu_perfmodel STARPU_LU(model_pivot) = {
			
 
				+static struct starpu_perfmodel STARPU_LU(model_pivot) =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 #ifdef STARPU_ATLAS
			
 
				 	.symbol = STARPU_LU_STR(lu_model_pivot_atlas)
			
@@ -582,7 +600,8 @@ static struct starpu_perfmodel STARPU_LU(model_pivot) = {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet cl_pivot = {
			
 
				+struct starpu_codelet cl_pivot =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {STARPU_LU(cpu_pivot), NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
--- a/examples/lu/xlu_pivot.c
+++ b/examples/lu/xlu_pivot.c
@@ -66,15 +66,18 @@ static void create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k == 0) {
			
 
				+	if (k == 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(PIVOT(k, i), 1, TAG11(k));
			
 
				 	}
			
 
				-	else 
			
 
				+	else
			
 
				 	{
			
 
				-		if (i > k) {
			
 
				+		if (i > k)
			
 
				+		{
			
 
				 			starpu_tag_declare_deps(PIVOT(k, i), 2, TAG11(k), TAG22(k-1, i, k));
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			starpu_tag_t *tags = malloc((nblocks - k)*sizeof(starpu_tag_t));
			
 
				 			
			
 
				 			tags[0] = TAG11(k);
			
@@ -111,7 +114,8 @@ static struct starpu_task *create_task_11_pivot(starpu_data_handle_t *dataAp, un
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
			
 
				 	}
			
 
				 
			
@@ -135,7 +139,8 @@ static void create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 
				 	task->buffers[1].handle = get_block(dataAp, nblocks, j, k);
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				-	if (!no_prio && (j == k+1)) {
			
 
				+	if (!no_prio && (j == k+1))
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
@@ -143,10 +148,12 @@ static void create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 
				 #if 0
			
 
				 	starpu_tag_declare_deps(TAG12(k, i), 1, PIVOT(k, i));
			
 
				 #endif
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG12(k, j), 2, TAG11(k), TAG22(k-1, k, j));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG12(k, j), 1, TAG11(k));
			
 
				 	}
			
 
				 
			
@@ -166,7 +173,8 @@ static void create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 
				 	task->buffers[1].handle = get_block(dataAp, nblocks, k, i); 
			
 
				 	task->buffers[1].mode = STARPU_RW;
			
 
				 
			
 
				-	if (!no_prio && (i == k+1)) {
			
 
				+	if (!no_prio && (i == k+1))
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
@@ -197,15 +205,18 @@ static void create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 
				 	task->buffers[2].handle = get_block(dataAp, nblocks, j, i);  /* produced by TAG22(k-1, i, j) */
			
 
				 	task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				-	if (!no_prio &&  (i == k + 1) && (j == k +1) ) {
			
 
				+	if (!no_prio &&  (i == k + 1) && (j == k +1) )
			
 
				+	{
			
 
				 		task->priority = STARPU_MAX_PRIO;
			
 
				 	}
			
 
				 
			
 
				 	/* enforce dependencies ... */
			
 
				-	if (k > 0) {
			
 
				+	if (k > 0)
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG12(k, j), TAG21(k, i));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG12(k, j), TAG21(k, i));
			
 
				 	}
			
 
				 
			
@@ -234,10 +245,12 @@ static double dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
				 		struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
			
 
				 
			
 
				 		/* we defer the launch of the first task */
			
 
				-		if (k == 0) {
			
 
				+		if (k == 0)
			
 
				+		{
			
 
				 			entry_task = task;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			starpu_task_submit(task);
			
 
				 		}
			
 
				 
			
@@ -314,12 +327,14 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 
				 	/* We already enforce deps by hand */
			
 
				 	starpu_data_set_sequential_consistency_flag(dataA, 0);
			
 
				 
			
 
				-	struct starpu_data_filter f = {
			
 
				+	struct starpu_data_filter f =
			
 
				+	{
			
 
				 		.filter_func = starpu_vertical_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
 
				 
			
 
				-	struct starpu_data_filter f2 = {
			
 
				+	struct starpu_data_filter f2 =
			
 
				+	{
			
 
				 		.filter_func = starpu_block_filter_func,
			
 
				 		.nchildren = nblocks
			
 
				 	};
			
--- a/examples/mandelbrot/mandelbrot.c
+++ b/examples/mandelbrot/mandelbrot.c
@@ -155,7 +155,8 @@ static int handle_events(void)
 
				 			topY -= 0.25*heightY;
			
 
				 			bottomY -= 0.25*heightY;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			double widthX = rightX - leftX;
			
 
				 			double heightY = topY - bottomY;
			
 
				 
			
@@ -177,13 +178,15 @@ static int handle_events(void)
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		if (text[0]=='q') {
			
 
				+		if (text[0]=='q')
			
 
				+		{
			
 
				 			return -1;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (event.type==ButtonPress) {
			
 
				-	/* tell where the mouse Button was Pressed */
			
 
				+	if (event.type==ButtonPress)
			
 
				+	{
			
 
				+		/* tell where the mouse Button was Pressed */
			
 
				 		printf("You pressed a button at (%i,%i)\n",
			
 
				 			event.xbutton.x,event.xbutton.y);
			
 
				 	}
			
@@ -371,7 +374,8 @@ static void compute_block_spmd(void *descr[], void *cl_arg)
 
				 
			
 
				 
			
 
				 
			
 
				-static struct starpu_codelet spmd_mandelbrot_cl = {
			
 
				+static struct starpu_codelet spmd_mandelbrot_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_OPENCL,
			
 
				 	.type = STARPU_SPMD,
			
 
				 	.max_parallelism = INT_MAX,
			
@@ -382,7 +386,8 @@ static struct starpu_codelet spmd_mandelbrot_cl = {
 
				 	.nbuffers = 1
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet mandelbrot_cl = {
			
 
				+static struct starpu_codelet mandelbrot_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_OPENCL,
			
 
				 	.type = STARPU_SEQ,
			
 
				 	.cpu_funcs = {compute_block, NULL},
			
@@ -395,38 +400,46 @@ static struct starpu_codelet mandelbrot_cl = {
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-h") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-h") == 0)
			
 
				+		{
			
 
				 			fprintf(stderr, "Usage: %s [-h] [ -width 800] [-height 600] [-nblocks 16] [-no-x11] [-pos leftx:rightx:bottomy:topy] [-niter 1000] [-spmd] [-demo] [-demozoom 0.2]\n", argv[0]);
			
 
				 			exit(-1);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-width") == 0) {
			
 
				+		if (strcmp(argv[i], "-width") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			width = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-height") == 0) {
			
 
				+		if (strcmp(argv[i], "-height") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			height = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-nblocks") == 0) {
			
 
				+		if (strcmp(argv[i], "-nblocks") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			nblocks = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-niter") == 0) {
			
 
				+		if (strcmp(argv[i], "-niter") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			niter = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-pos") == 0) {
			
 
				+		if (strcmp(argv[i], "-pos") == 0)
			
 
				+		{
			
 
				 			int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX, &rightX, &bottomY, &topY);
			
 
				 			assert(ret == 4);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-demo") == 0) {
			
 
				+		if (strcmp(argv[i], "-demo") == 0)
			
 
				+		{
			
 
				 			demo = 1;
			
 
				 			leftX = -50.22749575062760;
			
 
				 			rightX = 48.73874621262927;
			
@@ -435,18 +448,21 @@ static void parse_args(int argc, char **argv)
 
				 
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-demozoom") == 0) {
			
 
				+		if (strcmp(argv[i], "-demozoom") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			demozoom = strtof(argv[++i], &argptr);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-no-x11") == 0) {
			
 
				+		if (strcmp(argv[i], "-no-x11") == 0)
			
 
				+		{
			
 
				 #ifdef STARPU_HAVE_X11
			
 
				 			use_x11 = 0;
			
 
				 #endif
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-spmd") == 0) {
			
 
				+		if (strcmp(argv[i], "-spmd") == 0)
			
 
				+		{
			
 
				 			use_spmd = 1;
			
 
				 		}
			
 
				 	}
			
@@ -567,7 +583,8 @@ int main(int argc, char **argv)
 
				 				iter = 0;
			
 
				 				gettimeofday(&start, NULL);
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				leftX += (zoom_factor/2)*widthX;
			
 
				 				rightX -= (zoom_factor/2)*widthX;
			
 
				 				topY -= (zoom_factor/2)*heightY;
			
--- a/examples/matvecmult/matvecmult.c
+++ b/examples/matvecmult/matvecmult.c
@@ -64,27 +64,34 @@ void opencl_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-void fillArray(float* pfData, int iSize) {
			
 
				+void fillArray(float* pfData, int iSize)
			
 
				+{
			
 
				     int i;
			
 
				     const float fScale = 1.0f / (float)RAND_MAX;
			
 
				-    for (i = 0; i < iSize; ++i) {
			
 
				+    for (i = 0; i < iSize; ++i)
			
 
				+    {
			
 
				             pfData[i] = fScale * rand();
			
 
				     }
			
 
				 }
			
 
				 
			
 
				-void printArray(float* pfData, int iSize) {
			
 
				+void printArray(float* pfData, int iSize)
			
 
				+{
			
 
				     int i;
			
 
				-    for (i = 0; i < iSize; ++i) {
			
 
				+    for (i = 0; i < iSize; ++i)
			
 
				+    {
			
 
				             FPRINTF(stderr, "%f ", pfData[i]);
			
 
				     }
			
 
				     FPRINTF(stderr, "\n");
			
 
				 }
			
 
				 
			
 
				-void matVecMult(const float *matrix, const float *vector, int width, int height, float *mult) {
			
 
				+void matVecMult(const float *matrix, const float *vector, int width, int height, float *mult)
			
 
				+{
			
 
				     int i, j;
			
 
				-    for (i = 0; i < height; ++i) {
			
 
				+    for (i = 0; i < height; ++i)
			
 
				+    {
			
 
				         double sum = 0;
			
 
				-        for (j = 0; j < width; ++j) {
			
 
				+        for (j = 0; j < width; ++j)
			
 
				+	{
			
 
				             double a = matrix[i * width + j];
			
 
				             double b = vector[j];
			
 
				             sum += a * b;
			
@@ -93,12 +100,14 @@ void matVecMult(const float *matrix, const float *vector, int width, int height,
 
				     }
			
 
				 }
			
 
				 
			
 
				-int compareL2fe(const float* reference, const float* data, const unsigned int len, const float epsilon) {
			
 
				+int compareL2fe(const float* reference, const float* data, const unsigned int len, const float epsilon)
			
 
				+{
			
 
				     float error = 0;
			
 
				     float ref = 0;
			
 
				     unsigned int i;
			
 
				 
			
 
				-    for(i = 0; i < len; ++i) {
			
 
				+    for(i = 0; i < len; ++i)
			
 
				+    {
			
 
				         float diff = reference[i] - data[i];
			
 
				         error += diff * diff;
			
 
				         ref += reference[i] * reference[i];
			
@@ -117,7 +126,8 @@ int main(int argc, char **argv)
 
				 {
			
 
				 	struct starpu_codelet cl = {};
			
 
				 
			
 
				-	struct starpu_conf conf = {
			
 
				+	struct starpu_conf conf =
			
 
				+	{
			
 
				 		.ncpus = 0,
			
 
				 		.ncuda = 0,
			
 
				                 .nopencl = 1,
			
@@ -136,7 +146,8 @@ int main(int argc, char **argv)
 
				 	int ret, submit;
			
 
				 
			
 
				         ret = starpu_init(&conf);
			
 
				-	if (STARPU_UNLIKELY(ret == -ENODEV)) {
			
 
				+	if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				+	{
			
 
				                 FPRINTF(stderr, "This application requires an OpenCL worker.\n");
			
 
				 		starpu_shutdown();
			
 
				 		return 77;
			
@@ -186,10 +197,12 @@ int main(int argc, char **argv)
 
				         task->buffers[2].mode = STARPU_RW;
			
 
				 
			
 
				         submit = starpu_task_submit(task);
			
 
				-        if (STARPU_UNLIKELY(submit == -ENODEV)) {
			
 
				+        if (STARPU_UNLIKELY(submit == -ENODEV))
			
 
				+	{
			
 
				                 FPRINTF(stderr, "No worker may execute this task. This application requires an OpenCL worker.\n");
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		starpu_task_wait_for_all();
			
 
				 	}
			
 
				 
			
@@ -197,7 +210,8 @@ int main(int argc, char **argv)
 
				 	starpu_data_unregister(vector_handle);
			
 
				 	starpu_data_unregister(mult_handle);
			
 
				 
			
 
				-        if (STARPU_LIKELY(submit != -ENODEV)) {
			
 
				+        if (STARPU_LIKELY(submit != -ENODEV))
			
 
				+	{
			
 
				 		int res = compareL2fe(correctResult, mult, height, 1e-6f);
			
 
				 		FPRINTF(stdout, "TEST %s\n\n", (res == 0) ? "PASSED" : "FAILED !!!");
			
 
				 	}
			
--- a/examples/matvecmult/matvecmult_kernel.cl
+++ b/examples/matvecmult/matvecmult_kernel.cl
@@ -33,7 +33,8 @@ __kernel void matVecMult(
 
				 {
			
 
				         // Row index
			
 
				         uint y = get_global_id(0);
			
 
				-        if (y < height) {
			
 
				+        if (y < height)
			
 
				+	{
			
 
				                 // Row pointer
			
 
				                 const __global float* row = M + y * width;
			
 
				 
			
--- a/examples/mult/xgemm.c
+++ b/examples/mult/xgemm.c
@@ -53,10 +53,12 @@ static void check_output(void)
 
				 	TYPE err;
			
 
				 	err = CPU_ASUM(xdim*ydim, C, 1);
			
 
				 
			
 
				-	if (err < xdim*ydim*0.001) {
			
 
				+	if (err < xdim*ydim*0.001)
			
 
				+	{
			
 
				 		FPRINTF(stderr, "Results are OK\n");
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		int max;
			
 
				 		max = CPU_IAMAX(xdim*ydim, C, 1);
			
 
				 
			
@@ -74,20 +76,26 @@ static void init_problem_data(void)
 
				 	starpu_malloc((void **)&C, xdim*ydim*sizeof(TYPE));
			
 
				 
			
 
				 	/* fill the A and B matrices */
			
 
				-	for (j=0; j < ydim; j++) {
			
 
				-		for (i=0; i < zdim; i++) {
			
 
				+	for (j=0; j < ydim; j++)
			
 
				+	{
			
 
				+		for (i=0; i < zdim; i++)
			
 
				+		{
			
 
				 			A[j+i*ydim] = (TYPE)(starpu_drand48());
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	for (j=0; j < zdim; j++) {
			
 
				-		for (i=0; i < xdim; i++) {
			
 
				+	for (j=0; j < zdim; j++)
			
 
				+	{
			
 
				+		for (i=0; i < xdim; i++)
			
 
				+		{
			
 
				 			B[j+i*zdim] = (TYPE)(starpu_drand48());
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	for (j=0; j < ydim; j++) {
			
 
				-		for (i=0; i < xdim; i++) {
			
 
				+	for (j=0; j < ydim; j++)
			
 
				+	{
			
 
				+		for (i=0; i < xdim; i++)
			
 
				+		{
			
 
				 			C[j+i*ydim] = (TYPE)(0);
			
 
				 		}
			
 
				 	}
			
@@ -132,7 +140,8 @@ static void mult_kernel_common(void *descr[], int type)
 
				 	unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]);
			
 
				 	unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]);
			
 
				 
			
 
				-	if (type == STARPU_CPU) {
			
 
				+	if (type == STARPU_CPU)
			
 
				+	{
			
 
				 		int worker_size = starpu_combined_worker_get_size();
			
 
				 
			
 
				 		if (worker_size == 1)
			
@@ -140,7 +149,8 @@ static void mult_kernel_common(void *descr[], int type)
 
				 			/* Sequential CPU task */
			
 
				 			CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, (TYPE)0.0, subC, ldC);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* Parallel CPU task */
			
 
				 			int rank = starpu_combined_worker_get_rank();
			
 
				 		
			
@@ -156,7 +166,8 @@ static void mult_kernel_common(void *descr[], int type)
 
				 		}
			
 
				 	}
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		CUBLAS_GEMM('n', 'n', nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB,
			
 
				 					     (TYPE)0.0, subC, ldC);
			
 
				 		cudaStreamSynchronize(starpu_cuda_get_local_stream());
			
@@ -176,12 +187,14 @@ static void cpu_mult(void *descr[], __attribute__((unused))  void *arg)
 
				 	mult_kernel_common(descr, STARPU_CPU);
			
 
				 }
			
 
				 
			
 
				-static struct starpu_perfmodel starpu_gemm_model = {
			
 
				+static struct starpu_perfmodel starpu_gemm_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = STARPU_GEMM_STR(gemm)
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet cl = {
			
 
				+static struct starpu_codelet cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */
			
 
				 	.max_parallelism = INT_MAX,
			
@@ -196,48 +209,58 @@ static struct starpu_codelet cl = {
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-nblocks") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-nblocks") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			nslicesx = strtol(argv[++i], &argptr, 10);
			
 
				 			nslicesy = nslicesx;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-nblocksx") == 0) {
			
 
				+		if (strcmp(argv[i], "-nblocksx") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			nslicesx = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-nblocksy") == 0) {
			
 
				+		if (strcmp(argv[i], "-nblocksy") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			nslicesy = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-x") == 0) {
			
 
				+		if (strcmp(argv[i], "-x") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			xdim = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-y") == 0) {
			
 
				+		if (strcmp(argv[i], "-y") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			ydim = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-z") == 0) {
			
 
				+		if (strcmp(argv[i], "-z") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			zdim = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-iter") == 0) {
			
 
				+		if (strcmp(argv[i], "-iter") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			niter = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-check") == 0) {
			
 
				+		if (strcmp(argv[i], "-check") == 0)
			
 
				+		{
			
 
				 			check = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-spmd") == 0) {
			
 
				+		if (strcmp(argv[i], "-spmd") == 0)
			
 
				+		{
			
 
				 			cl.type = STARPU_SPMD;
			
 
				 		}
			
 
				 	}
			
--- a/examples/openmp/vector_scal.c
+++ b/examples/openmp/vector_scal.c
@@ -28,7 +28,8 @@
 
				 #define	NX	2048
			
 
				 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
			
 
				 
			
 
				-void scal_cpu_func(void *buffers[], void *_args) {
			
 
				+void scal_cpu_func(void *buffers[], void *_args)
			
 
				+{
			
 
				 	unsigned i;
			
 
				 	float *factor = _args;
			
 
				 	struct starpu_vector_interface *vector = buffers[0];
			
@@ -42,12 +43,14 @@ void scal_cpu_func(void *buffers[], void *_args) {
 
				 		val[i] *= *factor;
			
 
				 }
			
 
				 
			
 
				-static struct starpu_perfmodel vector_scal_model = {
			
 
				+static struct starpu_perfmodel vector_scal_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "vector_scale_parallel"
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet cl = {
			
 
				+static struct starpu_codelet cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU,
			
 
				 	.type = STARPU_FORKJOIN,
			
 
				 	.max_parallelism = INT_MAX,
			
--- a/examples/opt/pi/pi.c
+++ b/examples/opt/pi/pi.c
@@ -64,8 +64,10 @@ static void cpu_kernel(void *descr[], void *cl_arg)
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-ntasks") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-ntasks") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			ntasks = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
@@ -101,19 +103,22 @@ int main(int argc, char **argv)
 
				 	 * accessed by the CPU later on */
			
 
				 	starpu_data_set_wt_mask(cnt_array_handle, (1<<0));
			
 
				 
			
 
				-	struct starpu_data_filter f = {
			
 
				+	struct starpu_data_filter f =
			
 
				+	{
			
 
				 		.filter_func = starpu_block_filter_func_vector,
			
 
				 		.nchildren = ntasks
			
 
				 	};
			
 
				 	
			
 
				 	starpu_data_partition(cnt_array_handle, &f);
			
 
				 
			
 
				-	static struct starpu_perfmodel model = {
			
 
				+	static struct starpu_perfmodel model =
			
 
				+	{
			
 
				 		.type = STARPU_HISTORY_BASED,
			
 
				 		.symbol = "monte_carlo_pi"
			
 
				 	};
			
 
				 
			
 
				-	struct starpu_codelet cl = {
			
 
				+	struct starpu_codelet cl =
			
 
				+	{
			
 
				 		.where = STARPU_CPU|STARPU_CUDA,
			
 
				 		.cpu_funcs = {cpu_kernel, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
--- a/examples/opt/pi/pi_redux.c
+++ b/examples/opt/pi/pi_redux.c
@@ -64,7 +64,8 @@ static void init_rng(void *arg __attribute__((unused)))
 
				 
			
 
				 	int workerid = starpu_worker_get_id();
			
 
				 
			
 
				-	switch (starpu_worker_get_type(workerid)) {
			
 
				+	switch (starpu_worker_get_type(workerid))
			
 
				+	{
			
 
				 		case STARPU_CPU_WORKER:
			
 
				 			/* create a seed */
			
 
				 			starpu_srand48_r((long int)workerid, &randbuffer[PADDING*workerid]);
			
@@ -96,22 +97,27 @@ static void init_rng(void *arg __attribute__((unused)))
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-ntasks") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-ntasks") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			ntasks = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-noredux") == 0) {
			
 
				+		if (strcmp(argv[i], "-noredux") == 0)
			
 
				+		{
			
 
				 			use_redux = 0;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-warmup") == 0) {
			
 
				+		if (strcmp(argv[i], "-warmup") == 0)
			
 
				+		{
			
 
				 			do_warmup = 1;
			
 
				 			ntasks_warmup = 8; /* arbitrary number of warmup tasks */
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-h") == 0) {
			
 
				+		if (strcmp(argv[i], "-h") == 0)
			
 
				+		{
			
 
				 			fprintf(stderr, "Usage: %s [-ntasks n] [-noredux] [-warmup] [-h]\n", argv[0]);
			
 
				 			exit(-1);
			
 
				 		}
			
@@ -183,7 +189,8 @@ static void pi_func_cuda(void *descr[], void *cl_arg __attribute__ ((unused)))
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static struct starpu_codelet pi_cl = {
			
 
				+static struct starpu_codelet pi_cl =
			
 
				+{
			
 
				 	.where =
			
 
				 #ifdef STARPU_HAVE_CURAND
			
 
				 		STARPU_CUDA|
			
@@ -216,7 +223,8 @@ static void init_cuda_func(void *descr[], void *cl_arg)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static struct starpu_codelet init_codelet = {
			
 
				+static struct starpu_codelet init_codelet =
			
 
				+{
			
 
				 	.where =
			
 
				 #ifdef STARPU_HAVE_CURAND
			
 
				 		STARPU_CUDA|
			
@@ -255,7 +263,8 @@ static void redux_cpu_func(void *descr[], void *cl_arg)
 
				 	*a = *a + *b;
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet redux_codelet = {
			
 
				+static struct starpu_codelet redux_codelet =
			
 
				+{
			
 
				 	.where =
			
 
				 #ifdef STARPU_HAVE_CURAND
			
 
				 		STARPU_CUDA|
			
--- a/examples/ppm_downscaler/ppm_downscaler.c
+++ b/examples/ppm_downscaler/ppm_downscaler.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -57,7 +57,8 @@ struct ppm_image *file_to_ppm(char *filename)
 
				 
			
 
				 	/* read the file's dimensions */
			
 
				 	ret = fscanf(file, "P6\n%d %d\n%d\n", &ppm->ncols, &ppm->nlines, &ppm->coldepth);
			
 
				-	if (ret != 3) {
			
 
				+	if (ret != 3)
			
 
				+	{
			
 
				 		fclose(file);
			
 
				 		fprintf(stderr, "file %s is not valid\n", filename);
			
 
				 		exit(-1);
			
@@ -105,7 +106,8 @@ char *filename_out = "serpents.small.ppm";
 
				 
			
 
				 void parse_args(int argc, char **argv)
			
 
				 {
			
 
				-	if (argc == 3) {
			
 
				+	if (argc == 3)
			
 
				+	{
			
 
				 		filename_in = argv[1];
			
 
				 		filename_out = argv[2];
			
 
				 	}
			
--- a/examples/ppm_downscaler/ppm_downscaler.h
+++ b/examples/ppm_downscaler/ppm_downscaler.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -16,13 +16,15 @@
 
				  */
			
 
				 
			
 
				 /* we make the asumption that there are 256 color levels at most */
			
 
				-struct ppm_color {
			
 
				+struct ppm_color
			
 
				+{
			
 
				 	unsigned char r;
			
 
				 	unsigned char g;
			
 
				 	unsigned char b;
			
 
				 };
			
 
				 
			
 
				-struct ppm_image {
			
 
				+struct ppm_image
			
 
				+{
			
 
				 	int nlines;
			
 
				 	int ncols;
			
 
				 	int coldepth;
			
--- a/examples/ppm_downscaler/yuv_downscaler.c
+++ b/examples/ppm_downscaler/yuv_downscaler.c
@@ -38,11 +38,13 @@ char filename_out[1024];
 
				 
			
 
				 void parse_args(int argc, char **argv)
			
 
				 {
			
 
				-	if (argc == 3) {
			
 
				+	if (argc == 3)
			
 
				+	{
			
 
				 		strcpy(filename_in, argv[1]);
			
 
				 		strcpy(filename_out, argv[2]);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		sprintf(filename_in, "%s/examples/ppm_downscaler/%s", STARPU_BUILD_DIR, filename_in_default);
			
 
				 		sprintf(filename_out, "%s/examples/ppm_downscaler/%s", STARPU_BUILD_DIR, filename_out_default);
			
 
				 	}
			
@@ -82,7 +84,8 @@ static void ds_kernel_cpu(void *descr[], __attribute__((unused)) void *arg)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static struct starpu_codelet ds_codelet = {
			
 
				+static struct starpu_codelet ds_codelet =
			
 
				+{
			
 
				 	.where = STARPU_CPU,
			
 
				 	.cpu_funcs = {ds_kernel_cpu, NULL},
			
 
				 	.nbuffers = 2, /* input -> output */
			
@@ -90,12 +93,14 @@ static struct starpu_codelet ds_codelet = {
 
				 };
			
 
				 
			
 
				 /* each block contains BLOCK_HEIGHT consecutive lines */
			
 
				-static struct starpu_data_filter filter_y = {
			
 
				+static struct starpu_data_filter filter_y =
			
 
				+{
			
 
				 	.filter_func = starpu_block_filter_func,
			
 
				 	.nchildren= HEIGHT/BLOCK_HEIGHT
			
 
				 };
			
 
				-	
			
 
				-static struct starpu_data_filter filter_uv = {
			
 
				+
			
 
				+static struct starpu_data_filter filter_uv =
			
 
				+{
			
 
				 	.filter_func = starpu_block_filter_func,
			
 
				 	.nchildren = (HEIGHT/2)/BLOCK_HEIGHT
			
 
				 };
			
--- a/examples/ppm_downscaler/yuv_downscaler.h
+++ b/examples/ppm_downscaler/yuv_downscaler.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -27,13 +27,15 @@
 
				 
			
 
				 #include <stdint.h>
			
 
				 
			
 
				-struct yuv_frame {
			
 
				+struct yuv_frame
			
 
				+{
			
 
				 	uint8_t y[WIDTH*HEIGHT];
			
 
				 	uint8_t u[(WIDTH*HEIGHT)/4];
			
 
				 	uint8_t v[(WIDTH*HEIGHT)/4];
			
 
				 };
			
 
				 
			
 
				-struct yuv_new_frame {
			
 
				+struct yuv_new_frame
			
 
				+{
			
 
				 	uint8_t y[NEW_WIDTH*NEW_HEIGHT];
			
 
				 	uint8_t u[(NEW_WIDTH*NEW_HEIGHT)/4];
			
 
				 	uint8_t v[(NEW_WIDTH*NEW_HEIGHT)/4];
			
--- a/examples/reductions/dot_product.c
+++ b/examples/reductions/dot_product.c
@@ -73,7 +73,8 @@ void init_cuda_func(void *descr[], void *cl_arg)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static struct starpu_codelet init_codelet = {
			
 
				+static struct starpu_codelet init_codelet =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.can_execute = can_execute,
			
 
				 	.cpu_funcs = {init_cpu_func, NULL},
			
@@ -99,7 +100,8 @@ void redux_cpu_func(void *descr[], void *cl_arg)
 
				 extern void redux_cuda_func(void *descr[], void *_args);
			
 
				 #endif
			
 
				 
			
 
				-static struct starpu_codelet redux_codelet = {
			
 
				+static struct starpu_codelet redux_codelet =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.can_execute = can_execute,
			
 
				 	.cpu_funcs = {redux_cpu_func, NULL},
			
@@ -161,7 +163,8 @@ void dot_cuda_func(void *descr[], void *cl_arg)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static struct starpu_codelet dot_codelet = {
			
 
				+static struct starpu_codelet dot_codelet =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.can_execute = can_execute,
			
 
				 	.cpu_funcs = {dot_cpu_func, NULL},
			
--- a/examples/reductions/minmax_reduction.c
+++ b/examples/reductions/minmax_reduction.c
@@ -50,7 +50,8 @@ static void minmax_neutral_cpu_func(void *descr[], void *cl_arg)
 
				 	array[1] = TYPE_MIN;
			
 
				 }
			
 
				 
			
 
				-static struct starpu_codelet minmax_init_codelet = {
			
 
				+static struct starpu_codelet minmax_init_codelet =
			
 
				+{
			
 
				 	.where = STARPU_CPU,
			
 
				 	.cpu_funcs = {minmax_neutral_cpu_func, NULL},
			
 
				 	.nbuffers = 1
			
@@ -76,7 +77,8 @@ void minmax_redux_cpu_func(void *descr[], void *cl_arg)
 
				 	array_dst[1] = STARPU_MAX(max_dst, max_src);
			
 
				 }
			
 
				 
			
 
				-static struct starpu_codelet minmax_redux_codelet = {
			
 
				+static struct starpu_codelet minmax_redux_codelet =
			
 
				+{
			
 
				 	.where = STARPU_CPU,
			
 
				 	.cpu_funcs = {minmax_redux_cpu_func, NULL},
			
 
				 	.nbuffers = 2
			
@@ -110,7 +112,8 @@ void minmax_cpu_func(void *descr[], void *cl_arg)
 
				 	minmax[1] = local_max;
			
 
				 }
			
 
				 
			
 
				-static struct starpu_codelet minmax_codelet = {
			
 
				+static struct starpu_codelet minmax_codelet =
			
 
				+{
			
 
				 	.where = STARPU_CPU,
			
 
				 	.cpu_funcs = {minmax_cpu_func, NULL},
			
 
				 	.nbuffers = 2
			
--- a/examples/scheduler/dummy_sched.c
+++ b/examples/scheduler/dummy_sched.c
@@ -77,7 +77,8 @@ static struct starpu_task *pop_task_dummy(void)
 
				 	return starpu_task_list_pop_back(&sched_list);
			
 
				 }
			
 
				 
			
 
				-static struct starpu_sched_policy dummy_sched_policy = {
			
 
				+static struct starpu_sched_policy dummy_sched_policy =
			
 
				+{
			
 
				 	.init_sched = init_dummy_sched,
			
 
				 	.deinit_sched = deinit_dummy_sched,
			
 
				 	.push_task = push_task_dummy,
			
@@ -88,7 +89,8 @@ static struct starpu_sched_policy dummy_sched_policy = {
 
				 	.policy_description = "dummy scheduling strategy"
			
 
				 };
			
 
				 
			
 
				-static struct starpu_conf conf = {
			
 
				+static struct starpu_conf conf =
			
 
				+{
			
 
				 	.sched_policy_name = NULL,
			
 
				 	.sched_policy = &dummy_sched_policy,
			
 
				 	.ncpus = -1,
			
@@ -105,7 +107,7 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
				 {
			
 
				 }
			
 
				 
			
 
				-static struct starpu_codelet dummy_codelet = 
			
 
				+static struct starpu_codelet dummy_codelet =
			
 
				 {
			
 
				 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
			
 
				 	.cpu_funcs = {dummy_func, NULL},
			
--- a/examples/spmv/dw_block_spmv.c
+++ b/examples/spmv/dw_block_spmv.c
@@ -141,7 +141,8 @@ void call_filters(void)
 
				 #define NSPMV	32
			
 
				 unsigned totaltasks;
			
 
				 
			
 
				-struct starpu_codelet cl = {
			
 
				+struct starpu_codelet cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = { cpu_block_spmv, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -224,7 +225,8 @@ void launch_spmv_codelets(void)
 
				 
			
 
				 					is_entry_tab[taskid] = 0;
			
 
				 				}
			
 
				-				else {
			
 
				+				else
			
 
				+				{
			
 
				 					/* this is an entry task */
			
 
				 					is_entry_tab[taskid] = 1;
			
 
				 				}
			
@@ -241,7 +243,8 @@ void launch_spmv_codelets(void)
 
				 	unsigned task;
			
 
				 	for (task = 0; task < totaltasks; task++)
			
 
				 	{
			
 
				-		if (is_entry_tab[task]) {
			
 
				+		if (is_entry_tab[task])
			
 
				+		{
			
 
				 			nchains++;
			
 
				 		}
			
 
				 
			
--- a/examples/spmv/dw_block_spmv_kernels.c
+++ b/examples/spmv/dw_block_spmv_kernels.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -33,7 +33,8 @@ static inline void common_block_spmv(void *descr[], int s, __attribute__((unused
 
				 
			
 
				 	unsigned ld = STARPU_MATRIX_GET_LD(descr[0]);
			
 
				 
			
 
				-	switch (s) {
			
 
				+	switch (s)
			
 
				+	{
			
 
				 		case 0:
			
 
				 			cblas_sgemv(CblasRowMajor, CblasNoTrans, dx, dy, 1.0f, block, ld, in, 1, 1.0f, out, 1);
			
 
				 			break;
			
--- a/examples/spmv/matrix_market/mm_to_bcsr.c
+++ b/examples/spmv/matrix_market/mm_to_bcsr.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -22,8 +22,10 @@ static void print_block(tmp_block_t *block, unsigned r, unsigned c)
 
				 	printf(" **** block %d %d **** \n", block->i, block->j);
			
 
				 
			
 
				 	unsigned i, j;
			
 
				-	for (j = 0; j < r; j++) {
			
 
				-		for (i = 0; i < c; i++) {
			
 
				+	for (j = 0; j < r; j++)
			
 
				+	{
			
 
				+		for (i = 0; i < c; i++)
			
 
				+		{
			
 
				 			printf("%2.2f\t", block->val[i + j*c]);
			
 
				 		}
			
 
				 		printf("\n");
			
@@ -34,11 +36,12 @@ static void print_all_blocks(tmp_block_t *block_list, unsigned r, unsigned c)
 
				 {
			
 
				 	tmp_block_t *current_block = block_list;
			
 
				 
			
 
				-	while(current_block) {
			
 
				+	while(current_block)
			
 
				+	{
			
 
				 		print_block(current_block, r, c);
			
 
				 
			
 
				 		current_block = current_block->next;
			
 
				-	};
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void print_bcsr(bcsr_t *bcsr)
			
@@ -54,10 +57,11 @@ static unsigned count_blocks(tmp_block_t *block_list)
 
				 	unsigned count = 0;
			
 
				 	tmp_block_t *current_block = block_list;
			
 
				 
			
 
				-	while(current_block) {
			
 
				+	while(current_block)
			
 
				+	{
			
 
				 		count++;
			
 
				 		current_block = current_block->next;
			
 
				-	};
			
 
				+	}
			
 
				 
			
 
				 	return count;
			
 
				 }
			
@@ -67,12 +71,13 @@ static unsigned count_row_blocks(tmp_block_t *block_list)
 
				 	unsigned maxrow = 0;
			
 
				 	tmp_block_t *current_block = block_list;
			
 
				 
			
 
				-	while(current_block) {
			
 
				+	while(current_block)
			
 
				+	{
			
 
				 		if (current_block->j > maxrow)
			
 
				 			maxrow = current_block->j;
			
 
				 
			
 
				 		current_block = current_block->next;
			
 
				-	};
			
 
				+	}
			
 
				 
			
 
				 	return (maxrow+1);
			
 
				 }
			
@@ -86,7 +91,8 @@ static tmp_block_t *search_block(tmp_block_t *block_list, unsigned i, unsigned j
 
				 	tmp_block_t *current_block = block_list;
			
 
				 	/* printf("search %d %d\n", i, j); */
			
 
				 
			
 
				-	while (current_block) {
			
 
				+	while (current_block)
			
 
				+	{
			
 
				 		if ((current_block->i == i) && (current_block->j == j)) 
			
 
				 		{
			
 
				 			/* we found the block */
			
@@ -143,15 +149,18 @@ static void insert_block(tmp_block_t *block, tmp_block_t **block_list, unsigned
 
				 	/* first find an element that is bigger, then insert the block just before it */
			
 
				 	tmp_block_t *current_block = *block_list;
			
 
				 
			
 
				-	if (!current_block) {
			
 
				+	if (!current_block)
			
 
				+	{
			
 
				 		/* list was empty */
			
 
				 		*block_list = block;
			
 
				 		block->next = NULL;
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	while (current_block) {
			
 
				-		if (next_block_is_bigger(current_block, i, j)) {
			
 
				+	while (current_block)
			
 
				+	{
			
 
				+		if (next_block_is_bigger(current_block, i, j))
			
 
				+		{
			
 
				 			/* insert block here */
			
 
				 			block->next = current_block->next;
			
 
				 			current_block->next = block;
			
@@ -177,7 +186,8 @@ static void insert_elem(tmp_block_t **block_list, unsigned abs_i, unsigned abs_j
 
				 
			
 
				 	block = search_block(*block_list, i, j);
			
 
				 
			
 
				-	if (!block) {
			
 
				+	if (!block)
			
 
				+	{
			
 
				 		/* the block does not exist yet */
			
 
				 		/* create it */
			
 
				 		block = create_block(c, r);
			
@@ -225,7 +235,8 @@ static void fill_bcsr(tmp_block_t *block_list, unsigned c, unsigned r, bcsr_t *b
 
				 
			
 
				 	tmp_block_t *current_block = block_list;
			
 
				 
			
 
				-	while(current_block) {
			
 
				+	while(current_block)
			
 
				+	{
			
 
				 		/* copy the val from the block to the contiguous area in the BCSR */
			
 
				 		memcpy(&bcsr->val[current_offset], current_block->val, block_size);
			
 
				 
			
@@ -314,9 +325,9 @@ bcsr_t *mm_file_to_bcsr(char *filename, unsigned c, unsigned r)
 
				 		exit(1);
			
 
				 
			
 
				 	if (mm_read_banner(f, &matcode) != 0)
			
 
				-	{                                                       	
			
 
				+	{
			
 
				 		printf("Could not process Matrix Market banner.\n");
			
 
				-		exit(1);                                            	
			
 
				+		exit(1);
			
 
				 	}
			
 
				 
			
 
				 	/*  This is how one can screen matrix types if their application */
			
--- a/examples/spmv/matrix_market/mm_to_bcsr.h
+++ b/examples/spmv/matrix_market/mm_to_bcsr.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -22,7 +22,8 @@
 
				 /* convert a matrix stored in a file with the matrix market format into the 
			
 
				  * BCSR format */
			
 
				 
			
 
				-typedef struct tmp_block {
			
 
				+typedef struct tmp_block
			
 
				+{
			
 
				 	/* we have a linked list of blocks */
			
 
				 	struct tmp_block *next;
			
 
				 
			
@@ -33,7 +34,8 @@ typedef struct tmp_block {
 
				 
			
 
				 } tmp_block_t;
			
 
				 
			
 
				-typedef struct {
			
 
				+typedef struct
			
 
				+{
			
 
				 	unsigned r,c;
			
 
				 	unsigned nnz_blocks;
			
 
				 	unsigned nrows_blocks;
			
--- a/examples/spmv/spmv.c
+++ b/examples/spmv/spmv.c
@@ -27,13 +27,16 @@ starpu_data_handle_t vector_in, vector_out;
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-size") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-size") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			size = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-nblocks") == 0) {
			
 
				+		if (strcmp(argv[i], "-nblocks") == 0)
			
 
				+		{
			
 
				 			char *argptr;
			
 
				 			nblocks = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
@@ -66,7 +69,8 @@ static void csr_filter_func(void *father_interface, void *child_interface, struc
 
				 	csr_child->firstentry = local_firstentry;
			
 
				 	csr_child->elemsize = elemsize;
			
 
				 	
			
 
				-	if (csr_father->nzval) {
			
 
				+	if (csr_father->nzval)
			
 
				+	{
			
 
				 		csr_child->rowptr = &csr_father->rowptr[first_index];
			
 
				 		csr_child->colind = &csr_father->colind[local_firstentry];
			
 
				 		csr_child->nzval = csr_father->nzval + local_firstentry * elemsize;
			
@@ -74,20 +78,23 @@ static void csr_filter_func(void *father_interface, void *child_interface, struc
 
				 }
			
 
				 
			
 
				 /* partition the CSR matrix along a block distribution */
			
 
				-static struct starpu_data_filter csr_f = {
			
 
				+static struct starpu_data_filter csr_f =
			
 
				+{
			
 
				 	.filter_func = csr_filter_func,
			
 
				 	/* This value is defined later on */
			
 
				 	.nchildren = -1,
			
 
				 	/* the children also use a csr interface */
			
 
				 };
			
 
				 
			
 
				-static struct starpu_data_filter vector_f = {
			
 
				+static struct starpu_data_filter vector_f =
			
 
				+{
			
 
				 	.filter_func = starpu_block_filter_func_vector,
			
 
				 	/* This value is defined later on */
			
 
				 	.nchildren = -1,
			
 
				 };
			
 
				 
			
 
				-static struct starpu_codelet spmv_cl = {
			
 
				+static struct starpu_codelet spmv_cl =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
			
 
				 	.cpu_funcs = {spmv_kernel_cpu, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -143,7 +150,8 @@ int main(int argc, char **argv)
 
				 	{
			
 
				 		rowptr[row] = pos;
			
 
				 
			
 
				-		if (row > 0) {
			
 
				+		if (row > 0)
			
 
				+		{
			
 
				 			nzval[pos] = 1.0f;
			
 
				 			colind[pos] = row-1;
			
 
				 			pos++;
			
@@ -153,7 +161,8 @@ int main(int argc, char **argv)
 
				 		colind[pos] = row;
			
 
				 		pos++;
			
 
				 
			
 
				-		if (row < size - 1) {
			
 
				+		if (row < size - 1)
			
 
				+		{
			
 
				 			nzval[pos] = 1.0f;
			
 
				 			colind[pos] = row+1;
			
 
				 			pos++;
			
--- a/examples/starpufft/cudax_kernels.cu
+++ b/examples/starpufft/cudax_kernels.cu
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -27,10 +27,13 @@
 
				 #define DISTRIB_1d(n, func,args) \
			
 
				 	unsigned threads_per_block = 128; \
			
 
				 \
			
 
				-	if (n < threads_per_block) { \
			
 
				+	if (n < threads_per_block) \
			
 
				+	{			   \
			
 
				 		dim3 dimGrid(n); \
			
 
				 		func <<<dimGrid, 1, 0, starpu_cuda_get_local_stream()>>> args; \
			
 
				-	} else { \
			
 
				+	} 					\
			
 
				+	else 					\
			
 
				+	{				     \
			
 
				 		dim3 dimGrid(n / threads_per_block); \
			
 
				 		dim3 dimBlock(threads_per_block); \
			
 
				 		func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
			
@@ -81,21 +84,30 @@ STARPUFFT(cuda_twiddle_1d_host)(_cuComplex *out, const _cuComplex *roots, unsign
 
				 /* FIXME: introduce threads_per_dim_n / m instead */
			
 
				 #define DISTRIB_2d(n, m, func, args) \
			
 
				 	unsigned threads_per_dim = 16; \
			
 
				-	if (n < threads_per_dim) { \
			
 
				-		if (m < threads_per_dim) { \
			
 
				+	if (n < threads_per_dim) \
			
 
				+	{				   \
			
 
				+		if (m < threads_per_dim) \
			
 
				+		{			    \
			
 
				 			dim3 dimGrid(n, m); \
			
 
				 			func <<<dimGrid, 1, 0, starpu_cuda_get_local_stream()>>> args; \
			
 
				-		} else { \
			
 
				+		} \
			
 
				+		else \
			
 
				+		{					      \
			
 
				 			dim3 dimGrid(1, m / threads_per_dim); \
			
 
				 			dim3 dimBlock(n, threads_per_dim); \
			
 
				 			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
			
 
				 		} \
			
 
				-	} else {  \
			
 
				-		if (m < threads_per_dim) { \
			
 
				+	} \
			
 
				+	else \
			
 
				+	{				   \
			
 
				+		if (m < threads_per_dim) \
			
 
				+		{					      \
			
 
				 			dim3 dimGrid(n / threads_per_dim, 1); \
			
 
				 			dim3 dimBlock(threads_per_dim, m); \
			
 
				 			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
			
 
				-		} else { \
			
 
				+		} \
			
 
				+		else \
			
 
				+		{							\
			
 
				 			dim3 dimGrid(n / threads_per_dim, m / threads_per_dim); \
			
 
				 			dim3 dimBlock(threads_per_dim, threads_per_dim); \
			
 
				 			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
			
--- a/examples/starpufft/starpufftx.c
+++ b/examples/starpufft/starpufftx.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -34,7 +34,8 @@
 
				 #define _FFTW_FLAGS FFTW_ESTIMATE
			
 
				 
			
 
				 /* Steps for the parallel variant */
			
 
				-enum steps {
			
 
				+enum steps
			
 
				+{
			
 
				 	SPECIAL, TWIST1, FFT1, JOIN, TWIST2, FFT2, TWIST3, END
			
 
				 };
			
 
				 
			
@@ -49,7 +50,8 @@ enum steps {
 
				 
			
 
				 #define I_BITS STEP_SHIFT
			
 
				 
			
 
				-enum type {
			
 
				+enum type
			
 
				+{
			
 
				 	R2C,
			
 
				 	C2R,
			
 
				 	C2C
			
@@ -65,7 +67,8 @@ static struct timeval start, submit_tasks, end;
 
				  *
			
 
				  */
			
 
				 
			
 
				-struct STARPUFFT(plan) {
			
 
				+struct STARPUFFT(plan)
			
 
				+{
			
 
				 	int number;	/* uniquely identifies the plan, for starpu tags */
			
 
				 
			
 
				 	int *n;
			
@@ -84,7 +87,8 @@ struct STARPUFFT(plan) {
 
				 	starpu_data_handle_t roots_handle[2];
			
 
				 
			
 
				 	/* For each worker, we need some data */
			
 
				-	struct {
			
 
				+	struct
			
 
				+	{
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		/* CUFFT plans */
			
 
				 		cufftHandle plan1_cuda, plan2_cuda;
			
@@ -113,7 +117,8 @@ struct STARPUFFT(plan) {
 
				 	struct STARPUFFT(args) *fft1_args, *fft2_args;
			
 
				 };
			
 
				 
			
 
				-struct STARPUFFT(args) {
			
 
				+struct STARPUFFT(args)
			
 
				+{
			
 
				 	struct STARPUFFT(plan) *plan;
			
 
				 	int i, j, jj, kk, ll, *iv, *kkv;
			
 
				 };
			
@@ -123,7 +128,8 @@ check_dims(STARPUFFT(plan) plan)
 
				 {
			
 
				 	int dim;
			
 
				 	for (dim = 0; dim < plan->dim; dim++)
			
 
				-		if (plan->n[dim] & (plan->n[dim]-1)) {
			
 
				+		if (plan->n[dim] & (plan->n[dim]-1))
			
 
				+		{
			
 
				 			fprintf(stderr,"can't cope with non-power-of-2\n");
			
 
				 			STARPU_ABORT();
			
 
				 		}
			
@@ -135,7 +141,8 @@ compute_roots(STARPUFFT(plan) plan)
 
				 	int dim, k;
			
 
				 
			
 
				 	/* Compute the n-roots and m-roots of unity for twiddling */
			
 
				-	for (dim = 0; dim < plan->dim; dim++) {
			
 
				+	for (dim = 0; dim < plan->dim; dim++)
			
 
				+	{
			
 
				 		STARPUFFT(complex) exp = (plan->sign * 2. * 4.*atan(1.)) * _Complex_I / (STARPUFFT(complex)) plan->n[dim];
			
 
				 		plan->roots[dim] = malloc(plan->n[dim] * sizeof(**plan->roots));
			
 
				 		for (k = 0; k < plan->n[dim]; k++)
			
@@ -143,7 +150,8 @@ compute_roots(STARPUFFT(plan) plan)
 
				 		starpu_vector_data_register(&plan->roots_handle[dim], 0, (uintptr_t) plan->roots[dim], plan->n[dim], sizeof(**plan->roots));
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		if (plan->n[dim] > 100000) {
			
 
				+		if (plan->n[dim] > 100000)
			
 
				+		{
			
 
				 			/* prefetch the big root array on GPUs */
			
 
				 			unsigned worker;
			
 
				 			unsigned nworkers = starpu_worker_get_count();
			
@@ -170,17 +178,21 @@ STARPUFFT(start)(STARPUFFT(plan) plan, void *_in, void *_out)
 
				 	plan->in = _in;
			
 
				 	plan->out = _out;
			
 
				 
			
 
				-	switch (plan->dim) {
			
 
				-		case 1: {
			
 
				-			switch (plan->type) {
			
 
				+	switch (plan->dim)
			
 
				+	{
			
 
				+		case 1:
			
 
				+		{
			
 
				+			switch (plan->type)
			
 
				+			{
			
 
				 			case C2C:
			
 
				 				starpu_vector_data_register(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
			
 
				-if (!PARALLEL)
			
 
				-				starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
			
 
				-if (PARALLEL) {
			
 
				-				for (z = 0; z < plan->totsize1; z++)
			
 
				-					plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
			
 
				-}
			
 
				+				if (!PARALLEL)
			
 
				+					starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
			
 
				+				if (PARALLEL)
			
 
				+				{
			
 
				+					for (z = 0; z < plan->totsize1; z++)
			
 
				+						plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
			
 
				+				}
			
 
				 				task = STARPUFFT(start1dC2C)(plan, plan->in_handle, plan->out_handle);
			
 
				 				break;
			
 
				 			default:
			
@@ -191,12 +203,13 @@ if (PARALLEL) {
 
				 		}
			
 
				 		case 2:
			
 
				 			starpu_vector_data_register(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
			
 
				-if (!PARALLEL)
			
 
				-			starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
			
 
				-if (PARALLEL) {
			
 
				-			for (z = 0; z < plan->totsize1; z++)
			
 
				-				plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
			
 
				-}
			
 
				+			if (!PARALLEL)
			
 
				+				starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
			
 
				+			if (PARALLEL)
			
 
				+			{
			
 
				+				for (z = 0; z < plan->totsize1; z++)
			
 
				+					plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
			
 
				+			}
			
 
				 			task = STARPUFFT(start2dC2C)(plan, plan->in_handle, plan->out_handle);
			
 
				 			break;
			
 
				 		default:
			
@@ -211,10 +224,11 @@ STARPUFFT(cleanup)(STARPUFFT(plan) plan)
 
				 {
			
 
				 	if (plan->in_handle)
			
 
				 		starpu_data_unregister(plan->in_handle);
			
 
				-if (!PARALLEL) {
			
 
				-	if (plan->out_handle)
			
 
				-		starpu_data_unregister(plan->out_handle);
			
 
				-}
			
 
				+	if (!PARALLEL)
			
 
				+	{
			
 
				+		if (plan->out_handle)
			
 
				+			starpu_data_unregister(plan->out_handle);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 struct starpu_task *
			
@@ -253,16 +267,21 @@ STARPUFFT(destroy_plan)(STARPUFFT(plan) plan)
 
				 {
			
 
				 	int workerid, dim, i;
			
 
				 
			
 
				-	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) {
			
 
				-		switch (starpu_worker_get_type(workerid)) {
			
 
				+	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++)
			
 
				+	{
			
 
				+		switch (starpu_worker_get_type(workerid))
			
 
				+		{
			
 
				 		case STARPU_CPU_WORKER:
			
 
				 #ifdef STARPU_HAVE_FFTW
			
 
				-if (PARALLEL) {
			
 
				-			_FFTW(destroy_plan)(plan->plans[workerid].plan1_cpu);
			
 
				-			_FFTW(destroy_plan)(plan->plans[workerid].plan2_cpu);
			
 
				-} else {
			
 
				-			_FFTW(destroy_plan)(plan->plans[workerid].plan_cpu);
			
 
				-}
			
 
				+			if (PARALLEL)
			
 
				+			{
			
 
				+				_FFTW(destroy_plan)(plan->plans[workerid].plan1_cpu);
			
 
				+				_FFTW(destroy_plan)(plan->plans[workerid].plan2_cpu);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				_FFTW(destroy_plan)(plan->plans[workerid].plan_cpu);
			
 
				+			}
			
 
				 #endif
			
 
				 			break;
			
 
				 		case STARPU_CUDA_WORKER:
			
@@ -276,45 +295,50 @@ if (PARALLEL) {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-if (PARALLEL) {
			
 
				-	for (i = 0; i < plan->totsize1; i++) {
			
 
				-		starpu_data_unregister(plan->twisted1_handle[i]);
			
 
				-		free(plan->twist1_tasks[i]);
			
 
				-		starpu_data_unregister(plan->fft1_handle[i]);
			
 
				-		free(plan->fft1_tasks[i]);
			
 
				-	}
			
 
				+	if (PARALLEL)
			
 
				+	{
			
 
				+		for (i = 0; i < plan->totsize1; i++)
			
 
				+		{
			
 
				+			starpu_data_unregister(plan->twisted1_handle[i]);
			
 
				+			free(plan->twist1_tasks[i]);
			
 
				+			starpu_data_unregister(plan->fft1_handle[i]);
			
 
				+			free(plan->fft1_tasks[i]);
			
 
				+		}
			
 
				 
			
 
				-	free(plan->twisted1_handle);
			
 
				-	free(plan->twist1_tasks);
			
 
				-	free(plan->fft1_handle);
			
 
				-	free(plan->fft1_tasks);
			
 
				-	free(plan->fft1_args);
			
 
				+		free(plan->twisted1_handle);
			
 
				+		free(plan->twist1_tasks);
			
 
				+		free(plan->fft1_handle);
			
 
				+		free(plan->fft1_tasks);
			
 
				+		free(plan->fft1_args);
			
 
				 
			
 
				-	free(plan->join_task);
			
 
				+		free(plan->join_task);
			
 
				 
			
 
				-	for (i = 0; i < plan->totsize3; i++) {
			
 
				-		starpu_data_unregister(plan->twisted2_handle[i]);
			
 
				-		free(plan->twist2_tasks[i]);
			
 
				-		starpu_data_unregister(plan->fft2_handle[i]);
			
 
				-		free(plan->fft2_tasks[i]);
			
 
				-		free(plan->twist3_tasks[i]);
			
 
				-	}
			
 
				+		for (i = 0; i < plan->totsize3; i++)
			
 
				+		{
			
 
				+			starpu_data_unregister(plan->twisted2_handle[i]);
			
 
				+			free(plan->twist2_tasks[i]);
			
 
				+			starpu_data_unregister(plan->fft2_handle[i]);
			
 
				+			free(plan->fft2_tasks[i]);
			
 
				+			free(plan->twist3_tasks[i]);
			
 
				+		}
			
 
				 
			
 
				-	free(plan->end_task);
			
 
				+		free(plan->end_task);
			
 
				 
			
 
				-	free(plan->twisted2_handle);
			
 
				-	free(plan->twist2_tasks);
			
 
				-	free(plan->fft2_handle);
			
 
				-	free(plan->fft2_tasks);
			
 
				-	free(plan->twist3_tasks);
			
 
				-	free(plan->fft2_args);
			
 
				+		free(plan->twisted2_handle);
			
 
				+		free(plan->twist2_tasks);
			
 
				+		free(plan->fft2_handle);
			
 
				+		free(plan->fft2_tasks);
			
 
				+		free(plan->twist3_tasks);
			
 
				+		free(plan->fft2_args);
			
 
				 
			
 
				-	for (dim = 0; dim < plan->dim; dim++) {
			
 
				-		starpu_data_unregister(plan->roots_handle[dim]);
			
 
				-		free(plan->roots[dim]);
			
 
				-	}
			
 
				+		for (dim = 0; dim < plan->dim; dim++)
			
 
				+		{
			
 
				+			starpu_data_unregister(plan->roots_handle[dim]);
			
 
				+			free(plan->roots[dim]);
			
 
				+		}
			
 
				 
			
 
				-	switch (plan->dim) {
			
 
				+		switch (plan->dim)
			
 
				+		{
			
 
				 		case 1:
			
 
				 			STARPUFFT(free_1d_tags)(plan);
			
 
				 			break;
			
@@ -324,15 +348,15 @@ if (PARALLEL) {
 
				 		default:
			
 
				 			STARPU_ABORT();
			
 
				 			break;
			
 
				-	}
			
 
				+		}
			
 
				 
			
 
				-	free(plan->n1);
			
 
				-	free(plan->n2);
			
 
				-	STARPUFFT(free)(plan->twisted1);
			
 
				-	STARPUFFT(free)(plan->fft1);
			
 
				-	STARPUFFT(free)(plan->twisted2);
			
 
				-	STARPUFFT(free)(plan->fft2);
			
 
				-}
			
 
				+		free(plan->n1);
			
 
				+		free(plan->n2);
			
 
				+		STARPUFFT(free)(plan->twisted1);
			
 
				+		STARPUFFT(free)(plan->fft1);
			
 
				+		STARPUFFT(free)(plan->twisted2);
			
 
				+		STARPUFFT(free)(plan->fft2);
			
 
				+	}
			
 
				 	free(plan->n);
			
 
				 	free(plan);
			
 
				 }
			
--- a/examples/starpufft/testx.c
+++ b/examples/starpufft/testx.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -44,7 +44,8 @@ static void check_fftw(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, in
 
				 {
			
 
				 	int i;
			
 
				 	double max = 0., tot = 0., norm = 0., normdiff = 0.;
			
 
				-	for (i = 0; i < size; i++) {
			
 
				+	for (i = 0; i < size; i++)
			
 
				+	{
			
 
				 		double diff = cabs(out[i]-out_fftw[i]);
			
 
				 		double diff2 = diff * diff;
			
 
				 		double size = cabs(out_fftw[i]);
			
@@ -74,7 +75,8 @@ static void check_cuda(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, in
 
				 {
			
 
				 	int i;
			
 
				 	double max = 0., tot = 0., norm = 0., normdiff = 0.;
			
 
				-	for (i = 0; i < size; i++) {
			
 
				+	for (i = 0; i < size; i++)
			
 
				+	{
			
 
				 		double diff = cabs(out_cuda[i]-out_fftw[i]);
			
 
				 		double diff2 = diff * diff;
			
 
				 		double size = cabs(out_fftw[i]);
			
@@ -99,7 +101,8 @@ static void check_cuda(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, in
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-int main(int argc, char *argv[]) {
			
 
				+int main(int argc, char *argv[])
			
 
				+{
			
 
				 	int i;
			
 
				 	struct timeval begin, end;
			
 
				 	int size;
			
@@ -116,25 +119,31 @@ int main(int argc, char *argv[]) {
 
				 #endif
			
 
				 	double timing;
			
 
				 
			
 
				-	if (argc < 2 || argc > 3) {
			
 
				+	if (argc < 2 || argc > 3)
			
 
				+	{
			
 
				 		fprintf(stderr,"need one or two size of vector\n");
			
 
				 		exit(EXIT_FAILURE);
			
 
				 	}
			
 
				 
			
 
				 	starpu_init(NULL);
			
 
				 
			
 
				-	if (argc == 2) {
			
 
				+	if (argc == 2)
			
 
				+	{
			
 
				 		n = atoi(argv[1]);
			
 
				 
			
 
				 		/* 1D */
			
 
				 		size = n;
			
 
				-	} else if (argc == 3) {
			
 
				+	}
			
 
				+	else if (argc == 3)
			
 
				+	{
			
 
				 		n = atoi(argv[1]);
			
 
				 		m = atoi(argv[2]);
			
 
				 
			
 
				 		/* 2D */
			
 
				 		size = n * m;
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		assert(0);
			
 
				 	}
			
 
				 
			
@@ -155,7 +164,8 @@ int main(int argc, char *argv[]) {
 
				 	STARPUFFT(complex) *out_cuda = STARPUFFT(malloc)(size * sizeof(*out_cuda));
			
 
				 #endif
			
 
				 
			
 
				-	if (argc == 2) {
			
 
				+	if (argc == 2)
			
 
				+	{
			
 
				 		plan = STARPUFFT(plan_dft_1d)(n, SIGN, 0);
			
 
				 #ifdef STARPU_HAVE_FFTW
			
 
				 		fftw_plan = _FFTW(plan_dft_1d)(n, NULL, (void*) 1, SIGN, FFTW_ESTIMATE);
			
@@ -165,7 +175,9 @@ int main(int argc, char *argv[]) {
 
				 			printf("erf\n");
			
 
				 #endif
			
 
				 
			
 
				-	} else if (argc == 3) {
			
 
				+	}
			
 
				+	else if (argc == 3)
			
 
				+	{
			
 
				 		plan = STARPUFFT(plan_dft_2d)(n, m, SIGN, 0);
			
 
				 #ifdef STARPU_HAVE_FFTW
			
 
				 		fftw_plan = _FFTW(plan_dft_2d)(n, m, NULL, (void*) 1, SIGN, FFTW_ESTIMATE);
			
@@ -173,7 +185,9 @@ int main(int argc, char *argv[]) {
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		STARPU_ASSERT(cufftPlan2d(&cuda_plan, n, m, _CUFFT_C2C) == CUFFT_SUCCESS);
			
 
				 #endif
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		assert(0);
			
 
				 	}
			
 
				 
			
--- a/examples/starpufft/testx_threads.c
+++ b/examples/starpufft/testx_threads.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -32,7 +32,8 @@
 
				 #define SIGN (-1)
			
 
				 /* #define SIGN (1) */
			
 
				 
			
 
				-int main(int argc, char *argv[]) {
			
 
				+int main(int argc, char *argv[])
			
 
				+{
			
 
				 	int i;
			
 
				 	struct timeval begin, end;
			
 
				 	int size;
			
@@ -50,23 +51,29 @@ int main(int argc, char *argv[]) {
 
				 		num_threads = atoi(num);
			
 
				 	_FFTW(plan_with_nthreads)(num_threads);
			
 
				 
			
 
				-	if (argc < 2 || argc > 3) {
			
 
				+	if (argc < 2 || argc > 3)
			
 
				+	{
			
 
				 		fprintf(stderr,"need one or two size of vector\n");
			
 
				 		exit(EXIT_FAILURE);
			
 
				 	}
			
 
				 
			
 
				-	if (argc == 2) {
			
 
				+	if (argc == 2)
			
 
				+	{
			
 
				 		n = atoi(argv[1]);
			
 
				 
			
 
				 		/* 1D */
			
 
				 		size = n;
			
 
				-	} else if (argc == 3) {
			
 
				+	}
			
 
				+	else if (argc == 3)
			
 
				+	{
			
 
				 		n = atoi(argv[1]);
			
 
				 		m = atoi(argv[2]);
			
 
				 
			
 
				 		/* 2D */
			
 
				 		size = n * m;
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		assert(0);
			
 
				 	}
			
 
				 
			
@@ -79,12 +86,17 @@ int main(int argc, char *argv[]) {
 
				 
			
 
				 	_FFTW(complex) *out_fftw = _FFTW(malloc)(size * sizeof(*out_fftw));
			
 
				 
			
 
				-	if (argc == 2) {
			
 
				+	if (argc == 2)
			
 
				+	{
			
 
				 		fftw_plan = _FFTW(plan_dft_1d)(n, in, out_fftw, SIGN, FFTW_ESTIMATE);
			
 
				 
			
 
				-	} else if (argc == 3) {
			
 
				+	}
			
 
				+	else if (argc == 3)
			
 
				+	{
			
 
				 		fftw_plan = _FFTW(plan_dft_2d)(n, m, in, out_fftw, SIGN, FFTW_ESTIMATE);
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		assert(0);
			
 
				 	}
			
 
				 
			
--- a/examples/stencil/life.c
+++ b/examples/stencil/life.c
@@ -22,9 +22,12 @@ void life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, in
 
				 {
			
 
				 	unsigned x, y, z, num, alive;
			
 
				 
			
 
				-	for (z = iter; z < nz - iter; z++) {
			
 
				-		for (y = K; y < ny - K; y++) {
			
 
				-			for (x = K; x < nx - K; x++) {
			
 
				+	for (z = iter; z < nz - iter; z++)
			
 
				+	{
			
 
				+		for (y = K; y < ny - K; y++)
			
 
				+		{
			
 
				+			for (x = K; x < nx - K; x++)
			
 
				+			{
			
 
				 				num = 0
			
 
				                                         + old[x+(y+1)*ldy+(z+0)*ldz]
			
 
				                                         + old[x+(y+1)*ldy+(z+1)*ldz]
			
--- a/examples/stencil/life_cuda.cu
+++ b/examples/stencil/life_cuda.cu
@@ -35,8 +35,10 @@ cuda_life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, in
 
				 	unsigned num, alive;
			
 
				 
			
 
				 	for (z = iter + idz; z < nz - iter; z += stepz)
			
 
				-		for (y = K + idy; y < ny - K; y += stepy) {
			
 
				-			for (x = K + idx; x < nx - K; x += stepx) {
			
 
				+		for (y = K + idy; y < ny - K; y += stepy)
			
 
				+		{
			
 
				+			for (x = K + idx; x < nx - K; x += stepx)
			
 
				+			{
			
 
				 				unsigned index = x + y*ldy + z*ldz;
			
 
				 				num = 0
			
 
				                                         + old[index+1*ldy+0*ldz]
			
--- a/examples/stencil/life_opencl.c
+++ b/examples/stencil/life_opencl.c
@@ -42,8 +42,10 @@ life_update(int bz, __global const TYPE *old, __global TYPE *newp, int nx, int n
 
				 	unsigned num, alive;\n\
			
 
				 \n\
			
 
				 	for (z = iter + idz; z < nz - iter; z += stepz)\n\
			
 
				-		for (y = K + idy; y < ny - K; y += stepy) {\n\
			
 
				-			for (x = K + idx; x < nx - K; x += stepx) {\n\
			
 
				+		for (y = K + idy; y < ny - K; y += stepy) \n\
			
 
				+		{\n									\
			
 
				+			for (x = K + idx; x < nx - K; x += stepx) \
			
 
				+			{\n					\
			
 
				 				unsigned index = x + y*ldy + z*ldz;\n\
			
 
				 				num = 0\n\
			
 
				                                         + old[index+1*ldy+0*ldz]\n\
			
@@ -66,11 +68,13 @@ static const char * src = clsrc(TYPE,K);
 
				 static struct starpu_opencl_program program;
			
 
				 
			
 
				 void
			
 
				-opencl_life_init(void) {
			
 
				+opencl_life_init(void)
			
 
				+{
			
 
				   starpu_opencl_load_opencl_from_string(src, &program, NULL);
			
 
				 }
			
 
				 
			
 
				-void opencl_life_free(void) {
			
 
				+void opencl_life_free(void)
			
 
				+{
			
 
				   starpu_opencl_unload_opencl(&program);
			
 
				 }
			
 
				 
			
--- a/examples/stencil/shadow.h
+++ b/examples/stencil/shadow.h
@@ -20,7 +20,8 @@
 
				 /* TODO: rather use a dummy for loop, to assign the job to the threads that will work on it? */
			
 
				 	if (idy == 0)
			
 
				 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)
			
 
				-			for (x = K + idx; x < nx-K; x += stepx) {
			
 
				+			for (x = K + idx; x < nx-K; x += stepx)
			
 
				+			{
			
 
				 				unsigned index = x+z*ldz;
			
 
				 				ptr[index+(K-1)*ldy] = ptr[index+(ny-K-1)*ldy];
			
 
				 				ptr[index+(ny-K)*ldy] = ptr[index+K*ldy];
			
@@ -28,14 +29,16 @@
 
				 
			
 
				 	if (idx == 0)
			
 
				 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)
			
 
				-			for (y = K + idy; y < ny-K; y += stepy) {
			
 
				+			for (y = K + idy; y < ny-K; y += stepy)
			
 
				+			{
			
 
				 				unsigned index = y*ldy+z*ldz;
			
 
				 				ptr[(K-1)+index] = ptr[(nx-K-1)+index];
			
 
				 				ptr[(nx-K)+index] = ptr[K+index];
			
 
				 			}
			
 
				 
			
 
				 	if (idx == 0 && idy == 0)
			
 
				-		for (z = i-1 + idz; z < nz-(i-1); z += stepz) {
			
 
				+		for (z = i-1 + idz; z < nz-(i-1); z += stepz)
			
 
				+		{
			
 
				 			unsigned index = z*ldz;
			
 
				 			ptr[K-1+(K-1)*ldy+index] = ptr[(nx-K-1)+(ny-K-1)*ldy+index];
			
 
				 			ptr[(nx-K)+(K-1)*ldy+index] = ptr[K+(ny-K-1)*ldy+index];
			
--- a/examples/stencil/shadow_opencl.c
+++ b/examples/stencil/shadow_opencl.c
@@ -38,7 +38,8 @@ shadow( int bz, __global TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, in
 
				 	unsigned x, y, z;\n\
			
 
				 	if (idy == 0)\n\
			
 
				 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)\n\
			
 
				-			for (x = K + idx; x < nx-K; x += stepx) {\n\
			
 
				+			for (x = K + idx; x < nx-K; x += stepx) \
			
 
				+			{\n								\
			
 
				 				unsigned index = x+z*ldz;\n\
			
 
				 				ptr[index+(K-1)*ldy] = ptr[index+(ny-K-1)*ldy];\n\
			
 
				 				ptr[index+(ny-K)*ldy] = ptr[index+K*ldy];\n\
			
@@ -46,14 +47,16 @@ shadow( int bz, __global TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, in
 
				 \n\
			
 
				 	if (idx == 0)\n\
			
 
				 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)\n\
			
 
				-			for (y = K + idy; y < ny-K; y += stepy) {\n\
			
 
				+			for (y = K + idy; y < ny-K; y += stepy) \
			
 
				+			{\n					\
			
 
				 				unsigned index = y*ldy+z*ldz;\n\
			
 
				 				ptr[(K-1)+index] = ptr[(nx-K-1)+index];\n\
			
 
				 				ptr[(nx-K)+index] = ptr[K+index];\n\
			
 
				 			}\n\
			
 
				 \n\
			
 
				 	if (idx == 0 && idy == 0)\n\
			
 
				-		for (z = i-1 + idz; z < nz-(i-1); z += stepz) {\n\
			
 
				+		for (z = i-1 + idz; z < nz-(i-1); z += stepz) \
			
 
				+		{\n					      \
			
 
				 			unsigned index = z*ldz;\n\
			
 
				 			ptr[K-1+(K-1)*ldy+index] = ptr[(nx-K-1)+(ny-K-1)*ldy+index];\n\
			
 
				 			ptr[(nx-K)+(K-1)*ldy+index] = ptr[K+(ny-K-1)*ldy+index];\n\
			
@@ -66,11 +69,13 @@ static const char * src = clsrc(TYPE,K);
 
				 static struct starpu_opencl_program program;
			
 
				 
			
 
				 void
			
 
				-opencl_shadow_init(void) {
			
 
				+opencl_shadow_init(void)
			
 
				+{
			
 
				   starpu_opencl_load_opencl_from_string(src, &program, NULL);
			
 
				 }
			
 
				 
			
 
				-void opencl_shadow_free(void) {
			
 
				+void opencl_shadow_free(void)
			
 
				+{
			
 
				   starpu_opencl_unload_opencl(&program);
			
 
				 }
			
 
				 
			
--- a/examples/stencil/stencil-kernels.c
+++ b/examples/stencil/stencil-kernels.c
@@ -23,10 +23,12 @@
 
				 
			
 
				 #ifndef timersub
			
 
				 #define	timersub(x, y, res) \
			
 
				-	do { \
			
 
				+	do \
			
 
				+	{						   \
			
 
				 		(res)->tv_sec = (x)->tv_sec - (y)->tv_sec; \
			
 
				 		(res)->tv_usec = (x)->tv_usec - (y)->tv_usec; \
			
 
				-		if ((res)->tv_usec < 0) { \
			
 
				+		if ((res)->tv_usec < 0) \
			
 
				+		{			 \
			
 
				 			(res)->tv_sec--; \
			
 
				 			(res)->tv_usec += 1000000; \
			
 
				 		} \
			
@@ -34,10 +36,12 @@
 
				 #endif
			
 
				 #ifndef timeradd
			
 
				 #define	timeradd(x, y, res) \
			
 
				-	do { \
			
 
				+	do \
			
 
				+	{						   \
			
 
				 		(res)->tv_sec = (x)->tv_sec + (y)->tv_sec; \
			
 
				 		(res)->tv_usec = (x)->tv_usec + (y)->tv_usec; \
			
 
				-		if ((res)->tv_usec >= 1000000) { \
			
 
				+		if ((res)->tv_usec >= 1000000) \
			
 
				+		{			       \
			
 
				 			(res)->tv_sec++; \
			
 
				 			(res)->tv_usec -= 1000000; \
			
 
				 		} \
			
@@ -138,7 +142,8 @@ static void record_who_runs_what(struct block_description *block)
 
				 	gettimeofday(&tv, NULL);
			
 
				 	timersub(&tv, &start, &tv2);
			
 
				 	timersub(&tv2, &last_tick[block->bz], &diff);
			
 
				-	while (timercmp(&diff, &delta, >=)) {
			
 
				+	while (timercmp(&diff, &delta, >=))
			
 
				+	{
			
 
				 		timeradd(&last_tick[block->bz], &delta, &last_tick[block->bz]);
			
 
				 		timersub(&tv2, &last_tick[block->bz], &diff);
			
 
				 		if (who_runs_what_index[block->bz] < who_runs_what_len)
			
@@ -439,12 +444,14 @@ fprintf(stderr,"!!! DO update_func_cpu z %d CPU%d !!!\n", block->bz, workerid);
 
				 }
			
 
				 
			
 
				 /* Performance model and codelet structure */
			
 
				-static struct starpu_perfmodel cl_update_model = {
			
 
				+static struct starpu_perfmodel cl_update_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "cl_update" 
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet cl_update = {
			
 
				+struct starpu_codelet cl_update =
			
 
				+{
			
 
				 	.where = 0 |
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		STARPU_CUDA|
			
@@ -634,17 +641,20 @@ static void dummy_func_bottom_opencl(void *descr[] __attribute__((unused)), void
 
				 #endif /* STARPU_USE_OPENCL */
			
 
				 
			
 
				 /* Performance models and codelet for save */
			
 
				-static struct starpu_perfmodel save_cl_bottom_model = {
			
 
				+static struct starpu_perfmodel save_cl_bottom_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "save_cl_bottom" 
			
 
				 };
			
 
				 
			
 
				-static struct starpu_perfmodel save_cl_top_model = {
			
 
				+static struct starpu_perfmodel save_cl_top_model =
			
 
				+{
			
 
				 	.type = STARPU_HISTORY_BASED,
			
 
				 	.symbol = "save_cl_top" 
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet save_cl_bottom = {
			
 
				+struct starpu_codelet save_cl_bottom =
			
 
				+{
			
 
				 	.where = 0 |
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		STARPU_CUDA|
			
@@ -664,7 +674,8 @@ struct starpu_codelet save_cl_bottom = {
 
				 	.nbuffers = 4
			
 
				 };
			
 
				 
			
 
				-struct starpu_codelet save_cl_top = {
			
 
				+struct starpu_codelet save_cl_top =
			
 
				+{
			
 
				 	.where = 0|
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		STARPU_CUDA|
			
--- a/examples/stencil/stencil-tasks.c
+++ b/examples/stencil/stencil-tasks.c
@@ -76,7 +76,8 @@ static void create_task_save_local(unsigned iter, unsigned z, int dir, unsigned
 
				 /* R(z) = local & R(z+d) != local */
			
 
				 /* We need to send our save over MPI */
			
 
				 
			
 
				-static void send_done(void *arg) {
			
 
				+static void send_done(void *arg)
			
 
				+{
			
 
				 	uintptr_t z = (uintptr_t) arg;
			
 
				 	DEBUG("DO SEND %d\n", (int)z);
			
 
				 }
			
@@ -103,7 +104,8 @@ static void create_task_save_mpi_send(unsigned iter, unsigned z, int dir, unsign
 
				 /* R(z) != local & R(z+d) = local */
			
 
				 /* We need to receive over MPI */
			
 
				 
			
 
				-static void recv_done(void *arg) {
			
 
				+static void recv_done(void *arg)
			
 
				+{
			
 
				 	uintptr_t z = (uintptr_t) arg;
			
 
				 	DEBUG("DO RECV %d\n", (int)z);
			
 
				 }
			
@@ -146,12 +148,14 @@ void create_task_save(unsigned iter, unsigned z, int dir, unsigned local_rank)
 
				 		}
			
 
				 
			
 
				 	}
			
 
				-	else {	/* node_z != local_rank, this MPI node doesn't have the saved data */
			
 
				+	else
			
 
				+	{	/* node_z != local_rank, this MPI node doesn't have the saved data */
			
 
				 		if (node_z_and_d == local_rank)
			
 
				 		{
			
 
				 			create_task_save_mpi_recv(iter, z, dir, local_rank);
			
 
				 		}
			
 
				-		else { /* R(z) != local & R(z+d) != local We don't have
			
 
				+		else
			
 
				+		{ /* R(z) != local & R(z+d) != local We don't have
			
 
				 			      the saved data and don't need it, we shouldn't
			
 
				 			      even have been called! */
			
 
				 			STARPU_ASSERT(0);
			
@@ -176,7 +180,8 @@ void create_task_update(unsigned iter, unsigned z, unsigned local_rank)
 
				 	unsigned niter = get_niter();
			
 
				 
			
 
				 	/* We are going to synchronize with the last tasks */
			
 
				-	if (iter == niter) {
			
 
				+	if (iter == niter)
			
 
				+	{
			
 
				 		task->detach = 0;
			
 
				 		task->use_tag = 1;
			
 
				 		task->tag_id = TAG_FINISH(z);
			
@@ -218,7 +223,8 @@ void create_task_update(unsigned iter, unsigned z, unsigned local_rank)
 
				 
			
 
				 /* Dummy empty codelet taking one buffer */
			
 
				 static void null_func(void *descr[] __attribute__((unused)), void *arg __attribute__((unused))) { }
			
 
				-static struct starpu_codelet null = {
			
 
				+static struct starpu_codelet null =
			
 
				+{
			
 
				 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
			
 
				 	.cpu_funcs = {null_func, NULL},
			
 
				 	.cuda_funcs = {null_func, NULL},
			
@@ -226,7 +232,8 @@ static struct starpu_codelet null = {
 
				 	.nbuffers = 2
			
 
				 };
			
 
				 
			
 
				-void create_start_task(int z, int dir) {
			
 
				+void create_start_task(int z, int dir)
			
 
				+{
			
 
				 	/* Dumb task depending on the init task and simulating writing the
			
 
				 	   neighbour buffers, to avoid communications and computation running
			
 
				 	   before we start measuring time */
			
@@ -261,7 +268,8 @@ void create_tasks(int rank)
 
				 	int niter = get_niter();
			
 
				 	int nbz = get_nbz();
			
 
				 
			
 
				-	for (bz = 0; bz < nbz; bz++) {
			
 
				+	for (bz = 0; bz < nbz; bz++)
			
 
				+	{
			
 
				 		if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
			
 
				 			create_start_task(bz, +1);
			
 
				 		if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank))
			
--- a/examples/stencil/stencil.c
+++ b/examples/stencil/stencil.c
@@ -67,36 +67,45 @@ unsigned get_ticks(void)
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-b") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-b") == 0)
			
 
				+		{
			
 
				 			bind_tasks = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-nbz") == 0) {
			
 
				+		if (strcmp(argv[i], "-nbz") == 0)
			
 
				+		{
			
 
				 			nbz = atoi(argv[++i]);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-sizex") == 0) {
			
 
				+		if (strcmp(argv[i], "-sizex") == 0)
			
 
				+		{
			
 
				 			sizex = atoi(argv[++i]);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-sizey") == 0) {
			
 
				+		if (strcmp(argv[i], "-sizey") == 0)
			
 
				+		{
			
 
				 			sizey = atoi(argv[++i]);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-sizez") == 0) {
			
 
				+		if (strcmp(argv[i], "-sizez") == 0)
			
 
				+		{
			
 
				 			sizez = atoi(argv[++i]);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-niter") == 0) {
			
 
				+		if (strcmp(argv[i], "-niter") == 0)
			
 
				+		{
			
 
				 			niter = atoi(argv[++i]);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-ticks") == 0) {
			
 
				+		if (strcmp(argv[i], "-ticks") == 0)
			
 
				+		{
			
 
				 			ticks = atoi(argv[++i]);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-h") == 0) {
			
 
				+		if (strcmp(argv[i], "-h") == 0)
			
 
				+		{
			
 
				 			 fprintf(stderr, "Usage : %s [options...]\n", argv[0]);
			
 
				 			 fprintf(stderr, "\n");
			
 
				 			 fprintf(stderr, "Options:\n");
			
@@ -114,7 +123,8 @@ static void init_problem(int argc, char **argv, int rank, int world_size)
 
				 {
			
 
				 	parse_args(argc, argv);
			
 
				 
			
 
				-	if (getenv("STARPU_TOP")) {
			
 
				+	if (getenv("STARPU_TOP"))
			
 
				+	{
			
 
				 		starpu_top_init_loop = starpu_top_add_data_integer("Task creation iter", 0, niter, 1);
			
 
				 		starpu_top_achieved_loop = starpu_top_add_data_integer("Task achieved iter", 0, niter, 1);
			
 
				 		starpu_top_init_and_wait("stencil_top example");
			
@@ -152,8 +162,10 @@ void f(unsigned task_per_worker[STARPU_NMAXWORKERS])
 
				 
			
 
				 	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
			
 
				 		total += task_per_worker[worker];
			
 
				-	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) {
			
 
				-		if (task_per_worker[worker]) {
			
 
				+	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
			
 
				+	{
			
 
				+		if (task_per_worker[worker])
			
 
				+		{
			
 
				 			char name[32];
			
 
				 			starpu_worker_get_name(worker, name, sizeof(name));
			
 
				 			fprintf(stderr,"\t%s -> %d (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total);
			
@@ -181,7 +193,8 @@ int main(int argc, char **argv)
 
				 
			
 
				 #ifdef STARPU_USE_MPI
			
 
				 	int thread_support;
			
 
				-	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support)) {
			
 
				+	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support))
			
 
				+	{
			
 
				 		fprintf(stderr, "MPI_Init_thread failed\n");
			
 
				 	}
			
 
				 	if (thread_support == MPI_THREAD_FUNNELED)
			
@@ -295,15 +308,18 @@ int main(int argc, char **argv)
 
				 
			
 
				 		unsigned bz, iter;
			
 
				 		unsigned last;
			
 
				-		for (iter = 0; iter < who_runs_what_len; iter++) {
			
 
				+		for (iter = 0; iter < who_runs_what_len; iter++)
			
 
				+		{
			
 
				 			last = 1;
			
 
				-			for (bz = 0; bz < nbz; bz++) {
			
 
				+			for (bz = 0; bz < nbz; bz++)
			
 
				+			{
			
 
				 				if ((bz % nzblocks_per_process) == 0)
			
 
				 					fprintf(stderr, "| ");
			
 
				 
			
 
				 				if (who_runs_what_index[bz] <= iter)
			
 
				 					fprintf(stderr,"_ ");
			
 
				-				else {
			
 
				+				else
			
 
				+				{
			
 
				 					last = 0;
			
 
				 					if (who_runs_what[bz + iter * nbz] == -1)
			
 
				 						fprintf(stderr,"* ");
			
--- a/examples/stencil/stencil.h
+++ b/examples/stencil/stencil.h
@@ -50,13 +50,15 @@ extern struct starpu_top_data* starpu_top_achieved_loop;
 
				 
			
 
				 
			
 
				 /* Split only on the z axis to make things simple */
			
 
				-typedef enum {
			
 
				+typedef enum
			
 
				+{
			
 
				 	B = 0,
			
 
				 	T = 1
			
 
				 } direction;
			
 
				 
			
 
				 /* Description of a domain block */
			
 
				-struct block_description {
			
 
				+struct block_description
			
 
				+{
			
 
				 	/* Which MPI node should process that block ? */
			
 
				 	unsigned mpi_node;
			
 
				 	
			
--- a/examples/tag_example/tag_example.c
+++ b/examples/tag_example/tag_example.c
@@ -43,23 +43,28 @@ static unsigned iter = 0;
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-iter") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-iter") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			nk = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-i") == 0) {
			
 
				+		if (strcmp(argv[i], "-i") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			ni = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-j") == 0) {
			
 
				+		if (strcmp(argv[i], "-j") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			nj = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-h") == 0) {
			
 
				+		if (strcmp(argv[i], "-h") == 0)
			
 
				+		{
			
 
				 			printf("usage : %s [-iter iter] [-i i] [-j j]\n", argv[0]);
			
 
				 		}
			
 
				 	}
			
@@ -153,27 +158,29 @@ void cpu_codelet(void *descr[] __attribute__((unused)),
 
				 
			
 
				 static void express_deps(unsigned i, unsigned j, unsigned iter)
			
 
				 {
			
 
				-	if (j > 0) {
			
 
				+	if (j > 0)
			
 
				+	{
			
 
				 		/* (i,j-1) exists */
			
 
				 		if (j < nj - 1)
			
 
				 		{
			
 
				 			/* (i,j+1) exists */
			
 
				 			starpu_tag_declare_deps(TAG(i,j,iter), 2, TAG(i-1,j-1,iter), TAG(i-1,j+1,iter));
			
 
				 		}
			
 
				-		else 
			
 
				+		else
			
 
				 		{
			
 
				 			/* (i,j+1) does not exist */
			
 
				 			starpu_tag_declare_deps(TAG(i,j,iter), 1, TAG(i-1,j-1,iter));
			
 
				 		}
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* (i, (j-1) does not exist */
			
 
				 		if (j < nj - 1)
			
 
				 		{
			
 
				 			/* (i,j+1) exists */
			
 
				 			starpu_tag_declare_deps(TAG(i,j,iter), 1, TAG(i-1,j+1,iter));
			
 
				 		}
			
 
				-		else 
			
 
				+		else
			
 
				 		{
			
 
				 			/* (i,j+1) does not exist */
			
 
				 			STARPU_ABORT();
			
--- a/examples/tag_example/tag_example2.c
+++ b/examples/tag_example/tag_example2.c
@@ -40,18 +40,22 @@ static unsigned ni = Ni, nk = Nk;
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-iter") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-iter") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			nk = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-i") == 0) {
			
 
				+		if (strcmp(argv[i], "-i") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			ni = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-h") == 0) {
			
 
				+		if (strcmp(argv[i], "-h") == 0)
			
 
				+		{
			
 
				 			printf("usage : %s [-iter iter] [-i i]\n", argv[0]);
			
 
				 		}
			
 
				 	}
			
--- a/examples/tag_example/tag_example3.c
+++ b/examples/tag_example/tag_example3.c
@@ -40,18 +40,22 @@ static unsigned ni = Ni, nk = Nk;
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-iter") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-iter") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			nk = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-i") == 0) {
			
 
				+		if (strcmp(argv[i], "-i") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			ni = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-h") == 0) {
			
 
				+		if (strcmp(argv[i], "-h") == 0)
			
 
				+		{
			
 
				 			printf("usage : %s [-iter iter] [-i i]\n", argv[0]);
			
 
				 		}
			
 
				 	}
			
--- a/examples/tag_example/tag_restartable.c
+++ b/examples/tag_example/tag_restartable.c
@@ -46,18 +46,22 @@ struct starpu_task **tasks[Nrolls];
 
				 static void parse_args(int argc, char **argv)
			
 
				 {
			
 
				 	int i;
			
 
				-	for (i = 1; i < argc; i++) {
			
 
				-		if (strcmp(argv[i], "-iter") == 0) {
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-iter") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			nk = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-i") == 0) {
			
 
				+		if (strcmp(argv[i], "-i") == 0)
			
 
				+		{
			
 
				 		        char *argptr;
			
 
				 			ni = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				-		if (strcmp(argv[i], "-h") == 0) {
			
 
				+		if (strcmp(argv[i], "-h") == 0)
			
 
				+		{
			
 
				 			printf("usage : %s [-iter iter] [-i i]\n", argv[0]);
			
 
				 		}
			
 
				 	}
			
@@ -133,7 +137,8 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
				 
			
 
				 	FPRINTF(stderr, "ITER : %u\n", nk);
			
 
				 
			
 
				-	for (i = 0; i < Nrolls; i++) {
			
 
				+	for (i = 0; i < Nrolls; i++)
			
 
				+	{
			
 
				 		tasks[i] = (struct starpu_task **) malloc(ni * sizeof(*tasks[i]));
			
 
				 
			
 
				 		create_task_grid(i);
			
--- a/examples/top/hello_world_top.c
+++ b/examples/top/hello_world_top.c
@@ -56,10 +56,12 @@ void callback_func(void *callback_arg)
 
				  * DSM; the second arguments references read-only data that is passed as an
			
 
				  * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
			
 
				  * are no data input/output managed by the DSM (cl.nbuffers = 0) */
			
 
				-struct params {
			
 
				+struct params
			
 
				+{
			
 
				 	int i;
			
 
				 	float f;
			
 
				 };
			
 
				+
			
 
				 void cpu_func(void *buffers[], void *cl_arg)
			
 
				 {
			
 
				 	struct params *params = (struct params *) cl_arg;