浏览代码

update code w.r.t coding style

Nathalie Furmento 13 年之前
父节点
当前提交
b9476e6d4a
共有 90 个文件被更改,包括 1393 次插入736 次删除
  1. 27 14
      examples/audio/starpu_audio_processing.c
  2. 4 2
      examples/axpy/axpy.c
  3. 18 11
      examples/basic_examples/block.c
  4. 5 3
      examples/basic_examples/block_cpu.c
  5. 5 3
      examples/basic_examples/block_cuda.cu
  6. 3 2
      examples/basic_examples/block_opencl.c
  7. 5 3
      examples/basic_examples/block_opencl_kernel.cl
  8. 3 1
      examples/basic_examples/hello_world.c
  9. 22 12
      examples/basic_examples/mult.c
  10. 12 6
      examples/basic_examples/multiformat.c
  11. 12 6
      examples/basic_examples/multiformat_conversion_codelets.c
  12. 2 1
      examples/basic_examples/multiformat_conversion_codelets_cuda.cu
  13. 2 1
      examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl
  14. 4 2
      examples/basic_examples/multiformat_types.h
  15. 6 3
      examples/basic_examples/vector_scal.c
  16. 4 2
      examples/basic_examples/vector_scal_c.c
  17. 3 2
      examples/basic_examples/vector_scal_opencl_kernel.cl
  18. 14 7
      examples/cg/cg.c
  19. 40 20
      examples/cg/cg_kernels.c
  20. 18 9
      examples/cholesky/cholesky.h
  21. 38 19
      examples/cholesky/cholesky_grain_tag.c
  22. 24 12
      examples/cholesky/cholesky_implicit.c
  23. 9 5
      examples/cholesky/cholesky_kernels.c
  24. 12 6
      examples/cholesky/cholesky_models.c
  25. 44 22
      examples/cholesky/cholesky_tag.c
  26. 28 14
      examples/cholesky/cholesky_tile_tag.c
  27. 9 5
      examples/common/blas_model.h
  28. 14 7
      examples/filters/fblock.c
  29. 5 3
      examples/filters/fblock_cpu.c
  30. 5 3
      examples/filters/fblock_cuda.cu
  31. 3 2
      examples/filters/fblock_opencl.c
  32. 5 3
      examples/filters/fblock_opencl_kernel.cl
  33. 12 6
      examples/filters/fmatrix.c
  34. 2 1
      examples/filters/fvector.c
  35. 42 21
      examples/heat/dw_factolu.c
  36. 22 11
      examples/heat/dw_factolu.h
  37. 40 20
      examples/heat/dw_factolu_grain.c
  38. 12 6
      examples/heat/dw_factolu_kernels.c
  39. 36 18
      examples/heat/dw_factolu_tag.c
  40. 6 3
      examples/heat/dw_sparse_cg.c
  41. 13 7
      examples/heat/dw_sparse_cg.h
  42. 72 37
      examples/heat/heat.c
  43. 2 1
      examples/heat/heat.h
  44. 8 4
      examples/heat/heat_display.c
  45. 17 9
      examples/heat/lu_kernels_model.c
  46. 2 1
      examples/incrementer/incrementer.c
  47. 27 13
      examples/lu/lu_example.c
  48. 28 14
      examples/lu/xlu.c
  49. 2 1
      examples/lu/xlu.h
  50. 4 2
      examples/lu/xlu_implicit.c
  51. 4 2
      examples/lu/xlu_implicit_pivot.c
  52. 38 19
      examples/lu/xlu_kernels.c
  53. 31 16
      examples/lu/xlu_pivot.c
  54. 35 18
      examples/mandelbrot/mandelbrot.c
  55. 28 14
      examples/matvecmult/matvecmult.c
  56. 2 1
      examples/matvecmult/matvecmult_kernel.cl
  57. 46 23
      examples/mult/xgemm.c
  58. 6 3
      examples/openmp/vector_scal.c
  59. 10 5
      examples/opt/pi/pi.c
  60. 18 9
      examples/opt/pi/pi_redux.c
  61. 5 3
      examples/ppm_downscaler/ppm_downscaler.c
  62. 5 3
      examples/ppm_downscaler/ppm_downscaler.h
  63. 11 6
      examples/ppm_downscaler/yuv_downscaler.c
  64. 5 3
      examples/ppm_downscaler/yuv_downscaler.h
  65. 6 3
      examples/reductions/dot_product.c
  66. 6 3
      examples/reductions/minmax_reduction.c
  67. 5 3
      examples/scheduler/dummy_sched.c
  68. 6 3
      examples/spmv/dw_block_spmv.c
  69. 3 2
      examples/spmv/dw_block_spmv_kernels.c
  70. 28 17
      examples/spmv/matrix_market/mm_to_bcsr.c
  71. 5 3
      examples/spmv/matrix_market/mm_to_bcsr.h
  72. 18 9
      examples/spmv/spmv.c
  73. 21 9
      examples/starpufft/cudax_kernels.cu
  74. 100 76
      examples/starpufft/starpufftx.c
  75. 25 11
      examples/starpufft/testx.c
  76. 21 9
      examples/starpufft/testx_threads.c
  77. 6 3
      examples/stencil/life.c
  78. 4 2
      examples/stencil/life_cuda.cu
  79. 8 4
      examples/stencil/life_opencl.c
  80. 6 3
      examples/stencil/shadow.h
  81. 10 5
      examples/stencil/shadow_opencl.c
  82. 22 11
      examples/stencil/stencil-kernels.c
  83. 16 8
      examples/stencil/stencil-tasks.c
  84. 32 16
      examples/stencil/stencil.c
  85. 4 2
      examples/stencil/stencil.h
  86. 16 9
      examples/tag_example/tag_example.c
  87. 8 4
      examples/tag_example/tag_example2.c
  88. 8 4
      examples/tag_example/tag_example3.c
  89. 10 5
      examples/tag_example/tag_restartable.c
  90. 3 1
      examples/top/hello_world_top.c

+ 27 - 14
examples/audio/starpu_audio_processing.c

@@ -101,7 +101,8 @@ void read_16bit_wav(FILE *infile, unsigned size, float *arrayout, FILE *save_fil
 	/* we skip the header to only keep the data */
 	fseek(infile, headersize, SEEK_SET);
 	
-	for (v=0;v<size;v++) {
+	for (v=0;v<size;v++)
+	{
 		signed char val = (signed char)fgetc(infile);
 		signed char val2 = (signed char)fgetc(infile);
 
@@ -124,7 +125,8 @@ void write_16bit_wav(FILE *outfile, unsigned size, float *arrayin, FILE *save_fi
 	/* we assume that the header is copied using copy_wav_header */
 	fseek(outfile, headersize, SEEK_SET);
 	
-	for (v=0;v<size;v++) {
+	for (v=0;v<size;v++)
+	{
 		signed char val = ((int)arrayin[v]) % 256; 
 		signed char val2  = ((int)arrayin[v]) / 256;
 
@@ -146,7 +148,8 @@ void write_16bit_wav(FILE *outfile, unsigned size, float *arrayin, FILE *save_fi
  */
 
 /* we don't reinitialize the CUFFT plan for every kernel, so we "cache" it */
-typedef struct {
+typedef struct
+{
 	unsigned is_initialized;
 #ifdef STARPU_USE_CUDA
 	cufftHandle plan;
@@ -268,12 +271,14 @@ static void band_filter_kernel_cpu(void *descr[], __attribute__((unused)) void *
 		localA[i] /= nsamples;
 }
 
-struct starpu_perfmodel band_filter_model = {
+struct starpu_perfmodel band_filter_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "FFT_band_filter"
 };
 
-static struct starpu_codelet band_filter_cl = {
+static struct starpu_codelet band_filter_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {band_filter_kernel_gpu, NULL},
@@ -330,7 +335,8 @@ static void init_problem(void)
 	{
 		starpu_malloc((void **)&A, length_data*sizeof(float));
 	}
-	else {
+	else
+	{
 		A = malloc(length_data*sizeof(float));
 	}
 
@@ -344,31 +350,38 @@ static void init_problem(void)
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-h") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			fprintf(stderr, "Usage: %s [-pin] [-nsamples block_size] [-i input.wav] [-o output.wav | -no-output] [-h]\n", argv[0]);
 			exit(-1);
 		}
 
-		if (strcmp(argv[i], "-i") == 0) {
+		if (strcmp(argv[i], "-i") == 0)
+		{
 			inputfilename = argv[++i];;
 		}
 
-		if (strcmp(argv[i], "-o") == 0) {
+		if (strcmp(argv[i], "-o") == 0)
+		{
 			outputfilename = argv[++i];;
 		}
 
-		if (strcmp(argv[i], "-no-output") == 0) {
+		if (strcmp(argv[i], "-no-output") == 0)
+		{
 			outputfilename = NULL;;
 		}
 
 		/* block size */
-		if (strcmp(argv[i], "-nsamples") == 0) {
+		if (strcmp(argv[i], "-nsamples") == 0)
+		{
 			char *argptr;
 			nsamples = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-pin") == 0) {
+		if (strcmp(argv[i], "-pin") == 0)
+		{
 			use_pin = 1;
 		}
 	}
@@ -393,7 +406,7 @@ int main(int argc, char **argv)
 
 	starpu_vector_data_register(&A_handle, 0, (uintptr_t)A, niter*nsamples, sizeof(float));
 
-	struct starpu_data_filter f = 
+	struct starpu_data_filter f =
 	{
 		.filter_func = starpu_block_filter_func_vector,
 		.nchildren = niter

+ 4 - 2
examples/axpy/axpy.c

@@ -70,7 +70,8 @@ void axpy_gpu(void *descr[], __attribute__((unused)) void *arg)
 }
 #endif
 
-static struct starpu_codelet axpy_cl = {
+static struct starpu_codelet axpy_cl =
+{
         .where =
 #ifdef STARPU_USE_CUDA
                 STARPU_CUDA|
@@ -116,7 +117,8 @@ int main(int argc, char **argv)
 	starpu_vector_data_register(&handle_y, 0, (uintptr_t)vec_y, N, sizeof(TYPE));
 
 	/* Divide the vector into blocks */
-	struct starpu_data_filter block_filter = {
+	struct starpu_data_filter block_filter =
+	{
 		.filter_func = starpu_block_filter_func_vector,
 		.nchildren = NBLOCKS
 	};

+ 18 - 11
examples/basic_examples/block.c

@@ -57,7 +57,8 @@ int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny,
 	task->cl_arg_size = sizeof(multiplier);
 
         int ret = starpu_task_submit(task);
-        if (STARPU_UNLIKELY(ret == -ENODEV)) {
+        if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
                 FPRINTF(stderr, "No worker may execute this task\n");
                 return 1;
 	}
@@ -67,8 +68,9 @@ int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny,
 	/* update the array in RAM */
 	starpu_data_unregister(block_handle);
 
-        for(i=0 ; i<pnx*pny*pnz; i++) {
-          FPRINTF(stderr, "%f ", block[i]);
+        for(i=0 ; i<pnx*pny*pnz; i++)
+	{
+		FPRINTF(stderr, "%f ", block[i]);
         }
         FPRINTF(stderr, "\n");
 
@@ -88,9 +90,12 @@ int main(int argc, char **argv)
 
         block = (float*)malloc(nx*ny*nz*sizeof(float));
         assert(block);
-        for(k=0 ; k<nz ; k++) {
-                for(j=0 ; j<ny ; j++) {
-                        for(i=0 ; i<nx ; i++) {
+        for(k=0 ; k<nz ; k++)
+	{
+                for(j=0 ; j<ny ; j++)
+		{
+                        for(i=0 ; i<nx ; i++)
+			{
                                 block[(k*nx*ny)+(j*nx)+i] = n++;
                         }
                 }
@@ -110,11 +115,13 @@ int main(int argc, char **argv)
 
         /* Check result is correct */
         ret=1;
-        for(i=0 ; i<nx*ny*nz ; i++) {
-          if (block[i] != (i+1) * multiplier) {
-            ret=0;
-            break;
-          }
+        for(i=0 ; i<nx*ny*nz ; i++)
+	{
+		if (block[i] != (i+1) * multiplier)
+		{
+			ret=0;
+			break;
+		}
         }
 
         FPRINTF(stderr,"TEST %s\n", ret==1?"PASSED":"FAILED");

+ 5 - 3
examples/basic_examples/block_cpu.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,8 +28,10 @@ void cpu_codelet(void *descr[], void *_args)
         float *multiplier = (float *)_args;
         unsigned i, j, k;
 
-        for(k=0; k<nz ; k++) {
-                for(j=0; j<ny ; j++) {
+        for(k=0; k<nz ; k++)
+	{
+                for(j=0; j<ny ; j++)
+		{
                         for(i=0; i<nx ; i++)
                                 block[(k*ldz)+(j*ldy)+i] *= *multiplier;
                 }

+ 5 - 3
examples/basic_examples/block_cuda.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,8 +20,10 @@
 static __global__ void cuda_block(float *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier)
 {
         int i, j, k;
-        for(k=0; k<nz ; k++) {
-                for(j=0; j<ny ; j++) {
+        for(k=0; k<nz ; k++)
+	{
+                for(j=0; j<ny ; j++)
+		{
                         for(i=0; i<nx ; i++)
                                 block[(k*ldz)+(j*ldy)+i] *= multiplier;
                 }

+ 3 - 2
examples/basic_examples/block_opencl.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,7 +19,8 @@
 #include <starpu_opencl.h>
 
 #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr)       \
-do {                                                        \
+do						    	    \
+{							    \
 	int err;                                            \
 	err = clSetKernelArg(kernel, n, size, ptr);         \
 	if (err != CL_SUCCESS)                              \

+ 5 - 3
examples/basic_examples/block_opencl_kernel.cl

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,8 +17,10 @@
 __kernel void block(__global float *b, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier)
 {
         int i, j, k;
-        for(k=0; k<nz ; k++) {
-                for(j=0; j<ny ; j++) {
+        for(k=0; k<nz ; k++)
+	{
+                for(j=0; j<ny ; j++)
+		{
                         for(i=0; i<nx ; i++)
                                 b[(k*ldz)+(j*ldy)+i] *= multiplier;
                 }

+ 3 - 1
examples/basic_examples/hello_world.c

@@ -46,10 +46,12 @@ void callback_func(void *callback_arg)
  * DSM; the second arguments references read-only data that is passed as an
  * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
  * are no data input/output managed by the DSM (cl.nbuffers = 0) */
-struct params {
+struct params
+{
 	int i;
 	float f;
 };
+
 void cpu_func(void *buffers[], void *cl_arg)
 {
 	struct params *params = (struct params *) cl_arg;

+ 22 - 12
examples/basic_examples/mult.c

@@ -133,20 +133,26 @@ static void init_problem_data(void)
 
 	/* fill the A and B matrices */
 	srand(2009);
-	for (j=0; j < ydim; j++) {
-		for (i=0; i < zdim; i++) {
+	for (j=0; j < ydim; j++)
+	{
+		for (i=0; i < zdim; i++)
+		{
 			A[j+i*ydim] = (float)(starpu_drand48());
 		}
 	}
 
-	for (j=0; j < zdim; j++) {
-		for (i=0; i < xdim; i++) {
+	for (j=0; j < zdim; j++)
+	{
+		for (i=0; i < xdim; i++)
+		{
 			B[j+i*zdim] = (float)(starpu_drand48());
 		}
 	}
 
-	for (j=0; j < ydim; j++) {
-		for (i=0; i < xdim; i++) {
+	for (j=0; j < ydim; j++)
+	{
+		for (i=0; i < xdim; i++)
+		{
 			C[j+i*ydim] = (float)(0);
 		}
 	}
@@ -186,16 +192,18 @@ static void partition_mult_data(void)
 	/* StarPU supplies some basic filters such as the partition of a matrix
 	 * into blocks, note that we are using a FORTRAN ordering so that the
 	 * name of the filters are a bit misleading */
-	struct starpu_data_filter vert = {
+	struct starpu_data_filter vert =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nslicesx
 	};
-		
-	struct starpu_data_filter horiz = {
+
+	struct starpu_data_filter horiz =
+	{
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nslicesy
 	};
-		
+
 /*
  *	Illustration with nslicex = 4 and nslicey = 2, it is possible to access
  *	sub-data by using the "starpu_data_get_sub_data" method, which takes a data handle,
@@ -246,12 +254,14 @@ static void partition_mult_data(void)
 	starpu_data_map_filters(C_handle, 2, &vert, &horiz);
 }
 
-static struct starpu_perfmodel mult_perf_model = {
+static struct starpu_perfmodel mult_perf_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "mult_perf_model"
 };
 
-static struct starpu_codelet cl = {
+static struct starpu_codelet cl =
+{
         /* we can only execute that kernel on a CPU yet */
         .where = STARPU_CPU,
         /* CPU implementation of the codelet */

+ 12 - 6
examples/basic_examples/multiformat.c

@@ -32,7 +32,8 @@ multiformat_scal_cpu_func(void *buffers[], void *args)
 	aos = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
 	n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 
-	for (i = 0; i < n; i++) {
+	for (i = 0; i < n; i++)
+	{
 		aos[i].x *= aos[i].y;
 	}
 }
@@ -47,7 +48,8 @@ extern struct starpu_codelet cpu_to_opencl_cl;
 extern struct starpu_codelet opencl_to_cpu_cl;
 #endif
 
-static struct starpu_multiformat_data_interface_ops format_ops = {
+static struct starpu_multiformat_data_interface_ops format_ops =
+{
 #ifdef STARPU_USE_CUDA
 	.cuda_elemsize = 2* sizeof(float),
 	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
@@ -69,7 +71,8 @@ extern void multiformat_scal_cuda_func(void *buffers[], void *arg);
 extern void multiformat_scal_opencl_func(void *buffers[], void *arg);
 #endif
 
-static struct starpu_codelet  cl = {
+static struct starpu_codelet  cl =
+{
 	.where = STARPU_CUDA | STARPU_OPENCL,
 	.cpu_funcs = {multiformat_scal_cpu_func, NULL},
 #ifdef STARPU_USE_CUDA
@@ -89,7 +92,8 @@ static void
 init_problem_data(void)
 {
 	int i; 
-	for (i = 0; i < N_ELEMENTS; i++) {
+	for (i = 0; i < N_ELEMENTS; i++)
+	{
 		array_of_structs[i].x = 1.0 + i;
 		array_of_structs[i].y = 42.0;
 	}
@@ -154,7 +158,8 @@ static void
 print_it(void)
 {
 	int i;
-	for (i = 0; i < N_ELEMENTS; i++) {
+	for (i = 0; i < N_ELEMENTS; i++)
+	{
 		FPRINTF(stderr, "(%.2f %.2f) ",
 			array_of_structs[i].x,
 			array_of_structs[i].y);
@@ -166,7 +171,8 @@ static int
 check_it(void)
 {
 	int i;
-	for (i = 0; i < N_ELEMENTS; i++) {
+	for (i = 0; i < N_ELEMENTS; i++)
+	{
 		float expected_value = i + 1.0;
 #if STARPU_USE_CUDA
 		expected_value *= array_of_structs[i].y;

+ 12 - 6
examples/basic_examples/multiformat_conversion_codelets.c

@@ -24,21 +24,24 @@ void cuda_to_cpu(void *buffers[], void *arg)
 	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
 	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 	int i;
-	for (i = 0; i < n; i++) {
+	for (i = 0; i < n; i++)
+	{
 		dst[i].x = src->x[i];
 		dst[i].y = src->y[i];
 	}
 }
 
 extern void cpu_to_cuda_cuda_func(void *buffers[], void *args);
-struct starpu_codelet cpu_to_cuda_cl = {
+struct starpu_codelet cpu_to_cuda_cl =
+{
 	.where = STARPU_CUDA,
 	.cuda_funcs = {cpu_to_cuda_cuda_func, NULL},
 	.nbuffers = 1,
 	.name = "codelet_cpu_to_cuda"
 };
 
-struct starpu_codelet cuda_to_cpu_cl = {
+struct starpu_codelet cuda_to_cpu_cl =
+{
 	.where = STARPU_CPU,
 	.cpu_funcs = {cuda_to_cpu, NULL},
 	.nbuffers = 1,
@@ -54,20 +57,23 @@ void opencl_to_cpu(void *buffers[], void *arg)
 	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
 	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 	int i;
-	for (i = 0; i < n; i++) {
+	for (i = 0; i < n; i++)
+	{
 		dst[i].x = src->x[i];
 		dst[i].y = src->y[i];
 	}
 }
 
 extern void cpu_to_opencl_opencl_func(void *buffers[], void *args);
-struct starpu_codelet cpu_to_opencl_cl = {
+struct starpu_codelet cpu_to_opencl_cl =
+{
 	.where = STARPU_OPENCL,
 	.opencl_funcs = {cpu_to_opencl_opencl_func, NULL},
 	.nbuffers = 1
 };
 
-struct starpu_codelet opencl_to_cpu_cl = {
+struct starpu_codelet opencl_to_cpu_cl =
+{
 	.where = STARPU_CPU,
 	.cpu_funcs = {opencl_to_cpu, NULL},
 	.nbuffers = 1

+ 2 - 1
examples/basic_examples/multiformat_conversion_codelets_cuda.cu

@@ -23,7 +23,8 @@ static __global__ void cpu_to_cuda_cuda(struct point *src,
 {
         unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
 
-	if (i < n) {
+	if (i < n)
+	{
 		dst->x[i] = src[i].x;
 		dst->y[i] = src[i].y;
 	}

+ 2 - 1
examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl

@@ -21,7 +21,8 @@ __kernel void cpu_to_opencl_opencl(__global struct point *src,
 				   unsigned int n)
 {
 	const unsigned int i = get_global_id(0);
-	if (i < n) {
+	if (i < n)
+	{
 		dst->x[i] = src[i].x;
 		dst->y[i] = src[i].y;
 	}

+ 4 - 2
examples/basic_examples/multiformat_types.h

@@ -18,11 +18,13 @@
 
 #define N_ELEMENTS 10
 
-struct struct_of_arrays{
+struct struct_of_arrays
+{
 	float x[N_ELEMENTS];
 	float y[N_ELEMENTS];
 };
-struct point {
+struct point
+{
 	float x, y;
 };
 

+ 6 - 3
examples/basic_examples/vector_scal.c

@@ -37,17 +37,20 @@ extern void scal_sse_func_icc(void *buffers[], void *_args);
 extern void scal_cuda_func(void *buffers[], void *_args);
 extern void scal_opencl_func(void *buffers[], void *_args);
 
-static struct starpu_perfmodel vector_scal_model = {
+static struct starpu_perfmodel vector_scal_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "vector_scale"
 };
 
-static struct starpu_perfmodel vector_scal_power_model = {
+static struct starpu_perfmodel vector_scal_power_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "vector_scale_power"
 };
 
-static struct starpu_codelet cl = {
+static struct starpu_codelet cl =
+{
 	.where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
 	/* CPU implementation of the codelet */
 	.cpu_funcs = {

+ 4 - 2
examples/basic_examples/vector_scal_c.c

@@ -33,12 +33,14 @@
 extern void scal_cpu_func(void *buffers[], void *_args);
 extern void scal_cuda_func(void *buffers[], void *_args);
 
-static struct starpu_perfmodel vector_scal_model = {
+static struct starpu_perfmodel vector_scal_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "vector_scale_model"
 };
 
-static struct starpu_codelet cl = {
+static struct starpu_codelet cl =
+{
   .where = STARPU_CPU | STARPU_CUDA,
   /* CPU implementation of the codelet */
   .cpu_funcs = {scal_cpu_func, NULL},

+ 3 - 2
examples/basic_examples/vector_scal_opencl_kernel.cl

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,7 +17,8 @@
 __kernel void vector_mult_opencl(__global float* val, int nx, float factor)
 {
         const int i = get_global_id(0);
-        if (i < nx) {
+        if (i < nx)
+	{
                 val[i] *= factor;
         }
 }

+ 14 - 7
examples/cg/cg.c

@@ -299,7 +299,8 @@ static void cg(void)
 			/* r <- r - A x */
 			gemv_kernel(r_handle, A_handle, x_handle, 1.0, -1.0, nblocks, use_reduction); 
 		}
-		else {
+		else
+		{
 			/* r <- r - alpha q */
 			axpy_kernel(r_handle, q_handle, -alpha, nblocks);
 		}
@@ -342,28 +343,34 @@ static int check(void)
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-	        if (strcmp(argv[i], "-n") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+	        if (strcmp(argv[i], "-n") == 0)
+		{
 			n = (int long long)atoi(argv[++i]);
 			continue;
 		}
 
-	        if (strcmp(argv[i], "-maxiter") == 0) {
+	        if (strcmp(argv[i], "-maxiter") == 0)
+		{
 			i_max = atoi(argv[++i]);
 			continue;
 		}
 
-	        if (strcmp(argv[i], "-nblocks") == 0) {
+	        if (strcmp(argv[i], "-nblocks") == 0)
+		{
 			nblocks = atoi(argv[++i]);
 			continue;
 		}
 
-	        if (strcmp(argv[i], "-no-reduction") == 0) {
+	        if (strcmp(argv[i], "-no-reduction") == 0)
+		{
 			use_reduction = 0;
 			continue;
 		}
 
-	        if (strcmp(argv[i], "-h") == 0) {
+	        if (strcmp(argv[i], "-h") == 0)
+		{
 			FPRINTF(stderr, "usage: %s [-h] [-nblocks #blocks] [-n problem_size] [-no-reduction] [-maxiter i]\n", argv[0]);
 			exit(-1);
 			continue;

+ 40 - 20
examples/cg/cg_kernels.c

@@ -68,12 +68,14 @@ static void accumulate_variable_cpu(void *descr[], void *cl_arg)
 	*v_dst = *v_dst + *v_src;
 }
 
-static struct starpu_perfmodel accumulate_variable_model = {
+static struct starpu_perfmodel accumulate_variable_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "accumulate_variable"
 };
 
-struct starpu_codelet accumulate_variable_cl = {
+struct starpu_codelet accumulate_variable_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {accumulate_variable_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -104,12 +106,14 @@ static void accumulate_vector_cpu(void *descr[], void *cl_arg)
 	AXPY(n, (TYPE)1.0, v_src, 1, v_dst, 1);
 }
 
-static struct starpu_perfmodel accumulate_vector_model = {
+static struct starpu_perfmodel accumulate_vector_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "accumulate_vector"
 };
 
-struct starpu_codelet accumulate_vector_cl = {
+struct starpu_codelet accumulate_vector_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {accumulate_vector_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -142,12 +146,14 @@ static void bzero_variable_cpu(void *descr[], void *cl_arg)
 	*v = (TYPE)0.0;
 }
 
-static struct starpu_perfmodel bzero_variable_model = {
+static struct starpu_perfmodel bzero_variable_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "bzero_variable"
 };
 
-struct starpu_codelet bzero_variable_cl = {
+struct starpu_codelet bzero_variable_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {bzero_variable_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -177,12 +183,14 @@ static void bzero_vector_cpu(void *descr[], void *cl_arg)
 	memset(v, 0, n*sizeof(TYPE));
 }
 
-static struct starpu_perfmodel bzero_vector_model = {
+static struct starpu_perfmodel bzero_vector_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "bzero_vector"
 };
 
-struct starpu_codelet bzero_vector_cl = {
+struct starpu_codelet bzero_vector_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {bzero_vector_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -230,12 +238,14 @@ static void dot_kernel_cpu(void *descr[], void *cl_arg)
 	*dot = *dot + local_dot;
 }
 
-static struct starpu_perfmodel dot_kernel_model = {
+static struct starpu_perfmodel dot_kernel_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "dot_kernel"
 };
 
-static struct starpu_codelet dot_kernel_cl = {
+static struct starpu_codelet dot_kernel_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dot_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -297,12 +307,14 @@ static void scal_kernel_cpu(void *descr[], void *cl_arg)
 	SCAL(n, alpha, v1, 1);
 }
 
-static struct starpu_perfmodel scal_kernel_model = {
+static struct starpu_perfmodel scal_kernel_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "scal_kernel"
 };
 
-static struct starpu_codelet scal_kernel_cl = {
+static struct starpu_codelet scal_kernel_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {scal_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -368,12 +380,14 @@ static void gemv_kernel_cpu(void *descr[], void *cl_arg)
 	GEMV("N", nx, ny, alpha, M, ld, v2, 1, beta, v1, 1);
 }
 
-static struct starpu_perfmodel gemv_kernel_model = {
+static struct starpu_perfmodel gemv_kernel_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "gemv_kernel"
 };
 
-static struct starpu_codelet gemv_kernel_cl = {
+static struct starpu_codelet gemv_kernel_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.type = STARPU_SPMD,
 	.max_parallelism = INT_MAX,
@@ -460,12 +474,14 @@ static void scal_axpy_kernel_cpu(void *descr[], void *cl_arg)
 	AXPY(nx, p2, v2, 1, v1, 1);
 }
 
-static struct starpu_perfmodel scal_axpy_kernel_model = {
+static struct starpu_perfmodel scal_axpy_kernel_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "scal_axpy_kernel"
 };
 
-static struct starpu_codelet scal_axpy_kernel_cl = {
+static struct starpu_codelet scal_axpy_kernel_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {scal_axpy_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -528,12 +544,14 @@ static void axpy_kernel_cpu(void *descr[], void *cl_arg)
 	AXPY(nx, p1, v2, 1, v1, 1);
 }
 
-static struct starpu_perfmodel axpy_kernel_model = {
+static struct starpu_perfmodel axpy_kernel_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "axpy_kernel"
 };
 
-static struct starpu_codelet axpy_kernel_cl = {
+static struct starpu_codelet axpy_kernel_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {axpy_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -588,12 +606,14 @@ static void copy_handle_cuda(void *descr[], void *cl_arg)
 }
 #endif
 
-static struct starpu_perfmodel copy_handle_model = {
+static struct starpu_perfmodel copy_handle_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "copy_handle"
 };
 
-static struct starpu_codelet copy_handle_cl = {
+static struct starpu_codelet copy_handle_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {copy_handle_cpu, NULL},
 #ifdef STARPU_USE_CUDA

+ 18 - 9
examples/cholesky/cholesky.h

@@ -81,39 +81,48 @@ extern struct starpu_perfmodel chol_model_22;
 static void __attribute__((unused)) parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-size") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-size") == 0)
+		{
 		        char *argptr;
 			size = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-nblocks") == 0) {
+		if (strcmp(argv[i], "-nblocks") == 0)
+		{
 		        char *argptr;
 			nblocks = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-nbigblocks") == 0) {
+		if (strcmp(argv[i], "-nbigblocks") == 0)
+		{
 		        char *argptr;
 			nbigblocks = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-pin") == 0) {
+		if (strcmp(argv[i], "-pin") == 0)
+		{
 			pinned = 1;
 		}
 
-		if (strcmp(argv[i], "-no-prio") == 0) {
+		if (strcmp(argv[i], "-no-prio") == 0)
+		{
 			noprio = 1;
 		}
 
-		if (strcmp(argv[i], "-bound") == 0) {
+		if (strcmp(argv[i], "-bound") == 0)
+		{
 			bound = 1;
 		}
 
-		if (strcmp(argv[i], "-check") == 0) {
+		if (strcmp(argv[i], "-check") == 0)
+		{
 			check = 1;
 		}
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			printf("usage : %s [-pin] [-size size] [-nblocks nblocks] [-check]\n", argv[0]);
 		}
 	}

+ 38 - 19
examples/cholesky/cholesky_grain_tag.c

@@ -63,7 +63,8 @@ static struct starpu_task * create_task_11(starpu_data_handle_t dataA, unsigned
 	task->priority = STARPU_MAX_PRIO;
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11_AUX(k, reclevel), 1, TAG22_AUX(k-1, k, k, reclevel));
 	}
 
@@ -93,15 +94,18 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j, u
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j);
 	task->buffers[1].mode = STARPU_RW;
 
-	if (j == k+1) {
+	if (j == k+1)
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG21_AUX(k, j, reclevel), 2, TAG11_AUX(k, reclevel), TAG22_AUX(k-1, k, j, reclevel));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG21_AUX(k, j, reclevel), 1, TAG11_AUX(k, reclevel));
 	}
 
@@ -135,15 +139,18 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j);
 	task->buffers[2].mode = STARPU_RW;
 
-	if ( (i == k + 1) && (j == k +1) ) {
+	if ( (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22_AUX(k, i, j, reclevel), 3, TAG22_AUX(k-1, i, j, reclevel), TAG21_AUX(k, i, reclevel), TAG21_AUX(k, j, reclevel));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22_AUX(k, i, j, reclevel), 2, TAG21_AUX(k, i, reclevel), TAG21_AUX(k, j, reclevel));
 	}
 
@@ -173,12 +180,14 @@ static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 	};
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 	};
@@ -189,10 +198,12 @@ static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 	{
 		struct starpu_task *task = create_task_11(dataA, k, reclevel);
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 		}
-		else {
+		else
+		{
 			starpu_task_submit(task);
 		}
 
@@ -223,7 +234,8 @@ static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 		starpu_data_unpartition(dataA, 0);
 		return;
 	}
-	else {
+	else
+	{
 		STARPU_ASSERT(reclevel == 0);
 		unsigned ndeps_tags = (nblocks - nbigblocks)*(nblocks - nbigblocks);
 
@@ -261,7 +273,8 @@ static void initialize_system(float **A, unsigned dim, unsigned pinned)
 	{
 		starpu_malloc((void **)A, dim*dim*sizeof(float));
 	}
-	else {
+	else
+	{
 		*A = malloc(dim*dim*sizeof(float));
 	}
 }
@@ -321,10 +334,12 @@ int main(int argc, char **argv)
 	{
 		for (i = 0; i < size; i++)
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 			}
 		}
@@ -342,10 +357,12 @@ int main(int argc, char **argv)
 	{
 		for (i = 0; i < size; i++)
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				mat[j+i*size] = 0.0f; /* debug */
 			}
@@ -365,10 +382,12 @@ int main(int argc, char **argv)
 	{
 		for (i = 0; i < size; i++)
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
                                 FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]);
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 			}
 		}

+ 24 - 12
examples/cholesky/cholesky_implicit.c

@@ -137,7 +137,8 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
 	double flop = (1.0f*n*n*n)/3.0f;
 	FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
-	if (bound) {
+	if (bound)
+	{
 		double res;
 		starpu_bound_compute(&res, NULL, 0);
 		FPRINTF(stderr, "Theoretical GFlops: %2.2f\n", (flop/res/1000000.0f));
@@ -152,12 +153,14 @@ static void cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 	 * one block is now determined by 2 unsigned (i,j) */
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 	};
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 	};
@@ -203,10 +206,12 @@ int main(int argc, char **argv)
 	{
 		for (i = 0; i < size; i++)
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 			}
 		}
@@ -222,10 +227,12 @@ int main(int argc, char **argv)
 	{
 		for (i = 0; i < size; i++)
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				mat[j+i*size] = 0.0f; /* debug */
 			}
@@ -241,7 +248,8 @@ int main(int argc, char **argv)
 		{
 			for (i = 0; i < size; i++)
 			{
-				if (i > j) {
+				if (i > j)
+				{
 					mat[j+i*size] = 0.0f; /* debug */
 				}
 			}
@@ -258,10 +266,12 @@ int main(int argc, char **argv)
 		{
 			for (i = 0; i < size; i++)
 			{
-				if (i <= j) {
+				if (i <= j)
+				{
 					FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]);
 				}
-				else {
+				else
+				{
 					FPRINTF(stdout, ".\t");
 				}
 			}
@@ -273,10 +283,12 @@ int main(int argc, char **argv)
 		{
 			for (i = 0; i < size; i++)
 			{
-				if (i <= j) {
+				if (i <= j)
+				{
 	                                float orig = (1.0f/(1.0f+i+j)) + ((i == j)?1.0f*size:0.0f);
 	                                float err = abs(test_mat[j +i*size] - orig);
-	                                if (err > 0.00001) {
+	                                if (err > 0.00001)
+					{
 	                                        FPRINTF(stderr, "Error[%u, %u] --> %2.2f != %2.2f (err %2.2f)\n", i, j, test_mat[j +i*size], orig, err);
 	                                        assert(0);
 	                                }

+ 9 - 5
examples/cholesky/cholesky_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -55,7 +55,8 @@ static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, __at
 			SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, 
 				right, ld12, 1.0f, center, ld22);
 		}
-		else {
+		else
+		{
 			/* Parallel CPU kernel */
 			int rank = starpu_combined_worker_get_rank();
 
@@ -113,7 +114,8 @@ static inline void chol_common_codelet_update_u21(void *descr[], int s, __attrib
 	unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]);
 	unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]);
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 			STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
 			break;
@@ -157,7 +159,8 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, __attrib
 
 	unsigned z;
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 
 			/*
@@ -188,7 +191,8 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, __attrib
 			int ret;
 			int info;
 			ret = magma_spotrf_gpu('L', nx, sub11, ld, &info);
-			if (ret != MAGMA_SUCCESS) {
+			if (ret != MAGMA_SUCCESS)
+			{
 				fprintf(stderr, "Error in Magma: %d\n", ret);
 				STARPU_ABORT();
 			}

+ 12 - 6
examples/cholesky/cholesky_models.c

@@ -126,8 +126,10 @@ static double cuda_chol_task_22_cost(struct starpu_buffer_descr *descr)
 	return PERTURBATE(cost);
 }
 
-struct starpu_perfmodel chol_model_11 = {
-	.per_arch = {
+struct starpu_perfmodel chol_model_11 =
+{
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_11_cost },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_11_cost }
 	},
@@ -135,8 +137,10 @@ struct starpu_perfmodel chol_model_11 = {
 	.symbol = "chol_model_11"
 };
 
-struct starpu_perfmodel chol_model_21 = {
-	.per_arch = {
+struct starpu_perfmodel chol_model_21 =
+{
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_21_cost },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_21_cost }
 	},
@@ -144,8 +148,10 @@ struct starpu_perfmodel chol_model_21 = {
 	.symbol = "chol_model_21"
 };
 
-struct starpu_perfmodel chol_model_22 = {
-	.per_arch = {
+struct starpu_perfmodel chol_model_22 =
+{
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_22_cost },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_22_cost }
 	},

+ 44 - 22
examples/cholesky/cholesky_tag.c

@@ -64,7 +64,8 @@ static struct starpu_task * create_task_11(starpu_data_handle_t dataA, unsigned
 		task->priority = STARPU_MAX_PRIO;
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 
@@ -94,20 +95,24 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j)
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
 	task->buffers[1].mode = STARPU_RW;
 
-	if (!noprio && (j == k+1)) {
+	if (!noprio && (j == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG21(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 	}
 
 	int ret = starpu_task_submit(task);
-        if (STARPU_UNLIKELY(ret == -ENODEV)) {
+        if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
                 FPRINTF(stderr, "No worker may execute this task\n");
                 exit(0);
         }
@@ -141,20 +146,24 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
 	task->buffers[2].mode = STARPU_RW;
 
-	if (!noprio && (i == k + 1) && (j == k +1) ) {
+	if (!noprio && (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG21(k, i), TAG21(k, j));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
 	}
 
 	int ret = starpu_task_submit(task);
-        if (STARPU_UNLIKELY(ret == -ENODEV)) {
+        if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
                 FPRINTF(stderr, "No worker may execute this task\n");
                 exit(0);
         }
@@ -183,12 +192,15 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	{
 		struct starpu_task *task = create_task_11(dataA, k);
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 		}
-		else {
+		else
+		{
 			int ret = starpu_task_submit(task);
-                        if (STARPU_UNLIKELY(ret == -ENODEV)) {
+                        if (STARPU_UNLIKELY(ret == -ENODEV))
+			{
                                 FPRINTF(stderr, "No worker may execute this task\n");
                                 exit(0);
                         }
@@ -209,7 +221,8 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
 	/* schedule the codelet */
 	int ret = starpu_task_submit(entry_task);
-        if (STARPU_UNLIKELY(ret == -ENODEV)) {
+        if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
                 FPRINTF(stderr, "No worker may execute this task\n");
                 exit(0);
         }
@@ -243,7 +256,8 @@ static void initialize_system(float **A, unsigned dim, unsigned pinned)
 	{
 		starpu_malloc((void **)A, (size_t)dim*dim*sizeof(float));
 	} 
-	else {
+	else
+	{
 		*A = malloc(dim*dim*sizeof(float));
 	}
 }
@@ -258,12 +272,14 @@ static void cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 	};
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 	};
@@ -311,10 +327,12 @@ int main(int argc, char **argv)
 	{
 		for (i = 0; i < size; i++)
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 			}
 		}
@@ -332,10 +350,12 @@ int main(int argc, char **argv)
 	{
 		for (i = 0; i < size; i++)
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				mat[j+i*size] = 0.0f; /* debug */
 			}
@@ -355,10 +375,12 @@ int main(int argc, char **argv)
 	{
 		for (i = 0; i < size; i++)
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]);
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 			}
 		}

+ 28 - 14
examples/cholesky/cholesky_tile_tag.c

@@ -73,7 +73,8 @@ static struct starpu_task * create_task_11(unsigned k, unsigned nblocks)
 	task->priority = STARPU_MAX_PRIO;
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 
@@ -110,15 +111,18 @@ static void create_task_21(unsigned k, unsigned j)
 	task->buffers[1].handle = A_state[j][k]; 
 	task->buffers[1].mode = STARPU_RW;
 
-	if (j == k+1) {
+	if (j == k+1)
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG21(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 	}
 
@@ -159,15 +163,18 @@ static void create_task_22(unsigned k, unsigned i, unsigned j)
 	task->buffers[2].handle = A_state[j][i]; 
 	task->buffers[2].mode = STARPU_RW;
 
-	if ( (i == k + 1) && (j == k +1) ) {
+	if ( (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG21(k, i), TAG21(k, j));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
 	}
 
@@ -195,10 +202,12 @@ static void cholesky_no_stride(void)
 	{
 		struct starpu_task *task = create_task_11(k, nblocks);
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 		}
-		else {
+		else
+		{
 			starpu_task_submit(task);
 		}
 		
@@ -251,7 +260,8 @@ int main(int argc, char **argv)
 	for (y = 0; y < nblocks; y++)
 	for (x = 0; x < nblocks; x++)
 	{
-		if (x <= y) {
+		if (x <= y)
+		{
 			A[y][x] = malloc(BLOCKSIZE*BLOCKSIZE*sizeof(float));
 			assert(A[y][x]);
 		}
@@ -261,7 +271,8 @@ int main(int argc, char **argv)
 	for (y = 0; y < nblocks; y++)
 	for (x = 0; x < nblocks; x++)
 	{
-		if (x <= y) {
+		if (x <= y)
+		{
 #ifdef STARPU_HAVE_POSIX_MEMALIGN
 			posix_memalign((void **)&A[y][x], 128, BLOCKSIZE*BLOCKSIZE*sizeof(float));
 #else
@@ -277,7 +288,8 @@ int main(int argc, char **argv)
 	 * */
 	for (y = 0; y < nblocks; y++)
 	for (x = 0; x < nblocks; x++)
-	if (x <= y) {
+	if (x <= y)
+	{
 		for (i = 0; i < BLOCKSIZE; i++)
 		for (j = 0; j < BLOCKSIZE; j++)
 		{
@@ -293,7 +305,8 @@ int main(int argc, char **argv)
 	for (y = 0; y < nblocks; y++)
 	for (x = 0; x < nblocks; x++)
 	{
-		if (x <= y) {
+		if (x <= y)
+		{
 			starpu_matrix_data_register(&A_state[y][x], 0, (uintptr_t)A[y][x], 
 				BLOCKSIZE, BLOCKSIZE, BLOCKSIZE, sizeof(float));
 		}
@@ -304,7 +317,8 @@ int main(int argc, char **argv)
 	for (y = 0; y < nblocks; y++)
 	for (x = 0; x < nblocks; x++)
 	{
-		if (x <= y) {
+		if (x <= y)
+		{
 			starpu_data_unregister(A_state[y][x]);
 		}
 	}

+ 9 - 5
examples/common/blas_model.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -22,7 +22,8 @@
 
 double gemm_cost(struct starpu_buffer_descr *descr);
 
-static struct starpu_perfmodel starpu_sgemm_model = {
+static struct starpu_perfmodel starpu_sgemm_model =
+{
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 	.symbol = "sgemm_atlas"
@@ -33,12 +34,14 @@ static struct starpu_perfmodel starpu_sgemm_model = {
 #endif
 };
 
-static struct starpu_perfmodel starpu_sgemm_model_common = {
+static struct starpu_perfmodel starpu_sgemm_model_common =
+{
 	.cost_model = gemm_cost,
 	.type = STARPU_COMMON,
 };
 
-static struct starpu_perfmodel starpu_dgemm_model = {
+static struct starpu_perfmodel starpu_dgemm_model =
+{
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 	.symbol = "dgemm_atlas"
@@ -49,7 +52,8 @@ static struct starpu_perfmodel starpu_dgemm_model = {
 #endif
 };
 
-static struct starpu_perfmodel starpu_dgemm_model_common = {
+static struct starpu_perfmodel starpu_dgemm_model_common =
+{
 	.cost_model = gemm_cost,
 	.type = STARPU_COMMON,
 };

+ 14 - 7
examples/filters/fblock.c

@@ -39,9 +39,12 @@ void print_block(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz)
 {
         int i, j, k;
         FPRINTF(stderr, "block=%p nx=%d ny=%d nz=%d ldy=%u ldz=%u\n", block, nx, ny, nz, ldy, ldz);
-        for(k=0 ; k<nz ; k++) {
-                for(j=0 ; j<ny ; j++) {
-                        for(i=0 ; i<nx ; i++) {
+        for(k=0 ; k<nz ; k++)
+	{
+                for(j=0 ; j<ny ; j++)
+		{
+                        for(i=0 ; i<nx ; i++)
+			{
                                 FPRINTF(stderr, "%2d ", block[(k*ldz)+(j*ldy)+i]);
                         }
                         FPRINTF(stderr,"\n");
@@ -74,9 +77,12 @@ int main(int argc, char **argv)
 
         block = (int*)malloc(NX*NY*NZ*sizeof(block[0]));
         assert(block);
-        for(k=0 ; k<NZ ; k++) {
-                for(j=0 ; j<NY ; j++) {
-                        for(i=0 ; i<NX ; i++) {
+        for(k=0 ; k<NZ ; k++)
+	{
+                for(j=0 ; j<NY ; j++)
+		{
+                        for(i=0 ; i<NX ; i++)
+			{
                                 block[(k*NX*NY)+(j*NX)+i] = n++;
                         }
                 }
@@ -138,7 +144,8 @@ int main(int argc, char **argv)
                 task->cl_arg = &multiplier;
 
                 ret = starpu_task_submit(task);
-                if (ret) {
+                if (ret)
+		{
                         FPRINTF(stderr, "Error when submitting task\n");
                         exit(ret);
                 }

+ 5 - 3
examples/filters/fblock_cpu.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,8 +27,10 @@ void cpu_func(void *buffers[], void *cl_arg)
         unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]);
         unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]);
 
-        for(k=0; k<nz ; k++) {
-                for(j=0; j<ny ; j++) {
+        for(k=0; k<nz ; k++)
+	{
+                for(j=0; j<ny ; j++)
+		{
                         for(i=0; i<nx ; i++)
                                 block[(k*ldz)+(j*ldy)+i] = *factor;
                 }

+ 5 - 3
examples/filters/fblock_cuda.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,8 +20,10 @@
 static __global__ void fblock_cuda(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor)
 {
         int i, j, k;
-        for(k=0; k<nz ; k++) {
-                for(j=0; j<ny ; j++) {
+        for(k=0; k<nz ; k++)
+	{
+                for(j=0; j<ny ; j++)
+		{
                         for(i=0; i<nx ; i++)
                                 block[(k*ldz)+(j*ldy)+i] = factor;
                 }

+ 3 - 2
examples/filters/fblock_opencl.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -19,7 +19,8 @@
 #include <starpu_opencl.h>
 
 #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr)       \
-do {                                                        \
+do                                                          \
+{							    \
 	int err;                                            \
 	err = clSetKernelArg(kernel, n, size, ptr);         \
 	if (err != CL_SUCCESS)                              \

+ 5 - 3
examples/filters/fblock_opencl_kernel.cl

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,8 +17,10 @@
 __kernel void fblock_opencl(__global int* block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, int factor)
 {
         int i, j, k;
-        for(k=0; k<nz ; k++) {
-                for(j=0; j<ny ; j++) {
+        for(k=0; k<nz ; k++)
+	{
+                for(j=0; j<ny ; j++)
+		{
                         for(i=0; i<nx ; i++)
                                 block[(k*ldz)+(j*ldy)+i] = factor;
                 }

+ 12 - 6
examples/filters/fmatrix.c

@@ -34,7 +34,8 @@ void cpu_func(void *buffers[], void *cl_arg)
         /* local copy of the matrix pointer */
         int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]);
 
-        for(j=0; j<ny ; j++) {
+        for(j=0; j<ny ; j++)
+	{
                 for(i=0; i<nx ; i++)
                         val[(j*ld)+i] = *factor;
         }
@@ -46,8 +47,10 @@ int main(int argc, char **argv)
         int matrix[NX*NY];
 
         FPRINTF(stderr,"IN  Matrix: \n");
-        for(j=0 ; j<NY ; j++) {
-                for(i=0 ; i<NX ; i++) {
+        for(j=0 ; j<NY ; j++)
+	{
+                for(i=0 ; i<NX ; i++)
+		{
                         matrix[(j*NX)+i] = n++;
                         FPRINTF(stderr, "%2d ", matrix[(j*NX)+i]);
                 }
@@ -56,7 +59,8 @@ int main(int argc, char **argv)
         FPRINTF(stderr,"\n");
 
         starpu_data_handle_t handle;
-        struct starpu_codelet cl = {
+        struct starpu_codelet cl =
+	{
                 .where = STARPU_CPU,
                 .cpu_funcs = {cpu_func, NULL},
                 .nbuffers = 1
@@ -96,8 +100,10 @@ int main(int argc, char **argv)
 
         /* Print result matrix */
         FPRINTF(stderr,"OUT Matrix: \n");
-        for(j=0 ; j<NY ; j++) {
-                for(i=0 ; i<NX ; i++) {
+        for(j=0 ; j<NY ; j++)
+	{
+                for(i=0 ; i<NX ; i++)
+		{
                         FPRINTF(stderr, "%2d ", matrix[(j*NX)+i]);
                 }
                 FPRINTF(stderr,"\n");

+ 2 - 1
examples/filters/fvector.c

@@ -42,7 +42,8 @@ int main(int argc, char **argv)
         starpu_data_handle_t handle;
         int factor=1;
 
-        struct starpu_codelet cl = {
+        struct starpu_codelet cl =
+	{
                 .where = STARPU_CPU,
                 .cpu_funcs = {cpu_func, NULL},
                 .nbuffers = 1

+ 42 - 21
examples/heat/dw_factolu.c

@@ -97,7 +97,8 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 	/* we did task 22k,i,j */
 	advance_22[k*nblocks*nblocks + i + j*nblocks] = DONE;
 	
-	if ( (i == j) && (i == k+1)) {
+	if ( (i == j) && (i == k+1))
+	{
 		/* we now reduce the LU22 part (recursion appears there) */
 		cl_args *u11arg = malloc(sizeof(cl_args));
 
@@ -123,14 +124,17 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 	}
 
 	/* 11k+1 + 22k,k+1,j => 21 k+1,j */
-	if ( i == k + 1) {
+	if ( i == k + 1)
+	{
 		uint8_t dep;
 		/* 11 k+1*/
 		dep = advance_11[(k+1)];
-		if (dep & DONE) {
+		if (dep & DONE)
+		{
 			/* try to push the task */
 			uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[(k+1) + j*nblocks], STARTED);
-				if ((u & STARTED) == 0) {
+				if ((u & STARTED) == 0)
+				{
 					/* we are the only one that should 
 					 * launch that task */
 					cl_args *u21a = malloc(sizeof(cl_args));
@@ -159,14 +163,17 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 	}
 
 	/* 11k + 22k-1,i,k => 12 k,i */
-	if (j == k + 1) {
+	if (j == k + 1)
+	{
 		uint8_t dep;
 		/* 11 k+1*/
 		dep = advance_11[(k+1)];
-		if (dep & DONE) {
+		if (dep & DONE)
+		{
 			/* try to push the task */
 			uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[(k+1)*nblocks + i], STARTED);
-				 if ((u & STARTED) == 0) {
+				 if ((u & STARTED) == 0)
+				 {
 					/* we are the only one that should launch that task */
 					cl_args *u12a = malloc(sizeof(cl_args));
 
@@ -217,7 +224,8 @@ void dw_callback_v2_codelet_update_u12(void *argcb)
 		{
 			/* perhaps we may schedule the 22 i,args->k,slicey task */
 			uint8_t u = STARPU_ATOMIC_OR(&advance_22[i*nblocks*nblocks + slicey*nblocks + k], STARTED);
-                        if ((u & STARTED) == 0) {
+                        if ((u & STARTED) == 0)
+			{
 				/* update that square matrix */
 				cl_args *u22a = malloc(sizeof(cl_args));
 
@@ -276,7 +284,8 @@ void dw_callback_v2_codelet_update_u21(void *argcb)
 		{
 			/* perhaps we may schedule the 22 i,args->k,slicey task */
 			uint8_t u = STARPU_ATOMIC_OR(&advance_22[i*nblocks*nblocks + k*nblocks + slicex], STARTED);
-                        if ((u & STARTED) == 0) {
+                        if ((u & STARTED) == 0)
+			{
 				/* update that square matrix */
 				cl_args *u22a = malloc(sizeof(cl_args));
 
@@ -340,16 +349,20 @@ void dw_callback_v2_codelet_update_u11(void *argcb)
 
 			/* can we launch 12i,slice ? */
 			uint8_t deps12;
-			if (i == 0) {
+			if (i == 0)
+			{
 				deps12 = DONE;
 			}
-			else {
+			else
+			{
 				deps12 = advance_22[(i-1)*nblocks*nblocks + slice + i*nblocks];		
 			}
-			if (deps12 & DONE) {
+			if (deps12 & DONE)
+			{
 				/* we may perhaps launch the task 12i,slice */
 				 uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i*nblocks + slice], STARTED);
-				 if ((u & STARTED) == 0) {
+				 if ((u & STARTED) == 0)
+				 {
 					/* we are the only one that should launch that task */
 					cl_args *u12a = malloc(sizeof(cl_args));
 
@@ -377,16 +390,20 @@ void dw_callback_v2_codelet_update_u11(void *argcb)
 			}
 
 			/* can we launch 21i,slice ? */
-			if (i == 0) {
+			if (i == 0)
+			{
 				deps12 = DONE;
 			}
-			else {
+			else
+			{
 				deps12 = advance_22[(i-1)*nblocks*nblocks + slice*nblocks + i];		
 			}
-			if (deps12 & DONE) {
+			if (deps12 & DONE)
+			{
 				/* we may perhaps launch the task 12i,slice */
 				 uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i + slice*nblocks], STARTED);
-				 if ((u & STARTED) == 0) {
+				 if ((u & STARTED) == 0)
+				 {
 					/* we are the only one that should launch that task */
 					cl_args *u21a = malloc(sizeof(cl_args));
 
@@ -700,7 +717,8 @@ void initialize_system(float **A, float **B, unsigned dim, unsigned pinned)
 		starpu_malloc((void **)A, (size_t)dim*dim*sizeof(float));
 		starpu_malloc((void **)B, (size_t)dim*sizeof(float));
 	} 
-	else {
+	else
+	{
 		*A = malloc((size_t)dim*dim*sizeof(float));
 		STARPU_ASSERT(*A);
 		*B = malloc((size_t)dim*sizeof(float));
@@ -730,19 +748,22 @@ void dw_factoLU(float *matA, unsigned size,
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, 
 			size, size, sizeof(float));
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 	};
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 	};
 
 	starpu_data_map_filters(dataA, 2, &f, &f2);
 
-	switch (version) {
+	switch (version)
+	{
 		case 1:
 			dw_codelet_facto(dataA, nblocks);
 			break;

+ 22 - 11
examples/heat/dw_factolu.h

@@ -41,7 +41,8 @@
 #define BLAS3_FLOP(n1,n2,n3)    \
         (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
 
-typedef struct {
+typedef struct
+{
 	starpu_data_handle_t dataA;
 	unsigned i;
 	unsigned j;
@@ -89,10 +90,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 	{
 		for (i = 0; i < size; i++)
 		{
-/*			if (i <= j) { */
+/*			if (i <= j)
+			{ */
 				FPRINTF(stdout, "%2.2f\t", LU[j +i*size]);
 /*			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 			} */
 		}
@@ -107,10 +110,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 	{
 		for (i = 0; i < size; i++)
 		{
-/*			if (i <= j) { */
+/*			if (i <= j)
+			{ */
 				FPRINTF(stdout, "%2.2f\t", L[j +i*size]);
 /*			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 			} */
 		}
@@ -123,10 +128,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 	{
 		for (i = 0; i < size; i++)
 		{
-/*			if (i <= j) { */
+/*			if (i <= j)
+			{ */
 				FPRINTF(stdout, "%2.2f\t", U[j +i*size]);
 /*			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 			} */
 		}
@@ -155,10 +162,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 	{
 		for (i = 0; i < size; i++)
 		{
-	/*		if (i <= j) { */
+	/*		if (i <= j)
+			{ */
 	      			FPRINTF(stdout, "%2.2f\t", A[j +i*size]);
 	/*		}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 			} */
 		}
@@ -172,10 +181,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 	{
 		for (i = 0; i < size; i++)
 		{
-	/*		if (i <= j) { */
+	/*		if (i <= j)
+			{ */
 	      			FPRINTF(stdout, "%2.2f\t", L[j +i*size]);
 	/*		}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 			} */
 		}

+ 40 - 20
examples/heat/dw_factolu_grain.c

@@ -42,7 +42,8 @@ static struct starpu_task *create_task(starpu_tag_t id)
 	return task;
 }
 
-static struct starpu_codelet cl11 = {
+static struct starpu_codelet cl11 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u11, NULL},
 #ifdef STARPU_USE_CUDA
@@ -68,14 +69,16 @@ static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k
 	task->priority = STARPU_MAX_PRIO;
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11(k, tag_prefix), 1, TAG22(k-1, k, k, tag_prefix));
 	}
 
 	return task;
 }
 
-static struct starpu_codelet cl12 = {
+static struct starpu_codelet cl12 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u12, NULL},
 #ifdef STARPU_USE_CUDA
@@ -99,22 +102,26 @@ static void create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, i, k); 
 	task->buffers[1].mode = STARPU_RW;
 
-	if (i == k+1) {
+	if (i == k+1)
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG12(k, i, tag_prefix), 2, TAG11(k, tag_prefix), TAG22(k-1, i, k, tag_prefix));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG12(k, i, tag_prefix), 1, TAG11(k, tag_prefix));
 	}
 
 	starpu_task_submit(task);
 }
 
-static struct starpu_codelet cl21 = {
+static struct starpu_codelet cl21 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u21, NULL},
 #ifdef STARPU_USE_CUDA
@@ -136,22 +143,26 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j, u
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
 	task->buffers[1].mode = STARPU_RW;
 
-	if (j == k+1) {
+	if (j == k+1)
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG21(k, j, tag_prefix), 2, TAG11(k, tag_prefix), TAG22(k-1, k, j, tag_prefix));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG21(k, j, tag_prefix), 1, TAG11(k, tag_prefix));
 	}
 
 	starpu_task_submit(task);
 }
 
-static struct starpu_codelet cl22 = {
+static struct starpu_codelet cl22 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u22, NULL},
 #ifdef STARPU_USE_CUDA
@@ -177,15 +188,18 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
 	task->buffers[2].mode = STARPU_RW;
 
-	if ( (i == k + 1) && (j == k +1) ) {
+	if ( (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j, tag_prefix), 3, TAG22(k-1, i, j, tag_prefix), TAG12(k, i, tag_prefix), TAG21(k, j, tag_prefix));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j, tag_prefix), 2, TAG12(k, i, tag_prefix), TAG21(k, j, tag_prefix));
 	}
 
@@ -207,12 +221,14 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 	unsigned nblocks = size / blocksize;
 	unsigned maxk = inner_size / blocksize;
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 	};
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 	};
@@ -235,10 +251,12 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 		struct starpu_task *task = create_task_11(dataA, k, tag_prefix);
 
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 		}
-		else {
+		else
+		{
 			starpu_task_submit(task);
 		}
 		
@@ -272,7 +290,8 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 		starpu_data_unpartition(dataA, 0);		
 		return;
 	}
-	else {
+	else
+	{
 		/*
 		 * call dw_factoLU_grain_inner recursively in the remaining blocks
 		 */
@@ -301,7 +320,8 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 		{
 			dw_factoLU_grain_inner(newmatA, size-inner_size, (size-inner_size)/2, ld, blocksize/2, tag_prefix+1);
 		}
-		else { */
+		else
+		{ */
 			dw_factoLU_grain_inner(newmatA, size-inner_size, size-inner_size, ld, blocksize/2, tag_prefix+1);
 /*		} */
 	}

+ 12 - 6
examples/heat/dw_factolu_kernels.c

@@ -121,7 +121,8 @@ static inline void dw_common_cpu_codelet_update_u22(void *descr[], int s, __attr
 	cublasStatus status;
 #endif
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 			SGEMM("N", "N",	dy, dx, dz, 
 				-1.0f, left, ld21, right, ld12,
@@ -168,7 +169,8 @@ void dw_cublas_codelet_update_u22(void *descr[], void *_args)
  * U12
  */
 
-static inline void dw_common_codelet_update_u12(void *descr[], int s, __attribute__((unused)) void *_args) {
+static inline void dw_common_codelet_update_u12(void *descr[], int s, __attribute__((unused)) void *_args)
+{
 	float *sub11;
 	float *sub12;
 
@@ -186,7 +188,8 @@ static inline void dw_common_codelet_update_u12(void *descr[], int s, __attribut
 #endif
 
 	/* solve L11 U12 = A12 (find U12) */
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 			STRSM("L", "L", "N", "N",
 					 nx12, ny12, 1.0f, sub11, ld11, sub12, ld12);
@@ -231,7 +234,8 @@ void dw_cublas_codelet_update_u12(void *descr[], void *_args)
  * U21
  */
 
-static inline void dw_common_codelet_update_u21(void *descr[], int s, __attribute__((unused)) void *_args) {
+static inline void dw_common_codelet_update_u21(void *descr[], int s, __attribute__((unused)) void *_args)
+{
 	float *sub11;
 	float *sub21;
 
@@ -248,7 +252,8 @@ static inline void dw_common_codelet_update_u21(void *descr[], int s, __attribut
 	cublasStatus status;
 #endif
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 			STRSM("R", "U", "N", "U", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
 			break;
@@ -317,7 +322,8 @@ static inline void dw_common_codelet_update_u11(void *descr[], int s, __attribut
 
 	unsigned long z;
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 			for (z = 0; z < nx; z++)
 			{

+ 36 - 18
examples/heat/dw_factolu_tag.c

@@ -44,7 +44,8 @@ static struct starpu_task *create_task(starpu_tag_t id)
 	return task;
 }
 
-static struct starpu_codelet cl11 = {
+static struct starpu_codelet cl11 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u11, NULL},
 #ifdef STARPU_USE_CUDA
@@ -71,14 +72,16 @@ static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k
 		task->priority = STARPU_MAX_PRIO;
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 
 	return task;
 }
 
-static struct starpu_codelet cl12 = {
+static struct starpu_codelet cl12 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u12, NULL},
 #ifdef STARPU_USE_CUDA
@@ -102,22 +105,26 @@ static void create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned i)
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, i, k); 
 	task->buffers[1].mode = STARPU_RW;
 
-	if (!no_prio && (i == k+1)) {
+	if (!no_prio && (i == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG12(k, i), 2, TAG11(k), TAG22(k-1, i, k));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG12(k, i), 1, TAG11(k));
 	}
 
 	starpu_task_submit(task);
 }
 
-static struct starpu_codelet cl21 = {
+static struct starpu_codelet cl21 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u21, NULL},
 #ifdef STARPU_USE_CUDA
@@ -139,22 +146,26 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j)
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
 	task->buffers[1].mode = STARPU_RW;
 
-	if (!no_prio && (j == k+1)) {
+	if (!no_prio && (j == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG21(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 	}
 
 	starpu_task_submit(task);
 }
 
-static struct starpu_codelet cl22 = {
+static struct starpu_codelet cl22 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u22, NULL},
 #ifdef STARPU_USE_CUDA
@@ -180,15 +191,18 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
 	task->buffers[2].mode = STARPU_RW;
 
-	if (!no_prio &&  (i == k + 1) && (j == k +1) ) {
+	if (!no_prio &&  (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG12(k, i), TAG21(k, j));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG12(k, i), TAG21(k, j));
 	}
 
@@ -214,10 +228,12 @@ static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 		struct starpu_task *task = create_task_11(dataA, k);
 
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 		}
-		else {
+		else
+		{
 			starpu_task_submit(task);
 		}
 		
@@ -280,12 +296,14 @@ void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, u
 	 * one block is now determined by 2 unsigned (i,j) */
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 	};
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 	};

+ 6 - 3
examples/heat/dw_sparse_cg.c

@@ -63,7 +63,8 @@ static void create_data(float **_nzvalA, float **_vecb, float **_vecx, uint32_t
 	{
 		rowptr[row] = pos;
 
-		if (row > 0) {
+		if (row > 0)
+		{
 			nzval[pos] = 1.0f;
 			colind[pos] = row-1;
 			pos++;
@@ -73,7 +74,8 @@ static void create_data(float **_nzvalA, float **_vecb, float **_vecx, uint32_t
 		colind[pos] = row;
 		pos++;
 
-		if (row < size - 1) {
+		if (row < size - 1)
+		{
 			nzval[pos] = 1.0f;
 			colind[pos] = row+1;
 			pos++;
@@ -312,7 +314,8 @@ void iteration_cg(void *problem)
 		/* we did not reach the stop condition yet */
 		launch_new_cg_iteration(problem);
 	}
-	else {
+	else
+	{
 		/* we may stop */
 		FPRINTF(stdout, "We are done ... after %d iterations \n", pb->i - 1);
 		FPRINTF(stdout, "i : %d\n\tdelta_new %2.5f\n", pb->i, pb->delta_new);

+ 13 - 7
examples/heat/dw_sparse_cg.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -46,7 +46,8 @@ static unsigned usecpu = 0;
 static unsigned blocks = 512;
 static unsigned grids  = 8;
 
-struct cg_problem {
+struct cg_problem
+{
 	starpu_data_handle_t ds_matrixA;
 	starpu_data_handle_t ds_vecx;
 	starpu_data_handle_t ds_vecb;
@@ -71,23 +72,28 @@ struct cg_problem {
 static void __attribute__((unused)) parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-size") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-size") == 0)
+		{
 			char *argptr;
 			size = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-block") == 0) {
+		if (strcmp(argv[i], "-block") == 0)
+		{
 			char *argptr;
 			blocks = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-grid") == 0) {
+		if (strcmp(argv[i], "-grid") == 0)
+		{
 			char *argptr;
 			grids = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-cpu") == 0) {
+		if (strcmp(argv[i], "-cpu") == 0)
+		{
 			usecpu = 1;
 		}
 	}

+ 72 - 37
examples/heat/heat.c

@@ -36,65 +36,80 @@ extern void do_conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint3
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-cg") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-cg") == 0)
+		{
 			use_cg = 1;
 		}
 
-		if (strcmp(argv[i], "-shape") == 0) {
+		if (strcmp(argv[i], "-shape") == 0)
+		{
 		        char *argptr;
 			shape = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-nthick") == 0) {
+		if (strcmp(argv[i], "-nthick") == 0)
+		{
 		        char *argptr;
 			nthick = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-ntheta") == 0) {
+		if (strcmp(argv[i], "-ntheta") == 0)
+		{
 		        char *argptr;
 			ntheta = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-nblocks") == 0) {
+		if (strcmp(argv[i], "-nblocks") == 0)
+		{
 		        char *argptr;
 			nblocks = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-nbigblocks") == 0) {
+		if (strcmp(argv[i], "-nbigblocks") == 0)
+		{
 		        char *argptr;
 			nbigblocks = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-v1") == 0) {
+		if (strcmp(argv[i], "-v1") == 0)
+		{
 			version = 1;
 		}
 
-		if (strcmp(argv[i], "-v2") == 0) {
+		if (strcmp(argv[i], "-v2") == 0)
+		{
 			version = 2;
 		}
 
-		if (strcmp(argv[i], "-v3") == 0) {
+		if (strcmp(argv[i], "-v3") == 0)
+		{
 			version = 3;
 		}
 
-		if (strcmp(argv[i], "-v4") == 0) {
+		if (strcmp(argv[i], "-v4") == 0)
+		{
 			version = 4;
 		}
 
-		if (strcmp(argv[i], "-pin") == 0) {
+		if (strcmp(argv[i], "-pin") == 0)
+		{
 			pinned = 1;
 		}
 
-		if (strcmp(argv[i], "-check") == 0) {
+		if (strcmp(argv[i], "-check") == 0)
+		{
 			check = 1;
 		}
 
-		if (strcmp(argv[i], "-no-prio") == 0) {
+		if (strcmp(argv[i], "-no-prio") == 0)
+		{
 			no_prio = 1;
 		}
 
-		if (strcmp(argv[i], "-size") == 0) {
+		if (strcmp(argv[i], "-size") == 0)
+		{
 			char *argptr;
 			unsigned size = strtol(argv[++i], &argptr, 10);
 			nthick = 130;
@@ -102,7 +117,8 @@ static void parse_args(int argc, char **argv)
 			STARPU_ASSERT((nthick - 2)*(ntheta - 2) == size);
 		}
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			printf("usage : %s [-v1|-v2|-v3] [-pin] [-nthick number] [-ntheta number] [-shape [0|1|2]] [-cg] [-size number] [-no-prio]\n", argv[0]);
 		}
 	}
@@ -136,11 +152,14 @@ static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side
 	ya = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y;
 
 	/* B */
-	if (side_tr) {
+	if (side_tr)
+	{
 		/* lower D is actually B here */
 		xb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
 		yb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
-	} else {
+	}
+	else
+	{
 		/* upper */
 		xb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
 		yb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
@@ -150,24 +169,31 @@ static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side
 	yc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
 
 	/* now look for the actual psi node */
-	if (NODE_NUMBER(theta_tr, thick_tr) == NODE_NUMBER(theta_psi, thick_psi)) {
+	if (NODE_NUMBER(theta_tr, thick_tr) == NODE_NUMBER(theta_psi, thick_psi))
+	{
 		/* A nothing to do */
-	} else if (NODE_NUMBER(theta_tr+1, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi)) {
+	}
+	else if (NODE_NUMBER(theta_tr+1, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi))
+	{
 		/* psi matches C */
 		/* swap A and C coordinates  */
 		tmp = xa; xa = xc; xc = tmp;
 		tmp = ya; ya = yc; yc = tmp;
-	} else if
-		(side_tr && (NODE_NUMBER(theta_tr+1, thick_tr) == NODE_NUMBER(theta_psi, thick_psi))) {
+	}
+	else if (side_tr && (NODE_NUMBER(theta_tr+1, thick_tr) == NODE_NUMBER(theta_psi, thick_psi)))
+	{
 		/* psi is D (that was stored in C) XXX */
 		tmp = xa; xa = xb; xb = tmp;
 		tmp = ya; ya = yb; yb = tmp;
-	} else if
-		(!side_tr && (NODE_NUMBER(theta_tr, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi))) {
+	}
+	else if	(!side_tr && (NODE_NUMBER(theta_tr, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi)))
+	{
 		/* psi is C */
 		tmp = xa; xa = xb; xb = tmp;
 		tmp = ya; ya = yb; yb = tmp;
-	} else {
+	}
+	else
+	{
 		/* the psi node is not a node of the current triangle */
 		return 0.0f;
 	}
@@ -178,7 +204,8 @@ static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side
 
 	denom = (xa - xb)*(yc - ya) - (xc - xb)*(ya - yb);
 
-	switch (xy) {
+	switch (xy)
+	{
 		case X:
 			value = (yc - yb)/denom;
 			break;
@@ -220,11 +247,14 @@ static inline float surface_triangle(unsigned theta_tr, unsigned thick_tr, unsig
 	xj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x;
 	yj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
 
-	if (side_tr) {
+	if (side_tr)
+	{
 		/* lower */
 		xk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
 		yk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
-	} else {
+	}
+	else
+	{
 		xk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
 		yk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
 	}
@@ -314,8 +344,6 @@ done:
 
 static void solve_system(unsigned size, unsigned subsize, float *result, int *RefArray, float *Bformer, float *A, float *B)
 {
-
-
 	unsigned i;
 
 	/* solve the actual problem LU X = B */
@@ -452,7 +480,8 @@ void build_mesh(point *mesh)
 			float r;
 			r = thick * (RMAX - RMIN)/(nthick - 1) + RMIN;
 
-			switch (shape) {
+			switch (shape)
+			{
 				default:
 				case 0:
 					mesh[NODE_NUMBER(theta,thick)].x = r*cosf(angle);
@@ -604,11 +633,13 @@ static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uin
 			float val;
 			unsigned nodeneighbour =  neighbours[neighbour];
 
-			if (nodeneighbour < newsize) {
+			if (nodeneighbour < newsize)
+			{
 
 				val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
 	
-				if (val != 0.0f) {
+				if (val != 0.0f)
+				{
 					*nzval = realloc(*nzval, (pos+1)*sizeof(float));
 					*colind = realloc(*colind, (pos+1)*sizeof(uint32_t));
 	
@@ -648,7 +679,8 @@ static void build_dense_stiffness_matrix_A(point *pmesh, float *A, unsigned news
 		{
 			unsigned long nodeneighbour =  neighbours[neighbour];
 
-			if (nodeneighbour < newsize) {
+			if (nodeneighbour < newsize)
+			{
 				float val;
 				val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
 				A[j+ (unsigned long)newsize*nodeneighbour] = val;
@@ -686,7 +718,8 @@ int main(int argc, char **argv)
 
 	/* we can either use a direct method (LU decomposition here) or an 
 	 * iterative method (conjugate gradient here) */
-	if (use_cg) {
+	if (use_cg)
+	{
 		unsigned nnz;
 		float *nzval;
 		uint32_t *colind;
@@ -718,7 +751,8 @@ int main(int argc, char **argv)
 		}
 	
 	}
-	else {
+	else
+	{
 
 		/* unfortunately CUDA does not allow late memory registration, 
 		 * we need to do the malloc using CUDA itself ... */
@@ -733,7 +767,8 @@ int main(int argc, char **argv)
 
 		STARPU_ASSERT(newsize % nblocks == 0);
 
-		switch (version) {
+		switch (version)
+		{
 			case 1:
 			case 2:
 				dw_factoLU(A, newsize, newsize, nblocks, version, no_prio);

+ 2 - 1
examples/heat/heat.h

@@ -52,7 +52,8 @@
 #define NODE_TO_THICK(n)		((n) % nthick)
 #define NODE_TO_THETA(n)		((n) / nthick)
 
-typedef struct point_t {
+typedef struct point_t
+{
 	float x;
 	float y;
 } point;

+ 8 - 4
examples/heat/heat_display.c

@@ -52,13 +52,15 @@ static void generate_graph(void)
 			float colorA_G, colorB_G, colorC_G, colorD_G;
 			float colorA_B, colorB_B, colorC_B, colorD_B;
 
-			if (maxval == minval) {
+			if (maxval == minval)
+			{
 				colorA_R = 1.0f; colorA_G = 1.0f; colorA_B = 1.0f;
 				colorB_R = 1.0f; colorB_G = 1.0f; colorB_B = 1.0f;
 				colorC_R = 1.0f; colorC_G = 1.0f; colorC_B = 1.0f;
 				colorD_R = 1.0f; colorD_G = 1.0f; colorD_B = 1.0f;
 			}
-			else {
+			else
+			{
 				float amplitude = maxval - minval;
 
 				float coeffA, coeffB, coeffC, coeffD;
@@ -84,7 +86,8 @@ static void generate_graph(void)
 				colorD_G = coeffD<0.5f?1.0f:2.0*(1 - coeffD)*1.0f;
 			}
 
-			if (printmesh) {
+			if (printmesh)
+			{
 				glColor3f (0.0f, 0.0f, 0.0f);
 				glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
 				glLineWidth(3.0f);
@@ -142,7 +145,8 @@ static void display(void)
 
 static void pressKey(unsigned char key, int x __attribute__ ((unused)), int y  __attribute__ ((unused)))
 {
-	switch (key) {
+	switch (key)
+	{
 		case 'q':
 			exit(0);
 		default:

+ 17 - 9
examples/heat/lu_kernels_model.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -214,9 +214,11 @@ double task_22_cost_cpu(struct starpu_buffer_descr *descr)
 	return PERTURBATE(cost);
 }
 
-struct starpu_perfmodel model_11 = {
+struct starpu_perfmodel model_11 =
+{
 	.cost_model = task_11_cost,
-	.per_arch = {
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_11_cost_cpu },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_11_cost_cuda }
 	},
@@ -230,9 +232,11 @@ struct starpu_perfmodel model_11 = {
 #endif
 };
 
-struct starpu_perfmodel model_12 = {
+struct starpu_perfmodel model_12 =
+{
 	.cost_model = task_12_cost,
-	.per_arch = {
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_12_cost_cpu },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_12_cost_cuda }
 	},
@@ -246,9 +250,11 @@ struct starpu_perfmodel model_12 = {
 #endif
 };
 
-struct starpu_perfmodel model_21 = {
+struct starpu_perfmodel model_21 =
+{
 	.cost_model = task_21_cost,
-	.per_arch = {
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_21_cost_cpu },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_21_cost_cuda }
 	},
@@ -262,9 +268,11 @@ struct starpu_perfmodel model_21 = {
 #endif
 };
 
-struct starpu_perfmodel model_22 = {
+struct starpu_perfmodel model_22 =
+{
 	.cost_model = task_22_cost,
-	.per_arch = {
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_22_cost_cpu },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_22_cost_cuda }
 	},

+ 2 - 1
examples/incrementer/incrementer.c

@@ -109,7 +109,8 @@ int main(int argc, char **argv)
 	FPRINTF(stderr, "array -> %f, %f, %f, %f\n", float_array[0],
                 float_array[1], float_array[2], float_array[3]);
 
-	if (float_array[0] != niter || float_array[0] != float_array[1] + float_array[2] + float_array[3]) {
+	if (float_array[0] != niter || float_array[0] != float_array[1] + float_array[2] + float_array[3])
+	{
 		FPRINTF(stderr, "Incorrect result\n");
 		ret = 1;
 	}

+ 27 - 13
examples/lu/lu_example.c

@@ -46,41 +46,51 @@ TYPE **A_blocks;
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-size") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-size") == 0)
+		{
 			char *argptr;
 			size = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-nblocks") == 0) {
+		if (strcmp(argv[i], "-nblocks") == 0)
+		{
 			char *argptr;
 			nblocks = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-check") == 0) {
+		if (strcmp(argv[i], "-check") == 0)
+		{
 			check = 1;
 		}
 
-		if (strcmp(argv[i], "-piv") == 0) {
+		if (strcmp(argv[i], "-piv") == 0)
+		{
 			pivot = 1;
 		}
 
-		if (strcmp(argv[i], "-no-stride") == 0) {
+		if (strcmp(argv[i], "-no-stride") == 0)
+		{
 			no_stride = 1;
 		}
 
-		if (strcmp(argv[i], "-profile") == 0) {
+		if (strcmp(argv[i], "-profile") == 0)
+		{
 			profile = 1;
 		}
 
-		if (strcmp(argv[i], "-bound") == 0) {
+		if (strcmp(argv[i], "-bound") == 0)
+		{
 			bound = 1;
 		}
-		if (strcmp(argv[i], "-bounddeps") == 0) {
+		if (strcmp(argv[i], "-bounddeps") == 0)
+		{
 			bound = 1;
 			bounddeps = 1;
 		}
-		if (strcmp(argv[i], "-bounddepsprio") == 0) {
+		if (strcmp(argv[i], "-bounddepsprio") == 0)
+		{
 			bound = 1;
 			bounddeps = 1;
 			boundprio = 1;
@@ -344,14 +354,18 @@ int main(int argc, char **argv)
 		starpu_bus_profiling_helper_display_summary();
 	}
 
-	if (bound) {
+	if (bound)
+	{
 		double min;
 		starpu_bound_stop();
-		if (bounddeps) {
+		if (bounddeps)
+		{
 			FILE *f = fopen("lu.pl", "w");
 			starpu_bound_print_lp(f);
 			FPRINTF(stderr,"system printed to lu.pl\n");
-		} else {
+		}
+		else
+		{
 			starpu_bound_compute(&min, NULL, 0);
 			if (min != 0.)
 				FPRINTF(stderr, "theoretical min: %f ms\n", min);

+ 28 - 14
examples/lu/xlu.c

@@ -65,7 +65,8 @@ static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k
 		task->priority = STARPU_MAX_PRIO;
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 
@@ -86,15 +87,18 @@ static void create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, j, k); 
 	task->buffers[1].mode = STARPU_RW;
 
-	if (!no_prio && (j == k+1)) {
+	if (!no_prio && (j == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG12(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG12(k, j), 1, TAG11(k));
 	}
 
@@ -113,15 +117,18 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, i); 
 	task->buffers[1].mode = STARPU_RW;
 
-	if (!no_prio && (i == k+1)) {
+	if (!no_prio && (i == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG21(k, i), 2, TAG11(k), TAG22(k-1, i, k));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG21(k, i), 1, TAG11(k));
 	}
 
@@ -144,15 +151,18 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, j, i); /* produced by TAG22(k-1, i, j) */
 	task->buffers[2].mode = STARPU_RW;
 
-	if (!no_prio &&  (i == k + 1) && (j == k +1) ) {
+	if (!no_prio &&  (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG12(k, j), TAG21(k, i));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG12(k, j), TAG21(k, i));
 	}
 
@@ -178,10 +188,12 @@ static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 		struct starpu_task *task = create_task_11(dataA, k);
 
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 		}
-		else {
+		else
+		{
 			starpu_task_submit(task);
 		}
 		
@@ -236,12 +248,14 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 	/* We already enforce deps by hand */
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 	};
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 	};

+ 2 - 1
examples/lu/xlu.h

@@ -106,7 +106,8 @@ extern struct starpu_perfmodel model_12;
 extern struct starpu_perfmodel model_21;
 extern struct starpu_perfmodel model_22;
 
-struct piv_s {
+struct piv_s
+{
 	unsigned *piv; /* complete pivot array */
 	unsigned first; /* first element */
 	unsigned last; /* last element */

+ 4 - 2
examples/lu/xlu_implicit.c

@@ -143,12 +143,14 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 	 * one block is now determined by 2 unsigned (i,j) */
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 	
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 	};
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 	};

+ 4 - 2
examples/lu/xlu_implicit_pivot.c

@@ -189,12 +189,14 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 	 * one block is now determined by 2 unsigned (i,j) */
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 	};
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 	};

+ 38 - 19
examples/lu/xlu_kernels.c

@@ -51,7 +51,8 @@ static inline void STARPU_LU(common_u22)(void *descr[],
 	cudaError_t cures;
 #endif
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 			CPU_GEMM("N", "N", dy, dx, dz, 
 				(TYPE)-1.0, right, ld21, left, ld12,
@@ -59,7 +60,8 @@ static inline void STARPU_LU(common_u22)(void *descr[],
 			break;
 
 #ifdef STARPU_USE_CUDA
-		case 1: {
+		case 1:
+		{
 			CUBLAS_GEMM('n', 'n', dx, dy, dz,
 				*(CUBLAS_TYPE*)&m1, (CUBLAS_TYPE *)right, ld21, (CUBLAS_TYPE *)left, ld12,
 				*(CUBLAS_TYPE*)&p1, (CUBLAS_TYPE *)center, ld22);
@@ -92,7 +94,8 @@ void STARPU_LU(cublas_u22)(void *descr[], void *_args)
 }
 #endif /* STARPU_USE_CUDA */
 
-static struct starpu_perfmodel STARPU_LU(model_22) = {
+static struct starpu_perfmodel STARPU_LU(model_22) =
+{
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 	.symbol = STARPU_LU_STR(lu_model_22_atlas)
@@ -103,7 +106,8 @@ static struct starpu_perfmodel STARPU_LU(model_22) = {
 #endif
 };
 
-struct starpu_codelet cl22 = {
+struct starpu_codelet cl22 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u22), NULL},
 #ifdef STARPU_USE_CUDA
@@ -138,7 +142,8 @@ static inline void STARPU_LU(common_u12)(void *descr[],
 #endif
 
 	/* solve L11 U12 = A12 (find U12) */
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 			CPU_TRSM("L", "L", "N", "N", nx12, ny12,
 					(TYPE)1.0, sub11, ld11, sub12, ld12);
@@ -175,7 +180,8 @@ void STARPU_LU(cublas_u12)(void *descr[], void *_args)
 }
 #endif /* STARPU_USE_CUDA */
 
-static struct starpu_perfmodel STARPU_LU(model_12) = {
+static struct starpu_perfmodel STARPU_LU(model_12) =
+{
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 	.symbol = STARPU_LU_STR(lu_model_12_atlas)
@@ -186,7 +192,8 @@ static struct starpu_perfmodel STARPU_LU(model_12) = {
 #endif
 };
 
-struct starpu_codelet cl12 = {
+struct starpu_codelet cl12 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u12), NULL},
 #ifdef STARPU_USE_CUDA
@@ -219,7 +226,8 @@ static inline void STARPU_LU(common_u21)(void *descr[],
 	cublasStatus status;
 #endif
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 			CPU_TRSM("R", "U", "N", "U", nx21, ny21,
 					(TYPE)1.0, sub11, ld11, sub21, ld21);
@@ -255,7 +263,8 @@ void STARPU_LU(cublas_u21)(void *descr[], void *_args)
 }
 #endif 
 
-static struct starpu_perfmodel STARPU_LU(model_21) = {
+static struct starpu_perfmodel STARPU_LU(model_21) =
+{
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 	.symbol = STARPU_LU_STR(lu_model_21_atlas)
@@ -266,7 +275,8 @@ static struct starpu_perfmodel STARPU_LU(model_21) = {
 #endif
 };
 
-struct starpu_codelet cl21 = {
+struct starpu_codelet cl21 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u21), NULL},
 #ifdef STARPU_USE_CUDA
@@ -292,7 +302,8 @@ static inline void STARPU_LU(common_u11)(void *descr[],
 
 	unsigned long z;
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 			for (z = 0; z < nx; z++)
 			{
@@ -350,7 +361,8 @@ void STARPU_LU(cublas_u11)(void *descr[], void *_args)
 }
 #endif /* STARPU_USE_CUDA */
 
-static struct starpu_perfmodel STARPU_LU(model_11) = {
+static struct starpu_perfmodel STARPU_LU(model_11) =
+{
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 	.symbol = STARPU_LU_STR(lu_model_11_atlas)
@@ -361,7 +373,8 @@ static struct starpu_perfmodel STARPU_LU(model_11) = {
 #endif
 };
 
-struct starpu_codelet cl11 = {
+struct starpu_codelet cl11 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u11), NULL},
 #ifdef STARPU_USE_CUDA
@@ -391,7 +404,8 @@ static inline void STARPU_LU(common_u11_pivot)(void *descr[],
 	unsigned *ipiv = piv->piv;
 	unsigned first = piv->first;
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 			for (z = 0; z < nx; z++)
 			{
@@ -486,7 +500,8 @@ void STARPU_LU(cublas_u11_pivot)(void *descr[], void *_args)
 }
 #endif /* STARPU_USE_CUDA */
 
-static struct starpu_perfmodel STARPU_LU(model_11_pivot) = {
+static struct starpu_perfmodel STARPU_LU(model_11_pivot) =
+{
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 	.symbol = STARPU_LU_STR(lu_model_11_pivot_atlas)
@@ -497,7 +512,8 @@ static struct starpu_perfmodel STARPU_LU(model_11_pivot) = {
 #endif
 };
 
-struct starpu_codelet cl11_pivot = {
+struct starpu_codelet cl11_pivot =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u11_pivot), NULL},
 #ifdef STARPU_USE_CUDA
@@ -526,7 +542,8 @@ static inline void STARPU_LU(common_pivot)(void *descr[],
 	unsigned *ipiv = piv->piv;
 	unsigned first = piv->first;
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 			for (row = 0; row < nx; row++)
 			{
@@ -571,7 +588,8 @@ void STARPU_LU(cublas_pivot)(void *descr[], void *_args)
 
 #endif /* STARPU_USE_CUDA */
 
-static struct starpu_perfmodel STARPU_LU(model_pivot) = {
+static struct starpu_perfmodel STARPU_LU(model_pivot) =
+{
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 	.symbol = STARPU_LU_STR(lu_model_pivot_atlas)
@@ -582,7 +600,8 @@ static struct starpu_perfmodel STARPU_LU(model_pivot) = {
 #endif
 };
 
-struct starpu_codelet cl_pivot = {
+struct starpu_codelet cl_pivot =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_pivot), NULL},
 #ifdef STARPU_USE_CUDA

+ 31 - 16
examples/lu/xlu_pivot.c

@@ -66,15 +66,18 @@ static void create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 		task->priority = STARPU_MAX_PRIO;
 
 	/* enforce dependencies ... */
-	if (k == 0) {
+	if (k == 0)
+	{
 		starpu_tag_declare_deps(PIVOT(k, i), 1, TAG11(k));
 	}
-	else 
+	else
 	{
-		if (i > k) {
+		if (i > k)
+		{
 			starpu_tag_declare_deps(PIVOT(k, i), 2, TAG11(k), TAG22(k-1, i, k));
 		}
-		else {
+		else
+		{
 			starpu_tag_t *tags = malloc((nblocks - k)*sizeof(starpu_tag_t));
 			
 			tags[0] = TAG11(k);
@@ -111,7 +114,8 @@ static struct starpu_task *create_task_11_pivot(starpu_data_handle_t *dataAp, un
 		task->priority = STARPU_MAX_PRIO;
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 
@@ -135,7 +139,8 @@ static void create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 	task->buffers[1].handle = get_block(dataAp, nblocks, j, k);
 	task->buffers[1].mode = STARPU_RW;
 
-	if (!no_prio && (j == k+1)) {
+	if (!no_prio && (j == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
@@ -143,10 +148,12 @@ static void create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 #if 0
 	starpu_tag_declare_deps(TAG12(k, i), 1, PIVOT(k, i));
 #endif
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG12(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG12(k, j), 1, TAG11(k));
 	}
 
@@ -166,7 +173,8 @@ static void create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 	task->buffers[1].handle = get_block(dataAp, nblocks, k, i); 
 	task->buffers[1].mode = STARPU_RW;
 
-	if (!no_prio && (i == k+1)) {
+	if (!no_prio && (i == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
@@ -197,15 +205,18 @@ static void create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 	task->buffers[2].handle = get_block(dataAp, nblocks, j, i);  /* produced by TAG22(k-1, i, j) */
 	task->buffers[2].mode = STARPU_RW;
 
-	if (!no_prio &&  (i == k + 1) && (j == k +1) ) {
+	if (!no_prio &&  (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 	}
 
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG12(k, j), TAG21(k, i));
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG12(k, j), TAG21(k, i));
 	}
 
@@ -234,10 +245,12 @@ static double dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 		struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
 
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 		}
-		else {
+		else
+		{
 			starpu_task_submit(task);
 		}
 
@@ -314,12 +327,14 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 	/* We already enforce deps by hand */
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 	};
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 	};

+ 35 - 18
examples/mandelbrot/mandelbrot.c

@@ -155,7 +155,8 @@ static int handle_events(void)
 			topY -= 0.25*heightY;
 			bottomY -= 0.25*heightY;
 		}
-		else {
+		else
+		{
 			double widthX = rightX - leftX;
 			double heightY = topY - bottomY;
 
@@ -177,13 +178,15 @@ static int handle_events(void)
 			}
 		}
 
-		if (text[0]=='q') {
+		if (text[0]=='q')
+		{
 			return -1;
 		}
 	}
 
-	if (event.type==ButtonPress) {
-	/* tell where the mouse Button was Pressed */
+	if (event.type==ButtonPress)
+	{
+		/* tell where the mouse Button was Pressed */
 		printf("You pressed a button at (%i,%i)\n",
 			event.xbutton.x,event.xbutton.y);
 	}
@@ -371,7 +374,8 @@ static void compute_block_spmd(void *descr[], void *cl_arg)
 
 
 
-static struct starpu_codelet spmd_mandelbrot_cl = {
+static struct starpu_codelet spmd_mandelbrot_cl =
+{
 	.where = STARPU_CPU|STARPU_OPENCL,
 	.type = STARPU_SPMD,
 	.max_parallelism = INT_MAX,
@@ -382,7 +386,8 @@ static struct starpu_codelet spmd_mandelbrot_cl = {
 	.nbuffers = 1
 };
 
-static struct starpu_codelet mandelbrot_cl = {
+static struct starpu_codelet mandelbrot_cl =
+{
 	.where = STARPU_CPU|STARPU_OPENCL,
 	.type = STARPU_SEQ,
 	.cpu_funcs = {compute_block, NULL},
@@ -395,38 +400,46 @@ static struct starpu_codelet mandelbrot_cl = {
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-h") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			fprintf(stderr, "Usage: %s [-h] [ -width 800] [-height 600] [-nblocks 16] [-no-x11] [-pos leftx:rightx:bottomy:topy] [-niter 1000] [-spmd] [-demo] [-demozoom 0.2]\n", argv[0]);
 			exit(-1);
 		}
 
-		if (strcmp(argv[i], "-width") == 0) {
+		if (strcmp(argv[i], "-width") == 0)
+		{
 			char *argptr;
 			width = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-height") == 0) {
+		if (strcmp(argv[i], "-height") == 0)
+		{
 			char *argptr;
 			height = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-nblocks") == 0) {
+		if (strcmp(argv[i], "-nblocks") == 0)
+		{
 			char *argptr;
 			nblocks = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-niter") == 0) {
+		if (strcmp(argv[i], "-niter") == 0)
+		{
 			char *argptr;
 			niter = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-pos") == 0) {
+		if (strcmp(argv[i], "-pos") == 0)
+		{
 			int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX, &rightX, &bottomY, &topY);
 			assert(ret == 4);
 		}
 
-		if (strcmp(argv[i], "-demo") == 0) {
+		if (strcmp(argv[i], "-demo") == 0)
+		{
 			demo = 1;
 			leftX = -50.22749575062760;
 			rightX = 48.73874621262927;
@@ -435,18 +448,21 @@ static void parse_args(int argc, char **argv)
 
 		}
 
-		if (strcmp(argv[i], "-demozoom") == 0) {
+		if (strcmp(argv[i], "-demozoom") == 0)
+		{
 			char *argptr;
 			demozoom = strtof(argv[++i], &argptr);
 		}
 
-		if (strcmp(argv[i], "-no-x11") == 0) {
+		if (strcmp(argv[i], "-no-x11") == 0)
+		{
 #ifdef STARPU_HAVE_X11
 			use_x11 = 0;
 #endif
 		}
 
-		if (strcmp(argv[i], "-spmd") == 0) {
+		if (strcmp(argv[i], "-spmd") == 0)
+		{
 			use_spmd = 1;
 		}
 	}
@@ -567,7 +583,8 @@ int main(int argc, char **argv)
 				iter = 0;
 				gettimeofday(&start, NULL);
 			}
-			else {
+			else
+			{
 				leftX += (zoom_factor/2)*widthX;
 				rightX -= (zoom_factor/2)*widthX;
 				topY -= (zoom_factor/2)*heightY;

+ 28 - 14
examples/matvecmult/matvecmult.c

@@ -64,27 +64,34 @@ void opencl_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 }
 #endif
 
-void fillArray(float* pfData, int iSize) {
+void fillArray(float* pfData, int iSize)
+{
     int i;
     const float fScale = 1.0f / (float)RAND_MAX;
-    for (i = 0; i < iSize; ++i) {
+    for (i = 0; i < iSize; ++i)
+    {
             pfData[i] = fScale * rand();
     }
 }
 
-void printArray(float* pfData, int iSize) {
+void printArray(float* pfData, int iSize)
+{
     int i;
-    for (i = 0; i < iSize; ++i) {
+    for (i = 0; i < iSize; ++i)
+    {
             FPRINTF(stderr, "%f ", pfData[i]);
     }
     FPRINTF(stderr, "\n");
 }
 
-void matVecMult(const float *matrix, const float *vector, int width, int height, float *mult) {
+void matVecMult(const float *matrix, const float *vector, int width, int height, float *mult)
+{
     int i, j;
-    for (i = 0; i < height; ++i) {
+    for (i = 0; i < height; ++i)
+    {
         double sum = 0;
-        for (j = 0; j < width; ++j) {
+        for (j = 0; j < width; ++j)
+	{
             double a = matrix[i * width + j];
             double b = vector[j];
             sum += a * b;
@@ -93,12 +100,14 @@ void matVecMult(const float *matrix, const float *vector, int width, int height,
     }
 }
 
-int compareL2fe(const float* reference, const float* data, const unsigned int len, const float epsilon) {
+int compareL2fe(const float* reference, const float* data, const unsigned int len, const float epsilon)
+{
     float error = 0;
     float ref = 0;
     unsigned int i;
 
-    for(i = 0; i < len; ++i) {
+    for(i = 0; i < len; ++i)
+    {
         float diff = reference[i] - data[i];
         error += diff * diff;
         ref += reference[i] * reference[i];
@@ -117,7 +126,8 @@ int main(int argc, char **argv)
 {
 	struct starpu_codelet cl = {};
 
-	struct starpu_conf conf = {
+	struct starpu_conf conf =
+	{
 		.ncpus = 0,
 		.ncuda = 0,
                 .nopencl = 1,
@@ -136,7 +146,8 @@ int main(int argc, char **argv)
 	int ret, submit;
 
         ret = starpu_init(&conf);
-	if (STARPU_UNLIKELY(ret == -ENODEV)) {
+	if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
                 FPRINTF(stderr, "This application requires an OpenCL worker.\n");
 		starpu_shutdown();
 		return 77;
@@ -186,10 +197,12 @@ int main(int argc, char **argv)
         task->buffers[2].mode = STARPU_RW;
 
         submit = starpu_task_submit(task);
-        if (STARPU_UNLIKELY(submit == -ENODEV)) {
+        if (STARPU_UNLIKELY(submit == -ENODEV))
+	{
                 FPRINTF(stderr, "No worker may execute this task. This application requires an OpenCL worker.\n");
 	}
-	else {
+	else
+	{
 		starpu_task_wait_for_all();
 	}
 
@@ -197,7 +210,8 @@ int main(int argc, char **argv)
 	starpu_data_unregister(vector_handle);
 	starpu_data_unregister(mult_handle);
 
-        if (STARPU_LIKELY(submit != -ENODEV)) {
+        if (STARPU_LIKELY(submit != -ENODEV))
+	{
 		int res = compareL2fe(correctResult, mult, height, 1e-6f);
 		FPRINTF(stdout, "TEST %s\n\n", (res == 0) ? "PASSED" : "FAILED !!!");
 	}

+ 2 - 1
examples/matvecmult/matvecmult_kernel.cl

@@ -33,7 +33,8 @@ __kernel void matVecMult(
 {
         // Row index
         uint y = get_global_id(0);
-        if (y < height) {
+        if (y < height)
+	{
                 // Row pointer
                 const __global float* row = M + y * width;
 

+ 46 - 23
examples/mult/xgemm.c

@@ -53,10 +53,12 @@ static void check_output(void)
 	TYPE err;
 	err = CPU_ASUM(xdim*ydim, C, 1);
 
-	if (err < xdim*ydim*0.001) {
+	if (err < xdim*ydim*0.001)
+	{
 		FPRINTF(stderr, "Results are OK\n");
 	}
-	else {
+	else
+	{
 		int max;
 		max = CPU_IAMAX(xdim*ydim, C, 1);
 
@@ -74,20 +76,26 @@ static void init_problem_data(void)
 	starpu_malloc((void **)&C, xdim*ydim*sizeof(TYPE));
 
 	/* fill the A and B matrices */
-	for (j=0; j < ydim; j++) {
-		for (i=0; i < zdim; i++) {
+	for (j=0; j < ydim; j++)
+	{
+		for (i=0; i < zdim; i++)
+		{
 			A[j+i*ydim] = (TYPE)(starpu_drand48());
 		}
 	}
 
-	for (j=0; j < zdim; j++) {
-		for (i=0; i < xdim; i++) {
+	for (j=0; j < zdim; j++)
+	{
+		for (i=0; i < xdim; i++)
+		{
 			B[j+i*zdim] = (TYPE)(starpu_drand48());
 		}
 	}
 
-	for (j=0; j < ydim; j++) {
-		for (i=0; i < xdim; i++) {
+	for (j=0; j < ydim; j++)
+	{
+		for (i=0; i < xdim; i++)
+		{
 			C[j+i*ydim] = (TYPE)(0);
 		}
 	}
@@ -132,7 +140,8 @@ static void mult_kernel_common(void *descr[], int type)
 	unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]);
 	unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]);
 
-	if (type == STARPU_CPU) {
+	if (type == STARPU_CPU)
+	{
 		int worker_size = starpu_combined_worker_get_size();
 
 		if (worker_size == 1)
@@ -140,7 +149,8 @@ static void mult_kernel_common(void *descr[], int type)
 			/* Sequential CPU task */
 			CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, (TYPE)0.0, subC, ldC);
 		}
-		else {
+		else
+		{
 			/* Parallel CPU task */
 			int rank = starpu_combined_worker_get_rank();
 		
@@ -156,7 +166,8 @@ static void mult_kernel_common(void *descr[], int type)
 		}
 	}
 #ifdef STARPU_USE_CUDA
-	else {
+	else
+	{
 		CUBLAS_GEMM('n', 'n', nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB,
 					     (TYPE)0.0, subC, ldC);
 		cudaStreamSynchronize(starpu_cuda_get_local_stream());
@@ -176,12 +187,14 @@ static void cpu_mult(void *descr[], __attribute__((unused))  void *arg)
 	mult_kernel_common(descr, STARPU_CPU);
 }
 
-static struct starpu_perfmodel starpu_gemm_model = {
+static struct starpu_perfmodel starpu_gemm_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = STARPU_GEMM_STR(gemm)
 };
 
-static struct starpu_codelet cl = {
+static struct starpu_codelet cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */
 	.max_parallelism = INT_MAX,
@@ -196,48 +209,58 @@ static struct starpu_codelet cl = {
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-nblocks") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-nblocks") == 0)
+		{
 			char *argptr;
 			nslicesx = strtol(argv[++i], &argptr, 10);
 			nslicesy = nslicesx;
 		}
 
-		if (strcmp(argv[i], "-nblocksx") == 0) {
+		if (strcmp(argv[i], "-nblocksx") == 0)
+		{
 			char *argptr;
 			nslicesx = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-nblocksy") == 0) {
+		if (strcmp(argv[i], "-nblocksy") == 0)
+		{
 			char *argptr;
 			nslicesy = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-x") == 0) {
+		if (strcmp(argv[i], "-x") == 0)
+		{
 			char *argptr;
 			xdim = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-y") == 0) {
+		if (strcmp(argv[i], "-y") == 0)
+		{
 			char *argptr;
 			ydim = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-z") == 0) {
+		if (strcmp(argv[i], "-z") == 0)
+		{
 			char *argptr;
 			zdim = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-iter") == 0) {
+		if (strcmp(argv[i], "-iter") == 0)
+		{
 			char *argptr;
 			niter = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-check") == 0) {
+		if (strcmp(argv[i], "-check") == 0)
+		{
 			check = 1;
 		}
 
-		if (strcmp(argv[i], "-spmd") == 0) {
+		if (strcmp(argv[i], "-spmd") == 0)
+		{
 			cl.type = STARPU_SPMD;
 		}
 	}

+ 6 - 3
examples/openmp/vector_scal.c

@@ -28,7 +28,8 @@
 #define	NX	2048
 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
 
-void scal_cpu_func(void *buffers[], void *_args) {
+void scal_cpu_func(void *buffers[], void *_args)
+{
 	unsigned i;
 	float *factor = _args;
 	struct starpu_vector_interface *vector = buffers[0];
@@ -42,12 +43,14 @@ void scal_cpu_func(void *buffers[], void *_args) {
 		val[i] *= *factor;
 }
 
-static struct starpu_perfmodel vector_scal_model = {
+static struct starpu_perfmodel vector_scal_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "vector_scale_parallel"
 };
 
-static struct starpu_codelet cl = {
+static struct starpu_codelet cl =
+{
 	.where = STARPU_CPU,
 	.type = STARPU_FORKJOIN,
 	.max_parallelism = INT_MAX,

+ 10 - 5
examples/opt/pi/pi.c

@@ -64,8 +64,10 @@ static void cpu_kernel(void *descr[], void *cl_arg)
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-ntasks") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-ntasks") == 0)
+		{
 			char *argptr;
 			ntasks = strtol(argv[++i], &argptr, 10);
 		}
@@ -101,19 +103,22 @@ int main(int argc, char **argv)
 	 * accessed by the CPU later on */
 	starpu_data_set_wt_mask(cnt_array_handle, (1<<0));
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_block_filter_func_vector,
 		.nchildren = ntasks
 	};
 	
 	starpu_data_partition(cnt_array_handle, &f);
 
-	static struct starpu_perfmodel model = {
+	static struct starpu_perfmodel model =
+	{
 		.type = STARPU_HISTORY_BASED,
 		.symbol = "monte_carlo_pi"
 	};
 
-	struct starpu_codelet cl = {
+	struct starpu_codelet cl =
+	{
 		.where = STARPU_CPU|STARPU_CUDA,
 		.cpu_funcs = {cpu_kernel, NULL},
 #ifdef STARPU_USE_CUDA

+ 18 - 9
examples/opt/pi/pi_redux.c

@@ -64,7 +64,8 @@ static void init_rng(void *arg __attribute__((unused)))
 
 	int workerid = starpu_worker_get_id();
 
-	switch (starpu_worker_get_type(workerid)) {
+	switch (starpu_worker_get_type(workerid))
+	{
 		case STARPU_CPU_WORKER:
 			/* create a seed */
 			starpu_srand48_r((long int)workerid, &randbuffer[PADDING*workerid]);
@@ -96,22 +97,27 @@ static void init_rng(void *arg __attribute__((unused)))
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-ntasks") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-ntasks") == 0)
+		{
 			char *argptr;
 			ntasks = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-noredux") == 0) {
+		if (strcmp(argv[i], "-noredux") == 0)
+		{
 			use_redux = 0;
 		}
 
-		if (strcmp(argv[i], "-warmup") == 0) {
+		if (strcmp(argv[i], "-warmup") == 0)
+		{
 			do_warmup = 1;
 			ntasks_warmup = 8; /* arbitrary number of warmup tasks */
 		}
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			fprintf(stderr, "Usage: %s [-ntasks n] [-noredux] [-warmup] [-h]\n", argv[0]);
 			exit(-1);
 		}
@@ -183,7 +189,8 @@ static void pi_func_cuda(void *descr[], void *cl_arg __attribute__ ((unused)))
 }
 #endif
 
-static struct starpu_codelet pi_cl = {
+static struct starpu_codelet pi_cl =
+{
 	.where =
 #ifdef STARPU_HAVE_CURAND
 		STARPU_CUDA|
@@ -216,7 +223,8 @@ static void init_cuda_func(void *descr[], void *cl_arg)
 }
 #endif
 
-static struct starpu_codelet init_codelet = {
+static struct starpu_codelet init_codelet =
+{
 	.where =
 #ifdef STARPU_HAVE_CURAND
 		STARPU_CUDA|
@@ -255,7 +263,8 @@ static void redux_cpu_func(void *descr[], void *cl_arg)
 	*a = *a + *b;
 };
 
-static struct starpu_codelet redux_codelet = {
+static struct starpu_codelet redux_codelet =
+{
 	.where =
 #ifdef STARPU_HAVE_CURAND
 		STARPU_CUDA|

+ 5 - 3
examples/ppm_downscaler/ppm_downscaler.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -57,7 +57,8 @@ struct ppm_image *file_to_ppm(char *filename)
 
 	/* read the file's dimensions */
 	ret = fscanf(file, "P6\n%d %d\n%d\n", &ppm->ncols, &ppm->nlines, &ppm->coldepth);
-	if (ret != 3) {
+	if (ret != 3)
+	{
 		fclose(file);
 		fprintf(stderr, "file %s is not valid\n", filename);
 		exit(-1);
@@ -105,7 +106,8 @@ char *filename_out = "serpents.small.ppm";
 
 void parse_args(int argc, char **argv)
 {
-	if (argc == 3) {
+	if (argc == 3)
+	{
 		filename_in = argv[1];
 		filename_out = argv[2];
 	}

+ 5 - 3
examples/ppm_downscaler/ppm_downscaler.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -16,13 +16,15 @@
  */
 
 /* we make the asumption that there are 256 color levels at most */
-struct ppm_color {
+struct ppm_color
+{
 	unsigned char r;
 	unsigned char g;
 	unsigned char b;
 };
 
-struct ppm_image {
+struct ppm_image
+{
 	int nlines;
 	int ncols;
 	int coldepth;

+ 11 - 6
examples/ppm_downscaler/yuv_downscaler.c

@@ -38,11 +38,13 @@ char filename_out[1024];
 
 void parse_args(int argc, char **argv)
 {
-	if (argc == 3) {
+	if (argc == 3)
+	{
 		strcpy(filename_in, argv[1]);
 		strcpy(filename_out, argv[2]);
 	}
-	else {
+	else
+	{
 		sprintf(filename_in, "%s/examples/ppm_downscaler/%s", STARPU_BUILD_DIR, filename_in_default);
 		sprintf(filename_out, "%s/examples/ppm_downscaler/%s", STARPU_BUILD_DIR, filename_out_default);
 	}
@@ -82,7 +84,8 @@ static void ds_kernel_cpu(void *descr[], __attribute__((unused)) void *arg)
 	}
 }
 
-static struct starpu_codelet ds_codelet = {
+static struct starpu_codelet ds_codelet =
+{
 	.where = STARPU_CPU,
 	.cpu_funcs = {ds_kernel_cpu, NULL},
 	.nbuffers = 2, /* input -> output */
@@ -90,12 +93,14 @@ static struct starpu_codelet ds_codelet = {
 };
 
 /* each block contains BLOCK_HEIGHT consecutive lines */
-static struct starpu_data_filter filter_y = {
+static struct starpu_data_filter filter_y =
+{
 	.filter_func = starpu_block_filter_func,
 	.nchildren= HEIGHT/BLOCK_HEIGHT
 };
-	
-static struct starpu_data_filter filter_uv = {
+
+static struct starpu_data_filter filter_uv =
+{
 	.filter_func = starpu_block_filter_func,
 	.nchildren = (HEIGHT/2)/BLOCK_HEIGHT
 };

+ 5 - 3
examples/ppm_downscaler/yuv_downscaler.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,13 +27,15 @@
 
 #include <stdint.h>
 
-struct yuv_frame {
+struct yuv_frame
+{
 	uint8_t y[WIDTH*HEIGHT];
 	uint8_t u[(WIDTH*HEIGHT)/4];
 	uint8_t v[(WIDTH*HEIGHT)/4];
 };
 
-struct yuv_new_frame {
+struct yuv_new_frame
+{
 	uint8_t y[NEW_WIDTH*NEW_HEIGHT];
 	uint8_t u[(NEW_WIDTH*NEW_HEIGHT)/4];
 	uint8_t v[(NEW_WIDTH*NEW_HEIGHT)/4];

+ 6 - 3
examples/reductions/dot_product.c

@@ -73,7 +73,8 @@ void init_cuda_func(void *descr[], void *cl_arg)
 }
 #endif
 
-static struct starpu_codelet init_codelet = {
+static struct starpu_codelet init_codelet =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.can_execute = can_execute,
 	.cpu_funcs = {init_cpu_func, NULL},
@@ -99,7 +100,8 @@ void redux_cpu_func(void *descr[], void *cl_arg)
 extern void redux_cuda_func(void *descr[], void *_args);
 #endif
 
-static struct starpu_codelet redux_codelet = {
+static struct starpu_codelet redux_codelet =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.can_execute = can_execute,
 	.cpu_funcs = {redux_cpu_func, NULL},
@@ -161,7 +163,8 @@ void dot_cuda_func(void *descr[], void *cl_arg)
 }
 #endif
 
-static struct starpu_codelet dot_codelet = {
+static struct starpu_codelet dot_codelet =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.can_execute = can_execute,
 	.cpu_funcs = {dot_cpu_func, NULL},

+ 6 - 3
examples/reductions/minmax_reduction.c

@@ -50,7 +50,8 @@ static void minmax_neutral_cpu_func(void *descr[], void *cl_arg)
 	array[1] = TYPE_MIN;
 }
 
-static struct starpu_codelet minmax_init_codelet = {
+static struct starpu_codelet minmax_init_codelet =
+{
 	.where = STARPU_CPU,
 	.cpu_funcs = {minmax_neutral_cpu_func, NULL},
 	.nbuffers = 1
@@ -76,7 +77,8 @@ void minmax_redux_cpu_func(void *descr[], void *cl_arg)
 	array_dst[1] = STARPU_MAX(max_dst, max_src);
 }
 
-static struct starpu_codelet minmax_redux_codelet = {
+static struct starpu_codelet minmax_redux_codelet =
+{
 	.where = STARPU_CPU,
 	.cpu_funcs = {minmax_redux_cpu_func, NULL},
 	.nbuffers = 2
@@ -110,7 +112,8 @@ void minmax_cpu_func(void *descr[], void *cl_arg)
 	minmax[1] = local_max;
 }
 
-static struct starpu_codelet minmax_codelet = {
+static struct starpu_codelet minmax_codelet =
+{
 	.where = STARPU_CPU,
 	.cpu_funcs = {minmax_cpu_func, NULL},
 	.nbuffers = 2

+ 5 - 3
examples/scheduler/dummy_sched.c

@@ -77,7 +77,8 @@ static struct starpu_task *pop_task_dummy(void)
 	return starpu_task_list_pop_back(&sched_list);
 }
 
-static struct starpu_sched_policy dummy_sched_policy = {
+static struct starpu_sched_policy dummy_sched_policy =
+{
 	.init_sched = init_dummy_sched,
 	.deinit_sched = deinit_dummy_sched,
 	.push_task = push_task_dummy,
@@ -88,7 +89,8 @@ static struct starpu_sched_policy dummy_sched_policy = {
 	.policy_description = "dummy scheduling strategy"
 };
 
-static struct starpu_conf conf = {
+static struct starpu_conf conf =
+{
 	.sched_policy_name = NULL,
 	.sched_policy = &dummy_sched_policy,
 	.ncpus = -1,
@@ -105,7 +107,7 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 {
 }
 
-static struct starpu_codelet dummy_codelet = 
+static struct starpu_codelet dummy_codelet =
 {
 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
 	.cpu_funcs = {dummy_func, NULL},

+ 6 - 3
examples/spmv/dw_block_spmv.c

@@ -141,7 +141,8 @@ void call_filters(void)
 #define NSPMV	32
 unsigned totaltasks;
 
-struct starpu_codelet cl = {
+struct starpu_codelet cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = { cpu_block_spmv, NULL},
 #ifdef STARPU_USE_CUDA
@@ -224,7 +225,8 @@ void launch_spmv_codelets(void)
 
 					is_entry_tab[taskid] = 0;
 				}
-				else {
+				else
+				{
 					/* this is an entry task */
 					is_entry_tab[taskid] = 1;
 				}
@@ -241,7 +243,8 @@ void launch_spmv_codelets(void)
 	unsigned task;
 	for (task = 0; task < totaltasks; task++)
 	{
-		if (is_entry_tab[task]) {
+		if (is_entry_tab[task])
+		{
 			nchains++;
 		}
 

+ 3 - 2
examples/spmv/dw_block_spmv_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -33,7 +33,8 @@ static inline void common_block_spmv(void *descr[], int s, __attribute__((unused
 
 	unsigned ld = STARPU_MATRIX_GET_LD(descr[0]);
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 			cblas_sgemv(CblasRowMajor, CblasNoTrans, dx, dy, 1.0f, block, ld, in, 1, 1.0f, out, 1);
 			break;

+ 28 - 17
examples/spmv/matrix_market/mm_to_bcsr.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -22,8 +22,10 @@ static void print_block(tmp_block_t *block, unsigned r, unsigned c)
 	printf(" **** block %d %d **** \n", block->i, block->j);
 
 	unsigned i, j;
-	for (j = 0; j < r; j++) {
-		for (i = 0; i < c; i++) {
+	for (j = 0; j < r; j++)
+	{
+		for (i = 0; i < c; i++)
+		{
 			printf("%2.2f\t", block->val[i + j*c]);
 		}
 		printf("\n");
@@ -34,11 +36,12 @@ static void print_all_blocks(tmp_block_t *block_list, unsigned r, unsigned c)
 {
 	tmp_block_t *current_block = block_list;
 
-	while(current_block) {
+	while(current_block)
+	{
 		print_block(current_block, r, c);
 
 		current_block = current_block->next;
-	};
+	}
 }
 
 static void print_bcsr(bcsr_t *bcsr)
@@ -54,10 +57,11 @@ static unsigned count_blocks(tmp_block_t *block_list)
 	unsigned count = 0;
 	tmp_block_t *current_block = block_list;
 
-	while(current_block) {
+	while(current_block)
+	{
 		count++;
 		current_block = current_block->next;
-	};
+	}
 
 	return count;
 }
@@ -67,12 +71,13 @@ static unsigned count_row_blocks(tmp_block_t *block_list)
 	unsigned maxrow = 0;
 	tmp_block_t *current_block = block_list;
 
-	while(current_block) {
+	while(current_block)
+	{
 		if (current_block->j > maxrow)
 			maxrow = current_block->j;
 
 		current_block = current_block->next;
-	};
+	}
 
 	return (maxrow+1);
 }
@@ -86,7 +91,8 @@ static tmp_block_t *search_block(tmp_block_t *block_list, unsigned i, unsigned j
 	tmp_block_t *current_block = block_list;
 	/* printf("search %d %d\n", i, j); */
 
-	while (current_block) {
+	while (current_block)
+	{
 		if ((current_block->i == i) && (current_block->j == j)) 
 		{
 			/* we found the block */
@@ -143,15 +149,18 @@ static void insert_block(tmp_block_t *block, tmp_block_t **block_list, unsigned
 	/* first find an element that is bigger, then insert the block just before it */
 	tmp_block_t *current_block = *block_list;
 
-	if (!current_block) {
+	if (!current_block)
+	{
 		/* list was empty */
 		*block_list = block;
 		block->next = NULL;
 		return;
 	}
 
-	while (current_block) {
-		if (next_block_is_bigger(current_block, i, j)) {
+	while (current_block)
+	{
+		if (next_block_is_bigger(current_block, i, j))
+		{
 			/* insert block here */
 			block->next = current_block->next;
 			current_block->next = block;
@@ -177,7 +186,8 @@ static void insert_elem(tmp_block_t **block_list, unsigned abs_i, unsigned abs_j
 
 	block = search_block(*block_list, i, j);
 
-	if (!block) {
+	if (!block)
+	{
 		/* the block does not exist yet */
 		/* create it */
 		block = create_block(c, r);
@@ -225,7 +235,8 @@ static void fill_bcsr(tmp_block_t *block_list, unsigned c, unsigned r, bcsr_t *b
 
 	tmp_block_t *current_block = block_list;
 
-	while(current_block) {
+	while(current_block)
+	{
 		/* copy the val from the block to the contiguous area in the BCSR */
 		memcpy(&bcsr->val[current_offset], current_block->val, block_size);
 
@@ -314,9 +325,9 @@ bcsr_t *mm_file_to_bcsr(char *filename, unsigned c, unsigned r)
 		exit(1);
 
 	if (mm_read_banner(f, &matcode) != 0)
-	{                                                       	
+	{
 		printf("Could not process Matrix Market banner.\n");
-		exit(1);                                            	
+		exit(1);
 	}
 
 	/*  This is how one can screen matrix types if their application */

+ 5 - 3
examples/spmv/matrix_market/mm_to_bcsr.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -22,7 +22,8 @@
 /* convert a matrix stored in a file with the matrix market format into the 
  * BCSR format */
 
-typedef struct tmp_block {
+typedef struct tmp_block
+{
 	/* we have a linked list of blocks */
 	struct tmp_block *next;
 
@@ -33,7 +34,8 @@ typedef struct tmp_block {
 
 } tmp_block_t;
 
-typedef struct {
+typedef struct
+{
 	unsigned r,c;
 	unsigned nnz_blocks;
 	unsigned nrows_blocks;

+ 18 - 9
examples/spmv/spmv.c

@@ -27,13 +27,16 @@ starpu_data_handle_t vector_in, vector_out;
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-size") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-size") == 0)
+		{
 			char *argptr;
 			size = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-nblocks") == 0) {
+		if (strcmp(argv[i], "-nblocks") == 0)
+		{
 			char *argptr;
 			nblocks = strtol(argv[++i], &argptr, 10);
 		}
@@ -66,7 +69,8 @@ static void csr_filter_func(void *father_interface, void *child_interface, struc
 	csr_child->firstentry = local_firstentry;
 	csr_child->elemsize = elemsize;
 	
-	if (csr_father->nzval) {
+	if (csr_father->nzval)
+	{
 		csr_child->rowptr = &csr_father->rowptr[first_index];
 		csr_child->colind = &csr_father->colind[local_firstentry];
 		csr_child->nzval = csr_father->nzval + local_firstentry * elemsize;
@@ -74,20 +78,23 @@ static void csr_filter_func(void *father_interface, void *child_interface, struc
 }
 
 /* partition the CSR matrix along a block distribution */
-static struct starpu_data_filter csr_f = {
+static struct starpu_data_filter csr_f =
+{
 	.filter_func = csr_filter_func,
 	/* This value is defined later on */
 	.nchildren = -1,
 	/* the children also use a csr interface */
 };
 
-static struct starpu_data_filter vector_f = {
+static struct starpu_data_filter vector_f =
+{
 	.filter_func = starpu_block_filter_func_vector,
 	/* This value is defined later on */
 	.nchildren = -1,
 };
 
-static struct starpu_codelet spmv_cl = {
+static struct starpu_codelet spmv_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
 	.cpu_funcs = {spmv_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -143,7 +150,8 @@ int main(int argc, char **argv)
 	{
 		rowptr[row] = pos;
 
-		if (row > 0) {
+		if (row > 0)
+		{
 			nzval[pos] = 1.0f;
 			colind[pos] = row-1;
 			pos++;
@@ -153,7 +161,8 @@ int main(int argc, char **argv)
 		colind[pos] = row;
 		pos++;
 
-		if (row < size - 1) {
+		if (row < size - 1)
+		{
 			nzval[pos] = 1.0f;
 			colind[pos] = row+1;
 			pos++;

+ 21 - 9
examples/starpufft/cudax_kernels.cu

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,10 +27,13 @@
 #define DISTRIB_1d(n, func,args) \
 	unsigned threads_per_block = 128; \
 \
-	if (n < threads_per_block) { \
+	if (n < threads_per_block) \
+	{			   \
 		dim3 dimGrid(n); \
 		func <<<dimGrid, 1, 0, starpu_cuda_get_local_stream()>>> args; \
-	} else { \
+	} 					\
+	else 					\
+	{				     \
 		dim3 dimGrid(n / threads_per_block); \
 		dim3 dimBlock(threads_per_block); \
 		func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
@@ -81,21 +84,30 @@ STARPUFFT(cuda_twiddle_1d_host)(_cuComplex *out, const _cuComplex *roots, unsign
 /* FIXME: introduce threads_per_dim_n / m instead */
 #define DISTRIB_2d(n, m, func, args) \
 	unsigned threads_per_dim = 16; \
-	if (n < threads_per_dim) { \
-		if (m < threads_per_dim) { \
+	if (n < threads_per_dim) \
+	{				   \
+		if (m < threads_per_dim) \
+		{			    \
 			dim3 dimGrid(n, m); \
 			func <<<dimGrid, 1, 0, starpu_cuda_get_local_stream()>>> args; \
-		} else { \
+		} \
+		else \
+		{					      \
 			dim3 dimGrid(1, m / threads_per_dim); \
 			dim3 dimBlock(n, threads_per_dim); \
 			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
 		} \
-	} else {  \
-		if (m < threads_per_dim) { \
+	} \
+	else \
+	{				   \
+		if (m < threads_per_dim) \
+		{					      \
 			dim3 dimGrid(n / threads_per_dim, 1); \
 			dim3 dimBlock(threads_per_dim, m); \
 			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
-		} else { \
+		} \
+		else \
+		{							\
 			dim3 dimGrid(n / threads_per_dim, m / threads_per_dim); \
 			dim3 dimBlock(threads_per_dim, threads_per_dim); \
 			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \

+ 100 - 76
examples/starpufft/starpufftx.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -34,7 +34,8 @@
 #define _FFTW_FLAGS FFTW_ESTIMATE
 
 /* Steps for the parallel variant */
-enum steps {
+enum steps
+{
 	SPECIAL, TWIST1, FFT1, JOIN, TWIST2, FFT2, TWIST3, END
 };
 
@@ -49,7 +50,8 @@ enum steps {
 
 #define I_BITS STEP_SHIFT
 
-enum type {
+enum type
+{
 	R2C,
 	C2R,
 	C2C
@@ -65,7 +67,8 @@ static struct timeval start, submit_tasks, end;
  *
  */
 
-struct STARPUFFT(plan) {
+struct STARPUFFT(plan)
+{
 	int number;	/* uniquely identifies the plan, for starpu tags */
 
 	int *n;
@@ -84,7 +87,8 @@ struct STARPUFFT(plan) {
 	starpu_data_handle_t roots_handle[2];
 
 	/* For each worker, we need some data */
-	struct {
+	struct
+	{
 #ifdef STARPU_USE_CUDA
 		/* CUFFT plans */
 		cufftHandle plan1_cuda, plan2_cuda;
@@ -113,7 +117,8 @@ struct STARPUFFT(plan) {
 	struct STARPUFFT(args) *fft1_args, *fft2_args;
 };
 
-struct STARPUFFT(args) {
+struct STARPUFFT(args)
+{
 	struct STARPUFFT(plan) *plan;
 	int i, j, jj, kk, ll, *iv, *kkv;
 };
@@ -123,7 +128,8 @@ check_dims(STARPUFFT(plan) plan)
 {
 	int dim;
 	for (dim = 0; dim < plan->dim; dim++)
-		if (plan->n[dim] & (plan->n[dim]-1)) {
+		if (plan->n[dim] & (plan->n[dim]-1))
+		{
 			fprintf(stderr,"can't cope with non-power-of-2\n");
 			STARPU_ABORT();
 		}
@@ -135,7 +141,8 @@ compute_roots(STARPUFFT(plan) plan)
 	int dim, k;
 
 	/* Compute the n-roots and m-roots of unity for twiddling */
-	for (dim = 0; dim < plan->dim; dim++) {
+	for (dim = 0; dim < plan->dim; dim++)
+	{
 		STARPUFFT(complex) exp = (plan->sign * 2. * 4.*atan(1.)) * _Complex_I / (STARPUFFT(complex)) plan->n[dim];
 		plan->roots[dim] = malloc(plan->n[dim] * sizeof(**plan->roots));
 		for (k = 0; k < plan->n[dim]; k++)
@@ -143,7 +150,8 @@ compute_roots(STARPUFFT(plan) plan)
 		starpu_vector_data_register(&plan->roots_handle[dim], 0, (uintptr_t) plan->roots[dim], plan->n[dim], sizeof(**plan->roots));
 
 #ifdef STARPU_USE_CUDA
-		if (plan->n[dim] > 100000) {
+		if (plan->n[dim] > 100000)
+		{
 			/* prefetch the big root array on GPUs */
 			unsigned worker;
 			unsigned nworkers = starpu_worker_get_count();
@@ -170,17 +178,21 @@ STARPUFFT(start)(STARPUFFT(plan) plan, void *_in, void *_out)
 	plan->in = _in;
 	plan->out = _out;
 
-	switch (plan->dim) {
-		case 1: {
-			switch (plan->type) {
+	switch (plan->dim)
+	{
+		case 1:
+		{
+			switch (plan->type)
+			{
 			case C2C:
 				starpu_vector_data_register(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
-if (!PARALLEL)
-				starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
-if (PARALLEL) {
-				for (z = 0; z < plan->totsize1; z++)
-					plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
-}
+				if (!PARALLEL)
+					starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
+				if (PARALLEL)
+				{
+					for (z = 0; z < plan->totsize1; z++)
+						plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
+				}
 				task = STARPUFFT(start1dC2C)(plan, plan->in_handle, plan->out_handle);
 				break;
 			default:
@@ -191,12 +203,13 @@ if (PARALLEL) {
 		}
 		case 2:
 			starpu_vector_data_register(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
-if (!PARALLEL)
-			starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
-if (PARALLEL) {
-			for (z = 0; z < plan->totsize1; z++)
-				plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
-}
+			if (!PARALLEL)
+				starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
+			if (PARALLEL)
+			{
+				for (z = 0; z < plan->totsize1; z++)
+					plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
+			}
 			task = STARPUFFT(start2dC2C)(plan, plan->in_handle, plan->out_handle);
 			break;
 		default:
@@ -211,10 +224,11 @@ STARPUFFT(cleanup)(STARPUFFT(plan) plan)
 {
 	if (plan->in_handle)
 		starpu_data_unregister(plan->in_handle);
-if (!PARALLEL) {
-	if (plan->out_handle)
-		starpu_data_unregister(plan->out_handle);
-}
+	if (!PARALLEL)
+	{
+		if (plan->out_handle)
+			starpu_data_unregister(plan->out_handle);
+	}
 }
 
 struct starpu_task *
@@ -253,16 +267,21 @@ STARPUFFT(destroy_plan)(STARPUFFT(plan) plan)
 {
 	int workerid, dim, i;
 
-	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) {
-		switch (starpu_worker_get_type(workerid)) {
+	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++)
+	{
+		switch (starpu_worker_get_type(workerid))
+		{
 		case STARPU_CPU_WORKER:
 #ifdef STARPU_HAVE_FFTW
-if (PARALLEL) {
-			_FFTW(destroy_plan)(plan->plans[workerid].plan1_cpu);
-			_FFTW(destroy_plan)(plan->plans[workerid].plan2_cpu);
-} else {
-			_FFTW(destroy_plan)(plan->plans[workerid].plan_cpu);
-}
+			if (PARALLEL)
+			{
+				_FFTW(destroy_plan)(plan->plans[workerid].plan1_cpu);
+				_FFTW(destroy_plan)(plan->plans[workerid].plan2_cpu);
+			}
+			else
+			{
+				_FFTW(destroy_plan)(plan->plans[workerid].plan_cpu);
+			}
 #endif
 			break;
 		case STARPU_CUDA_WORKER:
@@ -276,45 +295,50 @@ if (PARALLEL) {
 		}
 	}
 
-if (PARALLEL) {
-	for (i = 0; i < plan->totsize1; i++) {
-		starpu_data_unregister(plan->twisted1_handle[i]);
-		free(plan->twist1_tasks[i]);
-		starpu_data_unregister(plan->fft1_handle[i]);
-		free(plan->fft1_tasks[i]);
-	}
+	if (PARALLEL)
+	{
+		for (i = 0; i < plan->totsize1; i++)
+		{
+			starpu_data_unregister(plan->twisted1_handle[i]);
+			free(plan->twist1_tasks[i]);
+			starpu_data_unregister(plan->fft1_handle[i]);
+			free(plan->fft1_tasks[i]);
+		}
 
-	free(plan->twisted1_handle);
-	free(plan->twist1_tasks);
-	free(plan->fft1_handle);
-	free(plan->fft1_tasks);
-	free(plan->fft1_args);
+		free(plan->twisted1_handle);
+		free(plan->twist1_tasks);
+		free(plan->fft1_handle);
+		free(plan->fft1_tasks);
+		free(plan->fft1_args);
 
-	free(plan->join_task);
+		free(plan->join_task);
 
-	for (i = 0; i < plan->totsize3; i++) {
-		starpu_data_unregister(plan->twisted2_handle[i]);
-		free(plan->twist2_tasks[i]);
-		starpu_data_unregister(plan->fft2_handle[i]);
-		free(plan->fft2_tasks[i]);
-		free(plan->twist3_tasks[i]);
-	}
+		for (i = 0; i < plan->totsize3; i++)
+		{
+			starpu_data_unregister(plan->twisted2_handle[i]);
+			free(plan->twist2_tasks[i]);
+			starpu_data_unregister(plan->fft2_handle[i]);
+			free(plan->fft2_tasks[i]);
+			free(plan->twist3_tasks[i]);
+		}
 
-	free(plan->end_task);
+		free(plan->end_task);
 
-	free(plan->twisted2_handle);
-	free(plan->twist2_tasks);
-	free(plan->fft2_handle);
-	free(plan->fft2_tasks);
-	free(plan->twist3_tasks);
-	free(plan->fft2_args);
+		free(plan->twisted2_handle);
+		free(plan->twist2_tasks);
+		free(plan->fft2_handle);
+		free(plan->fft2_tasks);
+		free(plan->twist3_tasks);
+		free(plan->fft2_args);
 
-	for (dim = 0; dim < plan->dim; dim++) {
-		starpu_data_unregister(plan->roots_handle[dim]);
-		free(plan->roots[dim]);
-	}
+		for (dim = 0; dim < plan->dim; dim++)
+		{
+			starpu_data_unregister(plan->roots_handle[dim]);
+			free(plan->roots[dim]);
+		}
 
-	switch (plan->dim) {
+		switch (plan->dim)
+		{
 		case 1:
 			STARPUFFT(free_1d_tags)(plan);
 			break;
@@ -324,15 +348,15 @@ if (PARALLEL) {
 		default:
 			STARPU_ABORT();
 			break;
-	}
+		}
 
-	free(plan->n1);
-	free(plan->n2);
-	STARPUFFT(free)(plan->twisted1);
-	STARPUFFT(free)(plan->fft1);
-	STARPUFFT(free)(plan->twisted2);
-	STARPUFFT(free)(plan->fft2);
-}
+		free(plan->n1);
+		free(plan->n2);
+		STARPUFFT(free)(plan->twisted1);
+		STARPUFFT(free)(plan->fft1);
+		STARPUFFT(free)(plan->twisted2);
+		STARPUFFT(free)(plan->fft2);
+	}
 	free(plan->n);
 	free(plan);
 }

+ 25 - 11
examples/starpufft/testx.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -44,7 +44,8 @@ static void check_fftw(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, in
 {
 	int i;
 	double max = 0., tot = 0., norm = 0., normdiff = 0.;
-	for (i = 0; i < size; i++) {
+	for (i = 0; i < size; i++)
+	{
 		double diff = cabs(out[i]-out_fftw[i]);
 		double diff2 = diff * diff;
 		double size = cabs(out_fftw[i]);
@@ -74,7 +75,8 @@ static void check_cuda(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, in
 {
 	int i;
 	double max = 0., tot = 0., norm = 0., normdiff = 0.;
-	for (i = 0; i < size; i++) {
+	for (i = 0; i < size; i++)
+	{
 		double diff = cabs(out_cuda[i]-out_fftw[i]);
 		double diff2 = diff * diff;
 		double size = cabs(out_fftw[i]);
@@ -99,7 +101,8 @@ static void check_cuda(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, in
 }
 #endif
 
-int main(int argc, char *argv[]) {
+int main(int argc, char *argv[])
+{
 	int i;
 	struct timeval begin, end;
 	int size;
@@ -116,25 +119,31 @@ int main(int argc, char *argv[]) {
 #endif
 	double timing;
 
-	if (argc < 2 || argc > 3) {
+	if (argc < 2 || argc > 3)
+	{
 		fprintf(stderr,"need one or two size of vector\n");
 		exit(EXIT_FAILURE);
 	}
 
 	starpu_init(NULL);
 
-	if (argc == 2) {
+	if (argc == 2)
+	{
 		n = atoi(argv[1]);
 
 		/* 1D */
 		size = n;
-	} else if (argc == 3) {
+	}
+	else if (argc == 3)
+	{
 		n = atoi(argv[1]);
 		m = atoi(argv[2]);
 
 		/* 2D */
 		size = n * m;
-	} else {
+	}
+	else
+	{
 		assert(0);
 	}
 
@@ -155,7 +164,8 @@ int main(int argc, char *argv[]) {
 	STARPUFFT(complex) *out_cuda = STARPUFFT(malloc)(size * sizeof(*out_cuda));
 #endif
 
-	if (argc == 2) {
+	if (argc == 2)
+	{
 		plan = STARPUFFT(plan_dft_1d)(n, SIGN, 0);
 #ifdef STARPU_HAVE_FFTW
 		fftw_plan = _FFTW(plan_dft_1d)(n, NULL, (void*) 1, SIGN, FFTW_ESTIMATE);
@@ -165,7 +175,9 @@ int main(int argc, char *argv[]) {
 			printf("erf\n");
 #endif
 
-	} else if (argc == 3) {
+	}
+	else if (argc == 3)
+	{
 		plan = STARPUFFT(plan_dft_2d)(n, m, SIGN, 0);
 #ifdef STARPU_HAVE_FFTW
 		fftw_plan = _FFTW(plan_dft_2d)(n, m, NULL, (void*) 1, SIGN, FFTW_ESTIMATE);
@@ -173,7 +185,9 @@ int main(int argc, char *argv[]) {
 #ifdef STARPU_USE_CUDA
 		STARPU_ASSERT(cufftPlan2d(&cuda_plan, n, m, _CUFFT_C2C) == CUFFT_SUCCESS);
 #endif
-	} else {
+	}
+	else
+	{
 		assert(0);
 	}
 

+ 21 - 9
examples/starpufft/testx_threads.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -32,7 +32,8 @@
 #define SIGN (-1)
 /* #define SIGN (1) */
 
-int main(int argc, char *argv[]) {
+int main(int argc, char *argv[])
+{
 	int i;
 	struct timeval begin, end;
 	int size;
@@ -50,23 +51,29 @@ int main(int argc, char *argv[]) {
 		num_threads = atoi(num);
 	_FFTW(plan_with_nthreads)(num_threads);
 
-	if (argc < 2 || argc > 3) {
+	if (argc < 2 || argc > 3)
+	{
 		fprintf(stderr,"need one or two size of vector\n");
 		exit(EXIT_FAILURE);
 	}
 
-	if (argc == 2) {
+	if (argc == 2)
+	{
 		n = atoi(argv[1]);
 
 		/* 1D */
 		size = n;
-	} else if (argc == 3) {
+	}
+	else if (argc == 3)
+	{
 		n = atoi(argv[1]);
 		m = atoi(argv[2]);
 
 		/* 2D */
 		size = n * m;
-	} else {
+	}
+	else
+	{
 		assert(0);
 	}
 
@@ -79,12 +86,17 @@ int main(int argc, char *argv[]) {
 
 	_FFTW(complex) *out_fftw = _FFTW(malloc)(size * sizeof(*out_fftw));
 
-	if (argc == 2) {
+	if (argc == 2)
+	{
 		fftw_plan = _FFTW(plan_dft_1d)(n, in, out_fftw, SIGN, FFTW_ESTIMATE);
 
-	} else if (argc == 3) {
+	}
+	else if (argc == 3)
+	{
 		fftw_plan = _FFTW(plan_dft_2d)(n, m, in, out_fftw, SIGN, FFTW_ESTIMATE);
-	} else {
+	}
+	else
+	{
 		assert(0);
 	}
 

+ 6 - 3
examples/stencil/life.c

@@ -22,9 +22,12 @@ void life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, in
 {
 	unsigned x, y, z, num, alive;
 
-	for (z = iter; z < nz - iter; z++) {
-		for (y = K; y < ny - K; y++) {
-			for (x = K; x < nx - K; x++) {
+	for (z = iter; z < nz - iter; z++)
+	{
+		for (y = K; y < ny - K; y++)
+		{
+			for (x = K; x < nx - K; x++)
+			{
 				num = 0
                                         + old[x+(y+1)*ldy+(z+0)*ldz]
                                         + old[x+(y+1)*ldy+(z+1)*ldz]

+ 4 - 2
examples/stencil/life_cuda.cu

@@ -35,8 +35,10 @@ cuda_life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, in
 	unsigned num, alive;
 
 	for (z = iter + idz; z < nz - iter; z += stepz)
-		for (y = K + idy; y < ny - K; y += stepy) {
-			for (x = K + idx; x < nx - K; x += stepx) {
+		for (y = K + idy; y < ny - K; y += stepy)
+		{
+			for (x = K + idx; x < nx - K; x += stepx)
+			{
 				unsigned index = x + y*ldy + z*ldz;
 				num = 0
                                         + old[index+1*ldy+0*ldz]

+ 8 - 4
examples/stencil/life_opencl.c

@@ -42,8 +42,10 @@ life_update(int bz, __global const TYPE *old, __global TYPE *newp, int nx, int n
 	unsigned num, alive;\n\
 \n\
 	for (z = iter + idz; z < nz - iter; z += stepz)\n\
-		for (y = K + idy; y < ny - K; y += stepy) {\n\
-			for (x = K + idx; x < nx - K; x += stepx) {\n\
+		for (y = K + idy; y < ny - K; y += stepy) \n\
+		{\n									\
+			for (x = K + idx; x < nx - K; x += stepx) \
+			{\n					\
 				unsigned index = x + y*ldy + z*ldz;\n\
 				num = 0\n\
                                         + old[index+1*ldy+0*ldz]\n\
@@ -66,11 +68,13 @@ static const char * src = clsrc(TYPE,K);
 static struct starpu_opencl_program program;
 
 void
-opencl_life_init(void) {
+opencl_life_init(void)
+{
   starpu_opencl_load_opencl_from_string(src, &program, NULL);
 }
 
-void opencl_life_free(void) {
+void opencl_life_free(void)
+{
   starpu_opencl_unload_opencl(&program);
 }
 

+ 6 - 3
examples/stencil/shadow.h

@@ -20,7 +20,8 @@
 /* TODO: rather use a dummy for loop, to assign the job to the threads that will work on it? */
 	if (idy == 0)
 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)
-			for (x = K + idx; x < nx-K; x += stepx) {
+			for (x = K + idx; x < nx-K; x += stepx)
+			{
 				unsigned index = x+z*ldz;
 				ptr[index+(K-1)*ldy] = ptr[index+(ny-K-1)*ldy];
 				ptr[index+(ny-K)*ldy] = ptr[index+K*ldy];
@@ -28,14 +29,16 @@
 
 	if (idx == 0)
 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)
-			for (y = K + idy; y < ny-K; y += stepy) {
+			for (y = K + idy; y < ny-K; y += stepy)
+			{
 				unsigned index = y*ldy+z*ldz;
 				ptr[(K-1)+index] = ptr[(nx-K-1)+index];
 				ptr[(nx-K)+index] = ptr[K+index];
 			}
 
 	if (idx == 0 && idy == 0)
-		for (z = i-1 + idz; z < nz-(i-1); z += stepz) {
+		for (z = i-1 + idz; z < nz-(i-1); z += stepz)
+		{
 			unsigned index = z*ldz;
 			ptr[K-1+(K-1)*ldy+index] = ptr[(nx-K-1)+(ny-K-1)*ldy+index];
 			ptr[(nx-K)+(K-1)*ldy+index] = ptr[K+(ny-K-1)*ldy+index];

+ 10 - 5
examples/stencil/shadow_opencl.c

@@ -38,7 +38,8 @@ shadow( int bz, __global TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, in
 	unsigned x, y, z;\n\
 	if (idy == 0)\n\
 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)\n\
-			for (x = K + idx; x < nx-K; x += stepx) {\n\
+			for (x = K + idx; x < nx-K; x += stepx) \
+			{\n								\
 				unsigned index = x+z*ldz;\n\
 				ptr[index+(K-1)*ldy] = ptr[index+(ny-K-1)*ldy];\n\
 				ptr[index+(ny-K)*ldy] = ptr[index+K*ldy];\n\
@@ -46,14 +47,16 @@ shadow( int bz, __global TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, in
 \n\
 	if (idx == 0)\n\
 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)\n\
-			for (y = K + idy; y < ny-K; y += stepy) {\n\
+			for (y = K + idy; y < ny-K; y += stepy) \
+			{\n					\
 				unsigned index = y*ldy+z*ldz;\n\
 				ptr[(K-1)+index] = ptr[(nx-K-1)+index];\n\
 				ptr[(nx-K)+index] = ptr[K+index];\n\
 			}\n\
 \n\
 	if (idx == 0 && idy == 0)\n\
-		for (z = i-1 + idz; z < nz-(i-1); z += stepz) {\n\
+		for (z = i-1 + idz; z < nz-(i-1); z += stepz) \
+		{\n					      \
 			unsigned index = z*ldz;\n\
 			ptr[K-1+(K-1)*ldy+index] = ptr[(nx-K-1)+(ny-K-1)*ldy+index];\n\
 			ptr[(nx-K)+(K-1)*ldy+index] = ptr[K+(ny-K-1)*ldy+index];\n\
@@ -66,11 +69,13 @@ static const char * src = clsrc(TYPE,K);
 static struct starpu_opencl_program program;
 
 void
-opencl_shadow_init(void) {
+opencl_shadow_init(void)
+{
   starpu_opencl_load_opencl_from_string(src, &program, NULL);
 }
 
-void opencl_shadow_free(void) {
+void opencl_shadow_free(void)
+{
   starpu_opencl_unload_opencl(&program);
 }
 

+ 22 - 11
examples/stencil/stencil-kernels.c

@@ -23,10 +23,12 @@
 
 #ifndef timersub
 #define	timersub(x, y, res) \
-	do { \
+	do \
+	{						   \
 		(res)->tv_sec = (x)->tv_sec - (y)->tv_sec; \
 		(res)->tv_usec = (x)->tv_usec - (y)->tv_usec; \
-		if ((res)->tv_usec < 0) { \
+		if ((res)->tv_usec < 0) \
+		{			 \
 			(res)->tv_sec--; \
 			(res)->tv_usec += 1000000; \
 		} \
@@ -34,10 +36,12 @@
 #endif
 #ifndef timeradd
 #define	timeradd(x, y, res) \
-	do { \
+	do \
+	{						   \
 		(res)->tv_sec = (x)->tv_sec + (y)->tv_sec; \
 		(res)->tv_usec = (x)->tv_usec + (y)->tv_usec; \
-		if ((res)->tv_usec >= 1000000) { \
+		if ((res)->tv_usec >= 1000000) \
+		{			       \
 			(res)->tv_sec++; \
 			(res)->tv_usec -= 1000000; \
 		} \
@@ -138,7 +142,8 @@ static void record_who_runs_what(struct block_description *block)
 	gettimeofday(&tv, NULL);
 	timersub(&tv, &start, &tv2);
 	timersub(&tv2, &last_tick[block->bz], &diff);
-	while (timercmp(&diff, &delta, >=)) {
+	while (timercmp(&diff, &delta, >=))
+	{
 		timeradd(&last_tick[block->bz], &delta, &last_tick[block->bz]);
 		timersub(&tv2, &last_tick[block->bz], &diff);
 		if (who_runs_what_index[block->bz] < who_runs_what_len)
@@ -439,12 +444,14 @@ fprintf(stderr,"!!! DO update_func_cpu z %d CPU%d !!!\n", block->bz, workerid);
 }
 
 /* Performance model and codelet structure */
-static struct starpu_perfmodel cl_update_model = {
+static struct starpu_perfmodel cl_update_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "cl_update" 
 };
 
-struct starpu_codelet cl_update = {
+struct starpu_codelet cl_update =
+{
 	.where = 0 |
 #ifdef STARPU_USE_CUDA
 		STARPU_CUDA|
@@ -634,17 +641,20 @@ static void dummy_func_bottom_opencl(void *descr[] __attribute__((unused)), void
 #endif /* STARPU_USE_OPENCL */
 
 /* Performance models and codelet for save */
-static struct starpu_perfmodel save_cl_bottom_model = {
+static struct starpu_perfmodel save_cl_bottom_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "save_cl_bottom" 
 };
 
-static struct starpu_perfmodel save_cl_top_model = {
+static struct starpu_perfmodel save_cl_top_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "save_cl_top" 
 };
 
-struct starpu_codelet save_cl_bottom = {
+struct starpu_codelet save_cl_bottom =
+{
 	.where = 0 |
 #ifdef STARPU_USE_CUDA
 		STARPU_CUDA|
@@ -664,7 +674,8 @@ struct starpu_codelet save_cl_bottom = {
 	.nbuffers = 4
 };
 
-struct starpu_codelet save_cl_top = {
+struct starpu_codelet save_cl_top =
+{
 	.where = 0|
 #ifdef STARPU_USE_CUDA
 		STARPU_CUDA|

+ 16 - 8
examples/stencil/stencil-tasks.c

@@ -76,7 +76,8 @@ static void create_task_save_local(unsigned iter, unsigned z, int dir, unsigned
 /* R(z) = local & R(z+d) != local */
 /* We need to send our save over MPI */
 
-static void send_done(void *arg) {
+static void send_done(void *arg)
+{
 	uintptr_t z = (uintptr_t) arg;
 	DEBUG("DO SEND %d\n", (int)z);
 }
@@ -103,7 +104,8 @@ static void create_task_save_mpi_send(unsigned iter, unsigned z, int dir, unsign
 /* R(z) != local & R(z+d) = local */
 /* We need to receive over MPI */
 
-static void recv_done(void *arg) {
+static void recv_done(void *arg)
+{
 	uintptr_t z = (uintptr_t) arg;
 	DEBUG("DO RECV %d\n", (int)z);
 }
@@ -146,12 +148,14 @@ void create_task_save(unsigned iter, unsigned z, int dir, unsigned local_rank)
 		}
 
 	}
-	else {	/* node_z != local_rank, this MPI node doesn't have the saved data */
+	else
+	{	/* node_z != local_rank, this MPI node doesn't have the saved data */
 		if (node_z_and_d == local_rank)
 		{
 			create_task_save_mpi_recv(iter, z, dir, local_rank);
 		}
-		else { /* R(z) != local & R(z+d) != local We don't have
+		else
+		{ /* R(z) != local & R(z+d) != local We don't have
 			      the saved data and don't need it, we shouldn't
 			      even have been called! */
 			STARPU_ASSERT(0);
@@ -176,7 +180,8 @@ void create_task_update(unsigned iter, unsigned z, unsigned local_rank)
 	unsigned niter = get_niter();
 
 	/* We are going to synchronize with the last tasks */
-	if (iter == niter) {
+	if (iter == niter)
+	{
 		task->detach = 0;
 		task->use_tag = 1;
 		task->tag_id = TAG_FINISH(z);
@@ -218,7 +223,8 @@ void create_task_update(unsigned iter, unsigned z, unsigned local_rank)
 
 /* Dummy empty codelet taking one buffer */
 static void null_func(void *descr[] __attribute__((unused)), void *arg __attribute__((unused))) { }
-static struct starpu_codelet null = {
+static struct starpu_codelet null =
+{
 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
 	.cpu_funcs = {null_func, NULL},
 	.cuda_funcs = {null_func, NULL},
@@ -226,7 +232,8 @@ static struct starpu_codelet null = {
 	.nbuffers = 2
 };
 
-void create_start_task(int z, int dir) {
+void create_start_task(int z, int dir)
+{
 	/* Dumb task depending on the init task and simulating writing the
 	   neighbour buffers, to avoid communications and computation running
 	   before we start measuring time */
@@ -261,7 +268,8 @@ void create_tasks(int rank)
 	int niter = get_niter();
 	int nbz = get_nbz();
 
-	for (bz = 0; bz < nbz; bz++) {
+	for (bz = 0; bz < nbz; bz++)
+	{
 		if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
 			create_start_task(bz, +1);
 		if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank))

+ 32 - 16
examples/stencil/stencil.c

@@ -67,36 +67,45 @@ unsigned get_ticks(void)
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-b") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-b") == 0)
+		{
 			bind_tasks = 1;
 		}
 
-		if (strcmp(argv[i], "-nbz") == 0) {
+		if (strcmp(argv[i], "-nbz") == 0)
+		{
 			nbz = atoi(argv[++i]);
 		}
 
-		if (strcmp(argv[i], "-sizex") == 0) {
+		if (strcmp(argv[i], "-sizex") == 0)
+		{
 			sizex = atoi(argv[++i]);
 		}
 
-		if (strcmp(argv[i], "-sizey") == 0) {
+		if (strcmp(argv[i], "-sizey") == 0)
+		{
 			sizey = atoi(argv[++i]);
 		}
 
-		if (strcmp(argv[i], "-sizez") == 0) {
+		if (strcmp(argv[i], "-sizez") == 0)
+		{
 			sizez = atoi(argv[++i]);
 		}
 
-		if (strcmp(argv[i], "-niter") == 0) {
+		if (strcmp(argv[i], "-niter") == 0)
+		{
 			niter = atoi(argv[++i]);
 		}
 
-		if (strcmp(argv[i], "-ticks") == 0) {
+		if (strcmp(argv[i], "-ticks") == 0)
+		{
 			ticks = atoi(argv[++i]);
 		}
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			 fprintf(stderr, "Usage : %s [options...]\n", argv[0]);
 			 fprintf(stderr, "\n");
 			 fprintf(stderr, "Options:\n");
@@ -114,7 +123,8 @@ static void init_problem(int argc, char **argv, int rank, int world_size)
 {
 	parse_args(argc, argv);
 
-	if (getenv("STARPU_TOP")) {
+	if (getenv("STARPU_TOP"))
+	{
 		starpu_top_init_loop = starpu_top_add_data_integer("Task creation iter", 0, niter, 1);
 		starpu_top_achieved_loop = starpu_top_add_data_integer("Task achieved iter", 0, niter, 1);
 		starpu_top_init_and_wait("stencil_top example");
@@ -152,8 +162,10 @@ void f(unsigned task_per_worker[STARPU_NMAXWORKERS])
 
 	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
 		total += task_per_worker[worker];
-	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) {
-		if (task_per_worker[worker]) {
+	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
+	{
+		if (task_per_worker[worker])
+		{
 			char name[32];
 			starpu_worker_get_name(worker, name, sizeof(name));
 			fprintf(stderr,"\t%s -> %d (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total);
@@ -181,7 +193,8 @@ int main(int argc, char **argv)
 
 #ifdef STARPU_USE_MPI
 	int thread_support;
-	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support)) {
+	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support))
+	{
 		fprintf(stderr, "MPI_Init_thread failed\n");
 	}
 	if (thread_support == MPI_THREAD_FUNNELED)
@@ -295,15 +308,18 @@ int main(int argc, char **argv)
 
 		unsigned bz, iter;
 		unsigned last;
-		for (iter = 0; iter < who_runs_what_len; iter++) {
+		for (iter = 0; iter < who_runs_what_len; iter++)
+		{
 			last = 1;
-			for (bz = 0; bz < nbz; bz++) {
+			for (bz = 0; bz < nbz; bz++)
+			{
 				if ((bz % nzblocks_per_process) == 0)
 					fprintf(stderr, "| ");
 
 				if (who_runs_what_index[bz] <= iter)
 					fprintf(stderr,"_ ");
-				else {
+				else
+				{
 					last = 0;
 					if (who_runs_what[bz + iter * nbz] == -1)
 						fprintf(stderr,"* ");

+ 4 - 2
examples/stencil/stencil.h

@@ -50,13 +50,15 @@ extern struct starpu_top_data* starpu_top_achieved_loop;
 
 
 /* Split only on the z axis to make things simple */
-typedef enum {
+typedef enum
+{
 	B = 0,
 	T = 1
 } direction;
 
 /* Description of a domain block */
-struct block_description {
+struct block_description
+{
 	/* Which MPI node should process that block ? */
 	unsigned mpi_node;
 	

+ 16 - 9
examples/tag_example/tag_example.c

@@ -43,23 +43,28 @@ static unsigned iter = 0;
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-iter") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-iter") == 0)
+		{
 		        char *argptr;
 			nk = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-i") == 0) {
+		if (strcmp(argv[i], "-i") == 0)
+		{
 		        char *argptr;
 			ni = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-j") == 0) {
+		if (strcmp(argv[i], "-j") == 0)
+		{
 		        char *argptr;
 			nj = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			printf("usage : %s [-iter iter] [-i i] [-j j]\n", argv[0]);
 		}
 	}
@@ -153,27 +158,29 @@ void cpu_codelet(void *descr[] __attribute__((unused)),
 
 static void express_deps(unsigned i, unsigned j, unsigned iter)
 {
-	if (j > 0) {
+	if (j > 0)
+	{
 		/* (i,j-1) exists */
 		if (j < nj - 1)
 		{
 			/* (i,j+1) exists */
 			starpu_tag_declare_deps(TAG(i,j,iter), 2, TAG(i-1,j-1,iter), TAG(i-1,j+1,iter));
 		}
-		else 
+		else
 		{
 			/* (i,j+1) does not exist */
 			starpu_tag_declare_deps(TAG(i,j,iter), 1, TAG(i-1,j-1,iter));
 		}
 	}
-	else {
+	else
+	{
 		/* (i, (j-1) does not exist */
 		if (j < nj - 1)
 		{
 			/* (i,j+1) exists */
 			starpu_tag_declare_deps(TAG(i,j,iter), 1, TAG(i-1,j+1,iter));
 		}
-		else 
+		else
 		{
 			/* (i,j+1) does not exist */
 			STARPU_ABORT();

+ 8 - 4
examples/tag_example/tag_example2.c

@@ -40,18 +40,22 @@ static unsigned ni = Ni, nk = Nk;
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-iter") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-iter") == 0)
+		{
 		        char *argptr;
 			nk = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-i") == 0) {
+		if (strcmp(argv[i], "-i") == 0)
+		{
 		        char *argptr;
 			ni = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			printf("usage : %s [-iter iter] [-i i]\n", argv[0]);
 		}
 	}

+ 8 - 4
examples/tag_example/tag_example3.c

@@ -40,18 +40,22 @@ static unsigned ni = Ni, nk = Nk;
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-iter") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-iter") == 0)
+		{
 		        char *argptr;
 			nk = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-i") == 0) {
+		if (strcmp(argv[i], "-i") == 0)
+		{
 		        char *argptr;
 			ni = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			printf("usage : %s [-iter iter] [-i i]\n", argv[0]);
 		}
 	}

+ 10 - 5
examples/tag_example/tag_restartable.c

@@ -46,18 +46,22 @@ struct starpu_task **tasks[Nrolls];
 static void parse_args(int argc, char **argv)
 {
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-iter") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-iter") == 0)
+		{
 		        char *argptr;
 			nk = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-i") == 0) {
+		if (strcmp(argv[i], "-i") == 0)
+		{
 		        char *argptr;
 			ni = strtol(argv[++i], &argptr, 10);
 		}
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			printf("usage : %s [-iter iter] [-i i]\n", argv[0]);
 		}
 	}
@@ -133,7 +137,8 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
 	FPRINTF(stderr, "ITER : %u\n", nk);
 
-	for (i = 0; i < Nrolls; i++) {
+	for (i = 0; i < Nrolls; i++)
+	{
 		tasks[i] = (struct starpu_task **) malloc(ni * sizeof(*tasks[i]));
 
 		create_task_grid(i);

+ 3 - 1
examples/top/hello_world_top.c

@@ -56,10 +56,12 @@ void callback_func(void *callback_arg)
  * DSM; the second arguments references read-only data that is passed as an
  * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
  * are no data input/output managed by the DSM (cl.nbuffers = 0) */
-struct params {
+struct params
+{
 	int i;
 	float f;
 };
+
 void cpu_func(void *buffers[], void *cl_arg)
 {
 	struct params *params = (struct params *) cl_arg;