Browse Source

update code w.r.t coding style

Nathalie Furmento 13 years ago
parent
commit
b9476e6d4a
90 changed files with 1393 additions and 736 deletions
  1. 27 14
      examples/audio/starpu_audio_processing.c
  2. 4 2
      examples/axpy/axpy.c
  3. 18 11
      examples/basic_examples/block.c
  4. 5 3
      examples/basic_examples/block_cpu.c
  5. 5 3
      examples/basic_examples/block_cuda.cu
  6. 3 2
      examples/basic_examples/block_opencl.c
  7. 5 3
      examples/basic_examples/block_opencl_kernel.cl
  8. 3 1
      examples/basic_examples/hello_world.c
  9. 22 12
      examples/basic_examples/mult.c
  10. 12 6
      examples/basic_examples/multiformat.c
  11. 12 6
      examples/basic_examples/multiformat_conversion_codelets.c
  12. 2 1
      examples/basic_examples/multiformat_conversion_codelets_cuda.cu
  13. 2 1
      examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl
  14. 4 2
      examples/basic_examples/multiformat_types.h
  15. 6 3
      examples/basic_examples/vector_scal.c
  16. 4 2
      examples/basic_examples/vector_scal_c.c
  17. 3 2
      examples/basic_examples/vector_scal_opencl_kernel.cl
  18. 14 7
      examples/cg/cg.c
  19. 40 20
      examples/cg/cg_kernels.c
  20. 18 9
      examples/cholesky/cholesky.h
  21. 38 19
      examples/cholesky/cholesky_grain_tag.c
  22. 24 12
      examples/cholesky/cholesky_implicit.c
  23. 9 5
      examples/cholesky/cholesky_kernels.c
  24. 12 6
      examples/cholesky/cholesky_models.c
  25. 44 22
      examples/cholesky/cholesky_tag.c
  26. 28 14
      examples/cholesky/cholesky_tile_tag.c
  27. 9 5
      examples/common/blas_model.h
  28. 14 7
      examples/filters/fblock.c
  29. 5 3
      examples/filters/fblock_cpu.c
  30. 5 3
      examples/filters/fblock_cuda.cu
  31. 3 2
      examples/filters/fblock_opencl.c
  32. 5 3
      examples/filters/fblock_opencl_kernel.cl
  33. 12 6
      examples/filters/fmatrix.c
  34. 2 1
      examples/filters/fvector.c
  35. 42 21
      examples/heat/dw_factolu.c
  36. 22 11
      examples/heat/dw_factolu.h
  37. 40 20
      examples/heat/dw_factolu_grain.c
  38. 12 6
      examples/heat/dw_factolu_kernels.c
  39. 36 18
      examples/heat/dw_factolu_tag.c
  40. 6 3
      examples/heat/dw_sparse_cg.c
  41. 13 7
      examples/heat/dw_sparse_cg.h
  42. 72 37
      examples/heat/heat.c
  43. 2 1
      examples/heat/heat.h
  44. 8 4
      examples/heat/heat_display.c
  45. 17 9
      examples/heat/lu_kernels_model.c
  46. 2 1
      examples/incrementer/incrementer.c
  47. 27 13
      examples/lu/lu_example.c
  48. 28 14
      examples/lu/xlu.c
  49. 2 1
      examples/lu/xlu.h
  50. 4 2
      examples/lu/xlu_implicit.c
  51. 4 2
      examples/lu/xlu_implicit_pivot.c
  52. 38 19
      examples/lu/xlu_kernels.c
  53. 31 16
      examples/lu/xlu_pivot.c
  54. 35 18
      examples/mandelbrot/mandelbrot.c
  55. 28 14
      examples/matvecmult/matvecmult.c
  56. 2 1
      examples/matvecmult/matvecmult_kernel.cl
  57. 46 23
      examples/mult/xgemm.c
  58. 6 3
      examples/openmp/vector_scal.c
  59. 10 5
      examples/opt/pi/pi.c
  60. 18 9
      examples/opt/pi/pi_redux.c
  61. 5 3
      examples/ppm_downscaler/ppm_downscaler.c
  62. 5 3
      examples/ppm_downscaler/ppm_downscaler.h
  63. 11 6
      examples/ppm_downscaler/yuv_downscaler.c
  64. 5 3
      examples/ppm_downscaler/yuv_downscaler.h
  65. 6 3
      examples/reductions/dot_product.c
  66. 6 3
      examples/reductions/minmax_reduction.c
  67. 5 3
      examples/scheduler/dummy_sched.c
  68. 6 3
      examples/spmv/dw_block_spmv.c
  69. 3 2
      examples/spmv/dw_block_spmv_kernels.c
  70. 28 17
      examples/spmv/matrix_market/mm_to_bcsr.c
  71. 5 3
      examples/spmv/matrix_market/mm_to_bcsr.h
  72. 18 9
      examples/spmv/spmv.c
  73. 21 9
      examples/starpufft/cudax_kernels.cu
  74. 100 76
      examples/starpufft/starpufftx.c
  75. 25 11
      examples/starpufft/testx.c
  76. 21 9
      examples/starpufft/testx_threads.c
  77. 6 3
      examples/stencil/life.c
  78. 4 2
      examples/stencil/life_cuda.cu
  79. 8 4
      examples/stencil/life_opencl.c
  80. 6 3
      examples/stencil/shadow.h
  81. 10 5
      examples/stencil/shadow_opencl.c
  82. 22 11
      examples/stencil/stencil-kernels.c
  83. 16 8
      examples/stencil/stencil-tasks.c
  84. 32 16
      examples/stencil/stencil.c
  85. 4 2
      examples/stencil/stencil.h
  86. 16 9
      examples/tag_example/tag_example.c
  87. 8 4
      examples/tag_example/tag_example2.c
  88. 8 4
      examples/tag_example/tag_example3.c
  89. 10 5
      examples/tag_example/tag_restartable.c
  90. 3 1
      examples/top/hello_world_top.c

+ 27 - 14
examples/audio/starpu_audio_processing.c

@@ -101,7 +101,8 @@ void read_16bit_wav(FILE *infile, unsigned size, float *arrayout, FILE *save_fil
 	/* we skip the header to only keep the data */
 	/* we skip the header to only keep the data */
 	fseek(infile, headersize, SEEK_SET);
 	fseek(infile, headersize, SEEK_SET);
 	
 	
-	for (v=0;v<size;v++) {
+	for (v=0;v<size;v++)
+	{
 		signed char val = (signed char)fgetc(infile);
 		signed char val = (signed char)fgetc(infile);
 		signed char val2 = (signed char)fgetc(infile);
 		signed char val2 = (signed char)fgetc(infile);
 
 
@@ -124,7 +125,8 @@ void write_16bit_wav(FILE *outfile, unsigned size, float *arrayin, FILE *save_fi
 	/* we assume that the header is copied using copy_wav_header */
 	/* we assume that the header is copied using copy_wav_header */
 	fseek(outfile, headersize, SEEK_SET);
 	fseek(outfile, headersize, SEEK_SET);
 	
 	
-	for (v=0;v<size;v++) {
+	for (v=0;v<size;v++)
+	{
 		signed char val = ((int)arrayin[v]) % 256; 
 		signed char val = ((int)arrayin[v]) % 256; 
 		signed char val2  = ((int)arrayin[v]) / 256;
 		signed char val2  = ((int)arrayin[v]) / 256;
 
 
@@ -146,7 +148,8 @@ void write_16bit_wav(FILE *outfile, unsigned size, float *arrayin, FILE *save_fi
  */
  */
 
 
 /* we don't reinitialize the CUFFT plan for every kernel, so we "cache" it */
 /* we don't reinitialize the CUFFT plan for every kernel, so we "cache" it */
-typedef struct {
+typedef struct
+{
 	unsigned is_initialized;
 	unsigned is_initialized;
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 	cufftHandle plan;
 	cufftHandle plan;
@@ -268,12 +271,14 @@ static void band_filter_kernel_cpu(void *descr[], __attribute__((unused)) void *
 		localA[i] /= nsamples;
 		localA[i] /= nsamples;
 }
 }
 
 
-struct starpu_perfmodel band_filter_model = {
+struct starpu_perfmodel band_filter_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "FFT_band_filter"
 	.symbol = "FFT_band_filter"
 };
 };
 
 
-static struct starpu_codelet band_filter_cl = {
+static struct starpu_codelet band_filter_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {band_filter_kernel_gpu, NULL},
 	.cuda_funcs = {band_filter_kernel_gpu, NULL},
@@ -330,7 +335,8 @@ static void init_problem(void)
 	{
 	{
 		starpu_malloc((void **)&A, length_data*sizeof(float));
 		starpu_malloc((void **)&A, length_data*sizeof(float));
 	}
 	}
-	else {
+	else
+	{
 		A = malloc(length_data*sizeof(float));
 		A = malloc(length_data*sizeof(float));
 	}
 	}
 
 
@@ -344,31 +350,38 @@ static void init_problem(void)
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-h") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			fprintf(stderr, "Usage: %s [-pin] [-nsamples block_size] [-i input.wav] [-o output.wav | -no-output] [-h]\n", argv[0]);
 			fprintf(stderr, "Usage: %s [-pin] [-nsamples block_size] [-i input.wav] [-o output.wav | -no-output] [-h]\n", argv[0]);
 			exit(-1);
 			exit(-1);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-i") == 0) {
+		if (strcmp(argv[i], "-i") == 0)
+		{
 			inputfilename = argv[++i];;
 			inputfilename = argv[++i];;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-o") == 0) {
+		if (strcmp(argv[i], "-o") == 0)
+		{
 			outputfilename = argv[++i];;
 			outputfilename = argv[++i];;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-no-output") == 0) {
+		if (strcmp(argv[i], "-no-output") == 0)
+		{
 			outputfilename = NULL;;
 			outputfilename = NULL;;
 		}
 		}
 
 
 		/* block size */
 		/* block size */
-		if (strcmp(argv[i], "-nsamples") == 0) {
+		if (strcmp(argv[i], "-nsamples") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			nsamples = strtol(argv[++i], &argptr, 10);
 			nsamples = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-pin") == 0) {
+		if (strcmp(argv[i], "-pin") == 0)
+		{
 			use_pin = 1;
 			use_pin = 1;
 		}
 		}
 	}
 	}
@@ -393,7 +406,7 @@ int main(int argc, char **argv)
 
 
 	starpu_vector_data_register(&A_handle, 0, (uintptr_t)A, niter*nsamples, sizeof(float));
 	starpu_vector_data_register(&A_handle, 0, (uintptr_t)A, niter*nsamples, sizeof(float));
 
 
-	struct starpu_data_filter f = 
+	struct starpu_data_filter f =
 	{
 	{
 		.filter_func = starpu_block_filter_func_vector,
 		.filter_func = starpu_block_filter_func_vector,
 		.nchildren = niter
 		.nchildren = niter

+ 4 - 2
examples/axpy/axpy.c

@@ -70,7 +70,8 @@ void axpy_gpu(void *descr[], __attribute__((unused)) void *arg)
 }
 }
 #endif
 #endif
 
 
-static struct starpu_codelet axpy_cl = {
+static struct starpu_codelet axpy_cl =
+{
         .where =
         .where =
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
                 STARPU_CUDA|
                 STARPU_CUDA|
@@ -116,7 +117,8 @@ int main(int argc, char **argv)
 	starpu_vector_data_register(&handle_y, 0, (uintptr_t)vec_y, N, sizeof(TYPE));
 	starpu_vector_data_register(&handle_y, 0, (uintptr_t)vec_y, N, sizeof(TYPE));
 
 
 	/* Divide the vector into blocks */
 	/* Divide the vector into blocks */
-	struct starpu_data_filter block_filter = {
+	struct starpu_data_filter block_filter =
+	{
 		.filter_func = starpu_block_filter_func_vector,
 		.filter_func = starpu_block_filter_func_vector,
 		.nchildren = NBLOCKS
 		.nchildren = NBLOCKS
 	};
 	};

+ 18 - 11
examples/basic_examples/block.c

@@ -57,7 +57,8 @@ int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny,
 	task->cl_arg_size = sizeof(multiplier);
 	task->cl_arg_size = sizeof(multiplier);
 
 
         int ret = starpu_task_submit(task);
         int ret = starpu_task_submit(task);
-        if (STARPU_UNLIKELY(ret == -ENODEV)) {
+        if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
                 FPRINTF(stderr, "No worker may execute this task\n");
                 FPRINTF(stderr, "No worker may execute this task\n");
                 return 1;
                 return 1;
 	}
 	}
@@ -67,8 +68,9 @@ int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny,
 	/* update the array in RAM */
 	/* update the array in RAM */
 	starpu_data_unregister(block_handle);
 	starpu_data_unregister(block_handle);
 
 
-        for(i=0 ; i<pnx*pny*pnz; i++) {
-          FPRINTF(stderr, "%f ", block[i]);
+        for(i=0 ; i<pnx*pny*pnz; i++)
+	{
+		FPRINTF(stderr, "%f ", block[i]);
         }
         }
         FPRINTF(stderr, "\n");
         FPRINTF(stderr, "\n");
 
 
@@ -88,9 +90,12 @@ int main(int argc, char **argv)
 
 
         block = (float*)malloc(nx*ny*nz*sizeof(float));
         block = (float*)malloc(nx*ny*nz*sizeof(float));
         assert(block);
         assert(block);
-        for(k=0 ; k<nz ; k++) {
-                for(j=0 ; j<ny ; j++) {
-                        for(i=0 ; i<nx ; i++) {
+        for(k=0 ; k<nz ; k++)
+	{
+                for(j=0 ; j<ny ; j++)
+		{
+                        for(i=0 ; i<nx ; i++)
+			{
                                 block[(k*nx*ny)+(j*nx)+i] = n++;
                                 block[(k*nx*ny)+(j*nx)+i] = n++;
                         }
                         }
                 }
                 }
@@ -110,11 +115,13 @@ int main(int argc, char **argv)
 
 
         /* Check result is correct */
         /* Check result is correct */
         ret=1;
         ret=1;
-        for(i=0 ; i<nx*ny*nz ; i++) {
-          if (block[i] != (i+1) * multiplier) {
-            ret=0;
-            break;
-          }
+        for(i=0 ; i<nx*ny*nz ; i++)
+	{
+		if (block[i] != (i+1) * multiplier)
+		{
+			ret=0;
+			break;
+		}
         }
         }
 
 
         FPRINTF(stderr,"TEST %s\n", ret==1?"PASSED":"FAILED");
         FPRINTF(stderr,"TEST %s\n", ret==1?"PASSED":"FAILED");

+ 5 - 3
examples/basic_examples/block_cpu.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010  Université de Bordeaux 1
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,8 +28,10 @@ void cpu_codelet(void *descr[], void *_args)
         float *multiplier = (float *)_args;
         float *multiplier = (float *)_args;
         unsigned i, j, k;
         unsigned i, j, k;
 
 
-        for(k=0; k<nz ; k++) {
-                for(j=0; j<ny ; j++) {
+        for(k=0; k<nz ; k++)
+	{
+                for(j=0; j<ny ; j++)
+		{
                         for(i=0; i<nx ; i++)
                         for(i=0; i<nx ; i++)
                                 block[(k*ldz)+(j*ldy)+i] *= *multiplier;
                                 block[(k*ldz)+(j*ldy)+i] *= *multiplier;
                 }
                 }

+ 5 - 3
examples/basic_examples/block_cuda.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,8 +20,10 @@
 static __global__ void cuda_block(float *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier)
 static __global__ void cuda_block(float *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier)
 {
 {
         int i, j, k;
         int i, j, k;
-        for(k=0; k<nz ; k++) {
-                for(j=0; j<ny ; j++) {
+        for(k=0; k<nz ; k++)
+	{
+                for(j=0; j<ny ; j++)
+		{
                         for(i=0; i<nx ; i++)
                         for(i=0; i<nx ; i++)
                                 block[(k*ldz)+(j*ldy)+i] *= multiplier;
                                 block[(k*ldz)+(j*ldy)+i] *= multiplier;
                 }
                 }

+ 3 - 2
examples/basic_examples/block_opencl.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010, 2011  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,7 +19,8 @@
 #include <starpu_opencl.h>
 #include <starpu_opencl.h>
 
 
 #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr)       \
 #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr)       \
-do {                                                        \
+do						    	    \
+{							    \
 	int err;                                            \
 	int err;                                            \
 	err = clSetKernelArg(kernel, n, size, ptr);         \
 	err = clSetKernelArg(kernel, n, size, ptr);         \
 	if (err != CL_SUCCESS)                              \
 	if (err != CL_SUCCESS)                              \

+ 5 - 3
examples/basic_examples/block_opencl_kernel.cl

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,8 +17,10 @@
 __kernel void block(__global float *b, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier)
 __kernel void block(__global float *b, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier)
 {
 {
         int i, j, k;
         int i, j, k;
-        for(k=0; k<nz ; k++) {
-                for(j=0; j<ny ; j++) {
+        for(k=0; k<nz ; k++)
+	{
+                for(j=0; j<ny ; j++)
+		{
                         for(i=0; i<nx ; i++)
                         for(i=0; i<nx ; i++)
                                 b[(k*ldz)+(j*ldy)+i] *= multiplier;
                                 b[(k*ldz)+(j*ldy)+i] *= multiplier;
                 }
                 }

+ 3 - 1
examples/basic_examples/hello_world.c

@@ -46,10 +46,12 @@ void callback_func(void *callback_arg)
  * DSM; the second arguments references read-only data that is passed as an
  * DSM; the second arguments references read-only data that is passed as an
  * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
  * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
  * are no data input/output managed by the DSM (cl.nbuffers = 0) */
  * are no data input/output managed by the DSM (cl.nbuffers = 0) */
-struct params {
+struct params
+{
 	int i;
 	int i;
 	float f;
 	float f;
 };
 };
+
 void cpu_func(void *buffers[], void *cl_arg)
 void cpu_func(void *buffers[], void *cl_arg)
 {
 {
 	struct params *params = (struct params *) cl_arg;
 	struct params *params = (struct params *) cl_arg;

+ 22 - 12
examples/basic_examples/mult.c

@@ -133,20 +133,26 @@ static void init_problem_data(void)
 
 
 	/* fill the A and B matrices */
 	/* fill the A and B matrices */
 	srand(2009);
 	srand(2009);
-	for (j=0; j < ydim; j++) {
-		for (i=0; i < zdim; i++) {
+	for (j=0; j < ydim; j++)
+	{
+		for (i=0; i < zdim; i++)
+		{
 			A[j+i*ydim] = (float)(starpu_drand48());
 			A[j+i*ydim] = (float)(starpu_drand48());
 		}
 		}
 	}
 	}
 
 
-	for (j=0; j < zdim; j++) {
-		for (i=0; i < xdim; i++) {
+	for (j=0; j < zdim; j++)
+	{
+		for (i=0; i < xdim; i++)
+		{
 			B[j+i*zdim] = (float)(starpu_drand48());
 			B[j+i*zdim] = (float)(starpu_drand48());
 		}
 		}
 	}
 	}
 
 
-	for (j=0; j < ydim; j++) {
-		for (i=0; i < xdim; i++) {
+	for (j=0; j < ydim; j++)
+	{
+		for (i=0; i < xdim; i++)
+		{
 			C[j+i*ydim] = (float)(0);
 			C[j+i*ydim] = (float)(0);
 		}
 		}
 	}
 	}
@@ -186,16 +192,18 @@ static void partition_mult_data(void)
 	/* StarPU supplies some basic filters such as the partition of a matrix
 	/* StarPU supplies some basic filters such as the partition of a matrix
 	 * into blocks, note that we are using a FORTRAN ordering so that the
 	 * into blocks, note that we are using a FORTRAN ordering so that the
 	 * name of the filters are a bit misleading */
 	 * name of the filters are a bit misleading */
-	struct starpu_data_filter vert = {
+	struct starpu_data_filter vert =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nslicesx
 		.nchildren = nslicesx
 	};
 	};
-		
-	struct starpu_data_filter horiz = {
+
+	struct starpu_data_filter horiz =
+	{
 		.filter_func = starpu_block_filter_func,
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nslicesy
 		.nchildren = nslicesy
 	};
 	};
-		
+
 /*
 /*
  *	Illustration with nslicex = 4 and nslicey = 2, it is possible to access
  *	Illustration with nslicex = 4 and nslicey = 2, it is possible to access
  *	sub-data by using the "starpu_data_get_sub_data" method, which takes a data handle,
  *	sub-data by using the "starpu_data_get_sub_data" method, which takes a data handle,
@@ -246,12 +254,14 @@ static void partition_mult_data(void)
 	starpu_data_map_filters(C_handle, 2, &vert, &horiz);
 	starpu_data_map_filters(C_handle, 2, &vert, &horiz);
 }
 }
 
 
-static struct starpu_perfmodel mult_perf_model = {
+static struct starpu_perfmodel mult_perf_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "mult_perf_model"
 	.symbol = "mult_perf_model"
 };
 };
 
 
-static struct starpu_codelet cl = {
+static struct starpu_codelet cl =
+{
         /* we can only execute that kernel on a CPU yet */
         /* we can only execute that kernel on a CPU yet */
         .where = STARPU_CPU,
         .where = STARPU_CPU,
         /* CPU implementation of the codelet */
         /* CPU implementation of the codelet */

+ 12 - 6
examples/basic_examples/multiformat.c

@@ -32,7 +32,8 @@ multiformat_scal_cpu_func(void *buffers[], void *args)
 	aos = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
 	aos = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
 	n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 	n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 
 
-	for (i = 0; i < n; i++) {
+	for (i = 0; i < n; i++)
+	{
 		aos[i].x *= aos[i].y;
 		aos[i].x *= aos[i].y;
 	}
 	}
 }
 }
@@ -47,7 +48,8 @@ extern struct starpu_codelet cpu_to_opencl_cl;
 extern struct starpu_codelet opencl_to_cpu_cl;
 extern struct starpu_codelet opencl_to_cpu_cl;
 #endif
 #endif
 
 
-static struct starpu_multiformat_data_interface_ops format_ops = {
+static struct starpu_multiformat_data_interface_ops format_ops =
+{
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 	.cuda_elemsize = 2* sizeof(float),
 	.cuda_elemsize = 2* sizeof(float),
 	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
 	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
@@ -69,7 +71,8 @@ extern void multiformat_scal_cuda_func(void *buffers[], void *arg);
 extern void multiformat_scal_opencl_func(void *buffers[], void *arg);
 extern void multiformat_scal_opencl_func(void *buffers[], void *arg);
 #endif
 #endif
 
 
-static struct starpu_codelet  cl = {
+static struct starpu_codelet  cl =
+{
 	.where = STARPU_CUDA | STARPU_OPENCL,
 	.where = STARPU_CUDA | STARPU_OPENCL,
 	.cpu_funcs = {multiformat_scal_cpu_func, NULL},
 	.cpu_funcs = {multiformat_scal_cpu_func, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -89,7 +92,8 @@ static void
 init_problem_data(void)
 init_problem_data(void)
 {
 {
 	int i; 
 	int i; 
-	for (i = 0; i < N_ELEMENTS; i++) {
+	for (i = 0; i < N_ELEMENTS; i++)
+	{
 		array_of_structs[i].x = 1.0 + i;
 		array_of_structs[i].x = 1.0 + i;
 		array_of_structs[i].y = 42.0;
 		array_of_structs[i].y = 42.0;
 	}
 	}
@@ -154,7 +158,8 @@ static void
 print_it(void)
 print_it(void)
 {
 {
 	int i;
 	int i;
-	for (i = 0; i < N_ELEMENTS; i++) {
+	for (i = 0; i < N_ELEMENTS; i++)
+	{
 		FPRINTF(stderr, "(%.2f %.2f) ",
 		FPRINTF(stderr, "(%.2f %.2f) ",
 			array_of_structs[i].x,
 			array_of_structs[i].x,
 			array_of_structs[i].y);
 			array_of_structs[i].y);
@@ -166,7 +171,8 @@ static int
 check_it(void)
 check_it(void)
 {
 {
 	int i;
 	int i;
-	for (i = 0; i < N_ELEMENTS; i++) {
+	for (i = 0; i < N_ELEMENTS; i++)
+	{
 		float expected_value = i + 1.0;
 		float expected_value = i + 1.0;
 #if STARPU_USE_CUDA
 #if STARPU_USE_CUDA
 		expected_value *= array_of_structs[i].y;
 		expected_value *= array_of_structs[i].y;

+ 12 - 6
examples/basic_examples/multiformat_conversion_codelets.c

@@ -24,21 +24,24 @@ void cuda_to_cpu(void *buffers[], void *arg)
 	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
 	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
 	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 	int i;
 	int i;
-	for (i = 0; i < n; i++) {
+	for (i = 0; i < n; i++)
+	{
 		dst[i].x = src->x[i];
 		dst[i].x = src->x[i];
 		dst[i].y = src->y[i];
 		dst[i].y = src->y[i];
 	}
 	}
 }
 }
 
 
 extern void cpu_to_cuda_cuda_func(void *buffers[], void *args);
 extern void cpu_to_cuda_cuda_func(void *buffers[], void *args);
-struct starpu_codelet cpu_to_cuda_cl = {
+struct starpu_codelet cpu_to_cuda_cl =
+{
 	.where = STARPU_CUDA,
 	.where = STARPU_CUDA,
 	.cuda_funcs = {cpu_to_cuda_cuda_func, NULL},
 	.cuda_funcs = {cpu_to_cuda_cuda_func, NULL},
 	.nbuffers = 1,
 	.nbuffers = 1,
 	.name = "codelet_cpu_to_cuda"
 	.name = "codelet_cpu_to_cuda"
 };
 };
 
 
-struct starpu_codelet cuda_to_cpu_cl = {
+struct starpu_codelet cuda_to_cpu_cl =
+{
 	.where = STARPU_CPU,
 	.where = STARPU_CPU,
 	.cpu_funcs = {cuda_to_cpu, NULL},
 	.cpu_funcs = {cuda_to_cpu, NULL},
 	.nbuffers = 1,
 	.nbuffers = 1,
@@ -54,20 +57,23 @@ void opencl_to_cpu(void *buffers[], void *arg)
 	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
 	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
 	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 	int i;
 	int i;
-	for (i = 0; i < n; i++) {
+	for (i = 0; i < n; i++)
+	{
 		dst[i].x = src->x[i];
 		dst[i].x = src->x[i];
 		dst[i].y = src->y[i];
 		dst[i].y = src->y[i];
 	}
 	}
 }
 }
 
 
 extern void cpu_to_opencl_opencl_func(void *buffers[], void *args);
 extern void cpu_to_opencl_opencl_func(void *buffers[], void *args);
-struct starpu_codelet cpu_to_opencl_cl = {
+struct starpu_codelet cpu_to_opencl_cl =
+{
 	.where = STARPU_OPENCL,
 	.where = STARPU_OPENCL,
 	.opencl_funcs = {cpu_to_opencl_opencl_func, NULL},
 	.opencl_funcs = {cpu_to_opencl_opencl_func, NULL},
 	.nbuffers = 1
 	.nbuffers = 1
 };
 };
 
 
-struct starpu_codelet opencl_to_cpu_cl = {
+struct starpu_codelet opencl_to_cpu_cl =
+{
 	.where = STARPU_CPU,
 	.where = STARPU_CPU,
 	.cpu_funcs = {opencl_to_cpu, NULL},
 	.cpu_funcs = {opencl_to_cpu, NULL},
 	.nbuffers = 1
 	.nbuffers = 1

+ 2 - 1
examples/basic_examples/multiformat_conversion_codelets_cuda.cu

@@ -23,7 +23,8 @@ static __global__ void cpu_to_cuda_cuda(struct point *src,
 {
 {
         unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
         unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
 
 
-	if (i < n) {
+	if (i < n)
+	{
 		dst->x[i] = src[i].x;
 		dst->x[i] = src[i].x;
 		dst->y[i] = src[i].y;
 		dst->y[i] = src[i].y;
 	}
 	}

+ 2 - 1
examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl

@@ -21,7 +21,8 @@ __kernel void cpu_to_opencl_opencl(__global struct point *src,
 				   unsigned int n)
 				   unsigned int n)
 {
 {
 	const unsigned int i = get_global_id(0);
 	const unsigned int i = get_global_id(0);
-	if (i < n) {
+	if (i < n)
+	{
 		dst->x[i] = src[i].x;
 		dst->x[i] = src[i].x;
 		dst->y[i] = src[i].y;
 		dst->y[i] = src[i].y;
 	}
 	}

+ 4 - 2
examples/basic_examples/multiformat_types.h

@@ -18,11 +18,13 @@
 
 
 #define N_ELEMENTS 10
 #define N_ELEMENTS 10
 
 
-struct struct_of_arrays{
+struct struct_of_arrays
+{
 	float x[N_ELEMENTS];
 	float x[N_ELEMENTS];
 	float y[N_ELEMENTS];
 	float y[N_ELEMENTS];
 };
 };
-struct point {
+struct point
+{
 	float x, y;
 	float x, y;
 };
 };
 
 

+ 6 - 3
examples/basic_examples/vector_scal.c

@@ -37,17 +37,20 @@ extern void scal_sse_func_icc(void *buffers[], void *_args);
 extern void scal_cuda_func(void *buffers[], void *_args);
 extern void scal_cuda_func(void *buffers[], void *_args);
 extern void scal_opencl_func(void *buffers[], void *_args);
 extern void scal_opencl_func(void *buffers[], void *_args);
 
 
-static struct starpu_perfmodel vector_scal_model = {
+static struct starpu_perfmodel vector_scal_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "vector_scale"
 	.symbol = "vector_scale"
 };
 };
 
 
-static struct starpu_perfmodel vector_scal_power_model = {
+static struct starpu_perfmodel vector_scal_power_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "vector_scale_power"
 	.symbol = "vector_scale_power"
 };
 };
 
 
-static struct starpu_codelet cl = {
+static struct starpu_codelet cl =
+{
 	.where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
 	.where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
 	/* CPU implementation of the codelet */
 	/* CPU implementation of the codelet */
 	.cpu_funcs = {
 	.cpu_funcs = {

+ 4 - 2
examples/basic_examples/vector_scal_c.c

@@ -33,12 +33,14 @@
 extern void scal_cpu_func(void *buffers[], void *_args);
 extern void scal_cpu_func(void *buffers[], void *_args);
 extern void scal_cuda_func(void *buffers[], void *_args);
 extern void scal_cuda_func(void *buffers[], void *_args);
 
 
-static struct starpu_perfmodel vector_scal_model = {
+static struct starpu_perfmodel vector_scal_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "vector_scale_model"
 	.symbol = "vector_scale_model"
 };
 };
 
 
-static struct starpu_codelet cl = {
+static struct starpu_codelet cl =
+{
   .where = STARPU_CPU | STARPU_CUDA,
   .where = STARPU_CPU | STARPU_CUDA,
   /* CPU implementation of the codelet */
   /* CPU implementation of the codelet */
   .cpu_funcs = {scal_cpu_func, NULL},
   .cpu_funcs = {scal_cpu_func, NULL},

+ 3 - 2
examples/basic_examples/vector_scal_opencl_kernel.cl

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,7 +17,8 @@
 __kernel void vector_mult_opencl(__global float* val, int nx, float factor)
 __kernel void vector_mult_opencl(__global float* val, int nx, float factor)
 {
 {
         const int i = get_global_id(0);
         const int i = get_global_id(0);
-        if (i < nx) {
+        if (i < nx)
+	{
                 val[i] *= factor;
                 val[i] *= factor;
         }
         }
 }
 }

+ 14 - 7
examples/cg/cg.c

@@ -299,7 +299,8 @@ static void cg(void)
 			/* r <- r - A x */
 			/* r <- r - A x */
 			gemv_kernel(r_handle, A_handle, x_handle, 1.0, -1.0, nblocks, use_reduction); 
 			gemv_kernel(r_handle, A_handle, x_handle, 1.0, -1.0, nblocks, use_reduction); 
 		}
 		}
-		else {
+		else
+		{
 			/* r <- r - alpha q */
 			/* r <- r - alpha q */
 			axpy_kernel(r_handle, q_handle, -alpha, nblocks);
 			axpy_kernel(r_handle, q_handle, -alpha, nblocks);
 		}
 		}
@@ -342,28 +343,34 @@ static int check(void)
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-	        if (strcmp(argv[i], "-n") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+	        if (strcmp(argv[i], "-n") == 0)
+		{
 			n = (int long long)atoi(argv[++i]);
 			n = (int long long)atoi(argv[++i]);
 			continue;
 			continue;
 		}
 		}
 
 
-	        if (strcmp(argv[i], "-maxiter") == 0) {
+	        if (strcmp(argv[i], "-maxiter") == 0)
+		{
 			i_max = atoi(argv[++i]);
 			i_max = atoi(argv[++i]);
 			continue;
 			continue;
 		}
 		}
 
 
-	        if (strcmp(argv[i], "-nblocks") == 0) {
+	        if (strcmp(argv[i], "-nblocks") == 0)
+		{
 			nblocks = atoi(argv[++i]);
 			nblocks = atoi(argv[++i]);
 			continue;
 			continue;
 		}
 		}
 
 
-	        if (strcmp(argv[i], "-no-reduction") == 0) {
+	        if (strcmp(argv[i], "-no-reduction") == 0)
+		{
 			use_reduction = 0;
 			use_reduction = 0;
 			continue;
 			continue;
 		}
 		}
 
 
-	        if (strcmp(argv[i], "-h") == 0) {
+	        if (strcmp(argv[i], "-h") == 0)
+		{
 			FPRINTF(stderr, "usage: %s [-h] [-nblocks #blocks] [-n problem_size] [-no-reduction] [-maxiter i]\n", argv[0]);
 			FPRINTF(stderr, "usage: %s [-h] [-nblocks #blocks] [-n problem_size] [-no-reduction] [-maxiter i]\n", argv[0]);
 			exit(-1);
 			exit(-1);
 			continue;
 			continue;

+ 40 - 20
examples/cg/cg_kernels.c

@@ -68,12 +68,14 @@ static void accumulate_variable_cpu(void *descr[], void *cl_arg)
 	*v_dst = *v_dst + *v_src;
 	*v_dst = *v_dst + *v_src;
 }
 }
 
 
-static struct starpu_perfmodel accumulate_variable_model = {
+static struct starpu_perfmodel accumulate_variable_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "accumulate_variable"
 	.symbol = "accumulate_variable"
 };
 };
 
 
-struct starpu_codelet accumulate_variable_cl = {
+struct starpu_codelet accumulate_variable_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {accumulate_variable_cpu, NULL},
 	.cpu_funcs = {accumulate_variable_cpu, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -104,12 +106,14 @@ static void accumulate_vector_cpu(void *descr[], void *cl_arg)
 	AXPY(n, (TYPE)1.0, v_src, 1, v_dst, 1);
 	AXPY(n, (TYPE)1.0, v_src, 1, v_dst, 1);
 }
 }
 
 
-static struct starpu_perfmodel accumulate_vector_model = {
+static struct starpu_perfmodel accumulate_vector_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "accumulate_vector"
 	.symbol = "accumulate_vector"
 };
 };
 
 
-struct starpu_codelet accumulate_vector_cl = {
+struct starpu_codelet accumulate_vector_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {accumulate_vector_cpu, NULL},
 	.cpu_funcs = {accumulate_vector_cpu, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -142,12 +146,14 @@ static void bzero_variable_cpu(void *descr[], void *cl_arg)
 	*v = (TYPE)0.0;
 	*v = (TYPE)0.0;
 }
 }
 
 
-static struct starpu_perfmodel bzero_variable_model = {
+static struct starpu_perfmodel bzero_variable_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "bzero_variable"
 	.symbol = "bzero_variable"
 };
 };
 
 
-struct starpu_codelet bzero_variable_cl = {
+struct starpu_codelet bzero_variable_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {bzero_variable_cpu, NULL},
 	.cpu_funcs = {bzero_variable_cpu, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -177,12 +183,14 @@ static void bzero_vector_cpu(void *descr[], void *cl_arg)
 	memset(v, 0, n*sizeof(TYPE));
 	memset(v, 0, n*sizeof(TYPE));
 }
 }
 
 
-static struct starpu_perfmodel bzero_vector_model = {
+static struct starpu_perfmodel bzero_vector_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "bzero_vector"
 	.symbol = "bzero_vector"
 };
 };
 
 
-struct starpu_codelet bzero_vector_cl = {
+struct starpu_codelet bzero_vector_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {bzero_vector_cpu, NULL},
 	.cpu_funcs = {bzero_vector_cpu, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -230,12 +238,14 @@ static void dot_kernel_cpu(void *descr[], void *cl_arg)
 	*dot = *dot + local_dot;
 	*dot = *dot + local_dot;
 }
 }
 
 
-static struct starpu_perfmodel dot_kernel_model = {
+static struct starpu_perfmodel dot_kernel_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "dot_kernel"
 	.symbol = "dot_kernel"
 };
 };
 
 
-static struct starpu_codelet dot_kernel_cl = {
+static struct starpu_codelet dot_kernel_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dot_kernel_cpu, NULL},
 	.cpu_funcs = {dot_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -297,12 +307,14 @@ static void scal_kernel_cpu(void *descr[], void *cl_arg)
 	SCAL(n, alpha, v1, 1);
 	SCAL(n, alpha, v1, 1);
 }
 }
 
 
-static struct starpu_perfmodel scal_kernel_model = {
+static struct starpu_perfmodel scal_kernel_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "scal_kernel"
 	.symbol = "scal_kernel"
 };
 };
 
 
-static struct starpu_codelet scal_kernel_cl = {
+static struct starpu_codelet scal_kernel_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {scal_kernel_cpu, NULL},
 	.cpu_funcs = {scal_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -368,12 +380,14 @@ static void gemv_kernel_cpu(void *descr[], void *cl_arg)
 	GEMV("N", nx, ny, alpha, M, ld, v2, 1, beta, v1, 1);
 	GEMV("N", nx, ny, alpha, M, ld, v2, 1, beta, v1, 1);
 }
 }
 
 
-static struct starpu_perfmodel gemv_kernel_model = {
+static struct starpu_perfmodel gemv_kernel_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "gemv_kernel"
 	.symbol = "gemv_kernel"
 };
 };
 
 
-static struct starpu_codelet gemv_kernel_cl = {
+static struct starpu_codelet gemv_kernel_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.type = STARPU_SPMD,
 	.type = STARPU_SPMD,
 	.max_parallelism = INT_MAX,
 	.max_parallelism = INT_MAX,
@@ -460,12 +474,14 @@ static void scal_axpy_kernel_cpu(void *descr[], void *cl_arg)
 	AXPY(nx, p2, v2, 1, v1, 1);
 	AXPY(nx, p2, v2, 1, v1, 1);
 }
 }
 
 
-static struct starpu_perfmodel scal_axpy_kernel_model = {
+static struct starpu_perfmodel scal_axpy_kernel_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "scal_axpy_kernel"
 	.symbol = "scal_axpy_kernel"
 };
 };
 
 
-static struct starpu_codelet scal_axpy_kernel_cl = {
+static struct starpu_codelet scal_axpy_kernel_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {scal_axpy_kernel_cpu, NULL},
 	.cpu_funcs = {scal_axpy_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -528,12 +544,14 @@ static void axpy_kernel_cpu(void *descr[], void *cl_arg)
 	AXPY(nx, p1, v2, 1, v1, 1);
 	AXPY(nx, p1, v2, 1, v1, 1);
 }
 }
 
 
-static struct starpu_perfmodel axpy_kernel_model = {
+static struct starpu_perfmodel axpy_kernel_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "axpy_kernel"
 	.symbol = "axpy_kernel"
 };
 };
 
 
-static struct starpu_codelet axpy_kernel_cl = {
+static struct starpu_codelet axpy_kernel_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {axpy_kernel_cpu, NULL},
 	.cpu_funcs = {axpy_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -588,12 +606,14 @@ static void copy_handle_cuda(void *descr[], void *cl_arg)
 }
 }
 #endif
 #endif
 
 
-static struct starpu_perfmodel copy_handle_model = {
+static struct starpu_perfmodel copy_handle_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "copy_handle"
 	.symbol = "copy_handle"
 };
 };
 
 
-static struct starpu_codelet copy_handle_cl = {
+static struct starpu_codelet copy_handle_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {copy_handle_cpu, NULL},
 	.cpu_funcs = {copy_handle_cpu, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA

+ 18 - 9
examples/cholesky/cholesky.h

@@ -81,39 +81,48 @@ extern struct starpu_perfmodel chol_model_22;
 static void __attribute__((unused)) parse_args(int argc, char **argv)
 static void __attribute__((unused)) parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-size") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-size") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			size = strtol(argv[++i], &argptr, 10);
 			size = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-nblocks") == 0) {
+		if (strcmp(argv[i], "-nblocks") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			nblocks = strtol(argv[++i], &argptr, 10);
 			nblocks = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-nbigblocks") == 0) {
+		if (strcmp(argv[i], "-nbigblocks") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			nbigblocks = strtol(argv[++i], &argptr, 10);
 			nbigblocks = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-pin") == 0) {
+		if (strcmp(argv[i], "-pin") == 0)
+		{
 			pinned = 1;
 			pinned = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-no-prio") == 0) {
+		if (strcmp(argv[i], "-no-prio") == 0)
+		{
 			noprio = 1;
 			noprio = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-bound") == 0) {
+		if (strcmp(argv[i], "-bound") == 0)
+		{
 			bound = 1;
 			bound = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-check") == 0) {
+		if (strcmp(argv[i], "-check") == 0)
+		{
 			check = 1;
 			check = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			printf("usage : %s [-pin] [-size size] [-nblocks nblocks] [-check]\n", argv[0]);
 			printf("usage : %s [-pin] [-size size] [-nblocks nblocks] [-check]\n", argv[0]);
 		}
 		}
 	}
 	}

+ 38 - 19
examples/cholesky/cholesky_grain_tag.c

@@ -63,7 +63,8 @@ static struct starpu_task * create_task_11(starpu_data_handle_t dataA, unsigned
 	task->priority = STARPU_MAX_PRIO;
 	task->priority = STARPU_MAX_PRIO;
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11_AUX(k, reclevel), 1, TAG22_AUX(k-1, k, k, reclevel));
 		starpu_tag_declare_deps(TAG11_AUX(k, reclevel), 1, TAG22_AUX(k-1, k, k, reclevel));
 	}
 	}
 
 
@@ -93,15 +94,18 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j, u
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j);
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j);
 	task->buffers[1].mode = STARPU_RW;
 	task->buffers[1].mode = STARPU_RW;
 
 
-	if (j == k+1) {
+	if (j == k+1)
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG21_AUX(k, j, reclevel), 2, TAG11_AUX(k, reclevel), TAG22_AUX(k-1, k, j, reclevel));
 		starpu_tag_declare_deps(TAG21_AUX(k, j, reclevel), 2, TAG11_AUX(k, reclevel), TAG22_AUX(k-1, k, j, reclevel));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG21_AUX(k, j, reclevel), 1, TAG11_AUX(k, reclevel));
 		starpu_tag_declare_deps(TAG21_AUX(k, j, reclevel), 1, TAG11_AUX(k, reclevel));
 	}
 	}
 
 
@@ -135,15 +139,18 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j);
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j);
 	task->buffers[2].mode = STARPU_RW;
 	task->buffers[2].mode = STARPU_RW;
 
 
-	if ( (i == k + 1) && (j == k +1) ) {
+	if ( (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22_AUX(k, i, j, reclevel), 3, TAG22_AUX(k-1, i, j, reclevel), TAG21_AUX(k, i, reclevel), TAG21_AUX(k, j, reclevel));
 		starpu_tag_declare_deps(TAG22_AUX(k, i, j, reclevel), 3, TAG22_AUX(k-1, i, j, reclevel), TAG21_AUX(k, i, reclevel), TAG21_AUX(k, j, reclevel));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22_AUX(k, i, j, reclevel), 2, TAG21_AUX(k, i, reclevel), TAG21_AUX(k, j, reclevel));
 		starpu_tag_declare_deps(TAG22_AUX(k, i, j, reclevel), 2, TAG21_AUX(k, i, reclevel), TAG21_AUX(k, j, reclevel));
 	}
 	}
 
 
@@ -173,12 +180,14 @@ static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 
 
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
 
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
@@ -189,10 +198,12 @@ static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 	{
 	{
 		struct starpu_task *task = create_task_11(dataA, k, reclevel);
 		struct starpu_task *task = create_task_11(dataA, k, reclevel);
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 			entry_task = task;
 		}
 		}
-		else {
+		else
+		{
 			starpu_task_submit(task);
 			starpu_task_submit(task);
 		}
 		}
 
 
@@ -223,7 +234,8 @@ static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 		starpu_data_unpartition(dataA, 0);
 		starpu_data_unpartition(dataA, 0);
 		return;
 		return;
 	}
 	}
-	else {
+	else
+	{
 		STARPU_ASSERT(reclevel == 0);
 		STARPU_ASSERT(reclevel == 0);
 		unsigned ndeps_tags = (nblocks - nbigblocks)*(nblocks - nbigblocks);
 		unsigned ndeps_tags = (nblocks - nbigblocks)*(nblocks - nbigblocks);
 
 
@@ -261,7 +273,8 @@ static void initialize_system(float **A, unsigned dim, unsigned pinned)
 	{
 	{
 		starpu_malloc((void **)A, dim*dim*sizeof(float));
 		starpu_malloc((void **)A, dim*dim*sizeof(float));
 	}
 	}
-	else {
+	else
+	{
 		*A = malloc(dim*dim*sizeof(float));
 		*A = malloc(dim*dim*sizeof(float));
 	}
 	}
 }
 }
@@ -321,10 +334,12 @@ int main(int argc, char **argv)
 	{
 	{
 		for (i = 0; i < size; i++)
 		for (i = 0; i < size; i++)
 		{
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 			}
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				FPRINTF(stdout, ".\t");
 			}
 			}
 		}
 		}
@@ -342,10 +357,12 @@ int main(int argc, char **argv)
 	{
 	{
 		for (i = 0; i < size; i++)
 		for (i = 0; i < size; i++)
 		{
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 			}
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				FPRINTF(stdout, ".\t");
 				mat[j+i*size] = 0.0f; /* debug */
 				mat[j+i*size] = 0.0f; /* debug */
 			}
 			}
@@ -365,10 +382,12 @@ int main(int argc, char **argv)
 	{
 	{
 		for (i = 0; i < size; i++)
 		for (i = 0; i < size; i++)
 		{
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
                                 FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]);
                                 FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]);
 			}
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				FPRINTF(stdout, ".\t");
 			}
 			}
 		}
 		}

+ 24 - 12
examples/cholesky/cholesky_implicit.c

@@ -137,7 +137,8 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
 
 	double flop = (1.0f*n*n*n)/3.0f;
 	double flop = (1.0f*n*n*n)/3.0f;
 	FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 	FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
-	if (bound) {
+	if (bound)
+	{
 		double res;
 		double res;
 		starpu_bound_compute(&res, NULL, 0);
 		starpu_bound_compute(&res, NULL, 0);
 		FPRINTF(stderr, "Theoretical GFlops: %2.2f\n", (flop/res/1000000.0f));
 		FPRINTF(stderr, "Theoretical GFlops: %2.2f\n", (flop/res/1000000.0f));
@@ -152,12 +153,14 @@ static void cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 	 * one block is now determined by 2 unsigned (i,j) */
 	 * one block is now determined by 2 unsigned (i,j) */
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
 
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
@@ -203,10 +206,12 @@ int main(int argc, char **argv)
 	{
 	{
 		for (i = 0; i < size; i++)
 		for (i = 0; i < size; i++)
 		{
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 			}
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				FPRINTF(stdout, ".\t");
 			}
 			}
 		}
 		}
@@ -222,10 +227,12 @@ int main(int argc, char **argv)
 	{
 	{
 		for (i = 0; i < size; i++)
 		for (i = 0; i < size; i++)
 		{
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 			}
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				FPRINTF(stdout, ".\t");
 				mat[j+i*size] = 0.0f; /* debug */
 				mat[j+i*size] = 0.0f; /* debug */
 			}
 			}
@@ -241,7 +248,8 @@ int main(int argc, char **argv)
 		{
 		{
 			for (i = 0; i < size; i++)
 			for (i = 0; i < size; i++)
 			{
 			{
-				if (i > j) {
+				if (i > j)
+				{
 					mat[j+i*size] = 0.0f; /* debug */
 					mat[j+i*size] = 0.0f; /* debug */
 				}
 				}
 			}
 			}
@@ -258,10 +266,12 @@ int main(int argc, char **argv)
 		{
 		{
 			for (i = 0; i < size; i++)
 			for (i = 0; i < size; i++)
 			{
 			{
-				if (i <= j) {
+				if (i <= j)
+				{
 					FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]);
 					FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]);
 				}
 				}
-				else {
+				else
+				{
 					FPRINTF(stdout, ".\t");
 					FPRINTF(stdout, ".\t");
 				}
 				}
 			}
 			}
@@ -273,10 +283,12 @@ int main(int argc, char **argv)
 		{
 		{
 			for (i = 0; i < size; i++)
 			for (i = 0; i < size; i++)
 			{
 			{
-				if (i <= j) {
+				if (i <= j)
+				{
 	                                float orig = (1.0f/(1.0f+i+j)) + ((i == j)?1.0f*size:0.0f);
 	                                float orig = (1.0f/(1.0f+i+j)) + ((i == j)?1.0f*size:0.0f);
 	                                float err = abs(test_mat[j +i*size] - orig);
 	                                float err = abs(test_mat[j +i*size] - orig);
-	                                if (err > 0.00001) {
+	                                if (err > 0.00001)
+					{
 	                                        FPRINTF(stderr, "Error[%u, %u] --> %2.2f != %2.2f (err %2.2f)\n", i, j, test_mat[j +i*size], orig, err);
 	                                        FPRINTF(stderr, "Error[%u, %u] --> %2.2f != %2.2f (err %2.2f)\n", i, j, test_mat[j +i*size], orig, err);
 	                                        assert(0);
 	                                        assert(0);
 	                                }
 	                                }

+ 9 - 5
examples/cholesky/cholesky_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
  * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -55,7 +55,8 @@ static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, __at
 			SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, 
 			SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, 
 				right, ld12, 1.0f, center, ld22);
 				right, ld12, 1.0f, center, ld22);
 		}
 		}
-		else {
+		else
+		{
 			/* Parallel CPU kernel */
 			/* Parallel CPU kernel */
 			int rank = starpu_combined_worker_get_rank();
 			int rank = starpu_combined_worker_get_rank();
 
 
@@ -113,7 +114,8 @@ static inline void chol_common_codelet_update_u21(void *descr[], int s, __attrib
 	unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]);
 	unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]);
 	unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]);
 	unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]);
 
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 		case 0:
 			STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
 			STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
 			break;
 			break;
@@ -157,7 +159,8 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, __attrib
 
 
 	unsigned z;
 	unsigned z;
 
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 		case 0:
 
 
 			/*
 			/*
@@ -188,7 +191,8 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, __attrib
 			int ret;
 			int ret;
 			int info;
 			int info;
 			ret = magma_spotrf_gpu('L', nx, sub11, ld, &info);
 			ret = magma_spotrf_gpu('L', nx, sub11, ld, &info);
-			if (ret != MAGMA_SUCCESS) {
+			if (ret != MAGMA_SUCCESS)
+			{
 				fprintf(stderr, "Error in Magma: %d\n", ret);
 				fprintf(stderr, "Error in Magma: %d\n", ret);
 				STARPU_ABORT();
 				STARPU_ABORT();
 			}
 			}

+ 12 - 6
examples/cholesky/cholesky_models.c

@@ -126,8 +126,10 @@ static double cuda_chol_task_22_cost(struct starpu_buffer_descr *descr)
 	return PERTURBATE(cost);
 	return PERTURBATE(cost);
 }
 }
 
 
-struct starpu_perfmodel chol_model_11 = {
-	.per_arch = {
+struct starpu_perfmodel chol_model_11 =
+{
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_11_cost },
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_11_cost },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_11_cost }
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_11_cost }
 	},
 	},
@@ -135,8 +137,10 @@ struct starpu_perfmodel chol_model_11 = {
 	.symbol = "chol_model_11"
 	.symbol = "chol_model_11"
 };
 };
 
 
-struct starpu_perfmodel chol_model_21 = {
-	.per_arch = {
+struct starpu_perfmodel chol_model_21 =
+{
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_21_cost },
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_21_cost },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_21_cost }
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_21_cost }
 	},
 	},
@@ -144,8 +148,10 @@ struct starpu_perfmodel chol_model_21 = {
 	.symbol = "chol_model_21"
 	.symbol = "chol_model_21"
 };
 };
 
 
-struct starpu_perfmodel chol_model_22 = {
-	.per_arch = {
+struct starpu_perfmodel chol_model_22 =
+{
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_22_cost },
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = cpu_chol_task_22_cost },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_22_cost }
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = cuda_chol_task_22_cost }
 	},
 	},

+ 44 - 22
examples/cholesky/cholesky_tag.c

@@ -64,7 +64,8 @@ static struct starpu_task * create_task_11(starpu_data_handle_t dataA, unsigned
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 	}
 
 
@@ -94,20 +95,24 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j)
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
 	task->buffers[1].mode = STARPU_RW;
 	task->buffers[1].mode = STARPU_RW;
 
 
-	if (!noprio && (j == k+1)) {
+	if (!noprio && (j == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG21(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 		starpu_tag_declare_deps(TAG21(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 	}
 	}
 
 
 	int ret = starpu_task_submit(task);
 	int ret = starpu_task_submit(task);
-        if (STARPU_UNLIKELY(ret == -ENODEV)) {
+        if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
                 FPRINTF(stderr, "No worker may execute this task\n");
                 FPRINTF(stderr, "No worker may execute this task\n");
                 exit(0);
                 exit(0);
         }
         }
@@ -141,20 +146,24 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
 	task->buffers[2].mode = STARPU_RW;
 	task->buffers[2].mode = STARPU_RW;
 
 
-	if (!noprio && (i == k + 1) && (j == k +1) ) {
+	if (!noprio && (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG21(k, i), TAG21(k, j));
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG21(k, i), TAG21(k, j));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
 	}
 	}
 
 
 	int ret = starpu_task_submit(task);
 	int ret = starpu_task_submit(task);
-        if (STARPU_UNLIKELY(ret == -ENODEV)) {
+        if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
                 FPRINTF(stderr, "No worker may execute this task\n");
                 FPRINTF(stderr, "No worker may execute this task\n");
                 exit(0);
                 exit(0);
         }
         }
@@ -183,12 +192,15 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	{
 	{
 		struct starpu_task *task = create_task_11(dataA, k);
 		struct starpu_task *task = create_task_11(dataA, k);
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 			entry_task = task;
 		}
 		}
-		else {
+		else
+		{
 			int ret = starpu_task_submit(task);
 			int ret = starpu_task_submit(task);
-                        if (STARPU_UNLIKELY(ret == -ENODEV)) {
+                        if (STARPU_UNLIKELY(ret == -ENODEV))
+			{
                                 FPRINTF(stderr, "No worker may execute this task\n");
                                 FPRINTF(stderr, "No worker may execute this task\n");
                                 exit(0);
                                 exit(0);
                         }
                         }
@@ -209,7 +221,8 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
 
 	/* schedule the codelet */
 	/* schedule the codelet */
 	int ret = starpu_task_submit(entry_task);
 	int ret = starpu_task_submit(entry_task);
-        if (STARPU_UNLIKELY(ret == -ENODEV)) {
+        if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
                 FPRINTF(stderr, "No worker may execute this task\n");
                 FPRINTF(stderr, "No worker may execute this task\n");
                 exit(0);
                 exit(0);
         }
         }
@@ -243,7 +256,8 @@ static void initialize_system(float **A, unsigned dim, unsigned pinned)
 	{
 	{
 		starpu_malloc((void **)A, (size_t)dim*dim*sizeof(float));
 		starpu_malloc((void **)A, (size_t)dim*dim*sizeof(float));
 	} 
 	} 
-	else {
+	else
+	{
 		*A = malloc(dim*dim*sizeof(float));
 		*A = malloc(dim*dim*sizeof(float));
 	}
 	}
 }
 }
@@ -258,12 +272,14 @@ static void cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
 
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
 
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
@@ -311,10 +327,12 @@ int main(int argc, char **argv)
 	{
 	{
 		for (i = 0; i < size; i++)
 		for (i = 0; i < size; i++)
 		{
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 			}
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				FPRINTF(stdout, ".\t");
 			}
 			}
 		}
 		}
@@ -332,10 +350,12 @@ int main(int argc, char **argv)
 	{
 	{
 		for (i = 0; i < size; i++)
 		for (i = 0; i < size; i++)
 		{
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 				FPRINTF(stdout, "%2.2f\t", mat[j +i*size]);
 			}
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				FPRINTF(stdout, ".\t");
 				mat[j+i*size] = 0.0f; /* debug */
 				mat[j+i*size] = 0.0f; /* debug */
 			}
 			}
@@ -355,10 +375,12 @@ int main(int argc, char **argv)
 	{
 	{
 		for (i = 0; i < size; i++)
 		for (i = 0; i < size; i++)
 		{
 		{
-			if (i <= j) {
+			if (i <= j)
+			{
 				FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]);
 				FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]);
 			}
 			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				FPRINTF(stdout, ".\t");
 			}
 			}
 		}
 		}

+ 28 - 14
examples/cholesky/cholesky_tile_tag.c

@@ -73,7 +73,8 @@ static struct starpu_task * create_task_11(unsigned k, unsigned nblocks)
 	task->priority = STARPU_MAX_PRIO;
 	task->priority = STARPU_MAX_PRIO;
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 	}
 
 
@@ -110,15 +111,18 @@ static void create_task_21(unsigned k, unsigned j)
 	task->buffers[1].handle = A_state[j][k]; 
 	task->buffers[1].handle = A_state[j][k]; 
 	task->buffers[1].mode = STARPU_RW;
 	task->buffers[1].mode = STARPU_RW;
 
 
-	if (j == k+1) {
+	if (j == k+1)
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG21(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 		starpu_tag_declare_deps(TAG21(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 	}
 	}
 
 
@@ -159,15 +163,18 @@ static void create_task_22(unsigned k, unsigned i, unsigned j)
 	task->buffers[2].handle = A_state[j][i]; 
 	task->buffers[2].handle = A_state[j][i]; 
 	task->buffers[2].mode = STARPU_RW;
 	task->buffers[2].mode = STARPU_RW;
 
 
-	if ( (i == k + 1) && (j == k +1) ) {
+	if ( (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG21(k, i), TAG21(k, j));
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG21(k, i), TAG21(k, j));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
 	}
 	}
 
 
@@ -195,10 +202,12 @@ static void cholesky_no_stride(void)
 	{
 	{
 		struct starpu_task *task = create_task_11(k, nblocks);
 		struct starpu_task *task = create_task_11(k, nblocks);
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 			entry_task = task;
 		}
 		}
-		else {
+		else
+		{
 			starpu_task_submit(task);
 			starpu_task_submit(task);
 		}
 		}
 		
 		
@@ -251,7 +260,8 @@ int main(int argc, char **argv)
 	for (y = 0; y < nblocks; y++)
 	for (y = 0; y < nblocks; y++)
 	for (x = 0; x < nblocks; x++)
 	for (x = 0; x < nblocks; x++)
 	{
 	{
-		if (x <= y) {
+		if (x <= y)
+		{
 			A[y][x] = malloc(BLOCKSIZE*BLOCKSIZE*sizeof(float));
 			A[y][x] = malloc(BLOCKSIZE*BLOCKSIZE*sizeof(float));
 			assert(A[y][x]);
 			assert(A[y][x]);
 		}
 		}
@@ -261,7 +271,8 @@ int main(int argc, char **argv)
 	for (y = 0; y < nblocks; y++)
 	for (y = 0; y < nblocks; y++)
 	for (x = 0; x < nblocks; x++)
 	for (x = 0; x < nblocks; x++)
 	{
 	{
-		if (x <= y) {
+		if (x <= y)
+		{
 #ifdef STARPU_HAVE_POSIX_MEMALIGN
 #ifdef STARPU_HAVE_POSIX_MEMALIGN
 			posix_memalign((void **)&A[y][x], 128, BLOCKSIZE*BLOCKSIZE*sizeof(float));
 			posix_memalign((void **)&A[y][x], 128, BLOCKSIZE*BLOCKSIZE*sizeof(float));
 #else
 #else
@@ -277,7 +288,8 @@ int main(int argc, char **argv)
 	 * */
 	 * */
 	for (y = 0; y < nblocks; y++)
 	for (y = 0; y < nblocks; y++)
 	for (x = 0; x < nblocks; x++)
 	for (x = 0; x < nblocks; x++)
-	if (x <= y) {
+	if (x <= y)
+	{
 		for (i = 0; i < BLOCKSIZE; i++)
 		for (i = 0; i < BLOCKSIZE; i++)
 		for (j = 0; j < BLOCKSIZE; j++)
 		for (j = 0; j < BLOCKSIZE; j++)
 		{
 		{
@@ -293,7 +305,8 @@ int main(int argc, char **argv)
 	for (y = 0; y < nblocks; y++)
 	for (y = 0; y < nblocks; y++)
 	for (x = 0; x < nblocks; x++)
 	for (x = 0; x < nblocks; x++)
 	{
 	{
-		if (x <= y) {
+		if (x <= y)
+		{
 			starpu_matrix_data_register(&A_state[y][x], 0, (uintptr_t)A[y][x], 
 			starpu_matrix_data_register(&A_state[y][x], 0, (uintptr_t)A[y][x], 
 				BLOCKSIZE, BLOCKSIZE, BLOCKSIZE, sizeof(float));
 				BLOCKSIZE, BLOCKSIZE, BLOCKSIZE, sizeof(float));
 		}
 		}
@@ -304,7 +317,8 @@ int main(int argc, char **argv)
 	for (y = 0; y < nblocks; y++)
 	for (y = 0; y < nblocks; y++)
 	for (x = 0; x < nblocks; x++)
 	for (x = 0; x < nblocks; x++)
 	{
 	{
-		if (x <= y) {
+		if (x <= y)
+		{
 			starpu_data_unregister(A_state[y][x]);
 			starpu_data_unregister(A_state[y][x]);
 		}
 		}
 	}
 	}

+ 9 - 5
examples/common/blas_model.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -22,7 +22,8 @@
 
 
 double gemm_cost(struct starpu_buffer_descr *descr);
 double gemm_cost(struct starpu_buffer_descr *descr);
 
 
-static struct starpu_perfmodel starpu_sgemm_model = {
+static struct starpu_perfmodel starpu_sgemm_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 #ifdef STARPU_ATLAS
 	.symbol = "sgemm_atlas"
 	.symbol = "sgemm_atlas"
@@ -33,12 +34,14 @@ static struct starpu_perfmodel starpu_sgemm_model = {
 #endif
 #endif
 };
 };
 
 
-static struct starpu_perfmodel starpu_sgemm_model_common = {
+static struct starpu_perfmodel starpu_sgemm_model_common =
+{
 	.cost_model = gemm_cost,
 	.cost_model = gemm_cost,
 	.type = STARPU_COMMON,
 	.type = STARPU_COMMON,
 };
 };
 
 
-static struct starpu_perfmodel starpu_dgemm_model = {
+static struct starpu_perfmodel starpu_dgemm_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 #ifdef STARPU_ATLAS
 	.symbol = "dgemm_atlas"
 	.symbol = "dgemm_atlas"
@@ -49,7 +52,8 @@ static struct starpu_perfmodel starpu_dgemm_model = {
 #endif
 #endif
 };
 };
 
 
-static struct starpu_perfmodel starpu_dgemm_model_common = {
+static struct starpu_perfmodel starpu_dgemm_model_common =
+{
 	.cost_model = gemm_cost,
 	.cost_model = gemm_cost,
 	.type = STARPU_COMMON,
 	.type = STARPU_COMMON,
 };
 };

+ 14 - 7
examples/filters/fblock.c

@@ -39,9 +39,12 @@ void print_block(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz)
 {
 {
         int i, j, k;
         int i, j, k;
         FPRINTF(stderr, "block=%p nx=%d ny=%d nz=%d ldy=%u ldz=%u\n", block, nx, ny, nz, ldy, ldz);
         FPRINTF(stderr, "block=%p nx=%d ny=%d nz=%d ldy=%u ldz=%u\n", block, nx, ny, nz, ldy, ldz);
-        for(k=0 ; k<nz ; k++) {
-                for(j=0 ; j<ny ; j++) {
-                        for(i=0 ; i<nx ; i++) {
+        for(k=0 ; k<nz ; k++)
+	{
+                for(j=0 ; j<ny ; j++)
+		{
+                        for(i=0 ; i<nx ; i++)
+			{
                                 FPRINTF(stderr, "%2d ", block[(k*ldz)+(j*ldy)+i]);
                                 FPRINTF(stderr, "%2d ", block[(k*ldz)+(j*ldy)+i]);
                         }
                         }
                         FPRINTF(stderr,"\n");
                         FPRINTF(stderr,"\n");
@@ -74,9 +77,12 @@ int main(int argc, char **argv)
 
 
         block = (int*)malloc(NX*NY*NZ*sizeof(block[0]));
         block = (int*)malloc(NX*NY*NZ*sizeof(block[0]));
         assert(block);
         assert(block);
-        for(k=0 ; k<NZ ; k++) {
-                for(j=0 ; j<NY ; j++) {
-                        for(i=0 ; i<NX ; i++) {
+        for(k=0 ; k<NZ ; k++)
+	{
+                for(j=0 ; j<NY ; j++)
+		{
+                        for(i=0 ; i<NX ; i++)
+			{
                                 block[(k*NX*NY)+(j*NX)+i] = n++;
                                 block[(k*NX*NY)+(j*NX)+i] = n++;
                         }
                         }
                 }
                 }
@@ -138,7 +144,8 @@ int main(int argc, char **argv)
                 task->cl_arg = &multiplier;
                 task->cl_arg = &multiplier;
 
 
                 ret = starpu_task_submit(task);
                 ret = starpu_task_submit(task);
-                if (ret) {
+                if (ret)
+		{
                         FPRINTF(stderr, "Error when submitting task\n");
                         FPRINTF(stderr, "Error when submitting task\n");
                         exit(ret);
                         exit(ret);
                 }
                 }

+ 5 - 3
examples/filters/fblock_cpu.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,8 +27,10 @@ void cpu_func(void *buffers[], void *cl_arg)
         unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]);
         unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]);
         unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]);
         unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]);
 
 
-        for(k=0; k<nz ; k++) {
-                for(j=0; j<ny ; j++) {
+        for(k=0; k<nz ; k++)
+	{
+                for(j=0; j<ny ; j++)
+		{
                         for(i=0; i<nx ; i++)
                         for(i=0; i<nx ; i++)
                                 block[(k*ldz)+(j*ldy)+i] = *factor;
                                 block[(k*ldz)+(j*ldy)+i] = *factor;
                 }
                 }

+ 5 - 3
examples/filters/fblock_cuda.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,8 +20,10 @@
 static __global__ void fblock_cuda(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor)
 static __global__ void fblock_cuda(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor)
 {
 {
         int i, j, k;
         int i, j, k;
-        for(k=0; k<nz ; k++) {
-                for(j=0; j<ny ; j++) {
+        for(k=0; k<nz ; k++)
+	{
+                for(j=0; j<ny ; j++)
+		{
                         for(i=0; i<nx ; i++)
                         for(i=0; i<nx ; i++)
                                 block[(k*ldz)+(j*ldy)+i] = factor;
                                 block[(k*ldz)+(j*ldy)+i] = factor;
                 }
                 }

+ 3 - 2
examples/filters/fblock_opencl.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Université de Bordeaux 1
  * Copyright (C) 2011  Université de Bordeaux 1
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -19,7 +19,8 @@
 #include <starpu_opencl.h>
 #include <starpu_opencl.h>
 
 
 #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr)       \
 #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr)       \
-do {                                                        \
+do                                                          \
+{							    \
 	int err;                                            \
 	int err;                                            \
 	err = clSetKernelArg(kernel, n, size, ptr);         \
 	err = clSetKernelArg(kernel, n, size, ptr);         \
 	if (err != CL_SUCCESS)                              \
 	if (err != CL_SUCCESS)                              \

+ 5 - 3
examples/filters/fblock_opencl_kernel.cl

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,8 +17,10 @@
 __kernel void fblock_opencl(__global int* block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, int factor)
 __kernel void fblock_opencl(__global int* block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, int factor)
 {
 {
         int i, j, k;
         int i, j, k;
-        for(k=0; k<nz ; k++) {
-                for(j=0; j<ny ; j++) {
+        for(k=0; k<nz ; k++)
+	{
+                for(j=0; j<ny ; j++)
+		{
                         for(i=0; i<nx ; i++)
                         for(i=0; i<nx ; i++)
                                 block[(k*ldz)+(j*ldy)+i] = factor;
                                 block[(k*ldz)+(j*ldy)+i] = factor;
                 }
                 }

+ 12 - 6
examples/filters/fmatrix.c

@@ -34,7 +34,8 @@ void cpu_func(void *buffers[], void *cl_arg)
         /* local copy of the matrix pointer */
         /* local copy of the matrix pointer */
         int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]);
         int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]);
 
 
-        for(j=0; j<ny ; j++) {
+        for(j=0; j<ny ; j++)
+	{
                 for(i=0; i<nx ; i++)
                 for(i=0; i<nx ; i++)
                         val[(j*ld)+i] = *factor;
                         val[(j*ld)+i] = *factor;
         }
         }
@@ -46,8 +47,10 @@ int main(int argc, char **argv)
         int matrix[NX*NY];
         int matrix[NX*NY];
 
 
         FPRINTF(stderr,"IN  Matrix: \n");
         FPRINTF(stderr,"IN  Matrix: \n");
-        for(j=0 ; j<NY ; j++) {
-                for(i=0 ; i<NX ; i++) {
+        for(j=0 ; j<NY ; j++)
+	{
+                for(i=0 ; i<NX ; i++)
+		{
                         matrix[(j*NX)+i] = n++;
                         matrix[(j*NX)+i] = n++;
                         FPRINTF(stderr, "%2d ", matrix[(j*NX)+i]);
                         FPRINTF(stderr, "%2d ", matrix[(j*NX)+i]);
                 }
                 }
@@ -56,7 +59,8 @@ int main(int argc, char **argv)
         FPRINTF(stderr,"\n");
         FPRINTF(stderr,"\n");
 
 
         starpu_data_handle_t handle;
         starpu_data_handle_t handle;
-        struct starpu_codelet cl = {
+        struct starpu_codelet cl =
+	{
                 .where = STARPU_CPU,
                 .where = STARPU_CPU,
                 .cpu_funcs = {cpu_func, NULL},
                 .cpu_funcs = {cpu_func, NULL},
                 .nbuffers = 1
                 .nbuffers = 1
@@ -96,8 +100,10 @@ int main(int argc, char **argv)
 
 
         /* Print result matrix */
         /* Print result matrix */
         FPRINTF(stderr,"OUT Matrix: \n");
         FPRINTF(stderr,"OUT Matrix: \n");
-        for(j=0 ; j<NY ; j++) {
-                for(i=0 ; i<NX ; i++) {
+        for(j=0 ; j<NY ; j++)
+	{
+                for(i=0 ; i<NX ; i++)
+		{
                         FPRINTF(stderr, "%2d ", matrix[(j*NX)+i]);
                         FPRINTF(stderr, "%2d ", matrix[(j*NX)+i]);
                 }
                 }
                 FPRINTF(stderr,"\n");
                 FPRINTF(stderr,"\n");

+ 2 - 1
examples/filters/fvector.c

@@ -42,7 +42,8 @@ int main(int argc, char **argv)
         starpu_data_handle_t handle;
         starpu_data_handle_t handle;
         int factor=1;
         int factor=1;
 
 
-        struct starpu_codelet cl = {
+        struct starpu_codelet cl =
+	{
                 .where = STARPU_CPU,
                 .where = STARPU_CPU,
                 .cpu_funcs = {cpu_func, NULL},
                 .cpu_funcs = {cpu_func, NULL},
                 .nbuffers = 1
                 .nbuffers = 1

+ 42 - 21
examples/heat/dw_factolu.c

@@ -97,7 +97,8 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 	/* we did task 22k,i,j */
 	/* we did task 22k,i,j */
 	advance_22[k*nblocks*nblocks + i + j*nblocks] = DONE;
 	advance_22[k*nblocks*nblocks + i + j*nblocks] = DONE;
 	
 	
-	if ( (i == j) && (i == k+1)) {
+	if ( (i == j) && (i == k+1))
+	{
 		/* we now reduce the LU22 part (recursion appears there) */
 		/* we now reduce the LU22 part (recursion appears there) */
 		cl_args *u11arg = malloc(sizeof(cl_args));
 		cl_args *u11arg = malloc(sizeof(cl_args));
 
 
@@ -123,14 +124,17 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 	}
 	}
 
 
 	/* 11k+1 + 22k,k+1,j => 21 k+1,j */
 	/* 11k+1 + 22k,k+1,j => 21 k+1,j */
-	if ( i == k + 1) {
+	if ( i == k + 1)
+	{
 		uint8_t dep;
 		uint8_t dep;
 		/* 11 k+1*/
 		/* 11 k+1*/
 		dep = advance_11[(k+1)];
 		dep = advance_11[(k+1)];
-		if (dep & DONE) {
+		if (dep & DONE)
+		{
 			/* try to push the task */
 			/* try to push the task */
 			uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[(k+1) + j*nblocks], STARTED);
 			uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[(k+1) + j*nblocks], STARTED);
-				if ((u & STARTED) == 0) {
+				if ((u & STARTED) == 0)
+				{
 					/* we are the only one that should 
 					/* we are the only one that should 
 					 * launch that task */
 					 * launch that task */
 					cl_args *u21a = malloc(sizeof(cl_args));
 					cl_args *u21a = malloc(sizeof(cl_args));
@@ -159,14 +163,17 @@ void dw_callback_v2_codelet_update_u22(void *argcb)
 	}
 	}
 
 
 	/* 11k + 22k-1,i,k => 12 k,i */
 	/* 11k + 22k-1,i,k => 12 k,i */
-	if (j == k + 1) {
+	if (j == k + 1)
+	{
 		uint8_t dep;
 		uint8_t dep;
 		/* 11 k+1*/
 		/* 11 k+1*/
 		dep = advance_11[(k+1)];
 		dep = advance_11[(k+1)];
-		if (dep & DONE) {
+		if (dep & DONE)
+		{
 			/* try to push the task */
 			/* try to push the task */
 			uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[(k+1)*nblocks + i], STARTED);
 			uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[(k+1)*nblocks + i], STARTED);
-				 if ((u & STARTED) == 0) {
+				 if ((u & STARTED) == 0)
+				 {
 					/* we are the only one that should launch that task */
 					/* we are the only one that should launch that task */
 					cl_args *u12a = malloc(sizeof(cl_args));
 					cl_args *u12a = malloc(sizeof(cl_args));
 
 
@@ -217,7 +224,8 @@ void dw_callback_v2_codelet_update_u12(void *argcb)
 		{
 		{
 			/* perhaps we may schedule the 22 i,args->k,slicey task */
 			/* perhaps we may schedule the 22 i,args->k,slicey task */
 			uint8_t u = STARPU_ATOMIC_OR(&advance_22[i*nblocks*nblocks + slicey*nblocks + k], STARTED);
 			uint8_t u = STARPU_ATOMIC_OR(&advance_22[i*nblocks*nblocks + slicey*nblocks + k], STARTED);
-                        if ((u & STARTED) == 0) {
+                        if ((u & STARTED) == 0)
+			{
 				/* update that square matrix */
 				/* update that square matrix */
 				cl_args *u22a = malloc(sizeof(cl_args));
 				cl_args *u22a = malloc(sizeof(cl_args));
 
 
@@ -276,7 +284,8 @@ void dw_callback_v2_codelet_update_u21(void *argcb)
 		{
 		{
 			/* perhaps we may schedule the 22 i,args->k,slicey task */
 			/* perhaps we may schedule the 22 i,args->k,slicey task */
 			uint8_t u = STARPU_ATOMIC_OR(&advance_22[i*nblocks*nblocks + k*nblocks + slicex], STARTED);
 			uint8_t u = STARPU_ATOMIC_OR(&advance_22[i*nblocks*nblocks + k*nblocks + slicex], STARTED);
-                        if ((u & STARTED) == 0) {
+                        if ((u & STARTED) == 0)
+			{
 				/* update that square matrix */
 				/* update that square matrix */
 				cl_args *u22a = malloc(sizeof(cl_args));
 				cl_args *u22a = malloc(sizeof(cl_args));
 
 
@@ -340,16 +349,20 @@ void dw_callback_v2_codelet_update_u11(void *argcb)
 
 
 			/* can we launch 12i,slice ? */
 			/* can we launch 12i,slice ? */
 			uint8_t deps12;
 			uint8_t deps12;
-			if (i == 0) {
+			if (i == 0)
+			{
 				deps12 = DONE;
 				deps12 = DONE;
 			}
 			}
-			else {
+			else
+			{
 				deps12 = advance_22[(i-1)*nblocks*nblocks + slice + i*nblocks];		
 				deps12 = advance_22[(i-1)*nblocks*nblocks + slice + i*nblocks];		
 			}
 			}
-			if (deps12 & DONE) {
+			if (deps12 & DONE)
+			{
 				/* we may perhaps launch the task 12i,slice */
 				/* we may perhaps launch the task 12i,slice */
 				 uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i*nblocks + slice], STARTED);
 				 uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i*nblocks + slice], STARTED);
-				 if ((u & STARTED) == 0) {
+				 if ((u & STARTED) == 0)
+				 {
 					/* we are the only one that should launch that task */
 					/* we are the only one that should launch that task */
 					cl_args *u12a = malloc(sizeof(cl_args));
 					cl_args *u12a = malloc(sizeof(cl_args));
 
 
@@ -377,16 +390,20 @@ void dw_callback_v2_codelet_update_u11(void *argcb)
 			}
 			}
 
 
 			/* can we launch 21i,slice ? */
 			/* can we launch 21i,slice ? */
-			if (i == 0) {
+			if (i == 0)
+			{
 				deps12 = DONE;
 				deps12 = DONE;
 			}
 			}
-			else {
+			else
+			{
 				deps12 = advance_22[(i-1)*nblocks*nblocks + slice*nblocks + i];		
 				deps12 = advance_22[(i-1)*nblocks*nblocks + slice*nblocks + i];		
 			}
 			}
-			if (deps12 & DONE) {
+			if (deps12 & DONE)
+			{
 				/* we may perhaps launch the task 12i,slice */
 				/* we may perhaps launch the task 12i,slice */
 				 uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i + slice*nblocks], STARTED);
 				 uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i + slice*nblocks], STARTED);
-				 if ((u & STARTED) == 0) {
+				 if ((u & STARTED) == 0)
+				 {
 					/* we are the only one that should launch that task */
 					/* we are the only one that should launch that task */
 					cl_args *u21a = malloc(sizeof(cl_args));
 					cl_args *u21a = malloc(sizeof(cl_args));
 
 
@@ -700,7 +717,8 @@ void initialize_system(float **A, float **B, unsigned dim, unsigned pinned)
 		starpu_malloc((void **)A, (size_t)dim*dim*sizeof(float));
 		starpu_malloc((void **)A, (size_t)dim*dim*sizeof(float));
 		starpu_malloc((void **)B, (size_t)dim*sizeof(float));
 		starpu_malloc((void **)B, (size_t)dim*sizeof(float));
 	} 
 	} 
-	else {
+	else
+	{
 		*A = malloc((size_t)dim*dim*sizeof(float));
 		*A = malloc((size_t)dim*dim*sizeof(float));
 		STARPU_ASSERT(*A);
 		STARPU_ASSERT(*A);
 		*B = malloc((size_t)dim*sizeof(float));
 		*B = malloc((size_t)dim*sizeof(float));
@@ -730,19 +748,22 @@ void dw_factoLU(float *matA, unsigned size,
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, 
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, 
 			size, size, sizeof(float));
 			size, size, sizeof(float));
 
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
 
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
 
 
 	starpu_data_map_filters(dataA, 2, &f, &f2);
 	starpu_data_map_filters(dataA, 2, &f, &f2);
 
 
-	switch (version) {
+	switch (version)
+	{
 		case 1:
 		case 1:
 			dw_codelet_facto(dataA, nblocks);
 			dw_codelet_facto(dataA, nblocks);
 			break;
 			break;

+ 22 - 11
examples/heat/dw_factolu.h

@@ -41,7 +41,8 @@
 #define BLAS3_FLOP(n1,n2,n3)    \
 #define BLAS3_FLOP(n1,n2,n3)    \
         (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
         (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
 
 
-typedef struct {
+typedef struct
+{
 	starpu_data_handle_t dataA;
 	starpu_data_handle_t dataA;
 	unsigned i;
 	unsigned i;
 	unsigned j;
 	unsigned j;
@@ -89,10 +90,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 	{
 	{
 		for (i = 0; i < size; i++)
 		for (i = 0; i < size; i++)
 		{
 		{
-/*			if (i <= j) { */
+/*			if (i <= j)
+			{ */
 				FPRINTF(stdout, "%2.2f\t", LU[j +i*size]);
 				FPRINTF(stdout, "%2.2f\t", LU[j +i*size]);
 /*			}
 /*			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				FPRINTF(stdout, ".\t");
 			} */
 			} */
 		}
 		}
@@ -107,10 +110,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 	{
 	{
 		for (i = 0; i < size; i++)
 		for (i = 0; i < size; i++)
 		{
 		{
-/*			if (i <= j) { */
+/*			if (i <= j)
+			{ */
 				FPRINTF(stdout, "%2.2f\t", L[j +i*size]);
 				FPRINTF(stdout, "%2.2f\t", L[j +i*size]);
 /*			}
 /*			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				FPRINTF(stdout, ".\t");
 			} */
 			} */
 		}
 		}
@@ -123,10 +128,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 	{
 	{
 		for (i = 0; i < size; i++)
 		for (i = 0; i < size; i++)
 		{
 		{
-/*			if (i <= j) { */
+/*			if (i <= j)
+			{ */
 				FPRINTF(stdout, "%2.2f\t", U[j +i*size]);
 				FPRINTF(stdout, "%2.2f\t", U[j +i*size]);
 /*			}
 /*			}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				FPRINTF(stdout, ".\t");
 			} */
 			} */
 		}
 		}
@@ -155,10 +162,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 	{
 	{
 		for (i = 0; i < size; i++)
 		for (i = 0; i < size; i++)
 		{
 		{
-	/*		if (i <= j) { */
+	/*		if (i <= j)
+			{ */
 	      			FPRINTF(stdout, "%2.2f\t", A[j +i*size]);
 	      			FPRINTF(stdout, "%2.2f\t", A[j +i*size]);
 	/*		}
 	/*		}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				FPRINTF(stdout, ".\t");
 			} */
 			} */
 		}
 		}
@@ -172,10 +181,12 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 	{
 	{
 		for (i = 0; i < size; i++)
 		for (i = 0; i < size; i++)
 		{
 		{
-	/*		if (i <= j) { */
+	/*		if (i <= j)
+			{ */
 	      			FPRINTF(stdout, "%2.2f\t", L[j +i*size]);
 	      			FPRINTF(stdout, "%2.2f\t", L[j +i*size]);
 	/*		}
 	/*		}
-			else {
+			else
+			{
 				FPRINTF(stdout, ".\t");
 				FPRINTF(stdout, ".\t");
 			} */
 			} */
 		}
 		}

+ 40 - 20
examples/heat/dw_factolu_grain.c

@@ -42,7 +42,8 @@ static struct starpu_task *create_task(starpu_tag_t id)
 	return task;
 	return task;
 }
 }
 
 
-static struct starpu_codelet cl11 = {
+static struct starpu_codelet cl11 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u11, NULL},
 	.cpu_funcs = {dw_cpu_codelet_update_u11, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -68,14 +69,16 @@ static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k
 	task->priority = STARPU_MAX_PRIO;
 	task->priority = STARPU_MAX_PRIO;
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11(k, tag_prefix), 1, TAG22(k-1, k, k, tag_prefix));
 		starpu_tag_declare_deps(TAG11(k, tag_prefix), 1, TAG22(k-1, k, k, tag_prefix));
 	}
 	}
 
 
 	return task;
 	return task;
 }
 }
 
 
-static struct starpu_codelet cl12 = {
+static struct starpu_codelet cl12 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u12, NULL},
 	.cpu_funcs = {dw_cpu_codelet_update_u12, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -99,22 +102,26 @@ static void create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, i, k); 
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, i, k); 
 	task->buffers[1].mode = STARPU_RW;
 	task->buffers[1].mode = STARPU_RW;
 
 
-	if (i == k+1) {
+	if (i == k+1)
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG12(k, i, tag_prefix), 2, TAG11(k, tag_prefix), TAG22(k-1, i, k, tag_prefix));
 		starpu_tag_declare_deps(TAG12(k, i, tag_prefix), 2, TAG11(k, tag_prefix), TAG22(k-1, i, k, tag_prefix));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG12(k, i, tag_prefix), 1, TAG11(k, tag_prefix));
 		starpu_tag_declare_deps(TAG12(k, i, tag_prefix), 1, TAG11(k, tag_prefix));
 	}
 	}
 
 
 	starpu_task_submit(task);
 	starpu_task_submit(task);
 }
 }
 
 
-static struct starpu_codelet cl21 = {
+static struct starpu_codelet cl21 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u21, NULL},
 	.cpu_funcs = {dw_cpu_codelet_update_u21, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -136,22 +143,26 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j, u
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
 	task->buffers[1].mode = STARPU_RW;
 	task->buffers[1].mode = STARPU_RW;
 
 
-	if (j == k+1) {
+	if (j == k+1)
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG21(k, j, tag_prefix), 2, TAG11(k, tag_prefix), TAG22(k-1, k, j, tag_prefix));
 		starpu_tag_declare_deps(TAG21(k, j, tag_prefix), 2, TAG11(k, tag_prefix), TAG22(k-1, k, j, tag_prefix));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG21(k, j, tag_prefix), 1, TAG11(k, tag_prefix));
 		starpu_tag_declare_deps(TAG21(k, j, tag_prefix), 1, TAG11(k, tag_prefix));
 	}
 	}
 
 
 	starpu_task_submit(task);
 	starpu_task_submit(task);
 }
 }
 
 
-static struct starpu_codelet cl22 = {
+static struct starpu_codelet cl22 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u22, NULL},
 	.cpu_funcs = {dw_cpu_codelet_update_u22, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -177,15 +188,18 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
 	task->buffers[2].mode = STARPU_RW;
 	task->buffers[2].mode = STARPU_RW;
 
 
-	if ( (i == k + 1) && (j == k +1) ) {
+	if ( (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j, tag_prefix), 3, TAG22(k-1, i, j, tag_prefix), TAG12(k, i, tag_prefix), TAG21(k, j, tag_prefix));
 		starpu_tag_declare_deps(TAG22(k, i, j, tag_prefix), 3, TAG22(k-1, i, j, tag_prefix), TAG12(k, i, tag_prefix), TAG21(k, j, tag_prefix));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j, tag_prefix), 2, TAG12(k, i, tag_prefix), TAG21(k, j, tag_prefix));
 		starpu_tag_declare_deps(TAG22(k, i, j, tag_prefix), 2, TAG12(k, i, tag_prefix), TAG21(k, j, tag_prefix));
 	}
 	}
 
 
@@ -207,12 +221,14 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 	unsigned nblocks = size / blocksize;
 	unsigned nblocks = size / blocksize;
 	unsigned maxk = inner_size / blocksize;
 	unsigned maxk = inner_size / blocksize;
 
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
 
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
@@ -235,10 +251,12 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 		struct starpu_task *task = create_task_11(dataA, k, tag_prefix);
 		struct starpu_task *task = create_task_11(dataA, k, tag_prefix);
 
 
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 			entry_task = task;
 		}
 		}
-		else {
+		else
+		{
 			starpu_task_submit(task);
 			starpu_task_submit(task);
 		}
 		}
 		
 		
@@ -272,7 +290,8 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 		starpu_data_unpartition(dataA, 0);		
 		starpu_data_unpartition(dataA, 0);		
 		return;
 		return;
 	}
 	}
-	else {
+	else
+	{
 		/*
 		/*
 		 * call dw_factoLU_grain_inner recursively in the remaining blocks
 		 * call dw_factoLU_grain_inner recursively in the remaining blocks
 		 */
 		 */
@@ -301,7 +320,8 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 		{
 		{
 			dw_factoLU_grain_inner(newmatA, size-inner_size, (size-inner_size)/2, ld, blocksize/2, tag_prefix+1);
 			dw_factoLU_grain_inner(newmatA, size-inner_size, (size-inner_size)/2, ld, blocksize/2, tag_prefix+1);
 		}
 		}
-		else { */
+		else
+		{ */
 			dw_factoLU_grain_inner(newmatA, size-inner_size, size-inner_size, ld, blocksize/2, tag_prefix+1);
 			dw_factoLU_grain_inner(newmatA, size-inner_size, size-inner_size, ld, blocksize/2, tag_prefix+1);
 /*		} */
 /*		} */
 	}
 	}

+ 12 - 6
examples/heat/dw_factolu_kernels.c

@@ -121,7 +121,8 @@ static inline void dw_common_cpu_codelet_update_u22(void *descr[], int s, __attr
 	cublasStatus status;
 	cublasStatus status;
 #endif
 #endif
 
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 		case 0:
 			SGEMM("N", "N",	dy, dx, dz, 
 			SGEMM("N", "N",	dy, dx, dz, 
 				-1.0f, left, ld21, right, ld12,
 				-1.0f, left, ld21, right, ld12,
@@ -168,7 +169,8 @@ void dw_cublas_codelet_update_u22(void *descr[], void *_args)
  * U12
  * U12
  */
  */
 
 
-static inline void dw_common_codelet_update_u12(void *descr[], int s, __attribute__((unused)) void *_args) {
+static inline void dw_common_codelet_update_u12(void *descr[], int s, __attribute__((unused)) void *_args)
+{
 	float *sub11;
 	float *sub11;
 	float *sub12;
 	float *sub12;
 
 
@@ -186,7 +188,8 @@ static inline void dw_common_codelet_update_u12(void *descr[], int s, __attribut
 #endif
 #endif
 
 
 	/* solve L11 U12 = A12 (find U12) */
 	/* solve L11 U12 = A12 (find U12) */
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 		case 0:
 			STRSM("L", "L", "N", "N",
 			STRSM("L", "L", "N", "N",
 					 nx12, ny12, 1.0f, sub11, ld11, sub12, ld12);
 					 nx12, ny12, 1.0f, sub11, ld11, sub12, ld12);
@@ -231,7 +234,8 @@ void dw_cublas_codelet_update_u12(void *descr[], void *_args)
  * U21
  * U21
  */
  */
 
 
-static inline void dw_common_codelet_update_u21(void *descr[], int s, __attribute__((unused)) void *_args) {
+static inline void dw_common_codelet_update_u21(void *descr[], int s, __attribute__((unused)) void *_args)
+{
 	float *sub11;
 	float *sub11;
 	float *sub21;
 	float *sub21;
 
 
@@ -248,7 +252,8 @@ static inline void dw_common_codelet_update_u21(void *descr[], int s, __attribut
 	cublasStatus status;
 	cublasStatus status;
 #endif
 #endif
 
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 		case 0:
 			STRSM("R", "U", "N", "U", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
 			STRSM("R", "U", "N", "U", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
 			break;
 			break;
@@ -317,7 +322,8 @@ static inline void dw_common_codelet_update_u11(void *descr[], int s, __attribut
 
 
 	unsigned long z;
 	unsigned long z;
 
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 		case 0:
 			for (z = 0; z < nx; z++)
 			for (z = 0; z < nx; z++)
 			{
 			{

+ 36 - 18
examples/heat/dw_factolu_tag.c

@@ -44,7 +44,8 @@ static struct starpu_task *create_task(starpu_tag_t id)
 	return task;
 	return task;
 }
 }
 
 
-static struct starpu_codelet cl11 = {
+static struct starpu_codelet cl11 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u11, NULL},
 	.cpu_funcs = {dw_cpu_codelet_update_u11, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -71,14 +72,16 @@ static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 	}
 
 
 	return task;
 	return task;
 }
 }
 
 
-static struct starpu_codelet cl12 = {
+static struct starpu_codelet cl12 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u12, NULL},
 	.cpu_funcs = {dw_cpu_codelet_update_u12, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -102,22 +105,26 @@ static void create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned i)
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, i, k); 
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, i, k); 
 	task->buffers[1].mode = STARPU_RW;
 	task->buffers[1].mode = STARPU_RW;
 
 
-	if (!no_prio && (i == k+1)) {
+	if (!no_prio && (i == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG12(k, i), 2, TAG11(k), TAG22(k-1, i, k));
 		starpu_tag_declare_deps(TAG12(k, i), 2, TAG11(k), TAG22(k-1, i, k));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG12(k, i), 1, TAG11(k));
 		starpu_tag_declare_deps(TAG12(k, i), 1, TAG11(k));
 	}
 	}
 
 
 	starpu_task_submit(task);
 	starpu_task_submit(task);
 }
 }
 
 
-static struct starpu_codelet cl21 = {
+static struct starpu_codelet cl21 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u21, NULL},
 	.cpu_funcs = {dw_cpu_codelet_update_u21, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -139,22 +146,26 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j)
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, j); 
 	task->buffers[1].mode = STARPU_RW;
 	task->buffers[1].mode = STARPU_RW;
 
 
-	if (!no_prio && (j == k+1)) {
+	if (!no_prio && (j == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG21(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 		starpu_tag_declare_deps(TAG21(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
 	}
 	}
 
 
 	starpu_task_submit(task);
 	starpu_task_submit(task);
 }
 }
 
 
-static struct starpu_codelet cl22 = {
+static struct starpu_codelet cl22 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dw_cpu_codelet_update_u22, NULL},
 	.cpu_funcs = {dw_cpu_codelet_update_u22, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -180,15 +191,18 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, i, j); 
 	task->buffers[2].mode = STARPU_RW;
 	task->buffers[2].mode = STARPU_RW;
 
 
-	if (!no_prio &&  (i == k + 1) && (j == k +1) ) {
+	if (!no_prio &&  (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG12(k, i), TAG21(k, j));
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG12(k, i), TAG21(k, j));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG12(k, i), TAG21(k, j));
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG12(k, i), TAG21(k, j));
 	}
 	}
 
 
@@ -214,10 +228,12 @@ static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 		struct starpu_task *task = create_task_11(dataA, k);
 		struct starpu_task *task = create_task_11(dataA, k);
 
 
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 			entry_task = task;
 		}
 		}
-		else {
+		else
+		{
 			starpu_task_submit(task);
 			starpu_task_submit(task);
 		}
 		}
 		
 		
@@ -280,12 +296,14 @@ void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, u
 	 * one block is now determined by 2 unsigned (i,j) */
 	 * one block is now determined by 2 unsigned (i,j) */
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
 
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};

+ 6 - 3
examples/heat/dw_sparse_cg.c

@@ -63,7 +63,8 @@ static void create_data(float **_nzvalA, float **_vecb, float **_vecx, uint32_t
 	{
 	{
 		rowptr[row] = pos;
 		rowptr[row] = pos;
 
 
-		if (row > 0) {
+		if (row > 0)
+		{
 			nzval[pos] = 1.0f;
 			nzval[pos] = 1.0f;
 			colind[pos] = row-1;
 			colind[pos] = row-1;
 			pos++;
 			pos++;
@@ -73,7 +74,8 @@ static void create_data(float **_nzvalA, float **_vecb, float **_vecx, uint32_t
 		colind[pos] = row;
 		colind[pos] = row;
 		pos++;
 		pos++;
 
 
-		if (row < size - 1) {
+		if (row < size - 1)
+		{
 			nzval[pos] = 1.0f;
 			nzval[pos] = 1.0f;
 			colind[pos] = row+1;
 			colind[pos] = row+1;
 			pos++;
 			pos++;
@@ -312,7 +314,8 @@ void iteration_cg(void *problem)
 		/* we did not reach the stop condition yet */
 		/* we did not reach the stop condition yet */
 		launch_new_cg_iteration(problem);
 		launch_new_cg_iteration(problem);
 	}
 	}
-	else {
+	else
+	{
 		/* we may stop */
 		/* we may stop */
 		FPRINTF(stdout, "We are done ... after %d iterations \n", pb->i - 1);
 		FPRINTF(stdout, "We are done ... after %d iterations \n", pb->i - 1);
 		FPRINTF(stdout, "i : %d\n\tdelta_new %2.5f\n", pb->i, pb->delta_new);
 		FPRINTF(stdout, "i : %d\n\tdelta_new %2.5f\n", pb->i, pb->delta_new);

+ 13 - 7
examples/heat/dw_sparse_cg.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -46,7 +46,8 @@ static unsigned usecpu = 0;
 static unsigned blocks = 512;
 static unsigned blocks = 512;
 static unsigned grids  = 8;
 static unsigned grids  = 8;
 
 
-struct cg_problem {
+struct cg_problem
+{
 	starpu_data_handle_t ds_matrixA;
 	starpu_data_handle_t ds_matrixA;
 	starpu_data_handle_t ds_vecx;
 	starpu_data_handle_t ds_vecx;
 	starpu_data_handle_t ds_vecb;
 	starpu_data_handle_t ds_vecb;
@@ -71,23 +72,28 @@ struct cg_problem {
 static void __attribute__((unused)) parse_args(int argc, char **argv)
 static void __attribute__((unused)) parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-size") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-size") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			size = strtol(argv[++i], &argptr, 10);
 			size = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-block") == 0) {
+		if (strcmp(argv[i], "-block") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			blocks = strtol(argv[++i], &argptr, 10);
 			blocks = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-grid") == 0) {
+		if (strcmp(argv[i], "-grid") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			grids = strtol(argv[++i], &argptr, 10);
 			grids = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-cpu") == 0) {
+		if (strcmp(argv[i], "-cpu") == 0)
+		{
 			usecpu = 1;
 			usecpu = 1;
 		}
 		}
 	}
 	}

+ 72 - 37
examples/heat/heat.c

@@ -36,65 +36,80 @@ extern void do_conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint3
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-cg") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-cg") == 0)
+		{
 			use_cg = 1;
 			use_cg = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-shape") == 0) {
+		if (strcmp(argv[i], "-shape") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			shape = strtol(argv[++i], &argptr, 10);
 			shape = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-nthick") == 0) {
+		if (strcmp(argv[i], "-nthick") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			nthick = strtol(argv[++i], &argptr, 10);
 			nthick = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-ntheta") == 0) {
+		if (strcmp(argv[i], "-ntheta") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			ntheta = strtol(argv[++i], &argptr, 10);
 			ntheta = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-nblocks") == 0) {
+		if (strcmp(argv[i], "-nblocks") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			nblocks = strtol(argv[++i], &argptr, 10);
 			nblocks = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-nbigblocks") == 0) {
+		if (strcmp(argv[i], "-nbigblocks") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			nbigblocks = strtol(argv[++i], &argptr, 10);
 			nbigblocks = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-v1") == 0) {
+		if (strcmp(argv[i], "-v1") == 0)
+		{
 			version = 1;
 			version = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-v2") == 0) {
+		if (strcmp(argv[i], "-v2") == 0)
+		{
 			version = 2;
 			version = 2;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-v3") == 0) {
+		if (strcmp(argv[i], "-v3") == 0)
+		{
 			version = 3;
 			version = 3;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-v4") == 0) {
+		if (strcmp(argv[i], "-v4") == 0)
+		{
 			version = 4;
 			version = 4;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-pin") == 0) {
+		if (strcmp(argv[i], "-pin") == 0)
+		{
 			pinned = 1;
 			pinned = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-check") == 0) {
+		if (strcmp(argv[i], "-check") == 0)
+		{
 			check = 1;
 			check = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-no-prio") == 0) {
+		if (strcmp(argv[i], "-no-prio") == 0)
+		{
 			no_prio = 1;
 			no_prio = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-size") == 0) {
+		if (strcmp(argv[i], "-size") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			unsigned size = strtol(argv[++i], &argptr, 10);
 			unsigned size = strtol(argv[++i], &argptr, 10);
 			nthick = 130;
 			nthick = 130;
@@ -102,7 +117,8 @@ static void parse_args(int argc, char **argv)
 			STARPU_ASSERT((nthick - 2)*(ntheta - 2) == size);
 			STARPU_ASSERT((nthick - 2)*(ntheta - 2) == size);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			printf("usage : %s [-v1|-v2|-v3] [-pin] [-nthick number] [-ntheta number] [-shape [0|1|2]] [-cg] [-size number] [-no-prio]\n", argv[0]);
 			printf("usage : %s [-v1|-v2|-v3] [-pin] [-nthick number] [-ntheta number] [-shape [0|1|2]] [-cg] [-size number] [-no-prio]\n", argv[0]);
 		}
 		}
 	}
 	}
@@ -136,11 +152,14 @@ static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side
 	ya = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y;
 	ya = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y;
 
 
 	/* B */
 	/* B */
-	if (side_tr) {
+	if (side_tr)
+	{
 		/* lower D is actually B here */
 		/* lower D is actually B here */
 		xb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
 		xb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
 		yb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
 		yb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
-	} else {
+	}
+	else
+	{
 		/* upper */
 		/* upper */
 		xb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
 		xb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
 		yb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
 		yb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
@@ -150,24 +169,31 @@ static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side
 	yc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
 	yc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
 
 
 	/* now look for the actual psi node */
 	/* now look for the actual psi node */
-	if (NODE_NUMBER(theta_tr, thick_tr) == NODE_NUMBER(theta_psi, thick_psi)) {
+	if (NODE_NUMBER(theta_tr, thick_tr) == NODE_NUMBER(theta_psi, thick_psi))
+	{
 		/* A nothing to do */
 		/* A nothing to do */
-	} else if (NODE_NUMBER(theta_tr+1, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi)) {
+	}
+	else if (NODE_NUMBER(theta_tr+1, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi))
+	{
 		/* psi matches C */
 		/* psi matches C */
 		/* swap A and C coordinates  */
 		/* swap A and C coordinates  */
 		tmp = xa; xa = xc; xc = tmp;
 		tmp = xa; xa = xc; xc = tmp;
 		tmp = ya; ya = yc; yc = tmp;
 		tmp = ya; ya = yc; yc = tmp;
-	} else if
-		(side_tr && (NODE_NUMBER(theta_tr+1, thick_tr) == NODE_NUMBER(theta_psi, thick_psi))) {
+	}
+	else if (side_tr && (NODE_NUMBER(theta_tr+1, thick_tr) == NODE_NUMBER(theta_psi, thick_psi)))
+	{
 		/* psi is D (that was stored in C) XXX */
 		/* psi is D (that was stored in C) XXX */
 		tmp = xa; xa = xb; xb = tmp;
 		tmp = xa; xa = xb; xb = tmp;
 		tmp = ya; ya = yb; yb = tmp;
 		tmp = ya; ya = yb; yb = tmp;
-	} else if
-		(!side_tr && (NODE_NUMBER(theta_tr, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi))) {
+	}
+	else if	(!side_tr && (NODE_NUMBER(theta_tr, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi)))
+	{
 		/* psi is C */
 		/* psi is C */
 		tmp = xa; xa = xb; xb = tmp;
 		tmp = xa; xa = xb; xb = tmp;
 		tmp = ya; ya = yb; yb = tmp;
 		tmp = ya; ya = yb; yb = tmp;
-	} else {
+	}
+	else
+	{
 		/* the psi node is not a node of the current triangle */
 		/* the psi node is not a node of the current triangle */
 		return 0.0f;
 		return 0.0f;
 	}
 	}
@@ -178,7 +204,8 @@ static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side
 
 
 	denom = (xa - xb)*(yc - ya) - (xc - xb)*(ya - yb);
 	denom = (xa - xb)*(yc - ya) - (xc - xb)*(ya - yb);
 
 
-	switch (xy) {
+	switch (xy)
+	{
 		case X:
 		case X:
 			value = (yc - yb)/denom;
 			value = (yc - yb)/denom;
 			break;
 			break;
@@ -220,11 +247,14 @@ static inline float surface_triangle(unsigned theta_tr, unsigned thick_tr, unsig
 	xj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x;
 	xj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x;
 	yj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
 	yj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y;
 
 
-	if (side_tr) {
+	if (side_tr)
+	{
 		/* lower */
 		/* lower */
 		xk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
 		xk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x;
 		yk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
 		yk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y;
-	} else {
+	}
+	else
+	{
 		xk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
 		xk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x;
 		yk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
 		yk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y;
 	}
 	}
@@ -314,8 +344,6 @@ done:
 
 
 static void solve_system(unsigned size, unsigned subsize, float *result, int *RefArray, float *Bformer, float *A, float *B)
 static void solve_system(unsigned size, unsigned subsize, float *result, int *RefArray, float *Bformer, float *A, float *B)
 {
 {
-
-
 	unsigned i;
 	unsigned i;
 
 
 	/* solve the actual problem LU X = B */
 	/* solve the actual problem LU X = B */
@@ -452,7 +480,8 @@ void build_mesh(point *mesh)
 			float r;
 			float r;
 			r = thick * (RMAX - RMIN)/(nthick - 1) + RMIN;
 			r = thick * (RMAX - RMIN)/(nthick - 1) + RMIN;
 
 
-			switch (shape) {
+			switch (shape)
+			{
 				default:
 				default:
 				case 0:
 				case 0:
 					mesh[NODE_NUMBER(theta,thick)].x = r*cosf(angle);
 					mesh[NODE_NUMBER(theta,thick)].x = r*cosf(angle);
@@ -604,11 +633,13 @@ static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uin
 			float val;
 			float val;
 			unsigned nodeneighbour =  neighbours[neighbour];
 			unsigned nodeneighbour =  neighbours[neighbour];
 
 
-			if (nodeneighbour < newsize) {
+			if (nodeneighbour < newsize)
+			{
 
 
 				val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
 				val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
 	
 	
-				if (val != 0.0f) {
+				if (val != 0.0f)
+				{
 					*nzval = realloc(*nzval, (pos+1)*sizeof(float));
 					*nzval = realloc(*nzval, (pos+1)*sizeof(float));
 					*colind = realloc(*colind, (pos+1)*sizeof(uint32_t));
 					*colind = realloc(*colind, (pos+1)*sizeof(uint32_t));
 	
 	
@@ -648,7 +679,8 @@ static void build_dense_stiffness_matrix_A(point *pmesh, float *A, unsigned news
 		{
 		{
 			unsigned long nodeneighbour =  neighbours[neighbour];
 			unsigned long nodeneighbour =  neighbours[neighbour];
 
 
-			if (nodeneighbour < newsize) {
+			if (nodeneighbour < newsize)
+			{
 				float val;
 				float val;
 				val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
 				val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh);
 				A[j+ (unsigned long)newsize*nodeneighbour] = val;
 				A[j+ (unsigned long)newsize*nodeneighbour] = val;
@@ -686,7 +718,8 @@ int main(int argc, char **argv)
 
 
 	/* we can either use a direct method (LU decomposition here) or an 
 	/* we can either use a direct method (LU decomposition here) or an 
 	 * iterative method (conjugate gradient here) */
 	 * iterative method (conjugate gradient here) */
-	if (use_cg) {
+	if (use_cg)
+	{
 		unsigned nnz;
 		unsigned nnz;
 		float *nzval;
 		float *nzval;
 		uint32_t *colind;
 		uint32_t *colind;
@@ -718,7 +751,8 @@ int main(int argc, char **argv)
 		}
 		}
 	
 	
 	}
 	}
-	else {
+	else
+	{
 
 
 		/* unfortunately CUDA does not allow late memory registration, 
 		/* unfortunately CUDA does not allow late memory registration, 
 		 * we need to do the malloc using CUDA itself ... */
 		 * we need to do the malloc using CUDA itself ... */
@@ -733,7 +767,8 @@ int main(int argc, char **argv)
 
 
 		STARPU_ASSERT(newsize % nblocks == 0);
 		STARPU_ASSERT(newsize % nblocks == 0);
 
 
-		switch (version) {
+		switch (version)
+		{
 			case 1:
 			case 1:
 			case 2:
 			case 2:
 				dw_factoLU(A, newsize, newsize, nblocks, version, no_prio);
 				dw_factoLU(A, newsize, newsize, nblocks, version, no_prio);

+ 2 - 1
examples/heat/heat.h

@@ -52,7 +52,8 @@
 #define NODE_TO_THICK(n)		((n) % nthick)
 #define NODE_TO_THICK(n)		((n) % nthick)
 #define NODE_TO_THETA(n)		((n) / nthick)
 #define NODE_TO_THETA(n)		((n) / nthick)
 
 
-typedef struct point_t {
+typedef struct point_t
+{
 	float x;
 	float x;
 	float y;
 	float y;
 } point;
 } point;

+ 8 - 4
examples/heat/heat_display.c

@@ -52,13 +52,15 @@ static void generate_graph(void)
 			float colorA_G, colorB_G, colorC_G, colorD_G;
 			float colorA_G, colorB_G, colorC_G, colorD_G;
 			float colorA_B, colorB_B, colorC_B, colorD_B;
 			float colorA_B, colorB_B, colorC_B, colorD_B;
 
 
-			if (maxval == minval) {
+			if (maxval == minval)
+			{
 				colorA_R = 1.0f; colorA_G = 1.0f; colorA_B = 1.0f;
 				colorA_R = 1.0f; colorA_G = 1.0f; colorA_B = 1.0f;
 				colorB_R = 1.0f; colorB_G = 1.0f; colorB_B = 1.0f;
 				colorB_R = 1.0f; colorB_G = 1.0f; colorB_B = 1.0f;
 				colorC_R = 1.0f; colorC_G = 1.0f; colorC_B = 1.0f;
 				colorC_R = 1.0f; colorC_G = 1.0f; colorC_B = 1.0f;
 				colorD_R = 1.0f; colorD_G = 1.0f; colorD_B = 1.0f;
 				colorD_R = 1.0f; colorD_G = 1.0f; colorD_B = 1.0f;
 			}
 			}
-			else {
+			else
+			{
 				float amplitude = maxval - minval;
 				float amplitude = maxval - minval;
 
 
 				float coeffA, coeffB, coeffC, coeffD;
 				float coeffA, coeffB, coeffC, coeffD;
@@ -84,7 +86,8 @@ static void generate_graph(void)
 				colorD_G = coeffD<0.5f?1.0f:2.0*(1 - coeffD)*1.0f;
 				colorD_G = coeffD<0.5f?1.0f:2.0*(1 - coeffD)*1.0f;
 			}
 			}
 
 
-			if (printmesh) {
+			if (printmesh)
+			{
 				glColor3f (0.0f, 0.0f, 0.0f);
 				glColor3f (0.0f, 0.0f, 0.0f);
 				glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
 				glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
 				glLineWidth(3.0f);
 				glLineWidth(3.0f);
@@ -142,7 +145,8 @@ static void display(void)
 
 
 static void pressKey(unsigned char key, int x __attribute__ ((unused)), int y  __attribute__ ((unused)))
 static void pressKey(unsigned char key, int x __attribute__ ((unused)), int y  __attribute__ ((unused)))
 {
 {
-	switch (key) {
+	switch (key)
+	{
 		case 'q':
 		case 'q':
 			exit(0);
 			exit(0);
 		default:
 		default:

+ 17 - 9
examples/heat/lu_kernels_model.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -214,9 +214,11 @@ double task_22_cost_cpu(struct starpu_buffer_descr *descr)
 	return PERTURBATE(cost);
 	return PERTURBATE(cost);
 }
 }
 
 
-struct starpu_perfmodel model_11 = {
+struct starpu_perfmodel model_11 =
+{
 	.cost_model = task_11_cost,
 	.cost_model = task_11_cost,
-	.per_arch = {
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_11_cost_cpu },
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_11_cost_cpu },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_11_cost_cuda }
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_11_cost_cuda }
 	},
 	},
@@ -230,9 +232,11 @@ struct starpu_perfmodel model_11 = {
 #endif
 #endif
 };
 };
 
 
-struct starpu_perfmodel model_12 = {
+struct starpu_perfmodel model_12 =
+{
 	.cost_model = task_12_cost,
 	.cost_model = task_12_cost,
-	.per_arch = {
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_12_cost_cpu },
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_12_cost_cpu },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_12_cost_cuda }
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_12_cost_cuda }
 	},
 	},
@@ -246,9 +250,11 @@ struct starpu_perfmodel model_12 = {
 #endif
 #endif
 };
 };
 
 
-struct starpu_perfmodel model_21 = {
+struct starpu_perfmodel model_21 =
+{
 	.cost_model = task_21_cost,
 	.cost_model = task_21_cost,
-	.per_arch = {
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_21_cost_cpu },
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_21_cost_cpu },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_21_cost_cuda }
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_21_cost_cuda }
 	},
 	},
@@ -262,9 +268,11 @@ struct starpu_perfmodel model_21 = {
 #endif
 #endif
 };
 };
 
 
-struct starpu_perfmodel model_22 = {
+struct starpu_perfmodel model_22 =
+{
 	.cost_model = task_22_cost,
 	.cost_model = task_22_cost,
-	.per_arch = {
+	.per_arch =
+	{
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_22_cost_cpu },
 		[STARPU_CPU_DEFAULT][0] = { .cost_model = task_22_cost_cpu },
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_22_cost_cuda }
 		[STARPU_CUDA_DEFAULT][0] = { .cost_model = task_22_cost_cuda }
 	},
 	},

+ 2 - 1
examples/incrementer/incrementer.c

@@ -109,7 +109,8 @@ int main(int argc, char **argv)
 	FPRINTF(stderr, "array -> %f, %f, %f, %f\n", float_array[0],
 	FPRINTF(stderr, "array -> %f, %f, %f, %f\n", float_array[0],
                 float_array[1], float_array[2], float_array[3]);
                 float_array[1], float_array[2], float_array[3]);
 
 
-	if (float_array[0] != niter || float_array[0] != float_array[1] + float_array[2] + float_array[3]) {
+	if (float_array[0] != niter || float_array[0] != float_array[1] + float_array[2] + float_array[3])
+	{
 		FPRINTF(stderr, "Incorrect result\n");
 		FPRINTF(stderr, "Incorrect result\n");
 		ret = 1;
 		ret = 1;
 	}
 	}

+ 27 - 13
examples/lu/lu_example.c

@@ -46,41 +46,51 @@ TYPE **A_blocks;
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-size") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-size") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			size = strtol(argv[++i], &argptr, 10);
 			size = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-nblocks") == 0) {
+		if (strcmp(argv[i], "-nblocks") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			nblocks = strtol(argv[++i], &argptr, 10);
 			nblocks = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-check") == 0) {
+		if (strcmp(argv[i], "-check") == 0)
+		{
 			check = 1;
 			check = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-piv") == 0) {
+		if (strcmp(argv[i], "-piv") == 0)
+		{
 			pivot = 1;
 			pivot = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-no-stride") == 0) {
+		if (strcmp(argv[i], "-no-stride") == 0)
+		{
 			no_stride = 1;
 			no_stride = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-profile") == 0) {
+		if (strcmp(argv[i], "-profile") == 0)
+		{
 			profile = 1;
 			profile = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-bound") == 0) {
+		if (strcmp(argv[i], "-bound") == 0)
+		{
 			bound = 1;
 			bound = 1;
 		}
 		}
-		if (strcmp(argv[i], "-bounddeps") == 0) {
+		if (strcmp(argv[i], "-bounddeps") == 0)
+		{
 			bound = 1;
 			bound = 1;
 			bounddeps = 1;
 			bounddeps = 1;
 		}
 		}
-		if (strcmp(argv[i], "-bounddepsprio") == 0) {
+		if (strcmp(argv[i], "-bounddepsprio") == 0)
+		{
 			bound = 1;
 			bound = 1;
 			bounddeps = 1;
 			bounddeps = 1;
 			boundprio = 1;
 			boundprio = 1;
@@ -344,14 +354,18 @@ int main(int argc, char **argv)
 		starpu_bus_profiling_helper_display_summary();
 		starpu_bus_profiling_helper_display_summary();
 	}
 	}
 
 
-	if (bound) {
+	if (bound)
+	{
 		double min;
 		double min;
 		starpu_bound_stop();
 		starpu_bound_stop();
-		if (bounddeps) {
+		if (bounddeps)
+		{
 			FILE *f = fopen("lu.pl", "w");
 			FILE *f = fopen("lu.pl", "w");
 			starpu_bound_print_lp(f);
 			starpu_bound_print_lp(f);
 			FPRINTF(stderr,"system printed to lu.pl\n");
 			FPRINTF(stderr,"system printed to lu.pl\n");
-		} else {
+		}
+		else
+		{
 			starpu_bound_compute(&min, NULL, 0);
 			starpu_bound_compute(&min, NULL, 0);
 			if (min != 0.)
 			if (min != 0.)
 				FPRINTF(stderr, "theoretical min: %f ms\n", min);
 				FPRINTF(stderr, "theoretical min: %f ms\n", min);

+ 28 - 14
examples/lu/xlu.c

@@ -65,7 +65,8 @@ static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 	}
 
 
@@ -86,15 +87,18 @@ static void create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, j, k); 
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, j, k); 
 	task->buffers[1].mode = STARPU_RW;
 	task->buffers[1].mode = STARPU_RW;
 
 
-	if (!no_prio && (j == k+1)) {
+	if (!no_prio && (j == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG12(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 		starpu_tag_declare_deps(TAG12(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG12(k, j), 1, TAG11(k));
 		starpu_tag_declare_deps(TAG12(k, j), 1, TAG11(k));
 	}
 	}
 
 
@@ -113,15 +117,18 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, i); 
 	task->buffers[1].handle = starpu_data_get_sub_data(dataA, 2, k, i); 
 	task->buffers[1].mode = STARPU_RW;
 	task->buffers[1].mode = STARPU_RW;
 
 
-	if (!no_prio && (i == k+1)) {
+	if (!no_prio && (i == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG21(k, i), 2, TAG11(k), TAG22(k-1, i, k));
 		starpu_tag_declare_deps(TAG21(k, i), 2, TAG11(k), TAG22(k-1, i, k));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG21(k, i), 1, TAG11(k));
 		starpu_tag_declare_deps(TAG21(k, i), 1, TAG11(k));
 	}
 	}
 
 
@@ -144,15 +151,18 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, j, i); /* produced by TAG22(k-1, i, j) */
 	task->buffers[2].handle = starpu_data_get_sub_data(dataA, 2, j, i); /* produced by TAG22(k-1, i, j) */
 	task->buffers[2].mode = STARPU_RW;
 	task->buffers[2].mode = STARPU_RW;
 
 
-	if (!no_prio &&  (i == k + 1) && (j == k +1) ) {
+	if (!no_prio &&  (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG12(k, j), TAG21(k, i));
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG12(k, j), TAG21(k, i));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG12(k, j), TAG21(k, i));
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG12(k, j), TAG21(k, i));
 	}
 	}
 
 
@@ -178,10 +188,12 @@ static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 		struct starpu_task *task = create_task_11(dataA, k);
 		struct starpu_task *task = create_task_11(dataA, k);
 
 
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 			entry_task = task;
 		}
 		}
-		else {
+		else
+		{
 			starpu_task_submit(task);
 			starpu_task_submit(task);
 		}
 		}
 		
 		
@@ -236,12 +248,14 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 	/* We already enforce deps by hand */
 	/* We already enforce deps by hand */
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
 
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};

+ 2 - 1
examples/lu/xlu.h

@@ -106,7 +106,8 @@ extern struct starpu_perfmodel model_12;
 extern struct starpu_perfmodel model_21;
 extern struct starpu_perfmodel model_21;
 extern struct starpu_perfmodel model_22;
 extern struct starpu_perfmodel model_22;
 
 
-struct piv_s {
+struct piv_s
+{
 	unsigned *piv; /* complete pivot array */
 	unsigned *piv; /* complete pivot array */
 	unsigned first; /* first element */
 	unsigned first; /* first element */
 	unsigned last; /* last element */
 	unsigned last; /* last element */

+ 4 - 2
examples/lu/xlu_implicit.c

@@ -143,12 +143,14 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 	 * one block is now determined by 2 unsigned (i,j) */
 	 * one block is now determined by 2 unsigned (i,j) */
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 	
 	
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
 
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};

+ 4 - 2
examples/lu/xlu_implicit_pivot.c

@@ -189,12 +189,14 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 	 * one block is now determined by 2 unsigned (i,j) */
 	 * one block is now determined by 2 unsigned (i,j) */
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
 
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};

+ 38 - 19
examples/lu/xlu_kernels.c

@@ -51,7 +51,8 @@ static inline void STARPU_LU(common_u22)(void *descr[],
 	cudaError_t cures;
 	cudaError_t cures;
 #endif
 #endif
 
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 		case 0:
 			CPU_GEMM("N", "N", dy, dx, dz, 
 			CPU_GEMM("N", "N", dy, dx, dz, 
 				(TYPE)-1.0, right, ld21, left, ld12,
 				(TYPE)-1.0, right, ld21, left, ld12,
@@ -59,7 +60,8 @@ static inline void STARPU_LU(common_u22)(void *descr[],
 			break;
 			break;
 
 
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
-		case 1: {
+		case 1:
+		{
 			CUBLAS_GEMM('n', 'n', dx, dy, dz,
 			CUBLAS_GEMM('n', 'n', dx, dy, dz,
 				*(CUBLAS_TYPE*)&m1, (CUBLAS_TYPE *)right, ld21, (CUBLAS_TYPE *)left, ld12,
 				*(CUBLAS_TYPE*)&m1, (CUBLAS_TYPE *)right, ld21, (CUBLAS_TYPE *)left, ld12,
 				*(CUBLAS_TYPE*)&p1, (CUBLAS_TYPE *)center, ld22);
 				*(CUBLAS_TYPE*)&p1, (CUBLAS_TYPE *)center, ld22);
@@ -92,7 +94,8 @@ void STARPU_LU(cublas_u22)(void *descr[], void *_args)
 }
 }
 #endif /* STARPU_USE_CUDA */
 #endif /* STARPU_USE_CUDA */
 
 
-static struct starpu_perfmodel STARPU_LU(model_22) = {
+static struct starpu_perfmodel STARPU_LU(model_22) =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 #ifdef STARPU_ATLAS
 	.symbol = STARPU_LU_STR(lu_model_22_atlas)
 	.symbol = STARPU_LU_STR(lu_model_22_atlas)
@@ -103,7 +106,8 @@ static struct starpu_perfmodel STARPU_LU(model_22) = {
 #endif
 #endif
 };
 };
 
 
-struct starpu_codelet cl22 = {
+struct starpu_codelet cl22 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u22), NULL},
 	.cpu_funcs = {STARPU_LU(cpu_u22), NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -138,7 +142,8 @@ static inline void STARPU_LU(common_u12)(void *descr[],
 #endif
 #endif
 
 
 	/* solve L11 U12 = A12 (find U12) */
 	/* solve L11 U12 = A12 (find U12) */
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 		case 0:
 			CPU_TRSM("L", "L", "N", "N", nx12, ny12,
 			CPU_TRSM("L", "L", "N", "N", nx12, ny12,
 					(TYPE)1.0, sub11, ld11, sub12, ld12);
 					(TYPE)1.0, sub11, ld11, sub12, ld12);
@@ -175,7 +180,8 @@ void STARPU_LU(cublas_u12)(void *descr[], void *_args)
 }
 }
 #endif /* STARPU_USE_CUDA */
 #endif /* STARPU_USE_CUDA */
 
 
-static struct starpu_perfmodel STARPU_LU(model_12) = {
+static struct starpu_perfmodel STARPU_LU(model_12) =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 #ifdef STARPU_ATLAS
 	.symbol = STARPU_LU_STR(lu_model_12_atlas)
 	.symbol = STARPU_LU_STR(lu_model_12_atlas)
@@ -186,7 +192,8 @@ static struct starpu_perfmodel STARPU_LU(model_12) = {
 #endif
 #endif
 };
 };
 
 
-struct starpu_codelet cl12 = {
+struct starpu_codelet cl12 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u12), NULL},
 	.cpu_funcs = {STARPU_LU(cpu_u12), NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -219,7 +226,8 @@ static inline void STARPU_LU(common_u21)(void *descr[],
 	cublasStatus status;
 	cublasStatus status;
 #endif
 #endif
 
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 		case 0:
 			CPU_TRSM("R", "U", "N", "U", nx21, ny21,
 			CPU_TRSM("R", "U", "N", "U", nx21, ny21,
 					(TYPE)1.0, sub11, ld11, sub21, ld21);
 					(TYPE)1.0, sub11, ld11, sub21, ld21);
@@ -255,7 +263,8 @@ void STARPU_LU(cublas_u21)(void *descr[], void *_args)
 }
 }
 #endif 
 #endif 
 
 
-static struct starpu_perfmodel STARPU_LU(model_21) = {
+static struct starpu_perfmodel STARPU_LU(model_21) =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 #ifdef STARPU_ATLAS
 	.symbol = STARPU_LU_STR(lu_model_21_atlas)
 	.symbol = STARPU_LU_STR(lu_model_21_atlas)
@@ -266,7 +275,8 @@ static struct starpu_perfmodel STARPU_LU(model_21) = {
 #endif
 #endif
 };
 };
 
 
-struct starpu_codelet cl21 = {
+struct starpu_codelet cl21 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u21), NULL},
 	.cpu_funcs = {STARPU_LU(cpu_u21), NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -292,7 +302,8 @@ static inline void STARPU_LU(common_u11)(void *descr[],
 
 
 	unsigned long z;
 	unsigned long z;
 
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 		case 0:
 			for (z = 0; z < nx; z++)
 			for (z = 0; z < nx; z++)
 			{
 			{
@@ -350,7 +361,8 @@ void STARPU_LU(cublas_u11)(void *descr[], void *_args)
 }
 }
 #endif /* STARPU_USE_CUDA */
 #endif /* STARPU_USE_CUDA */
 
 
-static struct starpu_perfmodel STARPU_LU(model_11) = {
+static struct starpu_perfmodel STARPU_LU(model_11) =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 #ifdef STARPU_ATLAS
 	.symbol = STARPU_LU_STR(lu_model_11_atlas)
 	.symbol = STARPU_LU_STR(lu_model_11_atlas)
@@ -361,7 +373,8 @@ static struct starpu_perfmodel STARPU_LU(model_11) = {
 #endif
 #endif
 };
 };
 
 
-struct starpu_codelet cl11 = {
+struct starpu_codelet cl11 =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u11), NULL},
 	.cpu_funcs = {STARPU_LU(cpu_u11), NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -391,7 +404,8 @@ static inline void STARPU_LU(common_u11_pivot)(void *descr[],
 	unsigned *ipiv = piv->piv;
 	unsigned *ipiv = piv->piv;
 	unsigned first = piv->first;
 	unsigned first = piv->first;
 
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 		case 0:
 			for (z = 0; z < nx; z++)
 			for (z = 0; z < nx; z++)
 			{
 			{
@@ -486,7 +500,8 @@ void STARPU_LU(cublas_u11_pivot)(void *descr[], void *_args)
 }
 }
 #endif /* STARPU_USE_CUDA */
 #endif /* STARPU_USE_CUDA */
 
 
-static struct starpu_perfmodel STARPU_LU(model_11_pivot) = {
+static struct starpu_perfmodel STARPU_LU(model_11_pivot) =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 #ifdef STARPU_ATLAS
 	.symbol = STARPU_LU_STR(lu_model_11_pivot_atlas)
 	.symbol = STARPU_LU_STR(lu_model_11_pivot_atlas)
@@ -497,7 +512,8 @@ static struct starpu_perfmodel STARPU_LU(model_11_pivot) = {
 #endif
 #endif
 };
 };
 
 
-struct starpu_codelet cl11_pivot = {
+struct starpu_codelet cl11_pivot =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_u11_pivot), NULL},
 	.cpu_funcs = {STARPU_LU(cpu_u11_pivot), NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -526,7 +542,8 @@ static inline void STARPU_LU(common_pivot)(void *descr[],
 	unsigned *ipiv = piv->piv;
 	unsigned *ipiv = piv->piv;
 	unsigned first = piv->first;
 	unsigned first = piv->first;
 
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 		case 0:
 			for (row = 0; row < nx; row++)
 			for (row = 0; row < nx; row++)
 			{
 			{
@@ -571,7 +588,8 @@ void STARPU_LU(cublas_pivot)(void *descr[], void *_args)
 
 
 #endif /* STARPU_USE_CUDA */
 #endif /* STARPU_USE_CUDA */
 
 
-static struct starpu_perfmodel STARPU_LU(model_pivot) = {
+static struct starpu_perfmodel STARPU_LU(model_pivot) =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 #ifdef STARPU_ATLAS
 #ifdef STARPU_ATLAS
 	.symbol = STARPU_LU_STR(lu_model_pivot_atlas)
 	.symbol = STARPU_LU_STR(lu_model_pivot_atlas)
@@ -582,7 +600,8 @@ static struct starpu_perfmodel STARPU_LU(model_pivot) = {
 #endif
 #endif
 };
 };
 
 
-struct starpu_codelet cl_pivot = {
+struct starpu_codelet cl_pivot =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {STARPU_LU(cpu_pivot), NULL},
 	.cpu_funcs = {STARPU_LU(cpu_pivot), NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA

+ 31 - 16
examples/lu/xlu_pivot.c

@@ -66,15 +66,18 @@ static void create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k == 0) {
+	if (k == 0)
+	{
 		starpu_tag_declare_deps(PIVOT(k, i), 1, TAG11(k));
 		starpu_tag_declare_deps(PIVOT(k, i), 1, TAG11(k));
 	}
 	}
-	else 
+	else
 	{
 	{
-		if (i > k) {
+		if (i > k)
+		{
 			starpu_tag_declare_deps(PIVOT(k, i), 2, TAG11(k), TAG22(k-1, i, k));
 			starpu_tag_declare_deps(PIVOT(k, i), 2, TAG11(k), TAG22(k-1, i, k));
 		}
 		}
-		else {
+		else
+		{
 			starpu_tag_t *tags = malloc((nblocks - k)*sizeof(starpu_tag_t));
 			starpu_tag_t *tags = malloc((nblocks - k)*sizeof(starpu_tag_t));
 			
 			
 			tags[0] = TAG11(k);
 			tags[0] = TAG11(k);
@@ -111,7 +114,8 @@ static struct starpu_task *create_task_11_pivot(starpu_data_handle_t *dataAp, un
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
 	}
 	}
 
 
@@ -135,7 +139,8 @@ static void create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 	task->buffers[1].handle = get_block(dataAp, nblocks, j, k);
 	task->buffers[1].handle = get_block(dataAp, nblocks, j, k);
 	task->buffers[1].mode = STARPU_RW;
 	task->buffers[1].mode = STARPU_RW;
 
 
-	if (!no_prio && (j == k+1)) {
+	if (!no_prio && (j == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
@@ -143,10 +148,12 @@ static void create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 #if 0
 #if 0
 	starpu_tag_declare_deps(TAG12(k, i), 1, PIVOT(k, i));
 	starpu_tag_declare_deps(TAG12(k, i), 1, PIVOT(k, i));
 #endif
 #endif
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG12(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 		starpu_tag_declare_deps(TAG12(k, j), 2, TAG11(k), TAG22(k-1, k, j));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG12(k, j), 1, TAG11(k));
 		starpu_tag_declare_deps(TAG12(k, j), 1, TAG11(k));
 	}
 	}
 
 
@@ -166,7 +173,8 @@ static void create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 	task->buffers[1].handle = get_block(dataAp, nblocks, k, i); 
 	task->buffers[1].handle = get_block(dataAp, nblocks, k, i); 
 	task->buffers[1].mode = STARPU_RW;
 	task->buffers[1].mode = STARPU_RW;
 
 
-	if (!no_prio && (i == k+1)) {
+	if (!no_prio && (i == k+1))
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
@@ -197,15 +205,18 @@ static void create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 	task->buffers[2].handle = get_block(dataAp, nblocks, j, i);  /* produced by TAG22(k-1, i, j) */
 	task->buffers[2].handle = get_block(dataAp, nblocks, j, i);  /* produced by TAG22(k-1, i, j) */
 	task->buffers[2].mode = STARPU_RW;
 	task->buffers[2].mode = STARPU_RW;
 
 
-	if (!no_prio &&  (i == k + 1) && (j == k +1) ) {
+	if (!no_prio &&  (i == k + 1) && (j == k +1) )
+	{
 		task->priority = STARPU_MAX_PRIO;
 		task->priority = STARPU_MAX_PRIO;
 	}
 	}
 
 
 	/* enforce dependencies ... */
 	/* enforce dependencies ... */
-	if (k > 0) {
+	if (k > 0)
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG12(k, j), TAG21(k, i));
 		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG12(k, j), TAG21(k, i));
 	}
 	}
-	else {
+	else
+	{
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG12(k, j), TAG21(k, i));
 		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG12(k, j), TAG21(k, i));
 	}
 	}
 
 
@@ -234,10 +245,12 @@ static double dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 		struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
 		struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
 
 
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */
-		if (k == 0) {
+		if (k == 0)
+		{
 			entry_task = task;
 			entry_task = task;
 		}
 		}
-		else {
+		else
+		{
 			starpu_task_submit(task);
 			starpu_task_submit(task);
 		}
 		}
 
 
@@ -314,12 +327,14 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 	/* We already enforce deps by hand */
 	/* We already enforce deps by hand */
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 	starpu_data_set_sequential_consistency_flag(dataA, 0);
 
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_vertical_block_filter_func,
 		.filter_func = starpu_vertical_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};
 
 
-	struct starpu_data_filter f2 = {
+	struct starpu_data_filter f2 =
+	{
 		.filter_func = starpu_block_filter_func,
 		.filter_func = starpu_block_filter_func,
 		.nchildren = nblocks
 		.nchildren = nblocks
 	};
 	};

+ 35 - 18
examples/mandelbrot/mandelbrot.c

@@ -155,7 +155,8 @@ static int handle_events(void)
 			topY -= 0.25*heightY;
 			topY -= 0.25*heightY;
 			bottomY -= 0.25*heightY;
 			bottomY -= 0.25*heightY;
 		}
 		}
-		else {
+		else
+		{
 			double widthX = rightX - leftX;
 			double widthX = rightX - leftX;
 			double heightY = topY - bottomY;
 			double heightY = topY - bottomY;
 
 
@@ -177,13 +178,15 @@ static int handle_events(void)
 			}
 			}
 		}
 		}
 
 
-		if (text[0]=='q') {
+		if (text[0]=='q')
+		{
 			return -1;
 			return -1;
 		}
 		}
 	}
 	}
 
 
-	if (event.type==ButtonPress) {
-	/* tell where the mouse Button was Pressed */
+	if (event.type==ButtonPress)
+	{
+		/* tell where the mouse Button was Pressed */
 		printf("You pressed a button at (%i,%i)\n",
 		printf("You pressed a button at (%i,%i)\n",
 			event.xbutton.x,event.xbutton.y);
 			event.xbutton.x,event.xbutton.y);
 	}
 	}
@@ -371,7 +374,8 @@ static void compute_block_spmd(void *descr[], void *cl_arg)
 
 
 
 
 
 
-static struct starpu_codelet spmd_mandelbrot_cl = {
+static struct starpu_codelet spmd_mandelbrot_cl =
+{
 	.where = STARPU_CPU|STARPU_OPENCL,
 	.where = STARPU_CPU|STARPU_OPENCL,
 	.type = STARPU_SPMD,
 	.type = STARPU_SPMD,
 	.max_parallelism = INT_MAX,
 	.max_parallelism = INT_MAX,
@@ -382,7 +386,8 @@ static struct starpu_codelet spmd_mandelbrot_cl = {
 	.nbuffers = 1
 	.nbuffers = 1
 };
 };
 
 
-static struct starpu_codelet mandelbrot_cl = {
+static struct starpu_codelet mandelbrot_cl =
+{
 	.where = STARPU_CPU|STARPU_OPENCL,
 	.where = STARPU_CPU|STARPU_OPENCL,
 	.type = STARPU_SEQ,
 	.type = STARPU_SEQ,
 	.cpu_funcs = {compute_block, NULL},
 	.cpu_funcs = {compute_block, NULL},
@@ -395,38 +400,46 @@ static struct starpu_codelet mandelbrot_cl = {
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-h") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			fprintf(stderr, "Usage: %s [-h] [ -width 800] [-height 600] [-nblocks 16] [-no-x11] [-pos leftx:rightx:bottomy:topy] [-niter 1000] [-spmd] [-demo] [-demozoom 0.2]\n", argv[0]);
 			fprintf(stderr, "Usage: %s [-h] [ -width 800] [-height 600] [-nblocks 16] [-no-x11] [-pos leftx:rightx:bottomy:topy] [-niter 1000] [-spmd] [-demo] [-demozoom 0.2]\n", argv[0]);
 			exit(-1);
 			exit(-1);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-width") == 0) {
+		if (strcmp(argv[i], "-width") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			width = strtol(argv[++i], &argptr, 10);
 			width = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-height") == 0) {
+		if (strcmp(argv[i], "-height") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			height = strtol(argv[++i], &argptr, 10);
 			height = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-nblocks") == 0) {
+		if (strcmp(argv[i], "-nblocks") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			nblocks = strtol(argv[++i], &argptr, 10);
 			nblocks = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-niter") == 0) {
+		if (strcmp(argv[i], "-niter") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			niter = strtol(argv[++i], &argptr, 10);
 			niter = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-pos") == 0) {
+		if (strcmp(argv[i], "-pos") == 0)
+		{
 			int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX, &rightX, &bottomY, &topY);
 			int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX, &rightX, &bottomY, &topY);
 			assert(ret == 4);
 			assert(ret == 4);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-demo") == 0) {
+		if (strcmp(argv[i], "-demo") == 0)
+		{
 			demo = 1;
 			demo = 1;
 			leftX = -50.22749575062760;
 			leftX = -50.22749575062760;
 			rightX = 48.73874621262927;
 			rightX = 48.73874621262927;
@@ -435,18 +448,21 @@ static void parse_args(int argc, char **argv)
 
 
 		}
 		}
 
 
-		if (strcmp(argv[i], "-demozoom") == 0) {
+		if (strcmp(argv[i], "-demozoom") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			demozoom = strtof(argv[++i], &argptr);
 			demozoom = strtof(argv[++i], &argptr);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-no-x11") == 0) {
+		if (strcmp(argv[i], "-no-x11") == 0)
+		{
 #ifdef STARPU_HAVE_X11
 #ifdef STARPU_HAVE_X11
 			use_x11 = 0;
 			use_x11 = 0;
 #endif
 #endif
 		}
 		}
 
 
-		if (strcmp(argv[i], "-spmd") == 0) {
+		if (strcmp(argv[i], "-spmd") == 0)
+		{
 			use_spmd = 1;
 			use_spmd = 1;
 		}
 		}
 	}
 	}
@@ -567,7 +583,8 @@ int main(int argc, char **argv)
 				iter = 0;
 				iter = 0;
 				gettimeofday(&start, NULL);
 				gettimeofday(&start, NULL);
 			}
 			}
-			else {
+			else
+			{
 				leftX += (zoom_factor/2)*widthX;
 				leftX += (zoom_factor/2)*widthX;
 				rightX -= (zoom_factor/2)*widthX;
 				rightX -= (zoom_factor/2)*widthX;
 				topY -= (zoom_factor/2)*heightY;
 				topY -= (zoom_factor/2)*heightY;

+ 28 - 14
examples/matvecmult/matvecmult.c

@@ -64,27 +64,34 @@ void opencl_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 }
 }
 #endif
 #endif
 
 
-void fillArray(float* pfData, int iSize) {
+void fillArray(float* pfData, int iSize)
+{
     int i;
     int i;
     const float fScale = 1.0f / (float)RAND_MAX;
     const float fScale = 1.0f / (float)RAND_MAX;
-    for (i = 0; i < iSize; ++i) {
+    for (i = 0; i < iSize; ++i)
+    {
             pfData[i] = fScale * rand();
             pfData[i] = fScale * rand();
     }
     }
 }
 }
 
 
-void printArray(float* pfData, int iSize) {
+void printArray(float* pfData, int iSize)
+{
     int i;
     int i;
-    for (i = 0; i < iSize; ++i) {
+    for (i = 0; i < iSize; ++i)
+    {
             FPRINTF(stderr, "%f ", pfData[i]);
             FPRINTF(stderr, "%f ", pfData[i]);
     }
     }
     FPRINTF(stderr, "\n");
     FPRINTF(stderr, "\n");
 }
 }
 
 
-void matVecMult(const float *matrix, const float *vector, int width, int height, float *mult) {
+void matVecMult(const float *matrix, const float *vector, int width, int height, float *mult)
+{
     int i, j;
     int i, j;
-    for (i = 0; i < height; ++i) {
+    for (i = 0; i < height; ++i)
+    {
         double sum = 0;
         double sum = 0;
-        for (j = 0; j < width; ++j) {
+        for (j = 0; j < width; ++j)
+	{
             double a = matrix[i * width + j];
             double a = matrix[i * width + j];
             double b = vector[j];
             double b = vector[j];
             sum += a * b;
             sum += a * b;
@@ -93,12 +100,14 @@ void matVecMult(const float *matrix, const float *vector, int width, int height,
     }
     }
 }
 }
 
 
-int compareL2fe(const float* reference, const float* data, const unsigned int len, const float epsilon) {
+int compareL2fe(const float* reference, const float* data, const unsigned int len, const float epsilon)
+{
     float error = 0;
     float error = 0;
     float ref = 0;
     float ref = 0;
     unsigned int i;
     unsigned int i;
 
 
-    for(i = 0; i < len; ++i) {
+    for(i = 0; i < len; ++i)
+    {
         float diff = reference[i] - data[i];
         float diff = reference[i] - data[i];
         error += diff * diff;
         error += diff * diff;
         ref += reference[i] * reference[i];
         ref += reference[i] * reference[i];
@@ -117,7 +126,8 @@ int main(int argc, char **argv)
 {
 {
 	struct starpu_codelet cl = {};
 	struct starpu_codelet cl = {};
 
 
-	struct starpu_conf conf = {
+	struct starpu_conf conf =
+	{
 		.ncpus = 0,
 		.ncpus = 0,
 		.ncuda = 0,
 		.ncuda = 0,
                 .nopencl = 1,
                 .nopencl = 1,
@@ -136,7 +146,8 @@ int main(int argc, char **argv)
 	int ret, submit;
 	int ret, submit;
 
 
         ret = starpu_init(&conf);
         ret = starpu_init(&conf);
-	if (STARPU_UNLIKELY(ret == -ENODEV)) {
+	if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
                 FPRINTF(stderr, "This application requires an OpenCL worker.\n");
                 FPRINTF(stderr, "This application requires an OpenCL worker.\n");
 		starpu_shutdown();
 		starpu_shutdown();
 		return 77;
 		return 77;
@@ -186,10 +197,12 @@ int main(int argc, char **argv)
         task->buffers[2].mode = STARPU_RW;
         task->buffers[2].mode = STARPU_RW;
 
 
         submit = starpu_task_submit(task);
         submit = starpu_task_submit(task);
-        if (STARPU_UNLIKELY(submit == -ENODEV)) {
+        if (STARPU_UNLIKELY(submit == -ENODEV))
+	{
                 FPRINTF(stderr, "No worker may execute this task. This application requires an OpenCL worker.\n");
                 FPRINTF(stderr, "No worker may execute this task. This application requires an OpenCL worker.\n");
 	}
 	}
-	else {
+	else
+	{
 		starpu_task_wait_for_all();
 		starpu_task_wait_for_all();
 	}
 	}
 
 
@@ -197,7 +210,8 @@ int main(int argc, char **argv)
 	starpu_data_unregister(vector_handle);
 	starpu_data_unregister(vector_handle);
 	starpu_data_unregister(mult_handle);
 	starpu_data_unregister(mult_handle);
 
 
-        if (STARPU_LIKELY(submit != -ENODEV)) {
+        if (STARPU_LIKELY(submit != -ENODEV))
+	{
 		int res = compareL2fe(correctResult, mult, height, 1e-6f);
 		int res = compareL2fe(correctResult, mult, height, 1e-6f);
 		FPRINTF(stdout, "TEST %s\n\n", (res == 0) ? "PASSED" : "FAILED !!!");
 		FPRINTF(stdout, "TEST %s\n\n", (res == 0) ? "PASSED" : "FAILED !!!");
 	}
 	}

+ 2 - 1
examples/matvecmult/matvecmult_kernel.cl

@@ -33,7 +33,8 @@ __kernel void matVecMult(
 {
 {
         // Row index
         // Row index
         uint y = get_global_id(0);
         uint y = get_global_id(0);
-        if (y < height) {
+        if (y < height)
+	{
                 // Row pointer
                 // Row pointer
                 const __global float* row = M + y * width;
                 const __global float* row = M + y * width;
 
 

+ 46 - 23
examples/mult/xgemm.c

@@ -53,10 +53,12 @@ static void check_output(void)
 	TYPE err;
 	TYPE err;
 	err = CPU_ASUM(xdim*ydim, C, 1);
 	err = CPU_ASUM(xdim*ydim, C, 1);
 
 
-	if (err < xdim*ydim*0.001) {
+	if (err < xdim*ydim*0.001)
+	{
 		FPRINTF(stderr, "Results are OK\n");
 		FPRINTF(stderr, "Results are OK\n");
 	}
 	}
-	else {
+	else
+	{
 		int max;
 		int max;
 		max = CPU_IAMAX(xdim*ydim, C, 1);
 		max = CPU_IAMAX(xdim*ydim, C, 1);
 
 
@@ -74,20 +76,26 @@ static void init_problem_data(void)
 	starpu_malloc((void **)&C, xdim*ydim*sizeof(TYPE));
 	starpu_malloc((void **)&C, xdim*ydim*sizeof(TYPE));
 
 
 	/* fill the A and B matrices */
 	/* fill the A and B matrices */
-	for (j=0; j < ydim; j++) {
-		for (i=0; i < zdim; i++) {
+	for (j=0; j < ydim; j++)
+	{
+		for (i=0; i < zdim; i++)
+		{
 			A[j+i*ydim] = (TYPE)(starpu_drand48());
 			A[j+i*ydim] = (TYPE)(starpu_drand48());
 		}
 		}
 	}
 	}
 
 
-	for (j=0; j < zdim; j++) {
-		for (i=0; i < xdim; i++) {
+	for (j=0; j < zdim; j++)
+	{
+		for (i=0; i < xdim; i++)
+		{
 			B[j+i*zdim] = (TYPE)(starpu_drand48());
 			B[j+i*zdim] = (TYPE)(starpu_drand48());
 		}
 		}
 	}
 	}
 
 
-	for (j=0; j < ydim; j++) {
-		for (i=0; i < xdim; i++) {
+	for (j=0; j < ydim; j++)
+	{
+		for (i=0; i < xdim; i++)
+		{
 			C[j+i*ydim] = (TYPE)(0);
 			C[j+i*ydim] = (TYPE)(0);
 		}
 		}
 	}
 	}
@@ -132,7 +140,8 @@ static void mult_kernel_common(void *descr[], int type)
 	unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]);
 	unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]);
 	unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]);
 	unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]);
 
 
-	if (type == STARPU_CPU) {
+	if (type == STARPU_CPU)
+	{
 		int worker_size = starpu_combined_worker_get_size();
 		int worker_size = starpu_combined_worker_get_size();
 
 
 		if (worker_size == 1)
 		if (worker_size == 1)
@@ -140,7 +149,8 @@ static void mult_kernel_common(void *descr[], int type)
 			/* Sequential CPU task */
 			/* Sequential CPU task */
 			CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, (TYPE)0.0, subC, ldC);
 			CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, (TYPE)0.0, subC, ldC);
 		}
 		}
-		else {
+		else
+		{
 			/* Parallel CPU task */
 			/* Parallel CPU task */
 			int rank = starpu_combined_worker_get_rank();
 			int rank = starpu_combined_worker_get_rank();
 		
 		
@@ -156,7 +166,8 @@ static void mult_kernel_common(void *descr[], int type)
 		}
 		}
 	}
 	}
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
-	else {
+	else
+	{
 		CUBLAS_GEMM('n', 'n', nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB,
 		CUBLAS_GEMM('n', 'n', nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB,
 					     (TYPE)0.0, subC, ldC);
 					     (TYPE)0.0, subC, ldC);
 		cudaStreamSynchronize(starpu_cuda_get_local_stream());
 		cudaStreamSynchronize(starpu_cuda_get_local_stream());
@@ -176,12 +187,14 @@ static void cpu_mult(void *descr[], __attribute__((unused))  void *arg)
 	mult_kernel_common(descr, STARPU_CPU);
 	mult_kernel_common(descr, STARPU_CPU);
 }
 }
 
 
-static struct starpu_perfmodel starpu_gemm_model = {
+static struct starpu_perfmodel starpu_gemm_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = STARPU_GEMM_STR(gemm)
 	.symbol = STARPU_GEMM_STR(gemm)
 };
 };
 
 
-static struct starpu_codelet cl = {
+static struct starpu_codelet cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */
 	.type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */
 	.max_parallelism = INT_MAX,
 	.max_parallelism = INT_MAX,
@@ -196,48 +209,58 @@ static struct starpu_codelet cl = {
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-nblocks") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-nblocks") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			nslicesx = strtol(argv[++i], &argptr, 10);
 			nslicesx = strtol(argv[++i], &argptr, 10);
 			nslicesy = nslicesx;
 			nslicesy = nslicesx;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-nblocksx") == 0) {
+		if (strcmp(argv[i], "-nblocksx") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			nslicesx = strtol(argv[++i], &argptr, 10);
 			nslicesx = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-nblocksy") == 0) {
+		if (strcmp(argv[i], "-nblocksy") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			nslicesy = strtol(argv[++i], &argptr, 10);
 			nslicesy = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-x") == 0) {
+		if (strcmp(argv[i], "-x") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			xdim = strtol(argv[++i], &argptr, 10);
 			xdim = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-y") == 0) {
+		if (strcmp(argv[i], "-y") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			ydim = strtol(argv[++i], &argptr, 10);
 			ydim = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-z") == 0) {
+		if (strcmp(argv[i], "-z") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			zdim = strtol(argv[++i], &argptr, 10);
 			zdim = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-iter") == 0) {
+		if (strcmp(argv[i], "-iter") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			niter = strtol(argv[++i], &argptr, 10);
 			niter = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-check") == 0) {
+		if (strcmp(argv[i], "-check") == 0)
+		{
 			check = 1;
 			check = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-spmd") == 0) {
+		if (strcmp(argv[i], "-spmd") == 0)
+		{
 			cl.type = STARPU_SPMD;
 			cl.type = STARPU_SPMD;
 		}
 		}
 	}
 	}

+ 6 - 3
examples/openmp/vector_scal.c

@@ -28,7 +28,8 @@
 #define	NX	2048
 #define	NX	2048
 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
 
 
-void scal_cpu_func(void *buffers[], void *_args) {
+void scal_cpu_func(void *buffers[], void *_args)
+{
 	unsigned i;
 	unsigned i;
 	float *factor = _args;
 	float *factor = _args;
 	struct starpu_vector_interface *vector = buffers[0];
 	struct starpu_vector_interface *vector = buffers[0];
@@ -42,12 +43,14 @@ void scal_cpu_func(void *buffers[], void *_args) {
 		val[i] *= *factor;
 		val[i] *= *factor;
 }
 }
 
 
-static struct starpu_perfmodel vector_scal_model = {
+static struct starpu_perfmodel vector_scal_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "vector_scale_parallel"
 	.symbol = "vector_scale_parallel"
 };
 };
 
 
-static struct starpu_codelet cl = {
+static struct starpu_codelet cl =
+{
 	.where = STARPU_CPU,
 	.where = STARPU_CPU,
 	.type = STARPU_FORKJOIN,
 	.type = STARPU_FORKJOIN,
 	.max_parallelism = INT_MAX,
 	.max_parallelism = INT_MAX,

+ 10 - 5
examples/opt/pi/pi.c

@@ -64,8 +64,10 @@ static void cpu_kernel(void *descr[], void *cl_arg)
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-ntasks") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-ntasks") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			ntasks = strtol(argv[++i], &argptr, 10);
 			ntasks = strtol(argv[++i], &argptr, 10);
 		}
 		}
@@ -101,19 +103,22 @@ int main(int argc, char **argv)
 	 * accessed by the CPU later on */
 	 * accessed by the CPU later on */
 	starpu_data_set_wt_mask(cnt_array_handle, (1<<0));
 	starpu_data_set_wt_mask(cnt_array_handle, (1<<0));
 
 
-	struct starpu_data_filter f = {
+	struct starpu_data_filter f =
+	{
 		.filter_func = starpu_block_filter_func_vector,
 		.filter_func = starpu_block_filter_func_vector,
 		.nchildren = ntasks
 		.nchildren = ntasks
 	};
 	};
 	
 	
 	starpu_data_partition(cnt_array_handle, &f);
 	starpu_data_partition(cnt_array_handle, &f);
 
 
-	static struct starpu_perfmodel model = {
+	static struct starpu_perfmodel model =
+	{
 		.type = STARPU_HISTORY_BASED,
 		.type = STARPU_HISTORY_BASED,
 		.symbol = "monte_carlo_pi"
 		.symbol = "monte_carlo_pi"
 	};
 	};
 
 
-	struct starpu_codelet cl = {
+	struct starpu_codelet cl =
+	{
 		.where = STARPU_CPU|STARPU_CUDA,
 		.where = STARPU_CPU|STARPU_CUDA,
 		.cpu_funcs = {cpu_kernel, NULL},
 		.cpu_funcs = {cpu_kernel, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA

+ 18 - 9
examples/opt/pi/pi_redux.c

@@ -64,7 +64,8 @@ static void init_rng(void *arg __attribute__((unused)))
 
 
 	int workerid = starpu_worker_get_id();
 	int workerid = starpu_worker_get_id();
 
 
-	switch (starpu_worker_get_type(workerid)) {
+	switch (starpu_worker_get_type(workerid))
+	{
 		case STARPU_CPU_WORKER:
 		case STARPU_CPU_WORKER:
 			/* create a seed */
 			/* create a seed */
 			starpu_srand48_r((long int)workerid, &randbuffer[PADDING*workerid]);
 			starpu_srand48_r((long int)workerid, &randbuffer[PADDING*workerid]);
@@ -96,22 +97,27 @@ static void init_rng(void *arg __attribute__((unused)))
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-ntasks") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-ntasks") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			ntasks = strtol(argv[++i], &argptr, 10);
 			ntasks = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-noredux") == 0) {
+		if (strcmp(argv[i], "-noredux") == 0)
+		{
 			use_redux = 0;
 			use_redux = 0;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-warmup") == 0) {
+		if (strcmp(argv[i], "-warmup") == 0)
+		{
 			do_warmup = 1;
 			do_warmup = 1;
 			ntasks_warmup = 8; /* arbitrary number of warmup tasks */
 			ntasks_warmup = 8; /* arbitrary number of warmup tasks */
 		}
 		}
 
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			fprintf(stderr, "Usage: %s [-ntasks n] [-noredux] [-warmup] [-h]\n", argv[0]);
 			fprintf(stderr, "Usage: %s [-ntasks n] [-noredux] [-warmup] [-h]\n", argv[0]);
 			exit(-1);
 			exit(-1);
 		}
 		}
@@ -183,7 +189,8 @@ static void pi_func_cuda(void *descr[], void *cl_arg __attribute__ ((unused)))
 }
 }
 #endif
 #endif
 
 
-static struct starpu_codelet pi_cl = {
+static struct starpu_codelet pi_cl =
+{
 	.where =
 	.where =
 #ifdef STARPU_HAVE_CURAND
 #ifdef STARPU_HAVE_CURAND
 		STARPU_CUDA|
 		STARPU_CUDA|
@@ -216,7 +223,8 @@ static void init_cuda_func(void *descr[], void *cl_arg)
 }
 }
 #endif
 #endif
 
 
-static struct starpu_codelet init_codelet = {
+static struct starpu_codelet init_codelet =
+{
 	.where =
 	.where =
 #ifdef STARPU_HAVE_CURAND
 #ifdef STARPU_HAVE_CURAND
 		STARPU_CUDA|
 		STARPU_CUDA|
@@ -255,7 +263,8 @@ static void redux_cpu_func(void *descr[], void *cl_arg)
 	*a = *a + *b;
 	*a = *a + *b;
 };
 };
 
 
-static struct starpu_codelet redux_codelet = {
+static struct starpu_codelet redux_codelet =
+{
 	.where =
 	.where =
 #ifdef STARPU_HAVE_CURAND
 #ifdef STARPU_HAVE_CURAND
 		STARPU_CUDA|
 		STARPU_CUDA|

+ 5 - 3
examples/ppm_downscaler/ppm_downscaler.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010  Université de Bordeaux 1
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -57,7 +57,8 @@ struct ppm_image *file_to_ppm(char *filename)
 
 
 	/* read the file's dimensions */
 	/* read the file's dimensions */
 	ret = fscanf(file, "P6\n%d %d\n%d\n", &ppm->ncols, &ppm->nlines, &ppm->coldepth);
 	ret = fscanf(file, "P6\n%d %d\n%d\n", &ppm->ncols, &ppm->nlines, &ppm->coldepth);
-	if (ret != 3) {
+	if (ret != 3)
+	{
 		fclose(file);
 		fclose(file);
 		fprintf(stderr, "file %s is not valid\n", filename);
 		fprintf(stderr, "file %s is not valid\n", filename);
 		exit(-1);
 		exit(-1);
@@ -105,7 +106,8 @@ char *filename_out = "serpents.small.ppm";
 
 
 void parse_args(int argc, char **argv)
 void parse_args(int argc, char **argv)
 {
 {
-	if (argc == 3) {
+	if (argc == 3)
+	{
 		filename_in = argv[1];
 		filename_in = argv[1];
 		filename_out = argv[2];
 		filename_out = argv[2];
 	}
 	}

+ 5 - 3
examples/ppm_downscaler/ppm_downscaler.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010  Université de Bordeaux 1
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -16,13 +16,15 @@
  */
  */
 
 
 /* we make the asumption that there are 256 color levels at most */
 /* we make the asumption that there are 256 color levels at most */
-struct ppm_color {
+struct ppm_color
+{
 	unsigned char r;
 	unsigned char r;
 	unsigned char g;
 	unsigned char g;
 	unsigned char b;
 	unsigned char b;
 };
 };
 
 
-struct ppm_image {
+struct ppm_image
+{
 	int nlines;
 	int nlines;
 	int ncols;
 	int ncols;
 	int coldepth;
 	int coldepth;

+ 11 - 6
examples/ppm_downscaler/yuv_downscaler.c

@@ -38,11 +38,13 @@ char filename_out[1024];
 
 
 void parse_args(int argc, char **argv)
 void parse_args(int argc, char **argv)
 {
 {
-	if (argc == 3) {
+	if (argc == 3)
+	{
 		strcpy(filename_in, argv[1]);
 		strcpy(filename_in, argv[1]);
 		strcpy(filename_out, argv[2]);
 		strcpy(filename_out, argv[2]);
 	}
 	}
-	else {
+	else
+	{
 		sprintf(filename_in, "%s/examples/ppm_downscaler/%s", STARPU_BUILD_DIR, filename_in_default);
 		sprintf(filename_in, "%s/examples/ppm_downscaler/%s", STARPU_BUILD_DIR, filename_in_default);
 		sprintf(filename_out, "%s/examples/ppm_downscaler/%s", STARPU_BUILD_DIR, filename_out_default);
 		sprintf(filename_out, "%s/examples/ppm_downscaler/%s", STARPU_BUILD_DIR, filename_out_default);
 	}
 	}
@@ -82,7 +84,8 @@ static void ds_kernel_cpu(void *descr[], __attribute__((unused)) void *arg)
 	}
 	}
 }
 }
 
 
-static struct starpu_codelet ds_codelet = {
+static struct starpu_codelet ds_codelet =
+{
 	.where = STARPU_CPU,
 	.where = STARPU_CPU,
 	.cpu_funcs = {ds_kernel_cpu, NULL},
 	.cpu_funcs = {ds_kernel_cpu, NULL},
 	.nbuffers = 2, /* input -> output */
 	.nbuffers = 2, /* input -> output */
@@ -90,12 +93,14 @@ static struct starpu_codelet ds_codelet = {
 };
 };
 
 
 /* each block contains BLOCK_HEIGHT consecutive lines */
 /* each block contains BLOCK_HEIGHT consecutive lines */
-static struct starpu_data_filter filter_y = {
+static struct starpu_data_filter filter_y =
+{
 	.filter_func = starpu_block_filter_func,
 	.filter_func = starpu_block_filter_func,
 	.nchildren= HEIGHT/BLOCK_HEIGHT
 	.nchildren= HEIGHT/BLOCK_HEIGHT
 };
 };
-	
-static struct starpu_data_filter filter_uv = {
+
+static struct starpu_data_filter filter_uv =
+{
 	.filter_func = starpu_block_filter_func,
 	.filter_func = starpu_block_filter_func,
 	.nchildren = (HEIGHT/2)/BLOCK_HEIGHT
 	.nchildren = (HEIGHT/2)/BLOCK_HEIGHT
 };
 };

+ 5 - 3
examples/ppm_downscaler/yuv_downscaler.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010  Université de Bordeaux 1
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,13 +27,15 @@
 
 
 #include <stdint.h>
 #include <stdint.h>
 
 
-struct yuv_frame {
+struct yuv_frame
+{
 	uint8_t y[WIDTH*HEIGHT];
 	uint8_t y[WIDTH*HEIGHT];
 	uint8_t u[(WIDTH*HEIGHT)/4];
 	uint8_t u[(WIDTH*HEIGHT)/4];
 	uint8_t v[(WIDTH*HEIGHT)/4];
 	uint8_t v[(WIDTH*HEIGHT)/4];
 };
 };
 
 
-struct yuv_new_frame {
+struct yuv_new_frame
+{
 	uint8_t y[NEW_WIDTH*NEW_HEIGHT];
 	uint8_t y[NEW_WIDTH*NEW_HEIGHT];
 	uint8_t u[(NEW_WIDTH*NEW_HEIGHT)/4];
 	uint8_t u[(NEW_WIDTH*NEW_HEIGHT)/4];
 	uint8_t v[(NEW_WIDTH*NEW_HEIGHT)/4];
 	uint8_t v[(NEW_WIDTH*NEW_HEIGHT)/4];

+ 6 - 3
examples/reductions/dot_product.c

@@ -73,7 +73,8 @@ void init_cuda_func(void *descr[], void *cl_arg)
 }
 }
 #endif
 #endif
 
 
-static struct starpu_codelet init_codelet = {
+static struct starpu_codelet init_codelet =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.can_execute = can_execute,
 	.can_execute = can_execute,
 	.cpu_funcs = {init_cpu_func, NULL},
 	.cpu_funcs = {init_cpu_func, NULL},
@@ -99,7 +100,8 @@ void redux_cpu_func(void *descr[], void *cl_arg)
 extern void redux_cuda_func(void *descr[], void *_args);
 extern void redux_cuda_func(void *descr[], void *_args);
 #endif
 #endif
 
 
-static struct starpu_codelet redux_codelet = {
+static struct starpu_codelet redux_codelet =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.can_execute = can_execute,
 	.can_execute = can_execute,
 	.cpu_funcs = {redux_cpu_func, NULL},
 	.cpu_funcs = {redux_cpu_func, NULL},
@@ -161,7 +163,8 @@ void dot_cuda_func(void *descr[], void *cl_arg)
 }
 }
 #endif
 #endif
 
 
-static struct starpu_codelet dot_codelet = {
+static struct starpu_codelet dot_codelet =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.can_execute = can_execute,
 	.can_execute = can_execute,
 	.cpu_funcs = {dot_cpu_func, NULL},
 	.cpu_funcs = {dot_cpu_func, NULL},

+ 6 - 3
examples/reductions/minmax_reduction.c

@@ -50,7 +50,8 @@ static void minmax_neutral_cpu_func(void *descr[], void *cl_arg)
 	array[1] = TYPE_MIN;
 	array[1] = TYPE_MIN;
 }
 }
 
 
-static struct starpu_codelet minmax_init_codelet = {
+static struct starpu_codelet minmax_init_codelet =
+{
 	.where = STARPU_CPU,
 	.where = STARPU_CPU,
 	.cpu_funcs = {minmax_neutral_cpu_func, NULL},
 	.cpu_funcs = {minmax_neutral_cpu_func, NULL},
 	.nbuffers = 1
 	.nbuffers = 1
@@ -76,7 +77,8 @@ void minmax_redux_cpu_func(void *descr[], void *cl_arg)
 	array_dst[1] = STARPU_MAX(max_dst, max_src);
 	array_dst[1] = STARPU_MAX(max_dst, max_src);
 }
 }
 
 
-static struct starpu_codelet minmax_redux_codelet = {
+static struct starpu_codelet minmax_redux_codelet =
+{
 	.where = STARPU_CPU,
 	.where = STARPU_CPU,
 	.cpu_funcs = {minmax_redux_cpu_func, NULL},
 	.cpu_funcs = {minmax_redux_cpu_func, NULL},
 	.nbuffers = 2
 	.nbuffers = 2
@@ -110,7 +112,8 @@ void minmax_cpu_func(void *descr[], void *cl_arg)
 	minmax[1] = local_max;
 	minmax[1] = local_max;
 }
 }
 
 
-static struct starpu_codelet minmax_codelet = {
+static struct starpu_codelet minmax_codelet =
+{
 	.where = STARPU_CPU,
 	.where = STARPU_CPU,
 	.cpu_funcs = {minmax_cpu_func, NULL},
 	.cpu_funcs = {minmax_cpu_func, NULL},
 	.nbuffers = 2
 	.nbuffers = 2

+ 5 - 3
examples/scheduler/dummy_sched.c

@@ -77,7 +77,8 @@ static struct starpu_task *pop_task_dummy(void)
 	return starpu_task_list_pop_back(&sched_list);
 	return starpu_task_list_pop_back(&sched_list);
 }
 }
 
 
-static struct starpu_sched_policy dummy_sched_policy = {
+static struct starpu_sched_policy dummy_sched_policy =
+{
 	.init_sched = init_dummy_sched,
 	.init_sched = init_dummy_sched,
 	.deinit_sched = deinit_dummy_sched,
 	.deinit_sched = deinit_dummy_sched,
 	.push_task = push_task_dummy,
 	.push_task = push_task_dummy,
@@ -88,7 +89,8 @@ static struct starpu_sched_policy dummy_sched_policy = {
 	.policy_description = "dummy scheduling strategy"
 	.policy_description = "dummy scheduling strategy"
 };
 };
 
 
-static struct starpu_conf conf = {
+static struct starpu_conf conf =
+{
 	.sched_policy_name = NULL,
 	.sched_policy_name = NULL,
 	.sched_policy = &dummy_sched_policy,
 	.sched_policy = &dummy_sched_policy,
 	.ncpus = -1,
 	.ncpus = -1,
@@ -105,7 +107,7 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 {
 {
 }
 }
 
 
-static struct starpu_codelet dummy_codelet = 
+static struct starpu_codelet dummy_codelet =
 {
 {
 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
 	.cpu_funcs = {dummy_func, NULL},
 	.cpu_funcs = {dummy_func, NULL},

+ 6 - 3
examples/spmv/dw_block_spmv.c

@@ -141,7 +141,8 @@ void call_filters(void)
 #define NSPMV	32
 #define NSPMV	32
 unsigned totaltasks;
 unsigned totaltasks;
 
 
-struct starpu_codelet cl = {
+struct starpu_codelet cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = { cpu_block_spmv, NULL},
 	.cpu_funcs = { cpu_block_spmv, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -224,7 +225,8 @@ void launch_spmv_codelets(void)
 
 
 					is_entry_tab[taskid] = 0;
 					is_entry_tab[taskid] = 0;
 				}
 				}
-				else {
+				else
+				{
 					/* this is an entry task */
 					/* this is an entry task */
 					is_entry_tab[taskid] = 1;
 					is_entry_tab[taskid] = 1;
 				}
 				}
@@ -241,7 +243,8 @@ void launch_spmv_codelets(void)
 	unsigned task;
 	unsigned task;
 	for (task = 0; task < totaltasks; task++)
 	for (task = 0; task < totaltasks; task++)
 	{
 	{
-		if (is_entry_tab[task]) {
+		if (is_entry_tab[task])
+		{
 			nchains++;
 			nchains++;
 		}
 		}
 
 

+ 3 - 2
examples/spmv/dw_block_spmv_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -33,7 +33,8 @@ static inline void common_block_spmv(void *descr[], int s, __attribute__((unused
 
 
 	unsigned ld = STARPU_MATRIX_GET_LD(descr[0]);
 	unsigned ld = STARPU_MATRIX_GET_LD(descr[0]);
 
 
-	switch (s) {
+	switch (s)
+	{
 		case 0:
 		case 0:
 			cblas_sgemv(CblasRowMajor, CblasNoTrans, dx, dy, 1.0f, block, ld, in, 1, 1.0f, out, 1);
 			cblas_sgemv(CblasRowMajor, CblasNoTrans, dx, dy, 1.0f, block, ld, in, 1, 1.0f, out, 1);
 			break;
 			break;

+ 28 - 17
examples/spmv/matrix_market/mm_to_bcsr.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -22,8 +22,10 @@ static void print_block(tmp_block_t *block, unsigned r, unsigned c)
 	printf(" **** block %d %d **** \n", block->i, block->j);
 	printf(" **** block %d %d **** \n", block->i, block->j);
 
 
 	unsigned i, j;
 	unsigned i, j;
-	for (j = 0; j < r; j++) {
-		for (i = 0; i < c; i++) {
+	for (j = 0; j < r; j++)
+	{
+		for (i = 0; i < c; i++)
+		{
 			printf("%2.2f\t", block->val[i + j*c]);
 			printf("%2.2f\t", block->val[i + j*c]);
 		}
 		}
 		printf("\n");
 		printf("\n");
@@ -34,11 +36,12 @@ static void print_all_blocks(tmp_block_t *block_list, unsigned r, unsigned c)
 {
 {
 	tmp_block_t *current_block = block_list;
 	tmp_block_t *current_block = block_list;
 
 
-	while(current_block) {
+	while(current_block)
+	{
 		print_block(current_block, r, c);
 		print_block(current_block, r, c);
 
 
 		current_block = current_block->next;
 		current_block = current_block->next;
-	};
+	}
 }
 }
 
 
 static void print_bcsr(bcsr_t *bcsr)
 static void print_bcsr(bcsr_t *bcsr)
@@ -54,10 +57,11 @@ static unsigned count_blocks(tmp_block_t *block_list)
 	unsigned count = 0;
 	unsigned count = 0;
 	tmp_block_t *current_block = block_list;
 	tmp_block_t *current_block = block_list;
 
 
-	while(current_block) {
+	while(current_block)
+	{
 		count++;
 		count++;
 		current_block = current_block->next;
 		current_block = current_block->next;
-	};
+	}
 
 
 	return count;
 	return count;
 }
 }
@@ -67,12 +71,13 @@ static unsigned count_row_blocks(tmp_block_t *block_list)
 	unsigned maxrow = 0;
 	unsigned maxrow = 0;
 	tmp_block_t *current_block = block_list;
 	tmp_block_t *current_block = block_list;
 
 
-	while(current_block) {
+	while(current_block)
+	{
 		if (current_block->j > maxrow)
 		if (current_block->j > maxrow)
 			maxrow = current_block->j;
 			maxrow = current_block->j;
 
 
 		current_block = current_block->next;
 		current_block = current_block->next;
-	};
+	}
 
 
 	return (maxrow+1);
 	return (maxrow+1);
 }
 }
@@ -86,7 +91,8 @@ static tmp_block_t *search_block(tmp_block_t *block_list, unsigned i, unsigned j
 	tmp_block_t *current_block = block_list;
 	tmp_block_t *current_block = block_list;
 	/* printf("search %d %d\n", i, j); */
 	/* printf("search %d %d\n", i, j); */
 
 
-	while (current_block) {
+	while (current_block)
+	{
 		if ((current_block->i == i) && (current_block->j == j)) 
 		if ((current_block->i == i) && (current_block->j == j)) 
 		{
 		{
 			/* we found the block */
 			/* we found the block */
@@ -143,15 +149,18 @@ static void insert_block(tmp_block_t *block, tmp_block_t **block_list, unsigned
 	/* first find an element that is bigger, then insert the block just before it */
 	/* first find an element that is bigger, then insert the block just before it */
 	tmp_block_t *current_block = *block_list;
 	tmp_block_t *current_block = *block_list;
 
 
-	if (!current_block) {
+	if (!current_block)
+	{
 		/* list was empty */
 		/* list was empty */
 		*block_list = block;
 		*block_list = block;
 		block->next = NULL;
 		block->next = NULL;
 		return;
 		return;
 	}
 	}
 
 
-	while (current_block) {
-		if (next_block_is_bigger(current_block, i, j)) {
+	while (current_block)
+	{
+		if (next_block_is_bigger(current_block, i, j))
+		{
 			/* insert block here */
 			/* insert block here */
 			block->next = current_block->next;
 			block->next = current_block->next;
 			current_block->next = block;
 			current_block->next = block;
@@ -177,7 +186,8 @@ static void insert_elem(tmp_block_t **block_list, unsigned abs_i, unsigned abs_j
 
 
 	block = search_block(*block_list, i, j);
 	block = search_block(*block_list, i, j);
 
 
-	if (!block) {
+	if (!block)
+	{
 		/* the block does not exist yet */
 		/* the block does not exist yet */
 		/* create it */
 		/* create it */
 		block = create_block(c, r);
 		block = create_block(c, r);
@@ -225,7 +235,8 @@ static void fill_bcsr(tmp_block_t *block_list, unsigned c, unsigned r, bcsr_t *b
 
 
 	tmp_block_t *current_block = block_list;
 	tmp_block_t *current_block = block_list;
 
 
-	while(current_block) {
+	while(current_block)
+	{
 		/* copy the val from the block to the contiguous area in the BCSR */
 		/* copy the val from the block to the contiguous area in the BCSR */
 		memcpy(&bcsr->val[current_offset], current_block->val, block_size);
 		memcpy(&bcsr->val[current_offset], current_block->val, block_size);
 
 
@@ -314,9 +325,9 @@ bcsr_t *mm_file_to_bcsr(char *filename, unsigned c, unsigned r)
 		exit(1);
 		exit(1);
 
 
 	if (mm_read_banner(f, &matcode) != 0)
 	if (mm_read_banner(f, &matcode) != 0)
-	{                                                       	
+	{
 		printf("Could not process Matrix Market banner.\n");
 		printf("Could not process Matrix Market banner.\n");
-		exit(1);                                            	
+		exit(1);
 	}
 	}
 
 
 	/*  This is how one can screen matrix types if their application */
 	/*  This is how one can screen matrix types if their application */

+ 5 - 3
examples/spmv/matrix_market/mm_to_bcsr.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -22,7 +22,8 @@
 /* convert a matrix stored in a file with the matrix market format into the 
 /* convert a matrix stored in a file with the matrix market format into the 
  * BCSR format */
  * BCSR format */
 
 
-typedef struct tmp_block {
+typedef struct tmp_block
+{
 	/* we have a linked list of blocks */
 	/* we have a linked list of blocks */
 	struct tmp_block *next;
 	struct tmp_block *next;
 
 
@@ -33,7 +34,8 @@ typedef struct tmp_block {
 
 
 } tmp_block_t;
 } tmp_block_t;
 
 
-typedef struct {
+typedef struct
+{
 	unsigned r,c;
 	unsigned r,c;
 	unsigned nnz_blocks;
 	unsigned nnz_blocks;
 	unsigned nrows_blocks;
 	unsigned nrows_blocks;

+ 18 - 9
examples/spmv/spmv.c

@@ -27,13 +27,16 @@ starpu_data_handle_t vector_in, vector_out;
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-size") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-size") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			size = strtol(argv[++i], &argptr, 10);
 			size = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-nblocks") == 0) {
+		if (strcmp(argv[i], "-nblocks") == 0)
+		{
 			char *argptr;
 			char *argptr;
 			nblocks = strtol(argv[++i], &argptr, 10);
 			nblocks = strtol(argv[++i], &argptr, 10);
 		}
 		}
@@ -66,7 +69,8 @@ static void csr_filter_func(void *father_interface, void *child_interface, struc
 	csr_child->firstentry = local_firstentry;
 	csr_child->firstentry = local_firstentry;
 	csr_child->elemsize = elemsize;
 	csr_child->elemsize = elemsize;
 	
 	
-	if (csr_father->nzval) {
+	if (csr_father->nzval)
+	{
 		csr_child->rowptr = &csr_father->rowptr[first_index];
 		csr_child->rowptr = &csr_father->rowptr[first_index];
 		csr_child->colind = &csr_father->colind[local_firstentry];
 		csr_child->colind = &csr_father->colind[local_firstentry];
 		csr_child->nzval = csr_father->nzval + local_firstentry * elemsize;
 		csr_child->nzval = csr_father->nzval + local_firstentry * elemsize;
@@ -74,20 +78,23 @@ static void csr_filter_func(void *father_interface, void *child_interface, struc
 }
 }
 
 
 /* partition the CSR matrix along a block distribution */
 /* partition the CSR matrix along a block distribution */
-static struct starpu_data_filter csr_f = {
+static struct starpu_data_filter csr_f =
+{
 	.filter_func = csr_filter_func,
 	.filter_func = csr_filter_func,
 	/* This value is defined later on */
 	/* This value is defined later on */
 	.nchildren = -1,
 	.nchildren = -1,
 	/* the children also use a csr interface */
 	/* the children also use a csr interface */
 };
 };
 
 
-static struct starpu_data_filter vector_f = {
+static struct starpu_data_filter vector_f =
+{
 	.filter_func = starpu_block_filter_func_vector,
 	.filter_func = starpu_block_filter_func_vector,
 	/* This value is defined later on */
 	/* This value is defined later on */
 	.nchildren = -1,
 	.nchildren = -1,
 };
 };
 
 
-static struct starpu_codelet spmv_cl = {
+static struct starpu_codelet spmv_cl =
+{
 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
 	.cpu_funcs = {spmv_kernel_cpu, NULL},
 	.cpu_funcs = {spmv_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -143,7 +150,8 @@ int main(int argc, char **argv)
 	{
 	{
 		rowptr[row] = pos;
 		rowptr[row] = pos;
 
 
-		if (row > 0) {
+		if (row > 0)
+		{
 			nzval[pos] = 1.0f;
 			nzval[pos] = 1.0f;
 			colind[pos] = row-1;
 			colind[pos] = row-1;
 			pos++;
 			pos++;
@@ -153,7 +161,8 @@ int main(int argc, char **argv)
 		colind[pos] = row;
 		colind[pos] = row;
 		pos++;
 		pos++;
 
 
-		if (row < size - 1) {
+		if (row < size - 1)
+		{
 			nzval[pos] = 1.0f;
 			nzval[pos] = 1.0f;
 			colind[pos] = row+1;
 			colind[pos] = row+1;
 			pos++;
 			pos++;

+ 21 - 9
examples/starpufft/cudax_kernels.cu

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,10 +27,13 @@
 #define DISTRIB_1d(n, func,args) \
 #define DISTRIB_1d(n, func,args) \
 	unsigned threads_per_block = 128; \
 	unsigned threads_per_block = 128; \
 \
 \
-	if (n < threads_per_block) { \
+	if (n < threads_per_block) \
+	{			   \
 		dim3 dimGrid(n); \
 		dim3 dimGrid(n); \
 		func <<<dimGrid, 1, 0, starpu_cuda_get_local_stream()>>> args; \
 		func <<<dimGrid, 1, 0, starpu_cuda_get_local_stream()>>> args; \
-	} else { \
+	} 					\
+	else 					\
+	{				     \
 		dim3 dimGrid(n / threads_per_block); \
 		dim3 dimGrid(n / threads_per_block); \
 		dim3 dimBlock(threads_per_block); \
 		dim3 dimBlock(threads_per_block); \
 		func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
 		func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
@@ -81,21 +84,30 @@ STARPUFFT(cuda_twiddle_1d_host)(_cuComplex *out, const _cuComplex *roots, unsign
 /* FIXME: introduce threads_per_dim_n / m instead */
 /* FIXME: introduce threads_per_dim_n / m instead */
 #define DISTRIB_2d(n, m, func, args) \
 #define DISTRIB_2d(n, m, func, args) \
 	unsigned threads_per_dim = 16; \
 	unsigned threads_per_dim = 16; \
-	if (n < threads_per_dim) { \
-		if (m < threads_per_dim) { \
+	if (n < threads_per_dim) \
+	{				   \
+		if (m < threads_per_dim) \
+		{			    \
 			dim3 dimGrid(n, m); \
 			dim3 dimGrid(n, m); \
 			func <<<dimGrid, 1, 0, starpu_cuda_get_local_stream()>>> args; \
 			func <<<dimGrid, 1, 0, starpu_cuda_get_local_stream()>>> args; \
-		} else { \
+		} \
+		else \
+		{					      \
 			dim3 dimGrid(1, m / threads_per_dim); \
 			dim3 dimGrid(1, m / threads_per_dim); \
 			dim3 dimBlock(n, threads_per_dim); \
 			dim3 dimBlock(n, threads_per_dim); \
 			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
 			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
 		} \
 		} \
-	} else {  \
-		if (m < threads_per_dim) { \
+	} \
+	else \
+	{				   \
+		if (m < threads_per_dim) \
+		{					      \
 			dim3 dimGrid(n / threads_per_dim, 1); \
 			dim3 dimGrid(n / threads_per_dim, 1); \
 			dim3 dimBlock(threads_per_dim, m); \
 			dim3 dimBlock(threads_per_dim, m); \
 			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
 			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
-		} else { \
+		} \
+		else \
+		{							\
 			dim3 dimGrid(n / threads_per_dim, m / threads_per_dim); \
 			dim3 dimGrid(n / threads_per_dim, m / threads_per_dim); \
 			dim3 dimBlock(threads_per_dim, threads_per_dim); \
 			dim3 dimBlock(threads_per_dim, threads_per_dim); \
 			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
 			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \

+ 100 - 76
examples/starpufft/starpufftx.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009-2011  Université de Bordeaux 1
  * Copyright (C) 2009-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -34,7 +34,8 @@
 #define _FFTW_FLAGS FFTW_ESTIMATE
 #define _FFTW_FLAGS FFTW_ESTIMATE
 
 
 /* Steps for the parallel variant */
 /* Steps for the parallel variant */
-enum steps {
+enum steps
+{
 	SPECIAL, TWIST1, FFT1, JOIN, TWIST2, FFT2, TWIST3, END
 	SPECIAL, TWIST1, FFT1, JOIN, TWIST2, FFT2, TWIST3, END
 };
 };
 
 
@@ -49,7 +50,8 @@ enum steps {
 
 
 #define I_BITS STEP_SHIFT
 #define I_BITS STEP_SHIFT
 
 
-enum type {
+enum type
+{
 	R2C,
 	R2C,
 	C2R,
 	C2R,
 	C2C
 	C2C
@@ -65,7 +67,8 @@ static struct timeval start, submit_tasks, end;
  *
  *
  */
  */
 
 
-struct STARPUFFT(plan) {
+struct STARPUFFT(plan)
+{
 	int number;	/* uniquely identifies the plan, for starpu tags */
 	int number;	/* uniquely identifies the plan, for starpu tags */
 
 
 	int *n;
 	int *n;
@@ -84,7 +87,8 @@ struct STARPUFFT(plan) {
 	starpu_data_handle_t roots_handle[2];
 	starpu_data_handle_t roots_handle[2];
 
 
 	/* For each worker, we need some data */
 	/* For each worker, we need some data */
-	struct {
+	struct
+	{
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 		/* CUFFT plans */
 		/* CUFFT plans */
 		cufftHandle plan1_cuda, plan2_cuda;
 		cufftHandle plan1_cuda, plan2_cuda;
@@ -113,7 +117,8 @@ struct STARPUFFT(plan) {
 	struct STARPUFFT(args) *fft1_args, *fft2_args;
 	struct STARPUFFT(args) *fft1_args, *fft2_args;
 };
 };
 
 
-struct STARPUFFT(args) {
+struct STARPUFFT(args)
+{
 	struct STARPUFFT(plan) *plan;
 	struct STARPUFFT(plan) *plan;
 	int i, j, jj, kk, ll, *iv, *kkv;
 	int i, j, jj, kk, ll, *iv, *kkv;
 };
 };
@@ -123,7 +128,8 @@ check_dims(STARPUFFT(plan) plan)
 {
 {
 	int dim;
 	int dim;
 	for (dim = 0; dim < plan->dim; dim++)
 	for (dim = 0; dim < plan->dim; dim++)
-		if (plan->n[dim] & (plan->n[dim]-1)) {
+		if (plan->n[dim] & (plan->n[dim]-1))
+		{
 			fprintf(stderr,"can't cope with non-power-of-2\n");
 			fprintf(stderr,"can't cope with non-power-of-2\n");
 			STARPU_ABORT();
 			STARPU_ABORT();
 		}
 		}
@@ -135,7 +141,8 @@ compute_roots(STARPUFFT(plan) plan)
 	int dim, k;
 	int dim, k;
 
 
 	/* Compute the n-roots and m-roots of unity for twiddling */
 	/* Compute the n-roots and m-roots of unity for twiddling */
-	for (dim = 0; dim < plan->dim; dim++) {
+	for (dim = 0; dim < plan->dim; dim++)
+	{
 		STARPUFFT(complex) exp = (plan->sign * 2. * 4.*atan(1.)) * _Complex_I / (STARPUFFT(complex)) plan->n[dim];
 		STARPUFFT(complex) exp = (plan->sign * 2. * 4.*atan(1.)) * _Complex_I / (STARPUFFT(complex)) plan->n[dim];
 		plan->roots[dim] = malloc(plan->n[dim] * sizeof(**plan->roots));
 		plan->roots[dim] = malloc(plan->n[dim] * sizeof(**plan->roots));
 		for (k = 0; k < plan->n[dim]; k++)
 		for (k = 0; k < plan->n[dim]; k++)
@@ -143,7 +150,8 @@ compute_roots(STARPUFFT(plan) plan)
 		starpu_vector_data_register(&plan->roots_handle[dim], 0, (uintptr_t) plan->roots[dim], plan->n[dim], sizeof(**plan->roots));
 		starpu_vector_data_register(&plan->roots_handle[dim], 0, (uintptr_t) plan->roots[dim], plan->n[dim], sizeof(**plan->roots));
 
 
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
-		if (plan->n[dim] > 100000) {
+		if (plan->n[dim] > 100000)
+		{
 			/* prefetch the big root array on GPUs */
 			/* prefetch the big root array on GPUs */
 			unsigned worker;
 			unsigned worker;
 			unsigned nworkers = starpu_worker_get_count();
 			unsigned nworkers = starpu_worker_get_count();
@@ -170,17 +178,21 @@ STARPUFFT(start)(STARPUFFT(plan) plan, void *_in, void *_out)
 	plan->in = _in;
 	plan->in = _in;
 	plan->out = _out;
 	plan->out = _out;
 
 
-	switch (plan->dim) {
-		case 1: {
-			switch (plan->type) {
+	switch (plan->dim)
+	{
+		case 1:
+		{
+			switch (plan->type)
+			{
 			case C2C:
 			case C2C:
 				starpu_vector_data_register(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
 				starpu_vector_data_register(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
-if (!PARALLEL)
-				starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
-if (PARALLEL) {
-				for (z = 0; z < plan->totsize1; z++)
-					plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
-}
+				if (!PARALLEL)
+					starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
+				if (PARALLEL)
+				{
+					for (z = 0; z < plan->totsize1; z++)
+						plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
+				}
 				task = STARPUFFT(start1dC2C)(plan, plan->in_handle, plan->out_handle);
 				task = STARPUFFT(start1dC2C)(plan, plan->in_handle, plan->out_handle);
 				break;
 				break;
 			default:
 			default:
@@ -191,12 +203,13 @@ if (PARALLEL) {
 		}
 		}
 		case 2:
 		case 2:
 			starpu_vector_data_register(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
 			starpu_vector_data_register(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
-if (!PARALLEL)
-			starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
-if (PARALLEL) {
-			for (z = 0; z < plan->totsize1; z++)
-				plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
-}
+			if (!PARALLEL)
+				starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
+			if (PARALLEL)
+			{
+				for (z = 0; z < plan->totsize1; z++)
+					plan->twist1_tasks[z]->buffers[0].handle = plan->in_handle;
+			}
 			task = STARPUFFT(start2dC2C)(plan, plan->in_handle, plan->out_handle);
 			task = STARPUFFT(start2dC2C)(plan, plan->in_handle, plan->out_handle);
 			break;
 			break;
 		default:
 		default:
@@ -211,10 +224,11 @@ STARPUFFT(cleanup)(STARPUFFT(plan) plan)
 {
 {
 	if (plan->in_handle)
 	if (plan->in_handle)
 		starpu_data_unregister(plan->in_handle);
 		starpu_data_unregister(plan->in_handle);
-if (!PARALLEL) {
-	if (plan->out_handle)
-		starpu_data_unregister(plan->out_handle);
-}
+	if (!PARALLEL)
+	{
+		if (plan->out_handle)
+			starpu_data_unregister(plan->out_handle);
+	}
 }
 }
 
 
 struct starpu_task *
 struct starpu_task *
@@ -253,16 +267,21 @@ STARPUFFT(destroy_plan)(STARPUFFT(plan) plan)
 {
 {
 	int workerid, dim, i;
 	int workerid, dim, i;
 
 
-	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) {
-		switch (starpu_worker_get_type(workerid)) {
+	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++)
+	{
+		switch (starpu_worker_get_type(workerid))
+		{
 		case STARPU_CPU_WORKER:
 		case STARPU_CPU_WORKER:
 #ifdef STARPU_HAVE_FFTW
 #ifdef STARPU_HAVE_FFTW
-if (PARALLEL) {
-			_FFTW(destroy_plan)(plan->plans[workerid].plan1_cpu);
-			_FFTW(destroy_plan)(plan->plans[workerid].plan2_cpu);
-} else {
-			_FFTW(destroy_plan)(plan->plans[workerid].plan_cpu);
-}
+			if (PARALLEL)
+			{
+				_FFTW(destroy_plan)(plan->plans[workerid].plan1_cpu);
+				_FFTW(destroy_plan)(plan->plans[workerid].plan2_cpu);
+			}
+			else
+			{
+				_FFTW(destroy_plan)(plan->plans[workerid].plan_cpu);
+			}
 #endif
 #endif
 			break;
 			break;
 		case STARPU_CUDA_WORKER:
 		case STARPU_CUDA_WORKER:
@@ -276,45 +295,50 @@ if (PARALLEL) {
 		}
 		}
 	}
 	}
 
 
-if (PARALLEL) {
-	for (i = 0; i < plan->totsize1; i++) {
-		starpu_data_unregister(plan->twisted1_handle[i]);
-		free(plan->twist1_tasks[i]);
-		starpu_data_unregister(plan->fft1_handle[i]);
-		free(plan->fft1_tasks[i]);
-	}
+	if (PARALLEL)
+	{
+		for (i = 0; i < plan->totsize1; i++)
+		{
+			starpu_data_unregister(plan->twisted1_handle[i]);
+			free(plan->twist1_tasks[i]);
+			starpu_data_unregister(plan->fft1_handle[i]);
+			free(plan->fft1_tasks[i]);
+		}
 
 
-	free(plan->twisted1_handle);
-	free(plan->twist1_tasks);
-	free(plan->fft1_handle);
-	free(plan->fft1_tasks);
-	free(plan->fft1_args);
+		free(plan->twisted1_handle);
+		free(plan->twist1_tasks);
+		free(plan->fft1_handle);
+		free(plan->fft1_tasks);
+		free(plan->fft1_args);
 
 
-	free(plan->join_task);
+		free(plan->join_task);
 
 
-	for (i = 0; i < plan->totsize3; i++) {
-		starpu_data_unregister(plan->twisted2_handle[i]);
-		free(plan->twist2_tasks[i]);
-		starpu_data_unregister(plan->fft2_handle[i]);
-		free(plan->fft2_tasks[i]);
-		free(plan->twist3_tasks[i]);
-	}
+		for (i = 0; i < plan->totsize3; i++)
+		{
+			starpu_data_unregister(plan->twisted2_handle[i]);
+			free(plan->twist2_tasks[i]);
+			starpu_data_unregister(plan->fft2_handle[i]);
+			free(plan->fft2_tasks[i]);
+			free(plan->twist3_tasks[i]);
+		}
 
 
-	free(plan->end_task);
+		free(plan->end_task);
 
 
-	free(plan->twisted2_handle);
-	free(plan->twist2_tasks);
-	free(plan->fft2_handle);
-	free(plan->fft2_tasks);
-	free(plan->twist3_tasks);
-	free(plan->fft2_args);
+		free(plan->twisted2_handle);
+		free(plan->twist2_tasks);
+		free(plan->fft2_handle);
+		free(plan->fft2_tasks);
+		free(plan->twist3_tasks);
+		free(plan->fft2_args);
 
 
-	for (dim = 0; dim < plan->dim; dim++) {
-		starpu_data_unregister(plan->roots_handle[dim]);
-		free(plan->roots[dim]);
-	}
+		for (dim = 0; dim < plan->dim; dim++)
+		{
+			starpu_data_unregister(plan->roots_handle[dim]);
+			free(plan->roots[dim]);
+		}
 
 
-	switch (plan->dim) {
+		switch (plan->dim)
+		{
 		case 1:
 		case 1:
 			STARPUFFT(free_1d_tags)(plan);
 			STARPUFFT(free_1d_tags)(plan);
 			break;
 			break;
@@ -324,15 +348,15 @@ if (PARALLEL) {
 		default:
 		default:
 			STARPU_ABORT();
 			STARPU_ABORT();
 			break;
 			break;
-	}
+		}
 
 
-	free(plan->n1);
-	free(plan->n2);
-	STARPUFFT(free)(plan->twisted1);
-	STARPUFFT(free)(plan->fft1);
-	STARPUFFT(free)(plan->twisted2);
-	STARPUFFT(free)(plan->fft2);
-}
+		free(plan->n1);
+		free(plan->n2);
+		STARPUFFT(free)(plan->twisted1);
+		STARPUFFT(free)(plan->fft1);
+		STARPUFFT(free)(plan->twisted2);
+		STARPUFFT(free)(plan->fft2);
+	}
 	free(plan->n);
 	free(plan->n);
 	free(plan);
 	free(plan);
 }
 }

+ 25 - 11
examples/starpufft/testx.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -44,7 +44,8 @@ static void check_fftw(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, in
 {
 {
 	int i;
 	int i;
 	double max = 0., tot = 0., norm = 0., normdiff = 0.;
 	double max = 0., tot = 0., norm = 0., normdiff = 0.;
-	for (i = 0; i < size; i++) {
+	for (i = 0; i < size; i++)
+	{
 		double diff = cabs(out[i]-out_fftw[i]);
 		double diff = cabs(out[i]-out_fftw[i]);
 		double diff2 = diff * diff;
 		double diff2 = diff * diff;
 		double size = cabs(out_fftw[i]);
 		double size = cabs(out_fftw[i]);
@@ -74,7 +75,8 @@ static void check_cuda(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, in
 {
 {
 	int i;
 	int i;
 	double max = 0., tot = 0., norm = 0., normdiff = 0.;
 	double max = 0., tot = 0., norm = 0., normdiff = 0.;
-	for (i = 0; i < size; i++) {
+	for (i = 0; i < size; i++)
+	{
 		double diff = cabs(out_cuda[i]-out_fftw[i]);
 		double diff = cabs(out_cuda[i]-out_fftw[i]);
 		double diff2 = diff * diff;
 		double diff2 = diff * diff;
 		double size = cabs(out_fftw[i]);
 		double size = cabs(out_fftw[i]);
@@ -99,7 +101,8 @@ static void check_cuda(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, in
 }
 }
 #endif
 #endif
 
 
-int main(int argc, char *argv[]) {
+int main(int argc, char *argv[])
+{
 	int i;
 	int i;
 	struct timeval begin, end;
 	struct timeval begin, end;
 	int size;
 	int size;
@@ -116,25 +119,31 @@ int main(int argc, char *argv[]) {
 #endif
 #endif
 	double timing;
 	double timing;
 
 
-	if (argc < 2 || argc > 3) {
+	if (argc < 2 || argc > 3)
+	{
 		fprintf(stderr,"need one or two size of vector\n");
 		fprintf(stderr,"need one or two size of vector\n");
 		exit(EXIT_FAILURE);
 		exit(EXIT_FAILURE);
 	}
 	}
 
 
 	starpu_init(NULL);
 	starpu_init(NULL);
 
 
-	if (argc == 2) {
+	if (argc == 2)
+	{
 		n = atoi(argv[1]);
 		n = atoi(argv[1]);
 
 
 		/* 1D */
 		/* 1D */
 		size = n;
 		size = n;
-	} else if (argc == 3) {
+	}
+	else if (argc == 3)
+	{
 		n = atoi(argv[1]);
 		n = atoi(argv[1]);
 		m = atoi(argv[2]);
 		m = atoi(argv[2]);
 
 
 		/* 2D */
 		/* 2D */
 		size = n * m;
 		size = n * m;
-	} else {
+	}
+	else
+	{
 		assert(0);
 		assert(0);
 	}
 	}
 
 
@@ -155,7 +164,8 @@ int main(int argc, char *argv[]) {
 	STARPUFFT(complex) *out_cuda = STARPUFFT(malloc)(size * sizeof(*out_cuda));
 	STARPUFFT(complex) *out_cuda = STARPUFFT(malloc)(size * sizeof(*out_cuda));
 #endif
 #endif
 
 
-	if (argc == 2) {
+	if (argc == 2)
+	{
 		plan = STARPUFFT(plan_dft_1d)(n, SIGN, 0);
 		plan = STARPUFFT(plan_dft_1d)(n, SIGN, 0);
 #ifdef STARPU_HAVE_FFTW
 #ifdef STARPU_HAVE_FFTW
 		fftw_plan = _FFTW(plan_dft_1d)(n, NULL, (void*) 1, SIGN, FFTW_ESTIMATE);
 		fftw_plan = _FFTW(plan_dft_1d)(n, NULL, (void*) 1, SIGN, FFTW_ESTIMATE);
@@ -165,7 +175,9 @@ int main(int argc, char *argv[]) {
 			printf("erf\n");
 			printf("erf\n");
 #endif
 #endif
 
 
-	} else if (argc == 3) {
+	}
+	else if (argc == 3)
+	{
 		plan = STARPUFFT(plan_dft_2d)(n, m, SIGN, 0);
 		plan = STARPUFFT(plan_dft_2d)(n, m, SIGN, 0);
 #ifdef STARPU_HAVE_FFTW
 #ifdef STARPU_HAVE_FFTW
 		fftw_plan = _FFTW(plan_dft_2d)(n, m, NULL, (void*) 1, SIGN, FFTW_ESTIMATE);
 		fftw_plan = _FFTW(plan_dft_2d)(n, m, NULL, (void*) 1, SIGN, FFTW_ESTIMATE);
@@ -173,7 +185,9 @@ int main(int argc, char *argv[]) {
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 		STARPU_ASSERT(cufftPlan2d(&cuda_plan, n, m, _CUFFT_C2C) == CUFFT_SUCCESS);
 		STARPU_ASSERT(cufftPlan2d(&cuda_plan, n, m, _CUFFT_C2C) == CUFFT_SUCCESS);
 #endif
 #endif
-	} else {
+	}
+	else
+	{
 		assert(0);
 		assert(0);
 	}
 	}
 
 

+ 21 - 9
examples/starpufft/testx_threads.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -32,7 +32,8 @@
 #define SIGN (-1)
 #define SIGN (-1)
 /* #define SIGN (1) */
 /* #define SIGN (1) */
 
 
-int main(int argc, char *argv[]) {
+int main(int argc, char *argv[])
+{
 	int i;
 	int i;
 	struct timeval begin, end;
 	struct timeval begin, end;
 	int size;
 	int size;
@@ -50,23 +51,29 @@ int main(int argc, char *argv[]) {
 		num_threads = atoi(num);
 		num_threads = atoi(num);
 	_FFTW(plan_with_nthreads)(num_threads);
 	_FFTW(plan_with_nthreads)(num_threads);
 
 
-	if (argc < 2 || argc > 3) {
+	if (argc < 2 || argc > 3)
+	{
 		fprintf(stderr,"need one or two size of vector\n");
 		fprintf(stderr,"need one or two size of vector\n");
 		exit(EXIT_FAILURE);
 		exit(EXIT_FAILURE);
 	}
 	}
 
 
-	if (argc == 2) {
+	if (argc == 2)
+	{
 		n = atoi(argv[1]);
 		n = atoi(argv[1]);
 
 
 		/* 1D */
 		/* 1D */
 		size = n;
 		size = n;
-	} else if (argc == 3) {
+	}
+	else if (argc == 3)
+	{
 		n = atoi(argv[1]);
 		n = atoi(argv[1]);
 		m = atoi(argv[2]);
 		m = atoi(argv[2]);
 
 
 		/* 2D */
 		/* 2D */
 		size = n * m;
 		size = n * m;
-	} else {
+	}
+	else
+	{
 		assert(0);
 		assert(0);
 	}
 	}
 
 
@@ -79,12 +86,17 @@ int main(int argc, char *argv[]) {
 
 
 	_FFTW(complex) *out_fftw = _FFTW(malloc)(size * sizeof(*out_fftw));
 	_FFTW(complex) *out_fftw = _FFTW(malloc)(size * sizeof(*out_fftw));
 
 
-	if (argc == 2) {
+	if (argc == 2)
+	{
 		fftw_plan = _FFTW(plan_dft_1d)(n, in, out_fftw, SIGN, FFTW_ESTIMATE);
 		fftw_plan = _FFTW(plan_dft_1d)(n, in, out_fftw, SIGN, FFTW_ESTIMATE);
 
 
-	} else if (argc == 3) {
+	}
+	else if (argc == 3)
+	{
 		fftw_plan = _FFTW(plan_dft_2d)(n, m, in, out_fftw, SIGN, FFTW_ESTIMATE);
 		fftw_plan = _FFTW(plan_dft_2d)(n, m, in, out_fftw, SIGN, FFTW_ESTIMATE);
-	} else {
+	}
+	else
+	{
 		assert(0);
 		assert(0);
 	}
 	}
 
 

+ 6 - 3
examples/stencil/life.c

@@ -22,9 +22,12 @@ void life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, in
 {
 {
 	unsigned x, y, z, num, alive;
 	unsigned x, y, z, num, alive;
 
 
-	for (z = iter; z < nz - iter; z++) {
-		for (y = K; y < ny - K; y++) {
-			for (x = K; x < nx - K; x++) {
+	for (z = iter; z < nz - iter; z++)
+	{
+		for (y = K; y < ny - K; y++)
+		{
+			for (x = K; x < nx - K; x++)
+			{
 				num = 0
 				num = 0
                                         + old[x+(y+1)*ldy+(z+0)*ldz]
                                         + old[x+(y+1)*ldy+(z+0)*ldz]
                                         + old[x+(y+1)*ldy+(z+1)*ldz]
                                         + old[x+(y+1)*ldy+(z+1)*ldz]

+ 4 - 2
examples/stencil/life_cuda.cu

@@ -35,8 +35,10 @@ cuda_life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, in
 	unsigned num, alive;
 	unsigned num, alive;
 
 
 	for (z = iter + idz; z < nz - iter; z += stepz)
 	for (z = iter + idz; z < nz - iter; z += stepz)
-		for (y = K + idy; y < ny - K; y += stepy) {
-			for (x = K + idx; x < nx - K; x += stepx) {
+		for (y = K + idy; y < ny - K; y += stepy)
+		{
+			for (x = K + idx; x < nx - K; x += stepx)
+			{
 				unsigned index = x + y*ldy + z*ldz;
 				unsigned index = x + y*ldy + z*ldz;
 				num = 0
 				num = 0
                                         + old[index+1*ldy+0*ldz]
                                         + old[index+1*ldy+0*ldz]

+ 8 - 4
examples/stencil/life_opencl.c

@@ -42,8 +42,10 @@ life_update(int bz, __global const TYPE *old, __global TYPE *newp, int nx, int n
 	unsigned num, alive;\n\
 	unsigned num, alive;\n\
 \n\
 \n\
 	for (z = iter + idz; z < nz - iter; z += stepz)\n\
 	for (z = iter + idz; z < nz - iter; z += stepz)\n\
-		for (y = K + idy; y < ny - K; y += stepy) {\n\
-			for (x = K + idx; x < nx - K; x += stepx) {\n\
+		for (y = K + idy; y < ny - K; y += stepy) \n\
+		{\n									\
+			for (x = K + idx; x < nx - K; x += stepx) \
+			{\n					\
 				unsigned index = x + y*ldy + z*ldz;\n\
 				unsigned index = x + y*ldy + z*ldz;\n\
 				num = 0\n\
 				num = 0\n\
                                         + old[index+1*ldy+0*ldz]\n\
                                         + old[index+1*ldy+0*ldz]\n\
@@ -66,11 +68,13 @@ static const char * src = clsrc(TYPE,K);
 static struct starpu_opencl_program program;
 static struct starpu_opencl_program program;
 
 
 void
 void
-opencl_life_init(void) {
+opencl_life_init(void)
+{
   starpu_opencl_load_opencl_from_string(src, &program, NULL);
   starpu_opencl_load_opencl_from_string(src, &program, NULL);
 }
 }
 
 
-void opencl_life_free(void) {
+void opencl_life_free(void)
+{
   starpu_opencl_unload_opencl(&program);
   starpu_opencl_unload_opencl(&program);
 }
 }
 
 

+ 6 - 3
examples/stencil/shadow.h

@@ -20,7 +20,8 @@
 /* TODO: rather use a dummy for loop, to assign the job to the threads that will work on it? */
 /* TODO: rather use a dummy for loop, to assign the job to the threads that will work on it? */
 	if (idy == 0)
 	if (idy == 0)
 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)
 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)
-			for (x = K + idx; x < nx-K; x += stepx) {
+			for (x = K + idx; x < nx-K; x += stepx)
+			{
 				unsigned index = x+z*ldz;
 				unsigned index = x+z*ldz;
 				ptr[index+(K-1)*ldy] = ptr[index+(ny-K-1)*ldy];
 				ptr[index+(K-1)*ldy] = ptr[index+(ny-K-1)*ldy];
 				ptr[index+(ny-K)*ldy] = ptr[index+K*ldy];
 				ptr[index+(ny-K)*ldy] = ptr[index+K*ldy];
@@ -28,14 +29,16 @@
 
 
 	if (idx == 0)
 	if (idx == 0)
 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)
 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)
-			for (y = K + idy; y < ny-K; y += stepy) {
+			for (y = K + idy; y < ny-K; y += stepy)
+			{
 				unsigned index = y*ldy+z*ldz;
 				unsigned index = y*ldy+z*ldz;
 				ptr[(K-1)+index] = ptr[(nx-K-1)+index];
 				ptr[(K-1)+index] = ptr[(nx-K-1)+index];
 				ptr[(nx-K)+index] = ptr[K+index];
 				ptr[(nx-K)+index] = ptr[K+index];
 			}
 			}
 
 
 	if (idx == 0 && idy == 0)
 	if (idx == 0 && idy == 0)
-		for (z = i-1 + idz; z < nz-(i-1); z += stepz) {
+		for (z = i-1 + idz; z < nz-(i-1); z += stepz)
+		{
 			unsigned index = z*ldz;
 			unsigned index = z*ldz;
 			ptr[K-1+(K-1)*ldy+index] = ptr[(nx-K-1)+(ny-K-1)*ldy+index];
 			ptr[K-1+(K-1)*ldy+index] = ptr[(nx-K-1)+(ny-K-1)*ldy+index];
 			ptr[(nx-K)+(K-1)*ldy+index] = ptr[K+(ny-K-1)*ldy+index];
 			ptr[(nx-K)+(K-1)*ldy+index] = ptr[K+(ny-K-1)*ldy+index];

+ 10 - 5
examples/stencil/shadow_opencl.c

@@ -38,7 +38,8 @@ shadow( int bz, __global TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, in
 	unsigned x, y, z;\n\
 	unsigned x, y, z;\n\
 	if (idy == 0)\n\
 	if (idy == 0)\n\
 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)\n\
 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)\n\
-			for (x = K + idx; x < nx-K; x += stepx) {\n\
+			for (x = K + idx; x < nx-K; x += stepx) \
+			{\n								\
 				unsigned index = x+z*ldz;\n\
 				unsigned index = x+z*ldz;\n\
 				ptr[index+(K-1)*ldy] = ptr[index+(ny-K-1)*ldy];\n\
 				ptr[index+(K-1)*ldy] = ptr[index+(ny-K-1)*ldy];\n\
 				ptr[index+(ny-K)*ldy] = ptr[index+K*ldy];\n\
 				ptr[index+(ny-K)*ldy] = ptr[index+K*ldy];\n\
@@ -46,14 +47,16 @@ shadow( int bz, __global TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, in
 \n\
 \n\
 	if (idx == 0)\n\
 	if (idx == 0)\n\
 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)\n\
 		for (z = i-1 + idz; z < nz-(i-1); z += stepz)\n\
-			for (y = K + idy; y < ny-K; y += stepy) {\n\
+			for (y = K + idy; y < ny-K; y += stepy) \
+			{\n					\
 				unsigned index = y*ldy+z*ldz;\n\
 				unsigned index = y*ldy+z*ldz;\n\
 				ptr[(K-1)+index] = ptr[(nx-K-1)+index];\n\
 				ptr[(K-1)+index] = ptr[(nx-K-1)+index];\n\
 				ptr[(nx-K)+index] = ptr[K+index];\n\
 				ptr[(nx-K)+index] = ptr[K+index];\n\
 			}\n\
 			}\n\
 \n\
 \n\
 	if (idx == 0 && idy == 0)\n\
 	if (idx == 0 && idy == 0)\n\
-		for (z = i-1 + idz; z < nz-(i-1); z += stepz) {\n\
+		for (z = i-1 + idz; z < nz-(i-1); z += stepz) \
+		{\n					      \
 			unsigned index = z*ldz;\n\
 			unsigned index = z*ldz;\n\
 			ptr[K-1+(K-1)*ldy+index] = ptr[(nx-K-1)+(ny-K-1)*ldy+index];\n\
 			ptr[K-1+(K-1)*ldy+index] = ptr[(nx-K-1)+(ny-K-1)*ldy+index];\n\
 			ptr[(nx-K)+(K-1)*ldy+index] = ptr[K+(ny-K-1)*ldy+index];\n\
 			ptr[(nx-K)+(K-1)*ldy+index] = ptr[K+(ny-K-1)*ldy+index];\n\
@@ -66,11 +69,13 @@ static const char * src = clsrc(TYPE,K);
 static struct starpu_opencl_program program;
 static struct starpu_opencl_program program;
 
 
 void
 void
-opencl_shadow_init(void) {
+opencl_shadow_init(void)
+{
   starpu_opencl_load_opencl_from_string(src, &program, NULL);
   starpu_opencl_load_opencl_from_string(src, &program, NULL);
 }
 }
 
 
-void opencl_shadow_free(void) {
+void opencl_shadow_free(void)
+{
   starpu_opencl_unload_opencl(&program);
   starpu_opencl_unload_opencl(&program);
 }
 }
 
 

+ 22 - 11
examples/stencil/stencil-kernels.c

@@ -23,10 +23,12 @@
 
 
 #ifndef timersub
 #ifndef timersub
 #define	timersub(x, y, res) \
 #define	timersub(x, y, res) \
-	do { \
+	do \
+	{						   \
 		(res)->tv_sec = (x)->tv_sec - (y)->tv_sec; \
 		(res)->tv_sec = (x)->tv_sec - (y)->tv_sec; \
 		(res)->tv_usec = (x)->tv_usec - (y)->tv_usec; \
 		(res)->tv_usec = (x)->tv_usec - (y)->tv_usec; \
-		if ((res)->tv_usec < 0) { \
+		if ((res)->tv_usec < 0) \
+		{			 \
 			(res)->tv_sec--; \
 			(res)->tv_sec--; \
 			(res)->tv_usec += 1000000; \
 			(res)->tv_usec += 1000000; \
 		} \
 		} \
@@ -34,10 +36,12 @@
 #endif
 #endif
 #ifndef timeradd
 #ifndef timeradd
 #define	timeradd(x, y, res) \
 #define	timeradd(x, y, res) \
-	do { \
+	do \
+	{						   \
 		(res)->tv_sec = (x)->tv_sec + (y)->tv_sec; \
 		(res)->tv_sec = (x)->tv_sec + (y)->tv_sec; \
 		(res)->tv_usec = (x)->tv_usec + (y)->tv_usec; \
 		(res)->tv_usec = (x)->tv_usec + (y)->tv_usec; \
-		if ((res)->tv_usec >= 1000000) { \
+		if ((res)->tv_usec >= 1000000) \
+		{			       \
 			(res)->tv_sec++; \
 			(res)->tv_sec++; \
 			(res)->tv_usec -= 1000000; \
 			(res)->tv_usec -= 1000000; \
 		} \
 		} \
@@ -138,7 +142,8 @@ static void record_who_runs_what(struct block_description *block)
 	gettimeofday(&tv, NULL);
 	gettimeofday(&tv, NULL);
 	timersub(&tv, &start, &tv2);
 	timersub(&tv, &start, &tv2);
 	timersub(&tv2, &last_tick[block->bz], &diff);
 	timersub(&tv2, &last_tick[block->bz], &diff);
-	while (timercmp(&diff, &delta, >=)) {
+	while (timercmp(&diff, &delta, >=))
+	{
 		timeradd(&last_tick[block->bz], &delta, &last_tick[block->bz]);
 		timeradd(&last_tick[block->bz], &delta, &last_tick[block->bz]);
 		timersub(&tv2, &last_tick[block->bz], &diff);
 		timersub(&tv2, &last_tick[block->bz], &diff);
 		if (who_runs_what_index[block->bz] < who_runs_what_len)
 		if (who_runs_what_index[block->bz] < who_runs_what_len)
@@ -439,12 +444,14 @@ fprintf(stderr,"!!! DO update_func_cpu z %d CPU%d !!!\n", block->bz, workerid);
 }
 }
 
 
 /* Performance model and codelet structure */
 /* Performance model and codelet structure */
-static struct starpu_perfmodel cl_update_model = {
+static struct starpu_perfmodel cl_update_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "cl_update" 
 	.symbol = "cl_update" 
 };
 };
 
 
-struct starpu_codelet cl_update = {
+struct starpu_codelet cl_update =
+{
 	.where = 0 |
 	.where = 0 |
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 		STARPU_CUDA|
 		STARPU_CUDA|
@@ -634,17 +641,20 @@ static void dummy_func_bottom_opencl(void *descr[] __attribute__((unused)), void
 #endif /* STARPU_USE_OPENCL */
 #endif /* STARPU_USE_OPENCL */
 
 
 /* Performance models and codelet for save */
 /* Performance models and codelet for save */
-static struct starpu_perfmodel save_cl_bottom_model = {
+static struct starpu_perfmodel save_cl_bottom_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "save_cl_bottom" 
 	.symbol = "save_cl_bottom" 
 };
 };
 
 
-static struct starpu_perfmodel save_cl_top_model = {
+static struct starpu_perfmodel save_cl_top_model =
+{
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
 	.symbol = "save_cl_top" 
 	.symbol = "save_cl_top" 
 };
 };
 
 
-struct starpu_codelet save_cl_bottom = {
+struct starpu_codelet save_cl_bottom =
+{
 	.where = 0 |
 	.where = 0 |
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 		STARPU_CUDA|
 		STARPU_CUDA|
@@ -664,7 +674,8 @@ struct starpu_codelet save_cl_bottom = {
 	.nbuffers = 4
 	.nbuffers = 4
 };
 };
 
 
-struct starpu_codelet save_cl_top = {
+struct starpu_codelet save_cl_top =
+{
 	.where = 0|
 	.where = 0|
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 		STARPU_CUDA|
 		STARPU_CUDA|

+ 16 - 8
examples/stencil/stencil-tasks.c

@@ -76,7 +76,8 @@ static void create_task_save_local(unsigned iter, unsigned z, int dir, unsigned
 /* R(z) = local & R(z+d) != local */
 /* R(z) = local & R(z+d) != local */
 /* We need to send our save over MPI */
 /* We need to send our save over MPI */
 
 
-static void send_done(void *arg) {
+static void send_done(void *arg)
+{
 	uintptr_t z = (uintptr_t) arg;
 	uintptr_t z = (uintptr_t) arg;
 	DEBUG("DO SEND %d\n", (int)z);
 	DEBUG("DO SEND %d\n", (int)z);
 }
 }
@@ -103,7 +104,8 @@ static void create_task_save_mpi_send(unsigned iter, unsigned z, int dir, unsign
 /* R(z) != local & R(z+d) = local */
 /* R(z) != local & R(z+d) = local */
 /* We need to receive over MPI */
 /* We need to receive over MPI */
 
 
-static void recv_done(void *arg) {
+static void recv_done(void *arg)
+{
 	uintptr_t z = (uintptr_t) arg;
 	uintptr_t z = (uintptr_t) arg;
 	DEBUG("DO RECV %d\n", (int)z);
 	DEBUG("DO RECV %d\n", (int)z);
 }
 }
@@ -146,12 +148,14 @@ void create_task_save(unsigned iter, unsigned z, int dir, unsigned local_rank)
 		}
 		}
 
 
 	}
 	}
-	else {	/* node_z != local_rank, this MPI node doesn't have the saved data */
+	else
+	{	/* node_z != local_rank, this MPI node doesn't have the saved data */
 		if (node_z_and_d == local_rank)
 		if (node_z_and_d == local_rank)
 		{
 		{
 			create_task_save_mpi_recv(iter, z, dir, local_rank);
 			create_task_save_mpi_recv(iter, z, dir, local_rank);
 		}
 		}
-		else { /* R(z) != local & R(z+d) != local We don't have
+		else
+		{ /* R(z) != local & R(z+d) != local We don't have
 			      the saved data and don't need it, we shouldn't
 			      the saved data and don't need it, we shouldn't
 			      even have been called! */
 			      even have been called! */
 			STARPU_ASSERT(0);
 			STARPU_ASSERT(0);
@@ -176,7 +180,8 @@ void create_task_update(unsigned iter, unsigned z, unsigned local_rank)
 	unsigned niter = get_niter();
 	unsigned niter = get_niter();
 
 
 	/* We are going to synchronize with the last tasks */
 	/* We are going to synchronize with the last tasks */
-	if (iter == niter) {
+	if (iter == niter)
+	{
 		task->detach = 0;
 		task->detach = 0;
 		task->use_tag = 1;
 		task->use_tag = 1;
 		task->tag_id = TAG_FINISH(z);
 		task->tag_id = TAG_FINISH(z);
@@ -218,7 +223,8 @@ void create_task_update(unsigned iter, unsigned z, unsigned local_rank)
 
 
 /* Dummy empty codelet taking one buffer */
 /* Dummy empty codelet taking one buffer */
 static void null_func(void *descr[] __attribute__((unused)), void *arg __attribute__((unused))) { }
 static void null_func(void *descr[] __attribute__((unused)), void *arg __attribute__((unused))) { }
-static struct starpu_codelet null = {
+static struct starpu_codelet null =
+{
 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
 	.cpu_funcs = {null_func, NULL},
 	.cpu_funcs = {null_func, NULL},
 	.cuda_funcs = {null_func, NULL},
 	.cuda_funcs = {null_func, NULL},
@@ -226,7 +232,8 @@ static struct starpu_codelet null = {
 	.nbuffers = 2
 	.nbuffers = 2
 };
 };
 
 
-void create_start_task(int z, int dir) {
+void create_start_task(int z, int dir)
+{
 	/* Dumb task depending on the init task and simulating writing the
 	/* Dumb task depending on the init task and simulating writing the
 	   neighbour buffers, to avoid communications and computation running
 	   neighbour buffers, to avoid communications and computation running
 	   before we start measuring time */
 	   before we start measuring time */
@@ -261,7 +268,8 @@ void create_tasks(int rank)
 	int niter = get_niter();
 	int niter = get_niter();
 	int nbz = get_nbz();
 	int nbz = get_nbz();
 
 
-	for (bz = 0; bz < nbz; bz++) {
+	for (bz = 0; bz < nbz; bz++)
+	{
 		if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
 		if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
 			create_start_task(bz, +1);
 			create_start_task(bz, +1);
 		if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank))
 		if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank))

+ 32 - 16
examples/stencil/stencil.c

@@ -67,36 +67,45 @@ unsigned get_ticks(void)
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-b") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-b") == 0)
+		{
 			bind_tasks = 1;
 			bind_tasks = 1;
 		}
 		}
 
 
-		if (strcmp(argv[i], "-nbz") == 0) {
+		if (strcmp(argv[i], "-nbz") == 0)
+		{
 			nbz = atoi(argv[++i]);
 			nbz = atoi(argv[++i]);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-sizex") == 0) {
+		if (strcmp(argv[i], "-sizex") == 0)
+		{
 			sizex = atoi(argv[++i]);
 			sizex = atoi(argv[++i]);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-sizey") == 0) {
+		if (strcmp(argv[i], "-sizey") == 0)
+		{
 			sizey = atoi(argv[++i]);
 			sizey = atoi(argv[++i]);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-sizez") == 0) {
+		if (strcmp(argv[i], "-sizez") == 0)
+		{
 			sizez = atoi(argv[++i]);
 			sizez = atoi(argv[++i]);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-niter") == 0) {
+		if (strcmp(argv[i], "-niter") == 0)
+		{
 			niter = atoi(argv[++i]);
 			niter = atoi(argv[++i]);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-ticks") == 0) {
+		if (strcmp(argv[i], "-ticks") == 0)
+		{
 			ticks = atoi(argv[++i]);
 			ticks = atoi(argv[++i]);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			 fprintf(stderr, "Usage : %s [options...]\n", argv[0]);
 			 fprintf(stderr, "Usage : %s [options...]\n", argv[0]);
 			 fprintf(stderr, "\n");
 			 fprintf(stderr, "\n");
 			 fprintf(stderr, "Options:\n");
 			 fprintf(stderr, "Options:\n");
@@ -114,7 +123,8 @@ static void init_problem(int argc, char **argv, int rank, int world_size)
 {
 {
 	parse_args(argc, argv);
 	parse_args(argc, argv);
 
 
-	if (getenv("STARPU_TOP")) {
+	if (getenv("STARPU_TOP"))
+	{
 		starpu_top_init_loop = starpu_top_add_data_integer("Task creation iter", 0, niter, 1);
 		starpu_top_init_loop = starpu_top_add_data_integer("Task creation iter", 0, niter, 1);
 		starpu_top_achieved_loop = starpu_top_add_data_integer("Task achieved iter", 0, niter, 1);
 		starpu_top_achieved_loop = starpu_top_add_data_integer("Task achieved iter", 0, niter, 1);
 		starpu_top_init_and_wait("stencil_top example");
 		starpu_top_init_and_wait("stencil_top example");
@@ -152,8 +162,10 @@ void f(unsigned task_per_worker[STARPU_NMAXWORKERS])
 
 
 	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
 	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
 		total += task_per_worker[worker];
 		total += task_per_worker[worker];
-	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) {
-		if (task_per_worker[worker]) {
+	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
+	{
+		if (task_per_worker[worker])
+		{
 			char name[32];
 			char name[32];
 			starpu_worker_get_name(worker, name, sizeof(name));
 			starpu_worker_get_name(worker, name, sizeof(name));
 			fprintf(stderr,"\t%s -> %d (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total);
 			fprintf(stderr,"\t%s -> %d (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total);
@@ -181,7 +193,8 @@ int main(int argc, char **argv)
 
 
 #ifdef STARPU_USE_MPI
 #ifdef STARPU_USE_MPI
 	int thread_support;
 	int thread_support;
-	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support)) {
+	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support))
+	{
 		fprintf(stderr, "MPI_Init_thread failed\n");
 		fprintf(stderr, "MPI_Init_thread failed\n");
 	}
 	}
 	if (thread_support == MPI_THREAD_FUNNELED)
 	if (thread_support == MPI_THREAD_FUNNELED)
@@ -295,15 +308,18 @@ int main(int argc, char **argv)
 
 
 		unsigned bz, iter;
 		unsigned bz, iter;
 		unsigned last;
 		unsigned last;
-		for (iter = 0; iter < who_runs_what_len; iter++) {
+		for (iter = 0; iter < who_runs_what_len; iter++)
+		{
 			last = 1;
 			last = 1;
-			for (bz = 0; bz < nbz; bz++) {
+			for (bz = 0; bz < nbz; bz++)
+			{
 				if ((bz % nzblocks_per_process) == 0)
 				if ((bz % nzblocks_per_process) == 0)
 					fprintf(stderr, "| ");
 					fprintf(stderr, "| ");
 
 
 				if (who_runs_what_index[bz] <= iter)
 				if (who_runs_what_index[bz] <= iter)
 					fprintf(stderr,"_ ");
 					fprintf(stderr,"_ ");
-				else {
+				else
+				{
 					last = 0;
 					last = 0;
 					if (who_runs_what[bz + iter * nbz] == -1)
 					if (who_runs_what[bz + iter * nbz] == -1)
 						fprintf(stderr,"* ");
 						fprintf(stderr,"* ");

+ 4 - 2
examples/stencil/stencil.h

@@ -50,13 +50,15 @@ extern struct starpu_top_data* starpu_top_achieved_loop;
 
 
 
 
 /* Split only on the z axis to make things simple */
 /* Split only on the z axis to make things simple */
-typedef enum {
+typedef enum
+{
 	B = 0,
 	B = 0,
 	T = 1
 	T = 1
 } direction;
 } direction;
 
 
 /* Description of a domain block */
 /* Description of a domain block */
-struct block_description {
+struct block_description
+{
 	/* Which MPI node should process that block ? */
 	/* Which MPI node should process that block ? */
 	unsigned mpi_node;
 	unsigned mpi_node;
 	
 	

+ 16 - 9
examples/tag_example/tag_example.c

@@ -43,23 +43,28 @@ static unsigned iter = 0;
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-iter") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-iter") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			nk = strtol(argv[++i], &argptr, 10);
 			nk = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-i") == 0) {
+		if (strcmp(argv[i], "-i") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			ni = strtol(argv[++i], &argptr, 10);
 			ni = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-j") == 0) {
+		if (strcmp(argv[i], "-j") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			nj = strtol(argv[++i], &argptr, 10);
 			nj = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			printf("usage : %s [-iter iter] [-i i] [-j j]\n", argv[0]);
 			printf("usage : %s [-iter iter] [-i i] [-j j]\n", argv[0]);
 		}
 		}
 	}
 	}
@@ -153,27 +158,29 @@ void cpu_codelet(void *descr[] __attribute__((unused)),
 
 
 static void express_deps(unsigned i, unsigned j, unsigned iter)
 static void express_deps(unsigned i, unsigned j, unsigned iter)
 {
 {
-	if (j > 0) {
+	if (j > 0)
+	{
 		/* (i,j-1) exists */
 		/* (i,j-1) exists */
 		if (j < nj - 1)
 		if (j < nj - 1)
 		{
 		{
 			/* (i,j+1) exists */
 			/* (i,j+1) exists */
 			starpu_tag_declare_deps(TAG(i,j,iter), 2, TAG(i-1,j-1,iter), TAG(i-1,j+1,iter));
 			starpu_tag_declare_deps(TAG(i,j,iter), 2, TAG(i-1,j-1,iter), TAG(i-1,j+1,iter));
 		}
 		}
-		else 
+		else
 		{
 		{
 			/* (i,j+1) does not exist */
 			/* (i,j+1) does not exist */
 			starpu_tag_declare_deps(TAG(i,j,iter), 1, TAG(i-1,j-1,iter));
 			starpu_tag_declare_deps(TAG(i,j,iter), 1, TAG(i-1,j-1,iter));
 		}
 		}
 	}
 	}
-	else {
+	else
+	{
 		/* (i, (j-1) does not exist */
 		/* (i, (j-1) does not exist */
 		if (j < nj - 1)
 		if (j < nj - 1)
 		{
 		{
 			/* (i,j+1) exists */
 			/* (i,j+1) exists */
 			starpu_tag_declare_deps(TAG(i,j,iter), 1, TAG(i-1,j+1,iter));
 			starpu_tag_declare_deps(TAG(i,j,iter), 1, TAG(i-1,j+1,iter));
 		}
 		}
-		else 
+		else
 		{
 		{
 			/* (i,j+1) does not exist */
 			/* (i,j+1) does not exist */
 			STARPU_ABORT();
 			STARPU_ABORT();

+ 8 - 4
examples/tag_example/tag_example2.c

@@ -40,18 +40,22 @@ static unsigned ni = Ni, nk = Nk;
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-iter") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-iter") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			nk = strtol(argv[++i], &argptr, 10);
 			nk = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-i") == 0) {
+		if (strcmp(argv[i], "-i") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			ni = strtol(argv[++i], &argptr, 10);
 			ni = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			printf("usage : %s [-iter iter] [-i i]\n", argv[0]);
 			printf("usage : %s [-iter iter] [-i i]\n", argv[0]);
 		}
 		}
 	}
 	}

+ 8 - 4
examples/tag_example/tag_example3.c

@@ -40,18 +40,22 @@ static unsigned ni = Ni, nk = Nk;
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-iter") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-iter") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			nk = strtol(argv[++i], &argptr, 10);
 			nk = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-i") == 0) {
+		if (strcmp(argv[i], "-i") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			ni = strtol(argv[++i], &argptr, 10);
 			ni = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			printf("usage : %s [-iter iter] [-i i]\n", argv[0]);
 			printf("usage : %s [-iter iter] [-i i]\n", argv[0]);
 		}
 		}
 	}
 	}

+ 10 - 5
examples/tag_example/tag_restartable.c

@@ -46,18 +46,22 @@ struct starpu_task **tasks[Nrolls];
 static void parse_args(int argc, char **argv)
 static void parse_args(int argc, char **argv)
 {
 {
 	int i;
 	int i;
-	for (i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-iter") == 0) {
+	for (i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-iter") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			nk = strtol(argv[++i], &argptr, 10);
 			nk = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-i") == 0) {
+		if (strcmp(argv[i], "-i") == 0)
+		{
 		        char *argptr;
 		        char *argptr;
 			ni = strtol(argv[++i], &argptr, 10);
 			ni = strtol(argv[++i], &argptr, 10);
 		}
 		}
 
 
-		if (strcmp(argv[i], "-h") == 0) {
+		if (strcmp(argv[i], "-h") == 0)
+		{
 			printf("usage : %s [-iter iter] [-i i]\n", argv[0]);
 			printf("usage : %s [-iter iter] [-i i]\n", argv[0]);
 		}
 		}
 	}
 	}
@@ -133,7 +137,8 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
 
 	FPRINTF(stderr, "ITER : %u\n", nk);
 	FPRINTF(stderr, "ITER : %u\n", nk);
 
 
-	for (i = 0; i < Nrolls; i++) {
+	for (i = 0; i < Nrolls; i++)
+	{
 		tasks[i] = (struct starpu_task **) malloc(ni * sizeof(*tasks[i]));
 		tasks[i] = (struct starpu_task **) malloc(ni * sizeof(*tasks[i]));
 
 
 		create_task_grid(i);
 		create_task_grid(i);

+ 3 - 1
examples/top/hello_world_top.c

@@ -56,10 +56,12 @@ void callback_func(void *callback_arg)
  * DSM; the second arguments references read-only data that is passed as an
  * DSM; the second arguments references read-only data that is passed as an
  * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
  * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
  * are no data input/output managed by the DSM (cl.nbuffers = 0) */
  * are no data input/output managed by the DSM (cl.nbuffers = 0) */
-struct params {
+struct params
+{
 	int i;
 	int i;
 	float f;
 	float f;
 };
 };
+
 void cpu_func(void *buffers[], void *cl_arg)
 void cpu_func(void *buffers[], void *cl_arg)
 {
 {
 	struct params *params = (struct params *) cl_arg;
 	struct params *params = (struct params *) cl_arg;