Browse Source

Add more verbose comments in examples source code

Samuel Thibault 10 years ago
parent
commit
00b1ef6f74
100 changed files with 407 additions and 293 deletions
  1. 0 1
      examples/Makefile.am
  2. 2 5
      examples/README.txt
  3. 7 1
      examples/audio/starpu_audio_processing.c
  4. 6 1
      examples/axpy/axpy.c
  5. 2 0
      examples/axpy/axpy_opencl.c
  6. 2 0
      examples/axpy/axpy_opencl_kernel.cl
  7. 5 0
      examples/binary/binary.c
  8. 6 1
      examples/callback/callback.c
  9. 14 19
      examples/callback/prologue.c
  10. 3 1
      examples/cg/cg.c
  11. 11 1
      examples/cg/cg_dot_kernel.cu
  12. 5 1
      examples/cg/cg_kernels.c
  13. 11 1
      examples/cholesky/cholesky_grain_tag.c
  14. 5 0
      examples/cholesky/cholesky_implicit.c
  15. 7 0
      examples/cholesky/cholesky_kernels.c
  16. 6 1
      examples/cholesky/cholesky_models.c
  17. 7 1
      examples/cholesky/cholesky_tag.c
  18. 7 1
      examples/cholesky/cholesky_tile_tag.c
  19. 6 1
      examples/cpp/incrementer_cpp.cpp
  20. 6 1
      examples/filters/fblock.c
  21. 2 0
      examples/filters/fblock_cpu.c
  22. 2 0
      examples/filters/fblock_cuda.cu
  23. 3 1
      examples/filters/fblock_opencl.c
  24. 2 0
      examples/filters/fblock_opencl_kernel.cl
  25. 5 0
      examples/filters/fmatrix.c
  26. 5 0
      examples/filters/fvector.c
  27. 0 203
      examples/filters/subdata.c
  28. 5 1
      examples/fortran/hello_c.c
  29. 4 0
      examples/fortran90/f90_example.f90
  30. 2 0
      examples/fortran90/marshalling.c
  31. 2 0
      examples/fortran90/mod_compute.f90
  32. 2 0
      examples/fortran90/mod_interface.f90
  33. 7 1
      examples/heat/dw_factolu.c
  34. 10 1
      examples/heat/dw_factolu_grain.c
  35. 4 1
      examples/heat/dw_factolu_kernels.c
  36. 6 1
      examples/heat/dw_factolu_tag.c
  37. 2 1
      examples/heat/dw_sparse_cg.c
  38. 5 1
      examples/heat/heat.c
  39. 4 1
      examples/incrementer/incrementer.c
  40. 3 1
      examples/incrementer/incrementer_kernels.cu
  41. 3 1
      examples/incrementer/incrementer_kernels_opencl.c
  42. 2 0
      examples/incrementer/incrementer_kernels_opencl_kernel.cl
  43. 3 1
      examples/lu/clu.c
  44. 3 1
      examples/lu/clu_implicit.c
  45. 3 1
      examples/lu/clu_implicit_pivot.c
  46. 3 1
      examples/lu/clu_kernels.c
  47. 3 1
      examples/lu/clu_pivot.c
  48. 2 1
      examples/lu/complex_double.h
  49. 2 1
      examples/lu/complex_float.h
  50. 3 1
      examples/lu/dlu.c
  51. 3 1
      examples/lu/dlu_implicit.c
  52. 3 1
      examples/lu/dlu_implicit_pivot.c
  53. 3 1
      examples/lu/dlu_kernels.c
  54. 3 1
      examples/lu/dlu_pivot.c
  55. 3 1
      examples/lu/lu-double.h
  56. 2 1
      examples/lu/lu-float.h
  57. 3 0
      examples/lu/lu_example.c
  58. 3 1
      examples/lu/lu_example_complex_double.c
  59. 3 1
      examples/lu/lu_example_complex_float.c
  60. 3 1
      examples/lu/lu_example_double.c
  61. 3 1
      examples/lu/lu_example_float.c
  62. 3 1
      examples/lu/slu.c
  63. 3 1
      examples/lu/slu_implicit.c
  64. 3 1
      examples/lu/slu_implicit_pivot.c
  65. 3 1
      examples/lu/slu_kernels.c
  66. 3 1
      examples/lu/slu_pivot.c
  67. 2 1
      examples/lu/xlu.c
  68. 2 0
      examples/lu/xlu_implicit.c
  69. 3 0
      examples/lu/xlu_implicit_pivot.c
  70. 3 1
      examples/lu/xlu_kernels.c
  71. 3 1
      examples/lu/xlu_pivot.c
  72. 3 1
      examples/lu/zlu.c
  73. 3 1
      examples/lu/zlu_implicit.c
  74. 3 1
      examples/lu/zlu_implicit_pivot.c
  75. 3 1
      examples/lu/zlu_kernels.c
  76. 3 1
      examples/lu/zlu_pivot.c
  77. 6 0
      examples/mandelbrot/mandelbrot.c
  78. 5 0
      examples/mult/xgemm.c
  79. 7 1
      examples/openmp/vector_scal_omp.c
  80. 10 1
      examples/pi/pi.c
  81. 4 1
      examples/pi/pi_kernel.cu
  82. 10 0
      examples/pi/pi_redux.c
  83. 3 1
      examples/pi/pi_redux_kernel.cu
  84. 4 1
      examples/ppm_downscaler/ppm_downscaler.c
  85. 5 0
      examples/ppm_downscaler/yuv_downscaler.c
  86. 4 0
      examples/profiling/profiling.c
  87. 5 0
      examples/reductions/dot_product.c
  88. 2 0
      examples/reductions/dot_product_kernels.cu
  89. 2 0
      examples/reductions/dot_product_opencl_kernels.cl
  90. 5 0
      examples/reductions/minmax_reduction.c
  91. 7 1
      examples/scheduler/dummy_sched.c
  92. 4 0
      examples/scheduler/heteroprio_test.c
  93. 6 0
      examples/spmd/vector_scal_spmd.c
  94. 5 0
      examples/spmv/dw_block_spmv.c
  95. 4 1
      examples/spmv/dw_block_spmv_kernels.c
  96. 6 2
      examples/spmv/spmv.c
  97. 3 1
      examples/spmv/spmv_cuda.cu
  98. 3 1
      examples/spmv/spmv_kernels.c
  99. 2 0
      examples/spmv/spmv_opencl.cl
  100. 0 0
      examples/tag_example/tag_restartable.c

+ 0 - 1
examples/Makefile.am

@@ -182,7 +182,6 @@ STARPU_EXAMPLES =				\
 	filters/fvector				\
 	filters/fblock				\
 	filters/fmatrix				\
-	filters/subdata				\
 	tag_example/tag_example			\
 	tag_example/tag_example2		\
 	tag_example/tag_example3		\

+ 2 - 5
examples/README.txt

@@ -29,16 +29,13 @@ cpp
 filters
 	This contains several partitioning examples
 
-fortran
-	This shows how to use StarPU from Fortran
+fortran90
+	This shows how to use StarPU from Fortran90
 
 gl_interop
 	This shows how interoperation can be done between StarPU CUDA
 	computations and OpenGL rendering
 
-gordon
-	This was an example for the Cell architecture, now deprecated
-
 heat
         This uses a finite element method to compute heat propagation thanks to
         an LU factorization or a conjugate gradient

+ 7 - 1
examples/audio/starpu_audio_processing.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012, 2014  Université de Bordeaux
+ * Copyright (C) 2010-2012, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
  *
@@ -16,6 +16,12 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This reads a wave file, splits it into chunks, and on each of them run a
+ * task which performs an fft, drop some high and low frequencies, and performs
+ * the inverse fft.  It then writes the output to a wave file.
+ */
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>

+ 6 - 1
examples/axpy/axpy.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2014  Université de Bordeaux
+ * Copyright (C) 2009-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
  *
@@ -16,6 +16,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This creates two dumb vectors, splits them into chunks, and for each pair of
+ * chunk, run axpy on them.
+ */
+
 #include <starpu.h>
 #include <stdlib.h>
 #include <stdio.h>

+ 2 - 0
examples/axpy/axpy_opencl.c

@@ -14,6 +14,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* OpenCL codelet for axpy */
+
 #include <starpu.h>
 #include "axpy.h"
 

+ 2 - 0
examples/axpy/axpy_opencl_kernel.cl

@@ -14,6 +14,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* OpenCL kernel implementing axpy */
+
 #include "axpy.h"
 
 __kernel void _axpy_opencl(__global TYPE *x,

+ 5 - 0
examples/binary/binary.c

@@ -15,6 +15,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This shows how to load OpenCL programs, either from a .cl file, or from a
+ * string containing the program itself.
+ */
+
 #include <starpu.h>
 
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)

+ 6 - 1
examples/callback/callback.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2013  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2013, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This is an example of using a callback. We submit a task, whose callback
+ * submits another task (without any callback).
+ */
+
 #include <starpu.h>
 
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)

+ 14 - 19
examples/callback/prologue.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2013-2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2013-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,13 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This is an example of using a prologue callback. We submit a task, whose
+ * prologue callback (i.e. before task gets scheduled) prints a value, and
+ * whose pop_prologue callback (i.e. after task gets scheduled, but before task
+ * execution) prints another value.
+ */
+
 #include <starpu.h>
 
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
@@ -37,28 +44,18 @@ struct starpu_codelet cl =
 	.name = "callback"
 };
 
-void callback_func(void *callback_arg)
-{
-	int ret;
-
-	struct starpu_task *task = starpu_task_create();
-	task->cl = &cl;
-	task->handles[0] = handle;
-
-	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-}
-
 void prologue_callback_func(void *callback_arg)
 {
 	double *x = (double*)callback_arg;
 	printf("x = %lf\n", *x);
+	STARPU_ASSERT(*x == -999.0);
 }
 
 void pop_prologue_callback_func(void *args)
 {
 	unsigned val = (uintptr_t) args;
 	printf("pop_prologue_callback val %d \n", val);
+	STARPU_ASSERT(val == 5);
 }
 
 
@@ -73,12 +70,12 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(int));
-	double *x = (double*)malloc(sizeof(double));
+	double x = -999.0;
 
 	struct starpu_task *task = starpu_task_create();
 	task->cl = &cl;
-	task->prologue_callback_func = callback_func;
-	task->prologue_callback_arg = NULL;
+	task->prologue_callback_func = prologue_callback_func;
+	task->prologue_callback_arg = &x;
 
 	task->prologue_callback_pop_func = pop_prologue_callback_func;
 	task->prologue_callback_pop_arg = (void*) 5;
@@ -89,11 +86,10 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) goto enodev;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 
-	*x = -999.0;
 	ret = starpu_task_insert(&cl,
 				 STARPU_RW, handle,
 				 STARPU_PROLOGUE_CALLBACK, prologue_callback_func,
-				 STARPU_PROLOGUE_CALLBACK_ARG, x,
+				 STARPU_PROLOGUE_CALLBACK_ARG, &x,
 				 STARPU_PROLOGUE_CALLBACK_POP, pop_prologue_callback_func,
 				 STARPU_PROLOGUE_CALLBACK_POP_ARG, 5,
 				 0);
@@ -104,7 +100,6 @@ int main(int argc, char **argv)
 
 enodev:
 	starpu_data_unregister(handle);
-	free(x);
 	FPRINTF(stderr, "v -> %d\n", v);
 	starpu_shutdown();
 	return (ret == -ENODEV) ? 77 : 0;

+ 3 - 1
examples/cg/cg.c

@@ -62,7 +62,9 @@
  *			d <- r + beta d
  *			i <- i + 1
  *		}
- *	
+ *
+ *	The dot() operations makes use of reduction to optimize parallelism.
+ *
  */
 
 #include "cg.h"

+ 11 - 1
examples/cg/cg_dot_kernel.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux
+ * Copyright (C) 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012, 2015  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -22,6 +22,12 @@
 #define MAXNBLOCKS	128
 #define MAXTHREADSPERBLOCK	256
 
+/*
+ * Dot product kernel
+ * We first perform dot computation in parallel in dot_device, and then we
+ * gather the dot values into one in gather_dot_device.
+ */
+
 static __global__ void dot_device(TYPE *vx, TYPE *vy, unsigned n, TYPE *dot_array)
 {
 	__shared__ TYPE scnt[MAXTHREADSPERBLOCK];
@@ -126,6 +132,10 @@ extern "C" void dot_host(TYPE *x, TYPE *y, unsigned nelems, TYPE *dot)
 	cudaFree(per_block_sum);
 }
 
+/*
+ * Fill a vector with zeroes
+ */
+
 static __global__ void zero_vector_device(TYPE *x, unsigned nelems, unsigned nelems_per_thread)
 {
 	unsigned i;

+ 5 - 1
examples/cg/cg_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2012-2014  Université de Bordeaux
+ * Copyright (C) 2010, 2012-2015  Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -14,6 +14,10 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * Standard BLAS kernels used by CG
+ */
+
 #include "cg.h"
 #include <math.h>
 #include <limits.h>

+ 11 - 1
examples/cholesky/cholesky_grain_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2014  Université de Bordeaux
+ * Copyright (C) 2009-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  CNRS
  *
@@ -16,6 +16,16 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This version of the Cholesky factorization uses explicit dependency
+ * declaration through dependency tags.
+ * It also uses data partitioning to split the matrix into submatrices.
+ * It also changes the partitioning during execution: when called first,
+ * cholesky_grain_rec splits the matrix with a big granularity (nblocks) and
+ * processes nbigblocks blocks, before calling itself again, to process the
+ * remainder of the matrix with a smaller granularity.
+ */
+
 #include "cholesky.h"
 
 #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA)

+ 5 - 0
examples/cholesky/cholesky_implicit.c

@@ -16,6 +16,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This version of the Cholesky factorization uses implicit dependency computation.
+ * The whole algorithm thus appears clearly in the task submission loop in _cholesky().
+ */
+
 #include "cholesky.h"
 #include "../sched_ctx_utils/sched_ctx_utils.h"
 

+ 7 - 0
examples/cholesky/cholesky_kernels.c

@@ -15,6 +15,13 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * Standard kernels for the Cholesky factorization
+ * U22 is the gemm update
+ * U21 is the trsm update
+ * U11 is the cholesky factorization
+ */
+
 #include <starpu.h>
 #include "cholesky.h"
 #include "../common/blas.h"

+ 6 - 1
examples/cholesky/cholesky_models.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011  Université de Bordeaux
+ * Copyright (C) 2009, 2010-2011, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  *
@@ -17,6 +17,11 @@
  */
 
 /*
+ * Example of a cost model for BLAS operations.  This is really just an
+ * example!
+ */
+
+/*
  * As a convention, in that file, buffers[0] is represented by A,
  * 				  buffers[1] is B ...
  */

+ 7 - 1
examples/cholesky/cholesky_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2014  Université de Bordeaux
+ * Copyright (C) 2009-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
  *
@@ -16,6 +16,12 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This version of the Cholesky factorization uses explicit dependency
+ * declaration through dependency tags.
+ * It also uses data partitioning to split the matrix into submatrices
+ */
+
 #include "cholesky.h"
 #include <starpu_perfmodel.h>
 

+ 7 - 1
examples/cholesky/cholesky_tile_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2014  Université de Bordeaux
+ * Copyright (C) 2009-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,12 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This version of the Cholesky factorization uses explicit dependency
+ * declaration through dependency tags.
+ * It also directly registers matrix tiles instead of using partitioning.
+ */
+
 #include "cholesky.h"
 
 #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA)

+ 6 - 1
examples/cpp/incrementer_cpp.cpp

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011, 2013-2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010-2011, 2013-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
  * Copyright (C) 2012 INRIA
  *
@@ -16,6 +16,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This is a small example of a C++ program using starpu.  We here just
+ * increment two values of a vector several times.
+ */
+
 #include <starpu.h>
 
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)

+ 6 - 1
examples/filters/fblock.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2011, 2013-2014  Université de Bordeaux
+ * Copyright (C) 2010-2011, 2013-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This examplifies how to use partitioning filters.  We here just split a 3D
+ * matrix into 3D slices (along the X axis), and run a dumb kernel on them.
+ */
+
 #include <starpu.h>
 
 #define NX    5

+ 2 - 0
examples/filters/fblock_cpu.c

@@ -14,6 +14,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* dumb kernel to fill a 3D matrix */
+
 #include <starpu.h>
 
 void cpu_func(void *buffers[], void *cl_arg)

+ 2 - 0
examples/filters/fblock_cuda.cu

@@ -14,6 +14,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* dumb CUDA kernel to fill a 3D matrix */
+
 #include <starpu.h>
 
 static __global__ void fblock_cuda(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor)

+ 3 - 1
examples/filters/fblock_opencl.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
- * Copyright (C) 2011, 2014  Université de Bordeaux
+ * Copyright (C) 2011, 2014-2015  Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -15,6 +15,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* dumb OpenCL codelet to fill a 3D matrix */
+
 #include <starpu.h>
 
 #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr)       	\

+ 2 - 0
examples/filters/fblock_opencl_kernel.cl

@@ -14,6 +14,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* dumb OpenCL kernel to fill a 3D matrix */
+
 __kernel void fblock_opencl(__global int* block, unsigned offset, int nx, int ny, int nz, unsigned ldy, unsigned ldz, int factor)
 {
         int i, j, k;

+ 5 - 0
examples/filters/fmatrix.c

@@ -14,6 +14,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This examplifies how to use partitioning filters.  We here just split a 2D
+ * matrix into 2D slices (along the X axis), and run a dumb kernel on them.
+ */
+
 #include <starpu.h>
 
 #define NX    5

+ 5 - 0
examples/filters/fvector.c

@@ -14,6 +14,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This examplifies how to use partitioning filters.  We here just split a
+ * vector into slices, and run a dumb kernel on them.
+ */
+
 #include <starpu.h>
 
 #define NX    21

+ 0 - 203
examples/filters/subdata.c

@@ -1,203 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-
-#define NX    6
-#define NY    4
-#define PARTS 2
-
-#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
-
-void cpu_func(void *buffers[], void *cl_arg)
-{
-        unsigned i, j;
-        int *factor = (int *) cl_arg;
-
-        /* length of the matrix */
-        unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]);
-        unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]);
-        unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]);
-        /* local copy of the matrix pointer */
-        int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]);
-
-	FPRINTF(stderr, "applying factor %d\n", *factor);
-        for(j=0; j<ny ; j++)
-	{
-                for(i=0; i<nx ; i++)
-		{
-                        FPRINTF(stderr, "%4d ", val[(j*ld)+i]);
-                        val[(j*ld)+i] *= *factor;
-		}
-		FPRINTF(stderr,"\n");
-        }
-	FPRINTF(stderr,"\n");
-}
-
-struct starpu_codelet cl =
-{
-	.cpu_funcs = {cpu_func},
-	.cpu_funcs_name = {"cpu_func"},
-	.nbuffers = 1,
-	.modes = {STARPU_RW},
-	.name = "matrix_scal"
-};
-
-void split_func(void *buffers[], void *cl_arg)
-{
-        unsigned i, j;
-        int *factor = (int *) cl_arg;
-
-        /* length of the matrix */
-        unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]);
-        unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]);
-        unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]);
-
-        /* local copy of the matrix pointer */
-        int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]);
-
-	FPRINTF(stderr, "splitting\n");
-        for(j=0; j<ny ; j++)
-	{
-                for(i=0; i<nx ; i++)
-		{
-                        FPRINTF(stderr, "%4d ", val[(j*ld)+i]);
-		}
-                FPRINTF(stderr,"\n");
-        }
-	FPRINTF(stderr,"\n");
-
-	starpu_data_handle_t submatrix = starpu_data_lookup(val);
-        /* Partition the sub-matrix in PARTS sub-sub-matrices */
-	struct starpu_data_filter f =
-	{
-		.filter_func = starpu_matrix_filter_block,
-		.nchildren = PARTS
-	};
-	starpu_data_partition(submatrix, &f);
-
-        /* Submit a task on each sub-vector */
-	for (i=0; i<starpu_data_get_nb_children(submatrix); i++)
-	{
-                struct starpu_task *task = starpu_task_create();
-		task->handles[0] = starpu_data_get_sub_data(submatrix, 1, i);
-                task->cl = &cl;
-                task->cl_arg = factor;
-                task->cl_arg_size = sizeof(*factor);
-
-		int ret = starpu_task_submit(task);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-	}
-	//starpu_data_unpartition(submatrix, STARPU_MAIN_RAM);
-}
-
-int main(int argc, char **argv)
-{
-	unsigned j, n=1;
-        int matrix[NX*NY];
-	int ret, i;
-	int factor = 12;
-
-        FPRINTF(stderr,"IN  Matrix: \n");
-        for(j=0 ; j<NY ; j++)
-	{
-                for(i=0 ; i<NX ; i++)
-		{
-                        matrix[(j*NX)+i] = n++;
-                        FPRINTF(stderr, "%4d ", matrix[(j*NX)+i]);
-                }
-                FPRINTF(stderr,"\n");
-        }
-        FPRINTF(stderr,"\n");
-
-        starpu_data_handle_t handle;
-        struct starpu_codelet split_cl =
-	{
-                .cpu_funcs = {split_func},
-                .cpu_funcs_name = {"split_func"},
-                .nbuffers = 1,
-		.modes = {STARPU_RW},
-		.name = "split_matrix"
-        };
-
-        ret = starpu_init(NULL);
-	if (ret == -ENODEV)
-		return 77;
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-
-	/* Declare data to StarPU */
-	starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0]));
-
-        /* Partition the matrix in PARTS sub-matrices */
-	struct starpu_data_filter f =
-	{
-		.filter_func = starpu_matrix_filter_block,
-		.nchildren = PARTS
-	};
-	starpu_data_partition(handle, &f);
-
-        /* Submit a task on each sub-vector */
-	for (i=0; i<starpu_data_get_nb_children(handle); i++)
-	{
-                struct starpu_task *task = starpu_task_create();
-		starpu_data_handle_t subdata = starpu_data_get_sub_data(handle, 1, i);
-		task->handles[0] = subdata;
-                task->cl = &split_cl;
-                task->cl_arg = &factor;
-                task->cl_arg_size = sizeof(factor);
-
-		ret = starpu_task_submit(task);
-		if (ret == -ENODEV) goto enodev;
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-	}
-
-	starpu_task_wait_for_all();
-	for (i=0; i<starpu_data_get_nb_children(handle); i++)
-	{
-		starpu_data_handle_t subdata = starpu_data_get_sub_data(handle, 1, i);
-		starpu_data_unpartition(subdata, STARPU_MAIN_RAM);
-	}
-
-        /* Unpartition the data, unregister it from StarPU and shutdown */
-	starpu_data_unpartition(handle, STARPU_MAIN_RAM);
-        starpu_data_unregister(handle);
-	starpu_shutdown();
-
-        /* Print result matrix */
-	n=1;
-        FPRINTF(stderr,"OUT Matrix: \n");
-        for(j=0 ; j<NY ; j++)
-	{
-                for(i=0 ; i<NX ; i++)
-		{
-                        FPRINTF(stderr, "%4d ", matrix[(j*NX)+i]);
-			if (matrix[(j*NX)+i] != n*12)
-			{
-				FPRINTF(stderr, "Incorrect result %4d != %4d", matrix[(j*NX)+i], n*12);
-				ret=1;
-			}
-			n++;
-                }
-                FPRINTF(stderr,"\n");
-        }
-        FPRINTF(stderr,"\n");
-
-	return ret;
-
-enodev:
-	starpu_shutdown();
-	return 77;
-}

+ 5 - 1
examples/fortran/hello_c.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux
+ * Copyright (C) 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,10 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This shows how to call a fortran function from a C function
+ */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>

+ 4 - 0
examples/fortran90/f90_example.f90

@@ -14,6 +14,10 @@
 !
 ! See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
+! This is an example of Fortran90 program making use of StarPU.
+! It registers a few matrices for each element of a domain, performs
+! update computations on them, and checks the result.
+
 PROGRAM f90_example
 
   USE mod_types

+ 2 - 0
examples/fortran90/marshalling.c

@@ -16,6 +16,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Helper functions to initialize StarPU and register element matrices */
+
 #include <starpu.h>
 
 //--------------------------------------------------------------//

+ 2 - 0
examples/fortran90/mod_compute.f90

@@ -14,6 +14,8 @@
 !
 ! See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
+! Computation kernels for the simulation
+
 MODULE mod_compute
 
   USE mod_types

+ 2 - 0
examples/fortran90/mod_interface.f90

@@ -14,6 +14,8 @@
 !
 ! See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
+! Fortran module interface for StarPU initialization and element registration
+
 MODULE mod_interface
 
   INTERFACE

+ 7 - 1
examples/heat/dw_factolu.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2014  Université de Bordeaux
+ * Copyright (C) 2009-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  CNRS
  *
@@ -16,6 +16,12 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This implements an LU factorization.
+ * The task graph is submitted through continuation: the rest of the graph is
+ * submitted as appropriate in the tasks' callback.
+ */
+
 #include "dw_factolu.h"
 
 #if 0

+ 10 - 1
examples/heat/dw_factolu_grain.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011, 2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010-2011, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  CNRS
  *
@@ -16,6 +16,15 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This implements an LU factorization.
+ * The task graph is submitted through dependency tags.
+ * It also changes the partitioning during execution: when called first,
+ * dw_factoLU_grain_inner splits the matrix with a big granularity (nblocks)
+ * and processes nbigblocks blocks, before calling itself again, to process the
+ * remainder of the matrix with a smaller granularity.
+ */
+
 #include "dw_factolu.h"
 
 #define TAG11(k, prefix)	((starpu_tag_t)( (((unsigned long long)(prefix))<<60)  |  (1ULL<<56) | (unsigned long long)(k)))

+ 4 - 1
examples/heat/dw_factolu_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2012, 2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010-2012, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,9 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * These are standard BLAS kernels for the LU factorization
+ */
 #include "dw_factolu.h"
 
 unsigned count_11_per_worker[STARPU_NMAXWORKERS] = {0};

+ 6 - 1
examples/heat/dw_factolu_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011, 2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010-2011, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
  *
@@ -16,6 +16,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This implements an LU factorization.
+ * The task graph is submitted through dependency tags.
+ */
+
 #include "dw_factolu.h"
 
 #define TAG11(k)	((starpu_tag_t)( (1ULL<<60) | (unsigned long long)(k)))

+ 2 - 1
examples/heat/dw_sparse_cg.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2011  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2011, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -17,6 +17,7 @@
 
 /*
  * Conjugate gradients for Sparse matrices
+ * The task graph is declared through tag dependencies
  */
 
 #include "dw_sparse_cg.h"

+ 5 - 1
examples/heat/heat.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2012  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2012, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,10 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * heat propagation simulation through either direct LU factorization or
+ * iterative conjugate gradient.
+ */
 #include "heat.h"
 
 /* default values */

+ 4 - 1
examples/incrementer/incrementer.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2011, 2013-2014  Université de Bordeaux
+ * Copyright (C) 2009-2011, 2013-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,9 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This is just a small example which increments two values of a vector several times.
+ */
 #include <starpu.h>
 
 static unsigned niter = 50000;

+ 3 - 1
examples/incrementer/incrementer_kernels.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* CUDA kernel for incrementation */
+
 #include <starpu.h>
 
 static __global__ void cuda_incrementer(float * tab)

+ 3 - 1
examples/incrementer/incrementer_kernels_opencl.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2012  CNRS
- * Copyright (C) 2011, 2014  Université de Bordeaux
+ * Copyright (C) 2011, 2014-2015  Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -15,6 +15,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* OpenCL codelet for incrementation */
+
 #include <starpu.h>
 
 extern struct starpu_opencl_program opencl_program;

+ 2 - 0
examples/incrementer/incrementer_kernels_opencl_kernel.cl

@@ -14,6 +14,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* OpenCL kernel for incrementation */
+
 __kernel void incrementer(__global float* input) 
 {
 	const int i = get_global_id(0);

+ 3 - 1
examples/lu/clu.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Complex float LU version, explicit dependencies */
+
 #include "complex_float.h"
 #include "xlu.c"

+ 3 - 1
examples/lu/clu_implicit.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux
+ * Copyright (C) 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Complex float LU version, implicit dependencies */
+
 #include "complex_float.h"
 #include "xlu_implicit.c"

+ 3 - 1
examples/lu/clu_implicit_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux
+ * Copyright (C) 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Complex float LU version, implicit dependencies, and partial pivoting */
+
 #include "complex_float.h"
 #include "xlu_implicit_pivot.c"

+ 3 - 1
examples/lu/clu_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Complex float LU kernels */
+
 #include "complex_float.h"
 #include "xlu_kernels.c"

+ 3 - 1
examples/lu/clu_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Complex float LU kernels with partial pivoting */
+
 #include "complex_float.h"
 #include "xlu_pivot.c"

+ 2 - 1
examples/lu/complex_double.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Complex double macros */
 
 #include <complex.h>
 #include "blas_complex.h"

+ 2 - 1
examples/lu/complex_float.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Complex float macros */
 
 #include <complex.h>
 #include "blas_complex.h"

+ 3 - 1
examples/lu/dlu.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Real double LU version, explicit dependencies */
+
 #include "lu-double.h"
 #include "xlu.c"

+ 3 - 1
examples/lu/dlu_implicit.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux
+ * Copyright (C) 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Real double LU version, implicit dependencies */
+
 #include "lu-double.h"
 #include "xlu_implicit.c"

+ 3 - 1
examples/lu/dlu_implicit_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux
+ * Copyright (C) 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Real double LU version, implicit dependencies with partial pivoting */
+
 #include "lu-double.h"
 #include "xlu_implicit_pivot.c"

+ 3 - 1
examples/lu/dlu_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Real double LU kernels */
+
 #include "lu-double.h"
 #include "xlu_kernels.c"

+ 3 - 1
examples/lu/dlu_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Real double LU kernels with partial pivoting */
+
 #include "lu-double.h"
 #include "xlu_pivot.c"

+ 3 - 1
examples/lu/lu-double.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Real double macros */
+
 #define TYPE double
 #define CUBLAS_TYPE TYPE
 

+ 2 - 1
examples/lu/lu-float.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Real float macros */
 
 #define TYPE float
 #define CUBLAS_TYPE TYPE

+ 3 - 0
examples/lu/lu_example.c

@@ -15,6 +15,9 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Main body for the LU factorization: matrix initialization and result
+ * checking */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>

+ 3 - 1
examples/lu/lu_example_complex_double.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Main body for the LU factorization, complex double version */
+
 #include "complex_double.h"
 #include "lu_example.c"

+ 3 - 1
examples/lu/lu_example_complex_float.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Main body for the LU factorization, complex float version */
+
 #include "complex_float.h"
 #include "lu_example.c"

+ 3 - 1
examples/lu/lu_example_double.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Main body for the LU factorization, real double version */
+
 #include "lu-double.h"
 #include "lu_example.c"

+ 3 - 1
examples/lu/lu_example_float.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Main body for the LU factorization, real float version */
+
 #include "lu-float.h"
 #include "lu_example.c"

+ 3 - 1
examples/lu/slu.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Real float LU version, explicit dependencies */
+
 #include "lu-float.h"
 #include "xlu.c"

+ 3 - 1
examples/lu/slu_implicit.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux
+ * Copyright (C) 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Real float LU version, implicit dependencies */
+
 #include "lu-float.h"
 #include "xlu_implicit.c"

+ 3 - 1
examples/lu/slu_implicit_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux
+ * Copyright (C) 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Real float LU version, implicit dependencies with partial pivoting */
+
 #include "lu-float.h"
 #include "xlu_implicit_pivot.c"

+ 3 - 1
examples/lu/slu_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Real float LU kernels */
+
 #include "lu-float.h"
 #include "xlu_kernels.c"

+ 3 - 1
examples/lu/slu_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Real float LU kernels with partial pivoting */
+
 #include "lu-float.h"
 #include "xlu_pivot.c"

+ 2 - 1
examples/lu/xlu.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011, 2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010-2011, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  CNRS
  *
@@ -16,6 +16,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* LU StarPU implementation using explicit tag dependencies */
 #include "xlu.h"
 #include "xlu_kernels.h"
 

+ 2 - 0
examples/lu/xlu_implicit.c

@@ -16,6 +16,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* LU StarPU implementation using implicit task dependencies. */
+
 #include "xlu.h"
 #include "xlu_kernels.h"
 

+ 3 - 0
examples/lu/xlu_implicit_pivot.c

@@ -16,6 +16,9 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* LU StarPU implementation using implicit task dependencies and partial
+ * pivoting */
+
 #include "xlu.h"
 #include "xlu_kernels.h"
 

+ 3 - 1
examples/lu/xlu_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2012, 2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010-2012, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2015  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* LU Kernels */
+
 #include "xlu.h"
 #include <math.h>
 

+ 3 - 1
examples/lu/xlu_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2012, 2014  Université de Bordeaux
+ * Copyright (C) 2009-2012, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* LU Kernels with partial pivoting */
+
 #include "xlu.h"
 #include "xlu_kernels.h"
 

+ 3 - 1
examples/lu/zlu.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Complex double LU version, explicit dependencies */
+
 #include "complex_double.h"
 #include "xlu.c"

+ 3 - 1
examples/lu/zlu_implicit.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux
+ * Copyright (C) 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Complex double LU version, implicit dependencies */
+
 #include "complex_double.h"
 #include "xlu_implicit.c"

+ 3 - 1
examples/lu/zlu_implicit_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux
+ * Copyright (C) 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Complex double LU version, implicit dependencies with partial pivoting */
+
 #include "complex_double.h"
 #include "xlu_implicit_pivot.c"

+ 3 - 1
examples/lu/zlu_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Complex double LU kernels */
+
 #include "complex_double.h"
 #include "xlu_kernels.c"

+ 3 - 1
examples/lu/zlu_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,5 +15,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Complex double LU kernels with partial pivoting */
+
 #include "complex_double.h"
 #include "xlu_pivot.c"

+ 6 - 0
examples/mandelbrot/mandelbrot.c

@@ -15,6 +15,12 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This computes the Mandelbrot set: the output image is split in horizontal
+ * stripes, which are computed in parallel.  We also make the same computation
+ * several times, so that OpenGL interaction allows to browse through the set.
+ */
+
 #include <starpu.h>
 #include <math.h>
 #include <limits.h>

+ 5 - 0
examples/mult/xgemm.c

@@ -16,6 +16,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * Simple parallel GEMM implementation: partition the output matrix in the two
+ * dimensions, and the input matrices in the corresponding dimension, and
+ * perform the output computations in parallel.
+ */
 #ifndef TYPE
 #error "Do not compile xgemm.c directly, compile sgemm.c or dgemm.c"
 #endif

+ 7 - 1
examples/openmp/vector_scal_omp.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS
- * Copyright (C) 2010-2013  Université de Bordeaux
+ * Copyright (C) 2010-2013, 2015  Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -15,6 +15,12 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This shows how to use an OpenMP parallel implementation for a StarPU
+ * forkjoin parallel task.
+ * This is just a vector scaling example.
+ */
+
 /* gcc build:
 
    gcc -fopenmp -O2 -g vector_scal.c -o vector_scal $(pkg-config --cflags starpu-1.0) $(pkg-config --libs starpu-1.0)

+ 10 - 1
examples/pi/pi.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2011, 2013-2014  Université de Bordeaux
+ * Copyright (C) 2010-2011, 2013-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
  *
@@ -16,6 +16,15 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This computes Pi by using drawing random coordinates (thanks to the sobol
+ * generator) and check whether they fall within one quarter of a circle.  The
+ * proportion gives an approximation of Pi. For each task, we draw a number of
+ * coordinates, and we gather the number of successful draws.
+ *
+ * TODO: use curandGenerateUniform instead of the sobol generator, like pi_redux.c does
+ */
+
 #include "SobolQRNG/sobol.h"
 #include "SobolQRNG/sobol_gold.h"
 #include "pi.h"

+ 4 - 1
examples/pi/pi_kernel.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2013  Université de Bordeaux
+ * Copyright (C) 2010, 2013, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2012, 2015  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,9 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* First draw a series of coordinates, then count how many fall inside the
+ * circle quarter */
+
 #include "SobolQRNG/sobol_gpu.h"
 #include "pi.h"
 

+ 10 - 0
examples/pi/pi_redux.c

@@ -14,6 +14,16 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This computes Pi by using drawing random coordinates (thanks to the sobol
+ * generator) and check whether they fall within one quarter of a circle.  The
+ * proportion gives an approximation of Pi. For each task, we draw a number of
+ * coordinates, and we gather the number of successful draws.
+ *
+ * This version uses reduction to optimize gathering the number of successful
+ * draws.
+ */
+
 #include <starpu.h>
 #include <stdlib.h>
 

+ 3 - 1
examples/pi/pi_redux_kernel.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux
+ * Copyright (C) 2010, 2015  Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -14,6 +14,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* This counts how many fall inside the circle quarter */
+
 #include <starpu.h>
 
 #define MAXNBLOCKS	128

+ 4 - 1
examples/ppm_downscaler/ppm_downscaler.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux
+ * Copyright (C) 2010, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2013  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,9 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* This uses a dummy algorithm to downscale a ppm file. */
+/* TODO: turn this into StarPU. */
+
 #include "ppm_downscaler.h"
 
 #include <starpu.h>

+ 5 - 0
examples/ppm_downscaler/yuv_downscaler.c

@@ -16,6 +16,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This uses a dummy algorithm to downscale a 1920x1080 yuv film.
+ * Each frame is split in horizontal stripes which are processed in parallel.
+ */
+
 #include <starpu.h>
 
 #include <sys/types.h>

+ 4 - 0
examples/profiling/profiling.c

@@ -15,6 +15,10 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This examplifies how to get task execution profiling from the application.
+ */
+
 #include <starpu.h>
 #include <assert.h>
 #include <unistd.h>

+ 5 - 0
examples/reductions/dot_product.c

@@ -15,6 +15,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This computes the dot product of a big vector, using data reduction to
+ * optimize the dot reduction.
+ */
+
 #include <starpu.h>
 #include <assert.h>
 #include <math.h>

+ 2 - 0
examples/reductions/dot_product_kernels.cu

@@ -14,6 +14,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Trivial dot reduction CUDA kernel */
+
 #include <starpu.h>
 
 #define DOT_TYPE double

+ 2 - 0
examples/reductions/dot_product_opencl_kernels.cl

@@ -14,6 +14,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* Trivial dot reduction OpenCL kernel */
+
 #include "dot_product.h"
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable

+ 5 - 0
examples/reductions/minmax_reduction.c

@@ -14,6 +14,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This computes the minimum and maximum values of a big vector, using data
+ * reduction to optimize the computation.
+ */
+
 #include <assert.h>
 #include <float.h>
 #include <limits.h>

+ 7 - 1
examples/scheduler/dummy_sched.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2014  Université de Bordeaux
+ * Copyright (C) 2010-2015  Université de Bordeaux
  * Copyright (C) 2010-2013  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,12 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This is an example of an application-defined scheduler.
+ * This is a mere eager scheduler with a centralized list of tasks to schedule:
+ * when a task becomes ready (push) it is put on the list. When a device
+ * becomes ready (pop), a task is taken from the list.
+ */
 #include <starpu.h>
 #include <starpu_scheduler.h>
 

+ 4 - 0
examples/scheduler/heteroprio_test.c

@@ -14,6 +14,10 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This is an example making use of the heteroprio scheduler, it shows how
+ * priorities are taken into account.
+ */
 #include <starpu.h>
 #include <schedulers/starpu_heteroprio.h>
 #include <unistd.h>

+ 6 - 0
examples/spmd/vector_scal_spmd.c

@@ -15,6 +15,12 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This shows how to implement an spmd parallel StarPU task: scal_cpu_func is
+ * called in parallel over several cores, and has to split the work accordingly.
+ * This is a mere vector scaling example.
+ */
+
 /* gcc build:
 
    gcc -O2 -g vector_scal.c -o vector_scal $(pkg-config --cflags starpu-1.0) $(pkg-config --libs starpu-1.0)

+ 5 - 0
examples/spmv/dw_block_spmv.c

@@ -16,6 +16,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This computes an SPMV on a BCSR sparse matrix. It simply splits the matrix
+ * into its blocks, thus turning the problem into mere matrix-vector products
+ * (GEMV) which can be run in parallel.
+ */
 #include "dw_block_spmv.h"
 #include "matrix_market/mm_to_bcsr.h"
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)

+ 4 - 1
examples/spmv/dw_block_spmv_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011  Université de Bordeaux
+ * Copyright (C) 2009, 2010-2011, 2015  Université de Bordeaux
  * Copyright (C) 2010, 2011  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,9 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * Standard GEMV kernel (on one matrix block of the sparse matrix)
+ */
 #include "dw_block_spmv.h"
 
 /*

+ 6 - 2
examples/spmv/spmv.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2011, 2013-2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2011, 2013-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  CNRS
  *
@@ -16,6 +16,10 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/*
+ * This computes an SPMV with a CSR sparse matrix, by splitting it in
+ * horizontal stripes and processing them in parallel.
+ */
 #include "spmv.h"
 
 unsigned nblocks = 4;
@@ -44,7 +48,7 @@ static void parse_args(int argc, char **argv)
 }
 
 /* This filter function takes a CSR matrix, and divides it into nparts with the
- * same number of non-zero entries. */
+ * same number of rows. */
 static void csr_filter_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
 {
 	struct starpu_csr_interface *csr_father = (struct starpu_csr_interface *) father_interface;

+ 3 - 1
examples/spmv/spmv_cuda.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -15,6 +15,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* CUDA kernel for SPMV */
+
 #include <starpu.h>
 
 #define MIN(a,b)	((a)<(b)?(a):(b))

+ 3 - 1
examples/spmv/spmv_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2011, 2014  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2011, 2014-2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  CNRS
  *
@@ -16,6 +16,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* OpenCL codelet for SPMV */
+
 #include "spmv.h"
 
 #ifdef STARPU_USE_OPENCL

+ 2 - 0
examples/spmv/spmv_opencl.cl

@@ -14,6 +14,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* OpenCL kernel for SPMV */
+
 __kernel void spmv(int nnz, int nrow,
                    __global float* nzval, __global unsigned* colind,
                    __global unsigned* rowptr, int firstentry,

+ 0 - 0
examples/tag_example/tag_restartable.c


Some files were not shown because too many files changed in this diff