10 years ago · 00b1ef6f74
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -182,7 +182,6 @@ STARPU_EXAMPLES =				\
 
				 	filters/fvector				\
			
 
				 	filters/fblock				\
			
 
				 	filters/fmatrix				\
			
 
				-	filters/subdata				\
			
 
				 	tag_example/tag_example			\
			
 
				 	tag_example/tag_example2		\
			
 
				 	tag_example/tag_example3		\
			
--- a/examples/README.txt
+++ b/examples/README.txt
@@ -29,16 +29,13 @@ cpp
 
				 filters
			
 
				 	This contains several partitioning examples
			
 
				 
			
 
				-fortran
			
 
				-	This shows how to use StarPU from Fortran
			
 
				+fortran90
			
 
				+	This shows how to use StarPU from Fortran90
			
 
				 
			
 
				 gl_interop
			
 
				 	This shows how interoperation can be done between StarPU CUDA
			
 
				 	computations and OpenGL rendering
			
 
				 
			
 
				-gordon
			
 
				-	This was an example for the Cell architecture, now deprecated
			
 
				-
			
 
				 heat
			
 
				         This uses a finite element method to compute heat propagation thanks to
			
 
				         an LU factorization or a conjugate gradient
			
--- a/examples/audio/starpu_audio_processing.c
+++ b/examples/audio/starpu_audio_processing.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2012, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2012, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				  *
			
@@ -16,6 +16,12 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This reads a wave file, splits it into chunks, and on each of them run a
			
 
				+ * task which performs an fft, drop some high and low frequencies, and performs
			
 
				+ * the inverse fft.  It then writes the output to a wave file.
			
 
				+ */
			
 
				+
			
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
 
				 #include <math.h>
			
--- a/examples/axpy/axpy.c
+++ b/examples/axpy/axpy.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
			
 
				  *
			
@@ -16,6 +16,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This creates two dumb vectors, splits them into chunks, and for each pair of
			
 
				+ * chunk, run axpy on them.
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 #include <stdlib.h>
			
 
				 #include <stdio.h>
			
--- a/examples/axpy/axpy_opencl.c
+++ b/examples/axpy/axpy_opencl.c
@@ -14,6 +14,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* OpenCL codelet for axpy */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 #include "axpy.h"
			
 
				 
			
--- a/examples/axpy/axpy_opencl_kernel.cl
+++ b/examples/axpy/axpy_opencl_kernel.cl
@@ -14,6 +14,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* OpenCL kernel implementing axpy */
			
 
				+
			
 
				 #include "axpy.h"
			
 
				 
			
 
				 __kernel void _axpy_opencl(__global TYPE *x,
			
--- a/examples/binary/binary.c
+++ b/examples/binary/binary.c
@@ -15,6 +15,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This shows how to load OpenCL programs, either from a .cl file, or from a
			
 
				+ * string containing the program itself.
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
			
--- a/examples/callback/callback.c
+++ b/examples/callback/callback.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2013  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2013, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This is an example of using a callback. We submit a task, whose callback
			
 
				+ * submits another task (without any callback).
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
			
--- a/examples/callback/prologue.c
+++ b/examples/callback/prologue.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2013-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2013-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,13 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This is an example of using a prologue callback. We submit a task, whose
			
 
				+ * prologue callback (i.e. before task gets scheduled) prints a value, and
			
 
				+ * whose pop_prologue callback (i.e. after task gets scheduled, but before task
			
 
				+ * execution) prints another value.
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
			
@@ -37,28 +44,18 @@ struct starpu_codelet cl =
 
				 	.name = "callback"
			
 
				 };
			
 
				 
			
 
				-void callback_func(void *callback_arg)
			
 
				-{
			
 
				-	int ret;
			
 
				-
			
 
				-	struct starpu_task *task = starpu_task_create();
			
 
				-	task->cl = &cl;
			
 
				-	task->handles[0] = handle;
			
 
				-
			
 
				-	ret = starpu_task_submit(task);
			
 
				-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				-}
			
 
				-
			
 
				 void prologue_callback_func(void *callback_arg)
			
 
				 {
			
 
				 	double *x = (double*)callback_arg;
			
 
				 	printf("x = %lf\n", *x);
			
 
				+	STARPU_ASSERT(*x == -999.0);
			
 
				 }
			
 
				 
			
 
				 void pop_prologue_callback_func(void *args)
			
 
				 {
			
 
				 	unsigned val = (uintptr_t) args;
			
 
				 	printf("pop_prologue_callback val %d \n", val);
			
 
				+	STARPU_ASSERT(val == 5);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -73,12 +70,12 @@ int main(int argc, char **argv)
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 	starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(int));
			
 
				-	double *x = (double*)malloc(sizeof(double));
			
 
				+	double x = -999.0;
			
 
				 
			
 
				 	struct starpu_task *task = starpu_task_create();
			
 
				 	task->cl = &cl;
			
 
				-	task->prologue_callback_func = callback_func;
			
 
				-	task->prologue_callback_arg = NULL;
			
 
				+	task->prologue_callback_func = prologue_callback_func;
			
 
				+	task->prologue_callback_arg = &x;
			
 
				 
			
 
				 	task->prologue_callback_pop_func = pop_prologue_callback_func;
			
 
				 	task->prologue_callback_pop_arg = (void*) 5;
			
@@ -89,11 +86,10 @@ int main(int argc, char **argv)
 
				 	if (ret == -ENODEV) goto enodev;
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				 
			
 
				-	*x = -999.0;
			
 
				 	ret = starpu_task_insert(&cl,
			
 
				 				 STARPU_RW, handle,
			
 
				 				 STARPU_PROLOGUE_CALLBACK, prologue_callback_func,
			
 
				-				 STARPU_PROLOGUE_CALLBACK_ARG, x,
			
 
				+				 STARPU_PROLOGUE_CALLBACK_ARG, &x,
			
 
				 				 STARPU_PROLOGUE_CALLBACK_POP, pop_prologue_callback_func,
			
 
				 				 STARPU_PROLOGUE_CALLBACK_POP_ARG, 5,
			
 
				 				 0);
			
@@ -104,7 +100,6 @@ int main(int argc, char **argv)
 
				 
			
 
				 enodev:
			
 
				 	starpu_data_unregister(handle);
			
 
				-	free(x);
			
 
				 	FPRINTF(stderr, "v -> %d\n", v);
			
 
				 	starpu_shutdown();
			
 
				 	return (ret == -ENODEV) ? 77 : 0;
			
--- a/examples/cg/cg.c
+++ b/examples/cg/cg.c
@@ -62,7 +62,9 @@
 
				  *			d <- r + beta d
			
 
				  *			i <- i + 1
			
 
				  *		}
			
 
				- *	
			
 
				+ *
			
 
				+ *	The dot() operations makes use of reduction to optimize parallelism.
			
 
				+ *
			
 
				  */
			
 
				 
			
 
				 #include "cg.h"
			
--- a/examples/cg/cg_dot_kernel.cu
+++ b/examples/cg/cg_dot_kernel.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012, 2015  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -22,6 +22,12 @@
 
				 #define MAXNBLOCKS	128
			
 
				 #define MAXTHREADSPERBLOCK	256
			
 
				 
			
 
				+/*
			
 
				+ * Dot product kernel
			
 
				+ * We first perform dot computation in parallel in dot_device, and then we
			
 
				+ * gather the dot values into one in gather_dot_device.
			
 
				+ */
			
 
				+
			
 
				 static __global__ void dot_device(TYPE *vx, TYPE *vy, unsigned n, TYPE *dot_array)
			
 
				 {
			
 
				 	__shared__ TYPE scnt[MAXTHREADSPERBLOCK];
			
@@ -126,6 +132,10 @@ extern "C" void dot_host(TYPE *x, TYPE *y, unsigned nelems, TYPE *dot)
 
				 	cudaFree(per_block_sum);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Fill a vector with zeroes
			
 
				+ */
			
 
				+
			
 
				 static __global__ void zero_vector_device(TYPE *x, unsigned nelems, unsigned nelems_per_thread)
			
 
				 {
			
 
				 	unsigned i;
			
--- a/examples/cg/cg_kernels.c
+++ b/examples/cg/cg_kernels.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010, 2012-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2012-2015  Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -14,6 +14,10 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * Standard BLAS kernels used by CG
			
 
				+ */
			
 
				+
			
 
				 #include "cg.h"
			
 
				 #include <math.h>
			
 
				 #include <limits.h>
			
--- a/examples/cholesky/cholesky_grain_tag.c
+++ b/examples/cholesky/cholesky_grain_tag.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				  * Copyright (C) 2010, 2011, 2012  CNRS
			
 
				  *
			
@@ -16,6 +16,16 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This version of the Cholesky factorization uses explicit dependency
			
 
				+ * declaration through dependency tags.
			
 
				+ * It also uses data partitioning to split the matrix into submatrices.
			
 
				+ * It also changes the partitioning during execution: when called first,
			
 
				+ * cholesky_grain_rec splits the matrix with a big granularity (nblocks) and
			
 
				+ * processes nbigblocks blocks, before calling itself again, to process the
			
 
				+ * remainder of the matrix with a smaller granularity.
			
 
				+ */
			
 
				+
			
 
				 #include "cholesky.h"
			
 
				 
			
 
				 #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA)
			
--- a/examples/cholesky/cholesky_implicit.c
+++ b/examples/cholesky/cholesky_implicit.c
@@ -16,6 +16,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This version of the Cholesky factorization uses implicit dependency computation.
			
 
				+ * The whole algorithm thus appears clearly in the task submission loop in _cholesky().
			
 
				+ */
			
 
				+
			
 
				 #include "cholesky.h"
			
 
				 #include "../sched_ctx_utils/sched_ctx_utils.h"
			
 
				 
			
--- a/examples/cholesky/cholesky_kernels.c
+++ b/examples/cholesky/cholesky_kernels.c
@@ -15,6 +15,13 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * Standard kernels for the Cholesky factorization
			
 
				+ * U22 is the gemm update
			
 
				+ * U21 is the trsm update
			
 
				+ * U11 is the cholesky factorization
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 #include "cholesky.h"
			
 
				 #include "../common/blas.h"
			
--- a/examples/cholesky/cholesky_models.c
+++ b/examples/cholesky/cholesky_models.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010-2011  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010-2011, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
@@ -17,6 +17,11 @@
 
				  */
			
 
				 
			
 
				 /*
			
 
				+ * Example of a cost model for BLAS operations.  This is really just an
			
 
				+ * example!
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				  * As a convention, in that file, buffers[0] is represented by A,
			
 
				  * 				  buffers[1] is B ...
			
 
				  */
			
--- a/examples/cholesky/cholesky_tag.c
+++ b/examples/cholesky/cholesky_tag.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				  *
			
@@ -16,6 +16,12 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This version of the Cholesky factorization uses explicit dependency
			
 
				+ * declaration through dependency tags.
			
 
				+ * It also uses data partitioning to split the matrix into submatrices
			
 
				+ */
			
 
				+
			
 
				 #include "cholesky.h"
			
 
				 #include <starpu_perfmodel.h>
			
 
				 
			
--- a/examples/cholesky/cholesky_tile_tag.c
+++ b/examples/cholesky/cholesky_tile_tag.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,12 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This version of the Cholesky factorization uses explicit dependency
			
 
				+ * declaration through dependency tags.
			
 
				+ * It also directly registers matrix tiles instead of using partitioning.
			
 
				+ */
			
 
				+
			
 
				 #include "cholesky.h"
			
 
				 
			
 
				 #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA)
			
--- a/examples/cpp/incrementer_cpp.cpp
+++ b/examples/cpp/incrementer_cpp.cpp
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010-2011, 2013-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010-2011, 2013-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014  CNRS
			
 
				  * Copyright (C) 2012 INRIA
			
 
				  *
			
@@ -16,6 +16,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This is a small example of a C++ program using starpu.  We here just
			
 
				+ * increment two values of a vector several times.
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
			
--- a/examples/filters/fblock.c
+++ b/examples/filters/fblock.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2011, 2013-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2013-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This examplifies how to use partitioning filters.  We here just split a 3D
			
 
				+ * matrix into 3D slices (along the X axis), and run a dumb kernel on them.
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #define NX    5
			
--- a/examples/filters/fblock_cpu.c
+++ b/examples/filters/fblock_cpu.c
@@ -14,6 +14,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* dumb kernel to fill a 3D matrix */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 void cpu_func(void *buffers[], void *cl_arg)
			
--- a/examples/filters/fblock_cuda.cu
+++ b/examples/filters/fblock_cuda.cu
@@ -14,6 +14,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* dumb CUDA kernel to fill a 3D matrix */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 static __global__ void fblock_cuda(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor)
			
--- a/examples/filters/fblock_opencl.c
+++ b/examples/filters/fblock_opencl.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				- * Copyright (C) 2011, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2011, 2014-2015  Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -15,6 +15,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* dumb OpenCL codelet to fill a 3D matrix */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr)       	\
			
--- a/examples/filters/fblock_opencl_kernel.cl
+++ b/examples/filters/fblock_opencl_kernel.cl
@@ -14,6 +14,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* dumb OpenCL kernel to fill a 3D matrix */
			
 
				+
			
 
				 __kernel void fblock_opencl(__global int* block, unsigned offset, int nx, int ny, int nz, unsigned ldy, unsigned ldz, int factor)
			
 
				 {
			
 
				         int i, j, k;
			
--- a/examples/filters/fmatrix.c
+++ b/examples/filters/fmatrix.c
@@ -14,6 +14,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This examplifies how to use partitioning filters.  We here just split a 2D
			
 
				+ * matrix into 2D slices (along the X axis), and run a dumb kernel on them.
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #define NX    5
			
--- a/examples/filters/fvector.c
+++ b/examples/filters/fvector.c
@@ -14,6 +14,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This examplifies how to use partitioning filters.  We here just split a
			
 
				+ * vector into slices, and run a dumb kernel on them.
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #define NX    21
			
--- a/examples/filters/subdata.c
+++ b/examples/filters/subdata.c
@@ -1,203 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2015  CNRS
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-#include <starpu.h>
			
 
				-
			
 
				-#define NX    6
			
 
				-#define NY    4
			
 
				-#define PARTS 2
			
 
				-
			
 
				-#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
			
 
				-
			
 
				-void cpu_func(void *buffers[], void *cl_arg)
			
 
				-{
			
 
				-        unsigned i, j;
			
 
				-        int *factor = (int *) cl_arg;
			
 
				-
			
 
				-        /* length of the matrix */
			
 
				-        unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]);
			
 
				-        unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]);
			
 
				-        unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]);
			
 
				-        /* local copy of the matrix pointer */
			
 
				-        int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]);
			
 
				-
			
 
				-	FPRINTF(stderr, "applying factor %d\n", *factor);
			
 
				-        for(j=0; j<ny ; j++)
			
 
				-	{
			
 
				-                for(i=0; i<nx ; i++)
			
 
				-		{
			
 
				-                        FPRINTF(stderr, "%4d ", val[(j*ld)+i]);
			
 
				-                        val[(j*ld)+i] *= *factor;
			
 
				-		}
			
 
				-		FPRINTF(stderr,"\n");
			
 
				-        }
			
 
				-	FPRINTF(stderr,"\n");
			
 
				-}
			
 
				-
			
 
				-struct starpu_codelet cl =
			
 
				-{
			
 
				-	.cpu_funcs = {cpu_func},
			
 
				-	.cpu_funcs_name = {"cpu_func"},
			
 
				-	.nbuffers = 1,
			
 
				-	.modes = {STARPU_RW},
			
 
				-	.name = "matrix_scal"
			
 
				-};
			
 
				-
			
 
				-void split_func(void *buffers[], void *cl_arg)
			
 
				-{
			
 
				-        unsigned i, j;
			
 
				-        int *factor = (int *) cl_arg;
			
 
				-
			
 
				-        /* length of the matrix */
			
 
				-        unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]);
			
 
				-        unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]);
			
 
				-        unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]);
			
 
				-
			
 
				-        /* local copy of the matrix pointer */
			
 
				-        int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]);
			
 
				-
			
 
				-	FPRINTF(stderr, "splitting\n");
			
 
				-        for(j=0; j<ny ; j++)
			
 
				-	{
			
 
				-                for(i=0; i<nx ; i++)
			
 
				-		{
			
 
				-                        FPRINTF(stderr, "%4d ", val[(j*ld)+i]);
			
 
				-		}
			
 
				-                FPRINTF(stderr,"\n");
			
 
				-        }
			
 
				-	FPRINTF(stderr,"\n");
			
 
				-
			
 
				-	starpu_data_handle_t submatrix = starpu_data_lookup(val);
			
 
				-        /* Partition the sub-matrix in PARTS sub-sub-matrices */
			
 
				-	struct starpu_data_filter f =
			
 
				-	{
			
 
				-		.filter_func = starpu_matrix_filter_block,
			
 
				-		.nchildren = PARTS
			
 
				-	};
			
 
				-	starpu_data_partition(submatrix, &f);
			
 
				-
			
 
				-        /* Submit a task on each sub-vector */
			
 
				-	for (i=0; i<starpu_data_get_nb_children(submatrix); i++)
			
 
				-	{
			
 
				-                struct starpu_task *task = starpu_task_create();
			
 
				-		task->handles[0] = starpu_data_get_sub_data(submatrix, 1, i);
			
 
				-                task->cl = &cl;
			
 
				-                task->cl_arg = factor;
			
 
				-                task->cl_arg_size = sizeof(*factor);
			
 
				-
			
 
				-		int ret = starpu_task_submit(task);
			
 
				-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				-	}
			
 
				-	//starpu_data_unpartition(submatrix, STARPU_MAIN_RAM);
			
 
				-}
			
 
				-
			
 
				-int main(int argc, char **argv)
			
 
				-{
			
 
				-	unsigned j, n=1;
			
 
				-        int matrix[NX*NY];
			
 
				-	int ret, i;
			
 
				-	int factor = 12;
			
 
				-
			
 
				-        FPRINTF(stderr,"IN  Matrix: \n");
			
 
				-        for(j=0 ; j<NY ; j++)
			
 
				-	{
			
 
				-                for(i=0 ; i<NX ; i++)
			
 
				-		{
			
 
				-                        matrix[(j*NX)+i] = n++;
			
 
				-                        FPRINTF(stderr, "%4d ", matrix[(j*NX)+i]);
			
 
				-                }
			
 
				-                FPRINTF(stderr,"\n");
			
 
				-        }
			
 
				-        FPRINTF(stderr,"\n");
			
 
				-
			
 
				-        starpu_data_handle_t handle;
			
 
				-        struct starpu_codelet split_cl =
			
 
				-	{
			
 
				-                .cpu_funcs = {split_func},
			
 
				-                .cpu_funcs_name = {"split_func"},
			
 
				-                .nbuffers = 1,
			
 
				-		.modes = {STARPU_RW},
			
 
				-		.name = "split_matrix"
			
 
				-        };
			
 
				-
			
 
				-        ret = starpu_init(NULL);
			
 
				-	if (ret == -ENODEV)
			
 
				-		return 77;
			
 
				-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				-
			
 
				-	/* Declare data to StarPU */
			
 
				-	starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0]));
			
 
				-
			
 
				-        /* Partition the matrix in PARTS sub-matrices */
			
 
				-	struct starpu_data_filter f =
			
 
				-	{
			
 
				-		.filter_func = starpu_matrix_filter_block,
			
 
				-		.nchildren = PARTS
			
 
				-	};
			
 
				-	starpu_data_partition(handle, &f);
			
 
				-
			
 
				-        /* Submit a task on each sub-vector */
			
 
				-	for (i=0; i<starpu_data_get_nb_children(handle); i++)
			
 
				-	{
			
 
				-                struct starpu_task *task = starpu_task_create();
			
 
				-		starpu_data_handle_t subdata = starpu_data_get_sub_data(handle, 1, i);
			
 
				-		task->handles[0] = subdata;
			
 
				-                task->cl = &split_cl;
			
 
				-                task->cl_arg = &factor;
			
 
				-                task->cl_arg_size = sizeof(factor);
			
 
				-
			
 
				-		ret = starpu_task_submit(task);
			
 
				-		if (ret == -ENODEV) goto enodev;
			
 
				-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				-	}
			
 
				-
			
 
				-	starpu_task_wait_for_all();
			
 
				-	for (i=0; i<starpu_data_get_nb_children(handle); i++)
			
 
				-	{
			
 
				-		starpu_data_handle_t subdata = starpu_data_get_sub_data(handle, 1, i);
			
 
				-		starpu_data_unpartition(subdata, STARPU_MAIN_RAM);
			
 
				-	}
			
 
				-
			
 
				-        /* Unpartition the data, unregister it from StarPU and shutdown */
			
 
				-	starpu_data_unpartition(handle, STARPU_MAIN_RAM);
			
 
				-        starpu_data_unregister(handle);
			
 
				-	starpu_shutdown();
			
 
				-
			
 
				-        /* Print result matrix */
			
 
				-	n=1;
			
 
				-        FPRINTF(stderr,"OUT Matrix: \n");
			
 
				-        for(j=0 ; j<NY ; j++)
			
 
				-	{
			
 
				-                for(i=0 ; i<NX ; i++)
			
 
				-		{
			
 
				-                        FPRINTF(stderr, "%4d ", matrix[(j*NX)+i]);
			
 
				-			if (matrix[(j*NX)+i] != n*12)
			
 
				-			{
			
 
				-				FPRINTF(stderr, "Incorrect result %4d != %4d", matrix[(j*NX)+i], n*12);
			
 
				-				ret=1;
			
 
				-			}
			
 
				-			n++;
			
 
				-                }
			
 
				-                FPRINTF(stderr,"\n");
			
 
				-        }
			
 
				-        FPRINTF(stderr,"\n");
			
 
				-
			
 
				-	return ret;
			
 
				-
			
 
				-enodev:
			
 
				-	starpu_shutdown();
			
 
				-	return 77;
			
 
				-}
			
--- a/examples/fortran/hello_c.c
+++ b/examples/fortran/hello_c.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,10 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This shows how to call a fortran function from a C function
			
 
				+ */
			
 
				+
			
 
				 #include <stdlib.h>
			
 
				 #include <stdio.h>
			
 
				 #include <stdint.h>
			
--- a/examples/fortran90/f90_example.f90
+++ b/examples/fortran90/f90_example.f90
@@ -14,6 +14,10 @@
 
				 !
			
 
				 ! See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				 
			
 
				+! This is an example of Fortran90 program making use of StarPU.
			
 
				+! It registers a few matrices for each element of a domain, performs
			
 
				+! update computations on them, and checks the result.
			
 
				+
			
 
				 PROGRAM f90_example
			
 
				 
			
 
				   USE mod_types
			
--- a/examples/fortran90/marshalling.c
+++ b/examples/fortran90/marshalling.c
@@ -16,6 +16,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Helper functions to initialize StarPU and register element matrices */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 //--------------------------------------------------------------//
			
--- a/examples/fortran90/mod_compute.f90
+++ b/examples/fortran90/mod_compute.f90
@@ -14,6 +14,8 @@
 
				 !
			
 
				 ! See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				 
			
 
				+! Computation kernels for the simulation
			
 
				+
			
 
				 MODULE mod_compute
			
 
				 
			
 
				   USE mod_types
			
--- a/examples/fortran90/mod_interface.f90
+++ b/examples/fortran90/mod_interface.f90
@@ -14,6 +14,8 @@
 
				 !
			
 
				 ! See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				 
			
 
				+! Fortran module interface for StarPU initialization and element registration
			
 
				+
			
 
				 MODULE mod_interface
			
 
				 
			
 
				   INTERFACE
			
--- a/examples/heat/dw_factolu.c
+++ b/examples/heat/dw_factolu.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				  * Copyright (C) 2010, 2011, 2012  CNRS
			
 
				  *
			
@@ -16,6 +16,12 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This implements an LU factorization.
			
 
				+ * The task graph is submitted through continuation: the rest of the graph is
			
 
				+ * submitted as appropriate in the tasks' callback.
			
 
				+ */
			
 
				+
			
 
				 #include "dw_factolu.h"
			
 
				 
			
 
				 #if 0
			
--- a/examples/heat/dw_factolu_grain.c
+++ b/examples/heat/dw_factolu_grain.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010-2011, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010-2011, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				  * Copyright (C) 2010, 2011, 2012  CNRS
			
 
				  *
			
@@ -16,6 +16,15 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This implements an LU factorization.
			
 
				+ * The task graph is submitted through dependency tags.
			
 
				+ * It also changes the partitioning during execution: when called first,
			
 
				+ * dw_factoLU_grain_inner splits the matrix with a big granularity (nblocks)
			
 
				+ * and processes nbigblocks blocks, before calling itself again, to process the
			
 
				+ * remainder of the matrix with a smaller granularity.
			
 
				+ */
			
 
				+
			
 
				 #include "dw_factolu.h"
			
 
				 
			
 
				 #define TAG11(k, prefix)	((starpu_tag_t)( (((unsigned long long)(prefix))<<60)  |  (1ULL<<56) | (unsigned long long)(k)))
			
--- a/examples/heat/dw_factolu_kernels.c
+++ b/examples/heat/dw_factolu_kernels.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010-2012, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010-2012, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,9 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * These are standard BLAS kernels for the LU factorization
			
 
				+ */
			
 
				 #include "dw_factolu.h"
			
 
				 
			
 
				 unsigned count_11_per_worker[STARPU_NMAXWORKERS] = {0};
			
--- a/examples/heat/dw_factolu_tag.c
+++ b/examples/heat/dw_factolu_tag.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010-2011, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010-2011, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				  *
			
@@ -16,6 +16,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This implements an LU factorization.
			
 
				+ * The task graph is submitted through dependency tags.
			
 
				+ */
			
 
				+
			
 
				 #include "dw_factolu.h"
			
 
				 
			
 
				 #define TAG11(k)	((starpu_tag_t)( (1ULL<<60) | (unsigned long long)(k)))
			
--- a/examples/heat/dw_sparse_cg.c
+++ b/examples/heat/dw_sparse_cg.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2011  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2011, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -17,6 +17,7 @@
 
				 
			
 
				 /*
			
 
				  * Conjugate gradients for Sparse matrices
			
 
				+ * The task graph is declared through tag dependencies
			
 
				  */
			
 
				 
			
 
				 #include "dw_sparse_cg.h"
			
--- a/examples/heat/heat.c
+++ b/examples/heat/heat.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2012  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2012, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,10 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * heat propagation simulation through either direct LU factorization or
			
 
				+ * iterative conjugate gradient.
			
 
				+ */
			
 
				 #include "heat.h"
			
 
				 
			
 
				 /* default values */
			
--- a/examples/incrementer/incrementer.c
+++ b/examples/incrementer/incrementer.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2011, 2013-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2011, 2013-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,9 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This is just a small example which increments two values of a vector several times.
			
 
				+ */
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 static unsigned niter = 50000;
			
--- a/examples/incrementer/incrementer_kernels.cu
+++ b/examples/incrementer/incrementer_kernels.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* CUDA kernel for incrementation */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 static __global__ void cuda_incrementer(float * tab)
			
--- a/examples/incrementer/incrementer_kernels_opencl.c
+++ b/examples/incrementer/incrementer_kernels_opencl.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				- * Copyright (C) 2011, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2011, 2014-2015  Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -15,6 +15,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* OpenCL codelet for incrementation */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 extern struct starpu_opencl_program opencl_program;
			
--- a/examples/incrementer/incrementer_kernels_opencl_kernel.cl
+++ b/examples/incrementer/incrementer_kernels_opencl_kernel.cl
@@ -14,6 +14,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* OpenCL kernel for incrementation */
			
 
				+
			
 
				 __kernel void incrementer(__global float* input) 
			
 
				 {
			
 
				 	const int i = get_global_id(0);
			
--- a/examples/lu/clu.c
+++ b/examples/lu/clu.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Complex float LU version, explicit dependencies */
			
 
				+
			
 
				 #include "complex_float.h"
			
 
				 #include "xlu.c"
			
--- a/examples/lu/clu_implicit.c
+++ b/examples/lu/clu_implicit.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Complex float LU version, implicit dependencies */
			
 
				+
			
 
				 #include "complex_float.h"
			
 
				 #include "xlu_implicit.c"
			
--- a/examples/lu/clu_implicit_pivot.c
+++ b/examples/lu/clu_implicit_pivot.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Complex float LU version, implicit dependencies, and partial pivoting */
			
 
				+
			
 
				 #include "complex_float.h"
			
 
				 #include "xlu_implicit_pivot.c"
			
--- a/examples/lu/clu_kernels.c
+++ b/examples/lu/clu_kernels.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Complex float LU kernels */
			
 
				+
			
 
				 #include "complex_float.h"
			
 
				 #include "xlu_kernels.c"
			
--- a/examples/lu/clu_pivot.c
+++ b/examples/lu/clu_pivot.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Complex float LU kernels with partial pivoting */
			
 
				+
			
 
				 #include "complex_float.h"
			
 
				 #include "xlu_pivot.c"
			
--- a/examples/lu/complex_double.h
+++ b/examples/lu/complex_double.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Complex double macros */
			
 
				 
			
 
				 #include <complex.h>
			
 
				 #include "blas_complex.h"
			
--- a/examples/lu/complex_float.h
+++ b/examples/lu/complex_float.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Complex float macros */
			
 
				 
			
 
				 #include <complex.h>
			
 
				 #include "blas_complex.h"
			
--- a/examples/lu/dlu.c
+++ b/examples/lu/dlu.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Real double LU version, explicit dependencies */
			
 
				+
			
 
				 #include "lu-double.h"
			
 
				 #include "xlu.c"
			
--- a/examples/lu/dlu_implicit.c
+++ b/examples/lu/dlu_implicit.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Real double LU version, implicit dependencies */
			
 
				+
			
 
				 #include "lu-double.h"
			
 
				 #include "xlu_implicit.c"
			
--- a/examples/lu/dlu_implicit_pivot.c
+++ b/examples/lu/dlu_implicit_pivot.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Real double LU version, implicit dependencies with partial pivoting */
			
 
				+
			
 
				 #include "lu-double.h"
			
 
				 #include "xlu_implicit_pivot.c"
			
--- a/examples/lu/dlu_kernels.c
+++ b/examples/lu/dlu_kernels.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Real double LU kernels */
			
 
				+
			
 
				 #include "lu-double.h"
			
 
				 #include "xlu_kernels.c"
			
--- a/examples/lu/dlu_pivot.c
+++ b/examples/lu/dlu_pivot.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Real double LU kernels with partial pivoting */
			
 
				+
			
 
				 #include "lu-double.h"
			
 
				 #include "xlu_pivot.c"
			
--- a/examples/lu/lu-double.h
+++ b/examples/lu/lu-double.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Real double macros */
			
 
				+
			
 
				 #define TYPE double
			
 
				 #define CUBLAS_TYPE TYPE
			
 
				 
			
--- a/examples/lu/lu-float.h
+++ b/examples/lu/lu-float.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Real float macros */
			
 
				 
			
 
				 #define TYPE float
			
 
				 #define CUBLAS_TYPE TYPE
			
--- a/examples/lu/lu_example.c
+++ b/examples/lu/lu_example.c
@@ -15,6 +15,9 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Main body for the LU factorization: matrix initialization and result
			
 
				+ * checking */
			
 
				+
			
 
				 #include <stdlib.h>
			
 
				 #include <stdio.h>
			
 
				 #include <string.h>
			
--- a/examples/lu/lu_example_complex_double.c
+++ b/examples/lu/lu_example_complex_double.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Main body for the LU factorization, complex double version */
			
 
				+
			
 
				 #include "complex_double.h"
			
 
				 #include "lu_example.c"
			
--- a/examples/lu/lu_example_complex_float.c
+++ b/examples/lu/lu_example_complex_float.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Main body for the LU factorization, complex float version */
			
 
				+
			
 
				 #include "complex_float.h"
			
 
				 #include "lu_example.c"
			
--- a/examples/lu/lu_example_double.c
+++ b/examples/lu/lu_example_double.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Main body for the LU factorization, real double version */
			
 
				+
			
 
				 #include "lu-double.h"
			
 
				 #include "lu_example.c"
			
--- a/examples/lu/lu_example_float.c
+++ b/examples/lu/lu_example_float.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Main body for the LU factorization, real float version */
			
 
				+
			
 
				 #include "lu-float.h"
			
 
				 #include "lu_example.c"
			
--- a/examples/lu/slu.c
+++ b/examples/lu/slu.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Real float LU version, explicit dependencies */
			
 
				+
			
 
				 #include "lu-float.h"
			
 
				 #include "xlu.c"
			
--- a/examples/lu/slu_implicit.c
+++ b/examples/lu/slu_implicit.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Real float LU version, implicit dependencies */
			
 
				+
			
 
				 #include "lu-float.h"
			
 
				 #include "xlu_implicit.c"
			
--- a/examples/lu/slu_implicit_pivot.c
+++ b/examples/lu/slu_implicit_pivot.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Real float LU version, implicit dependencies with partial pivoting */
			
 
				+
			
 
				 #include "lu-float.h"
			
 
				 #include "xlu_implicit_pivot.c"
			
--- a/examples/lu/slu_kernels.c
+++ b/examples/lu/slu_kernels.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Real float LU kernels */
			
 
				+
			
 
				 #include "lu-float.h"
			
 
				 #include "xlu_kernels.c"
			
--- a/examples/lu/slu_pivot.c
+++ b/examples/lu/slu_pivot.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Real float LU kernels with partial pivoting */
			
 
				+
			
 
				 #include "lu-float.h"
			
 
				 #include "xlu_pivot.c"
			
--- a/examples/lu/xlu.c
+++ b/examples/lu/xlu.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010-2011, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010-2011, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				  * Copyright (C) 2010, 2011, 2012  CNRS
			
 
				  *
			
@@ -16,6 +16,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* LU StarPU implementation using explicit tag dependencies */
			
 
				 #include "xlu.h"
			
 
				 #include "xlu_kernels.h"
			
 
				 
			
--- a/examples/lu/xlu_implicit.c
+++ b/examples/lu/xlu_implicit.c
@@ -16,6 +16,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* LU StarPU implementation using implicit task dependencies. */
			
 
				+
			
 
				 #include "xlu.h"
			
 
				 #include "xlu_kernels.h"
			
 
				 
			
--- a/examples/lu/xlu_implicit_pivot.c
+++ b/examples/lu/xlu_implicit_pivot.c
@@ -16,6 +16,9 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* LU StarPU implementation using implicit task dependencies and partial
			
 
				+ * pivoting */
			
 
				+
			
 
				 #include "xlu.h"
			
 
				 #include "xlu_kernels.h"
			
 
				 
			
--- a/examples/lu/xlu_kernels.c
+++ b/examples/lu/xlu_kernels.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010-2012, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010-2012, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2015  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* LU Kernels */
			
 
				+
			
 
				 #include "xlu.h"
			
 
				 #include <math.h>
			
 
				 
			
--- a/examples/lu/xlu_pivot.c
+++ b/examples/lu/xlu_pivot.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2012, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2012, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* LU Kernels with partial pivoting */
			
 
				+
			
 
				 #include "xlu.h"
			
 
				 #include "xlu_kernels.h"
			
 
				 
			
--- a/examples/lu/zlu.c
+++ b/examples/lu/zlu.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Complex double LU version, explicit dependencies */
			
 
				+
			
 
				 #include "complex_double.h"
			
 
				 #include "xlu.c"
			
--- a/examples/lu/zlu_implicit.c
+++ b/examples/lu/zlu_implicit.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Complex double LU version, implicit dependencies */
			
 
				+
			
 
				 #include "complex_double.h"
			
 
				 #include "xlu_implicit.c"
			
--- a/examples/lu/zlu_implicit_pivot.c
+++ b/examples/lu/zlu_implicit_pivot.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Complex double LU version, implicit dependencies with partial pivoting */
			
 
				+
			
 
				 #include "complex_double.h"
			
 
				 #include "xlu_implicit_pivot.c"
			
--- a/examples/lu/zlu_kernels.c
+++ b/examples/lu/zlu_kernels.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Complex double LU kernels */
			
 
				+
			
 
				 #include "complex_double.h"
			
 
				 #include "xlu_kernels.c"
			
--- a/examples/lu/zlu_pivot.c
+++ b/examples/lu/zlu_pivot.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,5 +15,7 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Complex double LU kernels with partial pivoting */
			
 
				+
			
 
				 #include "complex_double.h"
			
 
				 #include "xlu_pivot.c"
			
--- a/examples/mandelbrot/mandelbrot.c
+++ b/examples/mandelbrot/mandelbrot.c
@@ -15,6 +15,12 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This computes the Mandelbrot set: the output image is split in horizontal
			
 
				+ * stripes, which are computed in parallel.  We also make the same computation
			
 
				+ * several times, so that OpenGL interaction allows to browse through the set.
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 #include <math.h>
			
 
				 #include <limits.h>
			
--- a/examples/mult/xgemm.c
+++ b/examples/mult/xgemm.c
@@ -16,6 +16,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * Simple parallel GEMM implementation: partition the output matrix in the two
			
 
				+ * dimensions, and the input matrices in the corresponding dimension, and
			
 
				+ * perform the output computations in parallel.
			
 
				+ */
			
 
				 #ifndef TYPE
			
 
				 #error "Do not compile xgemm.c directly, compile sgemm.c or dgemm.c"
			
 
				 #endif
			
--- a/examples/openmp/vector_scal_omp.c
+++ b/examples/openmp/vector_scal_omp.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS
			
 
				- * Copyright (C) 2010-2013  Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2013, 2015  Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -15,6 +15,12 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This shows how to use an OpenMP parallel implementation for a StarPU
			
 
				+ * forkjoin parallel task.
			
 
				+ * This is just a vector scaling example.
			
 
				+ */
			
 
				+
			
 
				 /* gcc build:
			
 
				 
			
 
				    gcc -fopenmp -O2 -g vector_scal.c -o vector_scal $(pkg-config --cflags starpu-1.0) $(pkg-config --libs starpu-1.0)
			
--- a/examples/pi/pi.c
+++ b/examples/pi/pi.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2011, 2013-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2011, 2013-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				  *
			
@@ -16,6 +16,15 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This computes Pi by using drawing random coordinates (thanks to the sobol
			
 
				+ * generator) and check whether they fall within one quarter of a circle.  The
			
 
				+ * proportion gives an approximation of Pi. For each task, we draw a number of
			
 
				+ * coordinates, and we gather the number of successful draws.
			
 
				+ *
			
 
				+ * TODO: use curandGenerateUniform instead of the sobol generator, like pi_redux.c does
			
 
				+ */
			
 
				+
			
 
				 #include "SobolQRNG/sobol.h"
			
 
				 #include "SobolQRNG/sobol_gold.h"
			
 
				 #include "pi.h"
			
--- a/examples/pi/pi_kernel.cu
+++ b/examples/pi/pi_kernel.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010, 2013  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2013, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012, 2015  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,9 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* First draw a series of coordinates, then count how many fall inside the
			
 
				+ * circle quarter */
			
 
				+
			
 
				 #include "SobolQRNG/sobol_gpu.h"
			
 
				 #include "pi.h"
			
 
				 
			
--- a/examples/pi/pi_redux.c
+++ b/examples/pi/pi_redux.c
@@ -14,6 +14,16 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This computes Pi by using drawing random coordinates (thanks to the sobol
			
 
				+ * generator) and check whether they fall within one quarter of a circle.  The
			
 
				+ * proportion gives an approximation of Pi. For each task, we draw a number of
			
 
				+ * coordinates, and we gather the number of successful draws.
			
 
				+ *
			
 
				+ * This version uses reduction to optimize gathering the number of successful
			
 
				+ * draws.
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 #include <stdlib.h>
			
 
				 
			
--- a/examples/pi/pi_redux_kernel.cu
+++ b/examples/pi/pi_redux_kernel.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2015  Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -14,6 +14,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* This counts how many fall inside the circle quarter */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #define MAXNBLOCKS	128
			
--- a/examples/ppm_downscaler/ppm_downscaler.c
+++ b/examples/ppm_downscaler/ppm_downscaler.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2013  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,9 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* This uses a dummy algorithm to downscale a ppm file. */
			
 
				+/* TODO: turn this into StarPU. */
			
 
				+
			
 
				 #include "ppm_downscaler.h"
			
 
				 
			
 
				 #include <starpu.h>
			
--- a/examples/ppm_downscaler/yuv_downscaler.c
+++ b/examples/ppm_downscaler/yuv_downscaler.c
@@ -16,6 +16,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This uses a dummy algorithm to downscale a 1920x1080 yuv film.
			
 
				+ * Each frame is split in horizontal stripes which are processed in parallel.
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #include <sys/types.h>
			
--- a/examples/profiling/profiling.c
+++ b/examples/profiling/profiling.c
@@ -15,6 +15,10 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This examplifies how to get task execution profiling from the application.
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 #include <assert.h>
			
 
				 #include <unistd.h>
			
--- a/examples/reductions/dot_product.c
+++ b/examples/reductions/dot_product.c
@@ -15,6 +15,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This computes the dot product of a big vector, using data reduction to
			
 
				+ * optimize the dot reduction.
			
 
				+ */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 #include <assert.h>
			
 
				 #include <math.h>
			
--- a/examples/reductions/dot_product_kernels.cu
+++ b/examples/reductions/dot_product_kernels.cu
@@ -14,6 +14,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Trivial dot reduction CUDA kernel */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #define DOT_TYPE double
			
--- a/examples/reductions/dot_product_opencl_kernels.cl
+++ b/examples/reductions/dot_product_opencl_kernels.cl
@@ -14,6 +14,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* Trivial dot reduction OpenCL kernel */
			
 
				+
			
 
				 #include "dot_product.h"
			
 
				 
			
 
				 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
			
--- a/examples/reductions/minmax_reduction.c
+++ b/examples/reductions/minmax_reduction.c
@@ -14,6 +14,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This computes the minimum and maximum values of a big vector, using data
			
 
				+ * reduction to optimize the computation.
			
 
				+ */
			
 
				+
			
 
				 #include <assert.h>
			
 
				 #include <float.h>
			
 
				 #include <limits.h>
			
--- a/examples/scheduler/dummy_sched.c
+++ b/examples/scheduler/dummy_sched.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010-2013  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,12 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This is an example of an application-defined scheduler.
			
 
				+ * This is a mere eager scheduler with a centralized list of tasks to schedule:
			
 
				+ * when a task becomes ready (push) it is put on the list. When a device
			
 
				+ * becomes ready (pop), a task is taken from the list.
			
 
				+ */
			
 
				 #include <starpu.h>
			
 
				 #include <starpu_scheduler.h>
			
 
				 
			
--- a/examples/scheduler/heteroprio_test.c
+++ b/examples/scheduler/heteroprio_test.c
@@ -14,6 +14,10 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This is an example making use of the heteroprio scheduler, it shows how
			
 
				+ * priorities are taken into account.
			
 
				+ */
			
 
				 #include <starpu.h>
			
 
				 #include <schedulers/starpu_heteroprio.h>
			
 
				 #include <unistd.h>
			
--- a/examples/spmd/vector_scal_spmd.c
+++ b/examples/spmd/vector_scal_spmd.c
@@ -15,6 +15,12 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This shows how to implement an spmd parallel StarPU task: scal_cpu_func is
			
 
				+ * called in parallel over several cores, and has to split the work accordingly.
			
 
				+ * This is a mere vector scaling example.
			
 
				+ */
			
 
				+
			
 
				 /* gcc build:
			
 
				 
			
 
				    gcc -O2 -g vector_scal.c -o vector_scal $(pkg-config --cflags starpu-1.0) $(pkg-config --libs starpu-1.0)
			
--- a/examples/spmv/dw_block_spmv.c
+++ b/examples/spmv/dw_block_spmv.c
@@ -16,6 +16,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This computes an SPMV on a BCSR sparse matrix. It simply splits the matrix
			
 
				+ * into its blocks, thus turning the problem into mere matrix-vector products
			
 
				+ * (GEMV) which can be run in parallel.
			
 
				+ */
			
 
				 #include "dw_block_spmv.h"
			
 
				 #include "matrix_market/mm_to_bcsr.h"
			
 
				 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
			
--- a/examples/spmv/dw_block_spmv_kernels.c
+++ b/examples/spmv/dw_block_spmv_kernels.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010-2011  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010-2011, 2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,9 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * Standard GEMV kernel (on one matrix block of the sparse matrix)
			
 
				+ */
			
 
				 #include "dw_block_spmv.h"
			
 
				 
			
 
				 /*
			
--- a/examples/spmv/spmv.c
+++ b/examples/spmv/spmv.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2011, 2013-2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2011, 2013-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				  * Copyright (C) 2010, 2011, 2012  CNRS
			
 
				  *
			
@@ -16,6 +16,10 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * This computes an SPMV with a CSR sparse matrix, by splitting it in
			
 
				+ * horizontal stripes and processing them in parallel.
			
 
				+ */
			
 
				 #include "spmv.h"
			
 
				 
			
 
				 unsigned nblocks = 4;
			
@@ -44,7 +48,7 @@ static void parse_args(int argc, char **argv)
 
				 }
			
 
				 
			
 
				 /* This filter function takes a CSR matrix, and divides it into nparts with the
			
 
				- * same number of non-zero entries. */
			
 
				+ * same number of rows. */
			
 
				 static void csr_filter_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
			
 
				 {
			
 
				 	struct starpu_csr_interface *csr_father = (struct starpu_csr_interface *) father_interface;
			
--- a/examples/spmv/spmv_cuda.cu
+++ b/examples/spmv/spmv_cuda.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -15,6 +15,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* CUDA kernel for SPMV */
			
 
				+
			
 
				 #include <starpu.h>
			
 
				 
			
 
				 #define MIN(a,b)	((a)<(b)?(a):(b))
			
--- a/examples/spmv/spmv_kernels.c
+++ b/examples/spmv/spmv_kernels.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2011, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2011, 2014-2015  Université de Bordeaux
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				  * Copyright (C) 2010, 2011, 2012  CNRS
			
 
				  *
			
@@ -16,6 +16,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* OpenCL codelet for SPMV */
			
 
				+
			
 
				 #include "spmv.h"
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
--- a/examples/spmv/spmv_opencl.cl
+++ b/examples/spmv/spmv_opencl.cl
@@ -14,6 +14,8 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/* OpenCL kernel for SPMV */
			
 
				+
			
 
				 __kernel void spmv(int nnz, int nrow,
			
 
				                    __global float* nzval, __global unsigned* colind,
			
 
				                    __global unsigned* rowptr, int firstentry,
			
--- a/examples/tag_example/tag_restartable.c
+++ b/examples/tag_example/tag_restartable.c