Browse Source

merge examples

Andra Hugo 13 years ago
parent
commit
fedc28d7d3
40 changed files with 710 additions and 266 deletions
  1. 62 8
      examples/Makefile.am
  2. 2 0
      examples/audio/starpu_audio_processing.c
  3. 52 10
      examples/axpy/axpy.c
  4. 7 0
      examples/axpy/axpy.h
  5. 73 0
      examples/axpy/axpy_opencl.c
  6. 27 0
      examples/axpy/axpy_opencl_kernel.cl
  7. 25 15
      examples/cholesky/cholesky_grain_tag.c
  8. 12 7
      examples/cholesky/cholesky_implicit.c
  9. 18 11
      examples/cholesky/cholesky_tile_tag.c
  10. 24 0
      examples/interface/complex.c
  11. 95 1
      examples/interface/complex_interface.c
  12. 32 0
      examples/interface/complex_kernels.cl
  13. 80 0
      examples/interface/complex_kernels_opencl.c
  14. 8 8
      examples/lu/lu_example.c
  15. 34 28
      examples/lu/xlu.c
  16. 4 4
      examples/lu/xlu.h
  17. 36 24
      examples/lu/xlu_implicit.c
  18. 57 39
      examples/lu/xlu_implicit_pivot.c
  19. 8 2
      examples/mult/xgemm.c
  20. 0 81
      examples/opt/Makefile.am
  21. 0 0
      examples/pi/SobolQRNG/CforCUDA_SDK_license.txt
  22. 0 0
      examples/pi/SobolQRNG/sobol.h
  23. 0 0
      examples/pi/SobolQRNG/sobol_gold.c
  24. 0 0
      examples/pi/SobolQRNG/sobol_gold.h
  25. 0 0
      examples/pi/SobolQRNG/sobol_gpu.cu
  26. 0 0
      examples/pi/SobolQRNG/sobol_gpu.h
  27. 0 0
      examples/pi/SobolQRNG/sobol_primitives.c
  28. 0 0
      examples/pi/SobolQRNG/sobol_primitives.h
  29. 0 0
      examples/pi/pi.c
  30. 0 0
      examples/pi/pi.h
  31. 0 0
      examples/pi/pi_kernel.cu
  32. 1 1
      examples/opt/pi/pi_redux.c
  33. 0 0
      examples/pi/pi_redux_kernel.cu
  34. 2 0
      examples/spmv/dw_block_spmv.c
  35. 2 0
      examples/stencil/stencil.c
  36. 15 7
      examples/tag_example/tag_example.c
  37. 8 5
      examples/tag_example/tag_example2.c
  38. 8 5
      examples/tag_example/tag_example3.c
  39. 6 3
      examples/tag_example/tag_example4.c
  40. 12 7
      examples/tag_example/tag_restartable.c

+ 62 - 8
examples/Makefile.am

@@ -33,6 +33,7 @@ nobase_STARPU_OPENCL_DATA_DATA =
 endif
 
 EXTRA_DIST = 					\
+	axpy/axpy_opencl_kernel.cl		\
 	basic_examples/vector_scal_opencl_kernel.cl \
 	basic_examples/multiformat_opencl_kernel.cl  \
 	basic_examples/multiformat_conversion_codelets_opencl_kernel.cl \
@@ -55,7 +56,8 @@ EXTRA_DIST = 					\
 	openmp/vector_scal.c			\
 	filters/fblock_opencl_kernel.cl		\
 	filters/custom_mf/conversion_opencl.cl  \
-	filters/custom_mf/custom_opencl.cl
+	filters/custom_mf/custom_opencl.cl \
+	interface/complex_kernels.cl
 
 CLEANFILES = 					\
 	gordon/null_kernel_gordon.spuelf
@@ -104,6 +106,7 @@ examplebindir = $(libdir)/starpu/examples/
 examplebin_PROGRAMS =
 
 noinst_HEADERS = 				\
+	axpy/axpy.h                             \
 	cg/cg.h					\
 	heat/lu_kernels_model.h			\
 	heat/dw_sparse_cg.h			\
@@ -133,7 +136,12 @@ noinst_HEADERS = 				\
 	basic_examples/multiformat_types.h      \
 	filters/custom_mf/custom_interface.h    \
 	filters/custom_mf/custom_types.h	\
-	interface/complex_interface.h
+	interface/complex_interface.h		\
+	pi/pi.h					\
+	pi/SobolQRNG/sobol.h			\
+	pi/SobolQRNG/sobol_gold.h		\
+	pi/SobolQRNG/sobol_gpu.h		\
+	pi/SobolQRNG/sobol_primitives.h
 
 #####################################
 # What to install and what to check #
@@ -330,17 +338,17 @@ basic_examples_multiformat_SOURCES =                                    \
 	basic_examples/multiformat_conversion_codelets.c
 
 if STARPU_USE_CUDA
-basic_examples_multiformat_SOURCES+=                                     \
+basic_examples_multiformat_SOURCES +=                                    \
 	basic_examples/multiformat_cuda.cu                               \
 	basic_examples/multiformat_conversion_codelets_cuda.cu
 endif
 
 if STARPU_USE_OPENCL
-basic_examples_multiformat_SOURCES+=                                     \
+basic_examples_multiformat_SOURCES +=                                    \
 	basic_examples/multiformat_opencl.c                              \
-	basic_examples/multiformat_conversion_codelets_opencl.c          
+	basic_examples/multiformat_conversion_codelets_opencl.c
 
-nobase_STARPU_OPENCL_DATA_DATA+=                                         \
+nobase_STARPU_OPENCL_DATA_DATA +=                                        \
 	basic_examples/multiformat_opencl_kernel.cl                      \
 	basic_examples/multiformat_conversion_codelets_opencl_kernel.cl
 endif
@@ -414,13 +422,13 @@ filters_custom_mf_custom_mf_filter_SOURCES=\
 	filters/custom_mf/custom_conversion_codelets.c
 
 if STARPU_USE_CUDA
-filters_custom_mf_custom_mf_filter_SOURCES+=\
+filters_custom_mf_custom_mf_filter_SOURCES += \
 	filters/custom_mf/conversion.cu \
 	filters/custom_mf/cuda.cu
 endif
 
 if STARPU_USE_OPENCL
-filters_custom_mf_custom_mf_filter_SOURCES+=\
+filters_custom_mf_custom_mf_filter_SOURCES += \
 	filters/custom_mf/conversion_opencl.c \
 	filters/custom_mf/custom_opencl.c
 nobase_STARPU_OPENCL_DATA_DATA += \
@@ -436,6 +444,12 @@ if !NO_BLAS_LIB
 axpy_axpy_SOURCES =				\
 	axpy/axpy.c				\
 	common/blas.c
+if STARPU_USE_OPENCL
+axpy_axpy_SOURCES += \
+	axpy/axpy_opencl.c
+nobase_STARPU_OPENCL_DATA_DATA +=		\
+	axpy/axpy_opencl_kernel.cl
+endif
 
 axpy_axpy_LDADD =				\
 	$(STARPU_BLAS_LDFLAGS)
@@ -713,6 +727,13 @@ interface_complex_SOURCES	+=	\
 	interface/complex_kernels.cu
 endif
 
+if STARPU_USE_OPENCL
+interface_complex_SOURCES +=\
+	interface/complex_kernels_opencl.c
+nobase_STARPU_OPENCL_DATA_DATA += \
+	interface/complex_kernels.cl
+endif
+
 ######################
 # matVecMult example #
 ######################
@@ -753,6 +774,39 @@ examplebin_PROGRAMS +=				\
 top_hello_world_top_SOURCES =			\
 	top/hello_world_top.c
 
+######
+# Pi #
+######
+
+check_PROGRAMS +=				\
+	pi/pi					\
+	pi/pi_redux
+
+examplebin_PROGRAMS +=				\
+	pi/pi					\
+	pi/pi_redux
+
+pi_pi_SOURCES =					\
+	pi/pi.c					\
+	pi/SobolQRNG/sobol_gold.c		\
+	pi/SobolQRNG/sobol_primitives.c
+
+if STARPU_USE_CUDA
+pi_pi_SOURCES +=				\
+	pi/pi_kernel.cu				\
+	pi/SobolQRNG/sobol_gpu.cu
+endif
+
+pi_pi_redux_SOURCES =				\
+	pi/pi_redux.c
+
+if STARPU_USE_CUDA
+pi_pi_redux_SOURCES +=				\
+	pi/pi_redux_kernel.cu
+pi_pi_redux_LDADD =				\
+	$(STARPU_CURAND_LDFLAGS)
+endif
+
 showcheck:
 	-cat $(TEST_LOGS) /dev/null
 	for i in $(SUBDIRS) ; do \

+ 2 - 0
examples/audio/starpu_audio_processing.c

@@ -406,6 +406,8 @@ int main(int argc, char **argv)
 
 	/* launch StarPU */
 	ret = starpu_init(NULL);
+	if (ret == -ENODEV)
+		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	starpu_vector_data_register(&A_handle, 0, (uintptr_t)A, niter*nsamples, sizeof(float));

+ 52 - 10
examples/axpy/axpy.c

@@ -27,8 +27,12 @@
 #ifdef STARPU_USE_CUDA
 #include <cublas.h>
 #endif
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+#endif
+
+#include "axpy.h"
 
-#define TYPE	float
 #define AXPY	SAXPY
 #define CUBLASAXPY	cublasSaxpy
 
@@ -38,7 +42,10 @@
 
 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
 
+#define EPSILON 1e-6
+
 TYPE *vec_x, *vec_y;
+TYPE alpha = 3.41;
 
 /* descriptors for StarPU */
 starpu_data_handle_t handle_y, handle_x;
@@ -70,22 +77,41 @@ void axpy_gpu(void *descr[], __attribute__((unused)) void *arg)
 }
 #endif
 
-static struct starpu_codelet axpy_cl =
-{
-        .where =
-#ifdef STARPU_USE_CUDA
-                STARPU_CUDA|
+#ifdef STARPU_USE_OPENCL
+extern void axpy_opencl(void *buffers[], void *args);
 #endif
-                STARPU_CPU,
 
+static struct starpu_codelet axpy_cl =
+{
 	.cpu_funcs = {axpy_cpu, NULL},
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {axpy_gpu, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {axpy_opencl, NULL},
+#endif
 	.nbuffers = 2,
 	.modes = {STARPU_R, STARPU_RW}
 };
 
+static int
+check(void)
+{
+	int i;
+	for (i = 0; i < N; i++)
+	{
+		TYPE expected_value = alpha * vec_x[i] + 4.0;
+		if (fabs(vec_y[i] - expected_value) > EPSILON)
+			return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
+}
+
+#ifdef STARPU_USE_OPENCL
+struct starpu_opencl_program opencl_program;
+#endif
+
 int main(int argc, char **argv)
 {
 	int ret;
@@ -96,6 +122,12 @@ int main(int argc, char **argv)
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+#ifdef STARPU_USE_OPENCL
+	ret = starpu_opencl_load_opencl_from_file("examples/axpy/axpy_opencl_kernel.cl",
+						  &opencl_program, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+#endif
+
 	starpu_helper_cublas_init();
 
 	/* This is equivalent to
@@ -132,8 +164,6 @@ int main(int argc, char **argv)
 	starpu_data_partition(handle_x, &block_filter);
 	starpu_data_partition(handle_y, &block_filter);
 
-	TYPE alpha = 3.41;
-
 	struct timeval start;
 	struct timeval end;
 
@@ -152,11 +182,17 @@ int main(int argc, char **argv)
 		task->handles[1] = starpu_data_get_sub_data(handle_y, 1, b);
 
 		ret = starpu_task_submit(task);
+		if (ret == -ENODEV)
+		{
+		     ret = 77;
+		     goto enodev;
+		}
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
 
 	starpu_task_wait_for_all();
 
+enodev:
 	starpu_data_unpartition(handle_x, 0);
 	starpu_data_unpartition(handle_y, 0);
 	starpu_data_unregister(handle_x);
@@ -170,11 +206,17 @@ int main(int argc, char **argv)
 
 	FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", vec_y[0], alpha);
 
+	if (ret != 77)
+		ret = check();
+
 	starpu_free((void *)vec_x);
 	starpu_free((void *)vec_y);
 
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	/* Stop StarPU */
 	starpu_shutdown();
 
-	return 0;
+	return ret;
 }

+ 7 - 0
examples/axpy/axpy.h

@@ -0,0 +1,7 @@
+#ifndef AXPY_H__
+#define AXPY_H__
+
+#define TYPE float
+
+#endif /* AXPY_H__ */
+

+ 73 - 0
examples/axpy/axpy_opencl.c

@@ -0,0 +1,73 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_opencl.h>
+
+extern struct starpu_opencl_program opencl_program;
+
+void axpy_opencl(void *buffers[], void *_args)
+{
+	float *alpha = _args;
+	int id, devid;
+        cl_int err;
+	cl_kernel kernel;
+	cl_command_queue queue;
+	cl_event event;
+
+	unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
+	cl_mem x = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
+	cl_mem y = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]);
+
+	id = starpu_worker_get_id();
+	devid = starpu_worker_get_devid(id);
+
+	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "_axpy_opencl", devid);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = clSetKernelArg(kernel, 0, sizeof(x), &x);
+	err|= clSetKernelArg(kernel, 1, sizeof(y), &y);
+	err|= clSetKernelArg(kernel, 2, sizeof(n), &n);
+	err|= clSetKernelArg(kernel, 3, sizeof(*alpha), alpha);
+	if (err)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	{
+		size_t global=n;
+		size_t local;
+                size_t s;
+                cl_device_id device;
+
+                starpu_opencl_get_device(devid, &device);
+
+                err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
+                if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+                if (local > global)
+			local=global;
+
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+	}
+
+	clFinish(queue);
+	starpu_opencl_collect_stats(event);
+	clReleaseEvent(event);
+
+	starpu_opencl_release_kernel(kernel);
+}

+ 27 - 0
examples/axpy/axpy_opencl_kernel.cl

@@ -0,0 +1,27 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "axpy.h"
+
+__kernel void _axpy_opencl(__global TYPE *x,
+			   __global TYPE *y,
+			   unsigned nx,
+			   TYPE alpha)
+{
+        const int i = get_global_id(0);
+        if (i < nx)
+                y[i] = alpha * x[i] + y[i];
+}

+ 25 - 15
examples/cholesky/cholesky_grain_tag.c

@@ -83,7 +83,7 @@ static struct starpu_codelet cl21 =
 	.model = &chol_model_21
 };
 
-static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned reclevel)
+static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned reclevel)
 {
 	int ret;
 
@@ -111,7 +111,8 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j, u
 	}
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
 static struct starpu_codelet cl22 =
@@ -126,7 +127,7 @@ static struct starpu_codelet cl22 =
 	.model = &chol_model_22
 };
 
-static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned reclevel)
+static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned reclevel)
 {
 	int ret;
 
@@ -157,7 +158,8 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	}
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
 
@@ -167,7 +169,7 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
  *	and construct the DAG
  */
 
-static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks, unsigned reclevel)
+static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks, unsigned reclevel)
 {
 	int ret;
 
@@ -210,17 +212,22 @@ static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 		else
 		{
 			ret = starpu_task_submit(task);
+			if (ret == -ENODEV) return 77;
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 		}
 
 		for (j = k+1; j<nblocks; j++)
 		{
-			create_task_21(dataA, k, j, reclevel);
+		     	ret = create_task_21(dataA, k, j, reclevel);
+			if (ret == -ENODEV) return 77;
 
 			for (i = k+1; i<nblocks; i++)
 			{
 				if (i <= j)
-					create_task_22(dataA, k, i, j, reclevel);
+				{
+				     ret = create_task_22(dataA, k, i, j, reclevel);
+				     if (ret == -ENODEV) return 77;
+				}
 			}
 		}
 	}
@@ -230,7 +237,7 @@ static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 	if (STARPU_UNLIKELY(ret == -ENODEV))
 	{
 		FPRINTF(stderr, "No worker may execute this task\n");
-		exit(-1);
+		return 77;
 	}
 
 	if (nblocks == nbigblocks)
@@ -239,7 +246,7 @@ static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 		starpu_tag_wait(TAG11_AUX(nblocks-1, reclevel));
 		starpu_data_unpartition(dataA, 0);
 		starpu_data_unregister(dataA);
-		return;
+		return 0;
 	}
 	else
 	{
@@ -266,7 +273,7 @@ static void cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 
 		float *newmatA = &matA[nbigblocks*(size/nblocks)*(ld+1)];
 
-		cholesky_grain_rec(newmatA, size/nblocks*(nblocks - nbigblocks), ld, (nblocks - nbigblocks)*2, (nblocks - nbigblocks)*2, reclevel+1);
+		return cholesky_grain_rec(newmatA, size/nblocks*(nblocks - nbigblocks), ld, (nblocks - nbigblocks)*2, (nblocks - nbigblocks)*2, reclevel+1);
 	}
 }
 
@@ -291,14 +298,15 @@ static void initialize_system(float **A, unsigned dim, unsigned pinned)
 	}
 }
 
-void cholesky_grain(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks, unsigned pinned)
+int cholesky_grain(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks, unsigned pinned)
 {
 	struct timeval start;
 	struct timeval end;
+	int ret;
 
 	gettimeofday(&start, NULL);
 
-	cholesky_grain_rec(matA, size, ld, nblocks, nbigblocks, 0);
+	ret = cholesky_grain_rec(matA, size, ld, nblocks, nbigblocks, 0);
 
 	gettimeofday(&end, NULL);
 
@@ -308,7 +316,7 @@ void cholesky_grain(float *matA, unsigned size, unsigned ld, unsigned nblocks, u
 
 	double flop = (1.0f*size*size*size)/3.0f;
 	FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
-
+	return ret;
 }
 
 static void shutdown_system(float **matA, unsigned pinned)
@@ -333,6 +341,8 @@ int main(int argc, char **argv)
 	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
 	 * */
 
+     	int ret;
+
 	parse_args(argc, argv);
 
 	float *mat;
@@ -369,7 +379,7 @@ int main(int argc, char **argv)
 	}
 #endif
 
-	cholesky_grain(mat, size, size, nblocks, nbigblocks, pinned);
+	ret = cholesky_grain(mat, size, size, nblocks, nbigblocks, pinned);
 
 #ifdef CHECK_OUTPUT
 	FPRINTF(stdout, "Results :\n");
@@ -418,5 +428,5 @@ int main(int argc, char **argv)
 #endif
 
 	shutdown_system(&mat, pinned);
-	return 0;
+	return ret;
 }

+ 12 - 7
examples/cholesky/cholesky_implicit.c

@@ -72,7 +72,7 @@ static void callback_turn_spmd_on(void *arg __attribute__ ((unused)))
 	cl22.type = STARPU_SPMD;
 }
 
-static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
+static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 {
 	int ret;
 	struct timeval start;
@@ -96,6 +96,7 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 					 STARPU_RW, sdatakk,
 					 STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL,
 					 0);
+		if (ret == -ENODEV) return 77;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
 
 		for (j = k+1; j<nblocks; j++)
@@ -107,6 +108,7 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 						 STARPU_R, sdatakk,
 						 STARPU_RW, sdatakj,
 						 0);
+			if (ret == -ENODEV) return 77;
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
 
 			for (i = k+1; i<nblocks; i++)
@@ -122,6 +124,7 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 								 STARPU_R, sdatakj,
 								 STARPU_RW, sdataij,
 								 0);
+					if (ret == -ENODEV) return 77;
 					STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
                                 }
 			}
@@ -132,8 +135,6 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	if (bound)
 		starpu_bound_stop();
 
-	starpu_data_unpartition(dataA, 0);
-
 	gettimeofday(&end, NULL);
 
 	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
@@ -157,9 +158,10 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 			FPRINTF(stderr, "Theoretical GFlops: %2.2f\n", (flop/res/1000000.0f));
 		}
 	}
+	return 0;
 }
 
-static void cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
+static int cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 {
 	starpu_data_handle_t dataA;
 
@@ -181,9 +183,12 @@ static void cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
 	starpu_data_map_filters(dataA, 2, &f, &f2);
 
-	_cholesky(dataA, nblocks);
+	int ret = _cholesky(dataA, nblocks);
 
+	starpu_data_unpartition(dataA, 0);
 	starpu_data_unregister(dataA);
+
+	return ret;
 }
 
 static void execute_cholesky(unsigned size, unsigned nblocks)
@@ -222,7 +227,7 @@ static void execute_cholesky(unsigned size, unsigned nblocks)
 	}
 #endif
 
-	cholesky(mat, size, size, nblocks);
+	ret = cholesky(mat, size, size, nblocks);
 
 #ifdef PRINT_OUTPUT
 	FPRINTF(stdout, "Results :\n");
@@ -336,5 +341,5 @@ int main(int argc, char **argv)
 	starpu_helper_cublas_shutdown();
 	starpu_shutdown();
 
-	return 0;
+	return ret;
 }

+ 18 - 11
examples/cholesky/cholesky_tile_tag.c

@@ -100,7 +100,7 @@ static struct starpu_codelet cl21 =
 	.model = &chol_model_21
 };
 
-static void create_task_21(unsigned k, unsigned j)
+static int create_task_21(unsigned k, unsigned j)
 {
 	int ret;
 
@@ -128,7 +128,8 @@ static void create_task_21(unsigned k, unsigned j)
 	}
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
 static struct starpu_codelet cl22 =
@@ -150,7 +151,7 @@ static struct starpu_codelet cl22 =
 	.model = &chol_model_22
 };
 
-static void create_task_22(unsigned k, unsigned i, unsigned j)
+static int create_task_22(unsigned k, unsigned i, unsigned j)
 {
 	int ret;
 
@@ -181,17 +182,16 @@ static void create_task_22(unsigned k, unsigned i, unsigned j)
 	}
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
-
-
 /*
  *	code to bootstrap the factorization 
  *	and construct the DAG
  */
 
-static void cholesky_no_stride(void)
+static int cholesky_no_stride(void)
 {
 	int ret;
 
@@ -219,12 +219,16 @@ static void cholesky_no_stride(void)
 		
 		for (j = k+1; j<nblocks; j++)
 		{
-			create_task_21(k, j);
+			ret = create_task_21(k, j);
+			if (ret == -ENODEV) return 77;
 
 			for (i = k+1; i<nblocks; i++)
 			{
 				if (i <= j)
-					create_task_22(k, i, j);
+				{
+				     ret = create_task_22(k, i, j);
+				     if (ret == -ENODEV) return 77;
+				}
 			}
 		}
 	}
@@ -232,6 +236,7 @@ static void cholesky_no_stride(void)
 	/* schedule the codelet */
 	gettimeofday(&start, NULL);
 	ret = starpu_task_submit(entry_task);
+	if (ret == -ENODEV) return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 
 	/* stall the application until the end of computations */
@@ -245,6 +250,8 @@ static void cholesky_no_stride(void)
 
 	double flop = (1.0f*size*size*size)/3.0f;
 	FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
+
+	return 0;
 }
 
 int main(int argc, char **argv)
@@ -312,7 +319,7 @@ int main(int argc, char **argv)
 		}
 	}
 
-	cholesky_no_stride();
+	ret = cholesky_no_stride();
 
 	for (y = 0; y < nblocks; y++)
 	for (x = 0; x < nblocks; x++)
@@ -327,7 +334,7 @@ int main(int argc, char **argv)
 	starpu_helper_cublas_shutdown();
 
 	starpu_shutdown();
-	return 0;
+	return ret;
 }
 
 

+ 24 - 0
examples/interface/complex.c

@@ -16,10 +16,16 @@
 
 #include <starpu.h>
 #include "complex_interface.h"
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+#endif
 
 #ifdef STARPU_USE_CUDA
 extern void copy_complex_codelet_cuda(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
+#ifdef STARPU_USE_OPENCL
+extern void copy_complex_codelet_opencl(void *buffers[], void *args);
+#endif
 
 void compare_complex_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 {
@@ -72,6 +78,9 @@ struct starpu_codelet cl_copy =
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {copy_complex_codelet_cuda, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {copy_complex_codelet_opencl, NULL},
+#endif
 	.nbuffers = 2,
 	.modes = {STARPU_R, STARPU_W}
 };
@@ -83,6 +92,10 @@ struct starpu_codelet cl_compare =
 	.modes = {STARPU_R, STARPU_R}
 };
 
+#ifdef STARPU_USE_OPENCL
+struct starpu_opencl_program opencl_program;
+#endif
+
 int main(int argc, char **argv)
 {
 	int ret = 0;
@@ -98,6 +111,11 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+#ifdef STARPU_USE_OPENCL
+	ret = starpu_opencl_load_opencl_from_file("examples/interface/complex_kernels.cl",
+						  &opencl_program, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+#endif
 	starpu_complex_data_register(&handle1, 0, &real, &imaginary, 1);
 	starpu_complex_data_register(&handle2, 0, &copy_real, &copy_imaginary, 1);
 
@@ -141,10 +159,16 @@ int main(int argc, char **argv)
 
 	starpu_task_wait_for_all();
 
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	starpu_shutdown();
 	return 0;
 
 enodev:
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	starpu_data_unregister(handle1);
 	starpu_data_unregister(handle2);
 	starpu_shutdown();

+ 95 - 1
examples/interface/complex_interface.c

@@ -16,6 +16,7 @@
 
 #include <starpu.h>
 #include <starpu_cuda.h>
+#include <starpu_opencl.h>
 #include <starpu_hash.h>
 
 #include "complex_interface.h"
@@ -109,7 +110,30 @@ static starpu_ssize_t complex_allocate_data_on_node(void *data_interface, uint32
 #ifdef STARPU_USE_OPENCL
 	        case STARPU_OPENCL_RAM:
 		{
-			STARPU_ASSERT(0);
+			int ret;
+			cl_mem real, imaginary;
+			ret = starpu_opencl_allocate_memory(&real, requested_memory, CL_MEM_READ_WRITE);
+			if (ret != CL_SUCCESS)
+			{
+				fail = 1;
+				break;
+			}
+			else
+			{
+				addr_real = (double *) real;
+			}
+
+			ret = starpu_opencl_allocate_memory(&imaginary, requested_memory, CL_MEM_READ_WRITE);
+			if (ret != CL_SUCCESS)
+			{
+				fail = 1;
+				break;
+			}
+			else
+			{
+				addr_imaginary = (double *) imaginary;
+			}
+			break;
 		}
 #endif
 		default:
@@ -171,12 +195,82 @@ static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_in
 #endif
 
 
+#ifdef STARPU_USE_OPENCL
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node,
+                              void *dst_interface, unsigned dst_node)
+{
+	struct starpu_complex_interface *src_complex = src_interface;
+	struct starpu_complex_interface *dst_complex = dst_interface;
+
+	cl_int err;
+
+	err = starpu_opencl_copy_ram_to_opencl(
+		src_complex->real,
+		src_node,
+		(cl_mem) dst_complex->real,
+		dst_node,
+		src_complex->nx * sizeof(src_complex->real[0]),
+		0,
+		NULL);
+	if (STARPU_UNLIKELY(err != CL_SUCCESS))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = starpu_opencl_copy_ram_to_opencl(
+		src_complex->imaginary,
+		src_node,
+		(cl_mem) dst_complex->imaginary,
+		dst_node,
+		src_complex->nx * sizeof(src_complex->imaginary[0]),
+		0,
+		NULL);
+	if (STARPU_UNLIKELY(err != CL_SUCCESS))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	return 0;
+}
+
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node,
+			      void *dst_interface, unsigned dst_node)
+{
+	struct starpu_complex_interface *src_complex = src_interface;
+	struct starpu_complex_interface *dst_complex = dst_interface;
+
+	cl_int err;
+	err = starpu_opencl_copy_opencl_to_ram(
+		(cl_mem) src_complex->real,
+		src_node,
+		dst_complex->real,
+		dst_node,
+		src_complex->nx * sizeof(src_complex->real[0]),
+		0,
+		NULL);
+	if (STARPU_UNLIKELY(err != CL_SUCCESS))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = starpu_opencl_copy_opencl_to_ram(
+		(cl_mem) src_complex->imaginary,
+		src_node,
+		dst_complex->imaginary,
+		dst_node,
+		src_complex->nx * sizeof(src_complex->imaginary[0]),
+		0,
+		NULL);
+	if (STARPU_UNLIKELY(err != CL_SUCCESS))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	return 0;
+}
+#endif
 static const struct starpu_data_copy_methods complex_copy_methods =
 {
 #ifdef STARPU_USE_CUDA
 	.ram_to_cuda = copy_ram_to_cuda,
 	.cuda_to_ram = copy_cuda_to_ram,
 #endif
+#ifdef STARPU_USE_OPENCL
+	.ram_to_opencl = copy_ram_to_opencl,
+	.opencl_to_ram = copy_opencl_to_ram,
+#endif
 };
 
 static struct starpu_data_interface_ops interface_complex_ops =

+ 32 - 0
examples/interface/complex_kernels.cl

@@ -0,0 +1,32 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/* Use the "double" type */
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+__kernel void complex_copy_opencl(__global double *o_real,
+				  __global double *o_imaginary,
+				  __global double *i_real,
+				  __global double *i_imaginary,
+				  unsigned nx) 
+{
+        const int i = get_global_id(0);
+        if (i < nx)
+	{
+		o_real[i] = i_real[i];
+		o_imaginary[i] = i_imaginary[i];
+        }
+}

+ 80 - 0
examples/interface/complex_kernels_opencl.c

@@ -0,0 +1,80 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_opencl.h>
+#include "complex_interface.h"
+
+extern struct starpu_opencl_program opencl_program;
+
+void copy_complex_codelet_opencl(void *buffers[], void *_args)
+{
+	(void) _args;
+
+	int id, devid;
+        cl_int err;
+	cl_kernel kernel;
+	cl_command_queue queue;
+	cl_event event;
+
+	/* length of the vector */
+	unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
+	/* OpenCL copy of the vector pointer */
+	cl_mem *i_real      = (cl_mem *) STARPU_COMPLEX_GET_REAL(buffers[0]);
+	cl_mem *i_imaginary = (cl_mem *) STARPU_COMPLEX_GET_IMAGINARY(buffers[0]);
+	cl_mem *o_real      = (cl_mem *) STARPU_COMPLEX_GET_REAL(buffers[1]);
+	cl_mem *o_imaginary = (cl_mem *) STARPU_COMPLEX_GET_IMAGINARY(buffers[1]);
+
+	id = starpu_worker_get_id();
+	devid = starpu_worker_get_devid(id);
+
+	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "complex_copy_opencl", devid);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = clSetKernelArg(kernel, 0, sizeof(o_real), &o_real);
+	err|= clSetKernelArg(kernel, 1, sizeof(o_imaginary), &o_imaginary);
+	err|= clSetKernelArg(kernel, 2, sizeof(i_real), &i_real);
+	err|= clSetKernelArg(kernel, 3, sizeof(i_imaginary), &i_imaginary);
+	err|= clSetKernelArg(kernel, 4, sizeof(n), &n);
+	if (err)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	{
+		size_t global=n;
+		size_t local;
+                size_t s;
+                cl_device_id device;
+
+                starpu_opencl_get_device(devid, &device);
+
+                err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
+                if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+                if (local > global)
+			local=global;
+
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+	}
+
+	clFinish(queue);
+	starpu_opencl_collect_stats(event);
+	clReleaseEvent(event);
+
+	starpu_opencl_release_kernel(kernel);
+}

+ 8 - 8
examples/lu/lu_example.c

@@ -262,7 +262,7 @@ static void check_result(void)
 	/* compute "LU - A" in L*/
 	CPU_AXPY(size*size, -1.0, A_saved, 1, L, 1);
 	display_matrix(L, size, size, "Residuals");
-	
+
 #ifdef COMPLEX_LU
 	double err = CPU_ASUM(size*size, L, 1);
 	int max = CPU_IAMAX(size*size, L, 1);
@@ -333,24 +333,24 @@ int main(int argc, char **argv)
 			A_blocks = malloc(nblocks*nblocks*sizeof(TYPE **));
 			copy_matrix_into_blocks();
 
-			STARPU_LU(lu_decomposition_pivot_no_stride)(A_blocks, ipiv, size, size, nblocks);
+			ret = STARPU_LU(lu_decomposition_pivot_no_stride)(A_blocks, ipiv, size, size, nblocks);
 
 			copy_blocks_into_matrix();
 			free(A_blocks);
 		}
-		else 
+		else
 		{
 			struct timeval start;
 			struct timeval end;
 
 			gettimeofday(&start, NULL);
 
-			STARPU_LU(lu_decomposition_pivot)(A, ipiv, size, size, nblocks);
-	
+			ret = STARPU_LU(lu_decomposition_pivot)(A, ipiv, size, size, nblocks);
+
 			gettimeofday(&end, NULL);
 
 			double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
-			
+
 			unsigned n = size;
 			double flop = (2.0f*n*n*n)/3.0f;
 			FPRINTF(stderr, "Synthetic GFlops (TOTAL) : \n");
@@ -359,7 +359,7 @@ int main(int argc, char **argv)
 	}
 	else
 	{
-		STARPU_LU(lu_decomposition)(A, size, size, nblocks);
+		ret = STARPU_LU(lu_decomposition)(A, size, size, nblocks);
 	}
 
 	if (profile)
@@ -415,5 +415,5 @@ int main(int argc, char **argv)
 
 	starpu_shutdown();
 
-	return 0;
+	if (ret == -ENODEV) return 77; else return 0;
 }

+ 34 - 28
examples/lu/xlu.c

@@ -72,19 +72,19 @@ static struct starpu_task *create_task_11(starpu_data_handle_t dataA, unsigned k
 	return task;
 }
 
-static void create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
+static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 {
 	int ret;
 
 /*	printf("task 12 k,i = %d,%d TAG = %llx\n", k,i, TAG12(k,i)); */
 
 	struct starpu_task *task = create_task(TAG12(k, j));
-	
+
 	task->cl = &cl12;
 
 	/* which sub-data is manipulated ? */
-	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); 
-	task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k); 
+	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
+	task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k);
 
 	if (!no_prio && (j == k+1))
 	{
@@ -102,19 +102,20 @@ static void create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 	}
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
-static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
+static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 {
 	int ret;
 	struct starpu_task *task = create_task(TAG21(k, i));
 
 	task->cl = &cl21;
-	
+
 	/* which sub-data is manipulated ? */
-	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); 
-	task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, i); 
+	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
+	task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, i);
 
 	if (!no_prio && (i == k+1))
 	{
@@ -132,10 +133,11 @@ static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 	}
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
-static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j)
+static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j)
 {
 	int ret;
 
@@ -146,7 +148,7 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	task->cl = &cl22;
 
 	/* which sub-data is manipulated ? */
-	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, i); /* produced by TAG21(k, i) */ 
+	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, i); /* produced by TAG21(k, i) */
 	task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k); /* produced by TAG12(k, j) */
 	task->handles[2] = starpu_data_get_sub_data(dataA, 2, j, i); /* produced by TAG22(k-1, i, j) */
 
@@ -166,14 +168,15 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 	}
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
 /*
- *	code to bootstrap the factorization 
+ *	code to bootstrap the factorization
  */
 
-static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
+static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 {
 	int ret;
 	struct timeval start;
@@ -196,20 +199,24 @@ static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 		else
 		{
 			ret = starpu_task_submit(task);
+			if (ret == -ENODEV) return ret;
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 		}
-		
+
 		for (i = k+1; i<nblocks; i++)
 		{
-			create_task_12(dataA, k, i);
-			create_task_21(dataA, k, i);
+			ret = create_task_12(dataA, k, i);
+			if (ret == -ENODEV) return ret;
+			ret = create_task_21(dataA, k, i);
+			if (ret == -ENODEV) return ret;
 		}
 
 		for (i = k+1; i<nblocks; i++)
 		{
 			for (j = k+1; j<nblocks; j++)
 			{
-				create_task_22(dataA, k, i, j);
+			     ret = create_task_22(dataA, k, i, j);
+			     if (ret == -ENODEV) return ret;
 			}
 		}
 	}
@@ -217,13 +224,8 @@ static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 	/* schedule the codelet */
 	gettimeofday(&start, NULL);
 	ret = starpu_task_submit(entry_task);
-	if (STARPU_UNLIKELY(ret == -ENODEV))
-	{
-		FPRINTF(stderr, "No worker may execute this task\n");
-		exit(-1);
-	}
-
-
+	if (ret == -ENODEV) return ret;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 
 	/* stall the application until the end of computations */
 	starpu_tag_wait(TAG11(nblocks-1));
@@ -237,9 +239,11 @@ static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 	unsigned n = starpu_matrix_get_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 	FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
+
+	return 0;
 }
 
-void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks)
 {
 	starpu_data_handle_t dataA;
 
@@ -264,9 +268,11 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 
 	starpu_data_map_filters(dataA, 2, &f, &f2);
 
-	dw_codelet_facto_v3(dataA, nblocks);
+	int ret = dw_codelet_facto_v3(dataA, nblocks);
 
 	/* gather all the data */
 	starpu_data_unpartition(dataA, 0);
 	starpu_data_unregister(dataA);
+
+	return ret;
 }

+ 4 - 4
examples/lu/xlu.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -113,8 +113,8 @@ struct piv_s
 	unsigned last; /* last element */
 };
 
-void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks);
-void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks);
-void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks);
+int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks);
+int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks);
+int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks);
 
 #endif /* __XLU_H__ */

+ 36 - 24
examples/lu/xlu_implicit.c

@@ -21,7 +21,7 @@
 
 static unsigned no_prio = 0;
 
-static void create_task_11(starpu_data_handle_t dataA, unsigned k)
+static int create_task_11(starpu_data_handle_t dataA, unsigned k)
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
@@ -35,45 +35,48 @@ static void create_task_11(starpu_data_handle_t dataA, unsigned k)
 		task->priority = STARPU_MAX_PRIO;
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
-static void create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
+static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
 	task->cl = &cl12;
 
 	/* which sub-data is manipulated ? */
-	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); 
-	task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k); 
+	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
+	task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k);
 
 	if (!no_prio && (j == k+1))
 		task->priority = STARPU_MAX_PRIO;
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
-static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
+static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
 
 	task->cl = &cl21;
-	
+
 	/* which sub-data is manipulated ? */
-	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); 
-	task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, i); 
+	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
+	task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, i);
 
 	if (!no_prio && (i == k+1))
 		task->priority = STARPU_MAX_PRIO;
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
-static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j)
+static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j)
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
@@ -89,17 +92,19 @@ static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, u
 		task->priority = STARPU_MAX_PRIO;
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
 /*
- *	code to bootstrap the factorization 
+ *	code to bootstrap the factorization
  */
 
-static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
+static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 {
 	struct timeval start;
 	struct timeval end;
+	int ret;
 
 	/* create all the DAG nodes */
 	unsigned i,j,k;
@@ -108,17 +113,22 @@ static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
 	for (k = 0; k < nblocks; k++)
 	{
-		create_task_11(dataA, k);
-		
+		ret = create_task_11(dataA, k);
+		if (ret == -ENODEV) return ret;
+
 		for (i = k+1; i<nblocks; i++)
 		{
-			create_task_12(dataA, k, i);
-			create_task_21(dataA, k, i);
+		     ret = create_task_12(dataA, k, i);
+		     if (ret == -ENODEV) return ret;
+		     ret = create_task_21(dataA, k, i);
+		     if (ret == -ENODEV) return ret;
 		}
 
 		for (i = k+1; i<nblocks; i++)
-		for (j = k+1; j<nblocks; j++)
-				create_task_22(dataA, k, i, j);
+		     for (j = k+1; j<nblocks; j++) {
+			  ret = create_task_22(dataA, k, i, j);
+			  if (ret == -ENODEV) return ret;
+		     }
 	}
 
 	/* stall the application until the end of computations */
@@ -133,16 +143,17 @@ static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 	unsigned n = starpu_matrix_get_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 	FPRINTF(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
+	return 0;
 }
 
-void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks)
 {
 	starpu_data_handle_t dataA;
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
 	starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
-	
+
 	struct starpu_data_filter f =
 	{
 		.filter_func = starpu_vertical_block_filter_func,
@@ -157,9 +168,10 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 
 	starpu_data_map_filters(dataA, 2, &f, &f2);
 
-	dw_codelet_facto_v3(dataA, nblocks);
+	int ret = dw_codelet_facto_v3(dataA, nblocks);
 
 	/* gather all the data */
 	starpu_data_unpartition(dataA, 0);
 	starpu_data_unregister(dataA);
+	return ret;
 }

+ 57 - 39
examples/lu/xlu_implicit_pivot.c

@@ -25,10 +25,10 @@ static unsigned no_prio = 0;
  *	Construct the DAG
  */
 
-static void create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
-					struct piv_s *piv_description,
-					unsigned k, unsigned i,
-					starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
+			     struct piv_s *piv_description,
+			     unsigned k, unsigned i,
+			     starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
 {
 	int ret;
 
@@ -46,12 +46,13 @@ static void create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 		task->priority = STARPU_MAX_PRIO;
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
-static void create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
-					unsigned k, struct piv_s *piv_description,
-					starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+static int create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
+				unsigned k, struct piv_s *piv_description,
+				starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
 {
 	int ret;
 
@@ -69,11 +70,12 @@ static void create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 		task->priority = STARPU_MAX_PRIO;
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
-static void create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j,
-		starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j,
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
@@ -88,30 +90,32 @@ static void create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 		task->priority = STARPU_MAX_PRIO;
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
-static void create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i,
-				starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i,
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
 
 	task->cl = &cl21;
-	
+
 	/* which sub-data is manipulated ? */
-	task->handles[0] = get_block(dataAp, nblocks, k, k); 
-	task->handles[1] = get_block(dataAp, nblocks, k, i); 
+	task->handles[0] = get_block(dataAp, nblocks, k, k);
+	task->handles[1] = get_block(dataAp, nblocks, k, i);
 
 	if (!no_prio && (i == k+1))
 		task->priority = STARPU_MAX_PRIO;
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
-static void create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j,
-				starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+static int create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j,
+			  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
 {
 	int ret;
 	struct starpu_task *task = starpu_task_create();
@@ -127,20 +131,23 @@ static void create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsig
 		task->priority = STARPU_MAX_PRIO;
 
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	return ret;
 }
 
 /*
- *	code to bootstrap the factorization 
+ *	code to bootstrap the factorization
  */
 
-static double dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
-					struct piv_s *piv_description,
-					unsigned nblocks,
-					starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned))
+static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
+				  struct piv_s *piv_description,
+				  unsigned nblocks,
+				  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned),
+				  double *timing)
 {
 	struct timeval start;
 	struct timeval end;
+	int ret;
 
 	gettimeofday(&start, NULL);
 
@@ -148,23 +155,32 @@ static double dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 	unsigned i,j,k;
 	for (k = 0; k < nblocks; k++)
 	{
-		create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
+	     ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
+	     if (ret == -ENODEV) return ret;
 
 		for (i = 0; i < nblocks; i++)
 		{
 			if (i != k)
-				create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block);
+			{
+			     ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block);
+			     if (ret == -ENODEV) return ret;
+			}
 		}
-	
+
 		for (i = k+1; i<nblocks; i++)
 		{
-			create_task_12(dataAp, nblocks, k, i, get_block);
-			create_task_21(dataAp, nblocks, k, i, get_block);
+		     ret = create_task_12(dataAp, nblocks, k, i, get_block);
+		     if (ret == -ENODEV) return ret;
+		     ret = create_task_21(dataAp, nblocks, k, i, get_block);
+		     if (ret == -ENODEV) return ret;
 		}
 
 		for (i = k+1; i<nblocks; i++)
-		for (j = k+1; j<nblocks; j++)
-			create_task_22(dataAp, nblocks, k, i, j, get_block);
+		     for (j = k+1; j<nblocks; j++)
+		     {
+			  ret = create_task_22(dataAp, nblocks, k, i, j, get_block);
+			  if (ret == -ENODEV) return ret;
+		     }
 	}
 
 	/* stall the application until the end of computations */
@@ -172,8 +188,8 @@ static double dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
 	gettimeofday(&end, NULL);
 
-	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
-	return timing;
+	*timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
+	return 0;
 }
 
 starpu_data_handle_t get_block_with_striding(starpu_data_handle_t *dataAp,
@@ -184,7 +200,7 @@ starpu_data_handle_t get_block_with_striding(starpu_data_handle_t *dataAp,
 }
 
 
-void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
 {
 	starpu_data_handle_t dataA;
 
@@ -220,7 +236,7 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 	}
 
 	double timing;
-	timing = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding);
+	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing);
 
 	FPRINTF(stderr, "Computation took (in ms)\n");
 	FPRINTF(stderr, "%2.2f\n", timing/1000);
@@ -234,6 +250,7 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 	starpu_data_unregister(dataA);
 
 	free(piv_description);
+	return ret;
 }
 
 
@@ -243,7 +260,7 @@ starpu_data_handle_t get_block_with_no_striding(starpu_data_handle_t *dataAp, un
 	return dataAp[i+j*nblocks];
 }
 
-void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
+int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
 {
 	starpu_data_handle_t *dataAp = malloc(nblocks*nblocks*sizeof(starpu_data_handle_t));
 
@@ -272,7 +289,7 @@ void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, un
 	}
 
 	double timing;
-	timing = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding);
+	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing);
 
 	FPRINTF(stderr, "Computation took (in ms)\n");
 	FPRINTF(stderr, "%2.2f\n", timing/1000);
@@ -287,4 +304,5 @@ void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, un
 		starpu_data_unregister(dataAp[bi+nblocks*bj]);
 	}
 	free(dataAp);
+	return ret;
 }

+ 8 - 2
examples/mult/xgemm.c

@@ -306,7 +306,12 @@ int main(int argc, char **argv)
 			task->handles[2] = starpu_data_get_sub_data(C_handle, 2, x, y);
 
 			int ret = starpu_task_submit(task);
-			STARPU_ASSERT(!ret);
+			if (ret == -ENODEV)
+			{
+			     ret = 77;
+			     goto enodev;
+			}
+			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 		}
 
 		starpu_task_wait_for_all();
@@ -322,6 +327,7 @@ int main(int argc, char **argv)
 				*((unsigned long)ydim)*((unsigned long)zdim);
 	FPRINTF(stderr, "GFlop/s: %.2f\n", flops/timing/1000.0);
 
+enodev:
 	starpu_data_unpartition(C_handle, 0);
 	starpu_data_unpartition(B_handle, 0);
 	starpu_data_unpartition(A_handle, 0);
@@ -340,5 +346,5 @@ int main(int argc, char **argv)
 	starpu_helper_cublas_shutdown();
 	starpu_shutdown();
 
-	return 0;
+	return ret;
 }

+ 0 - 81
examples/opt/Makefile.am

@@ -1,81 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2009, 2010-2012  Université de Bordeaux 1
-# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-AM_CFLAGS = $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
-LIBS = $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la $(HWLOC_LIBS) @LIBS@
-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include
-AM_LDFLAGS = $(STARPU_CUDA_LDFLAGS) $(STARPU_OPENCL_LDFLAGS)
-
-if STARPU_USE_CUDA
-
-NVCCFLAGS += --compiler-options -fno-strict-aliasing  -I$(top_srcdir)/include/ -I$(top_builddir)/include/ -I$(top_srcdir)/examples/ $(HWLOC_CFLAGS) -arch sm_13
-
-.cu.o:
-	$(NVCC) $< -c -o $@ $(NVCCFLAGS)
-
-endif
-
-TESTS	=	$(check_PROGRAMS)
-
-check_PROGRAMS =
-
-examplebindir = $(libdir)/starpu/examples/
-
-examplebin_PROGRAMS =
-
-noinst_HEADERS = 				\
-	pi/SobolQRNG/sobol.h			\
-	pi/SobolQRNG/sobol_gold.h		\
-	pi/SobolQRNG/sobol_gpu.h		\
-	pi/SobolQRNG/sobol_primitives.h
-
-######
-# Pi #
-######
-
-check_PROGRAMS +=				\
-	pi/pi					\
-	pi/pi_redux
-
-examplebin_PROGRAMS +=				\
-	pi/pi					\
-	pi/pi_redux
-
-pi_pi_SOURCES =					\
-	pi/pi.c					\
-	pi/SobolQRNG/sobol_gold.c		\
-	pi/SobolQRNG/sobol_primitives.c
-
-if STARPU_USE_CUDA
-pi_pi_SOURCES +=				\
-	pi/pi_kernel.cu				\
-	pi/SobolQRNG/sobol_gpu.cu
-endif
-
-pi_pi_redux_SOURCES =				\
-	pi/pi_redux.c
-
-if STARPU_USE_CUDA
-pi_pi_redux_SOURCES +=				\
-	pi/pi_redux_kernel.cu
-pi_pi_redux_LDADD =				\
-	$(STARPU_CURAND_LDFLAGS)
-endif
-
-
-
-showcheck:
-	-cat $(TEST_LOGS) /dev/null

examples/opt/pi/SobolQRNG/CforCUDA_SDK_license.txt → examples/pi/SobolQRNG/CforCUDA_SDK_license.txt


examples/opt/pi/SobolQRNG/sobol.h → examples/pi/SobolQRNG/sobol.h


examples/opt/pi/SobolQRNG/sobol_gold.c → examples/pi/SobolQRNG/sobol_gold.c


examples/opt/pi/SobolQRNG/sobol_gold.h → examples/pi/SobolQRNG/sobol_gold.h


examples/opt/pi/SobolQRNG/sobol_gpu.cu → examples/pi/SobolQRNG/sobol_gpu.cu


examples/opt/pi/SobolQRNG/sobol_gpu.h → examples/pi/SobolQRNG/sobol_gpu.h


examples/opt/pi/SobolQRNG/sobol_primitives.c → examples/pi/SobolQRNG/sobol_primitives.c


examples/opt/pi/SobolQRNG/sobol_primitives.h → examples/pi/SobolQRNG/sobol_primitives.h


examples/opt/pi/pi.c → examples/pi/pi.c


examples/opt/pi/pi.h → examples/pi/pi.h


examples/opt/pi/pi_kernel.cu → examples/pi/pi_kernel.cu


+ 1 - 1
examples/opt/pi/pi_redux.c

@@ -53,7 +53,7 @@ static curandGenerator_t curandgens[STARPU_NMAXWORKERS];
 /* state for the erand48 function : note the huge padding to avoid false-sharing */
 #define PADDING	1024
 static unsigned short xsubi[STARPU_NMAXWORKERS*PADDING];
-static struct drand48_data randbuffer[STARPU_NMAXWORKERS*PADDING];
+static starpu_drand48_data randbuffer[STARPU_NMAXWORKERS*PADDING];
 
 /* Function to initialize the random number generator in the current worker */
 static void init_rng(void *arg __attribute__((unused)))

examples/opt/pi/pi_redux_kernel.cu → examples/pi/pi_redux_kernel.cu


+ 2 - 0
examples/spmv/dw_block_spmv.c

@@ -301,6 +301,8 @@ int main(__attribute__ ((unused)) int argc,
 
 	/* start the runtime */
 	ret = starpu_init(NULL);
+	if (ret == -ENODEV)
+		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	sem_init(&sem, 0, 0U);

+ 2 - 0
examples/stencil/stencil.c

@@ -216,6 +216,8 @@ int main(int argc, char **argv)
 	}
 
 	ret = starpu_init(NULL);
+	if (ret == -ENODEV)
+		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 #ifdef STARPU_USE_MPI

+ 15 - 7
examples/tag_example/tag_example.c

@@ -43,9 +43,15 @@
 
 struct starpu_codelet cl = {};
 
+#ifdef STARPU_SLOW_MACHINE
+#define Ni	32
+#define Nj	32
+#define Nk	32
+#else
 #define Ni	64
 #define Nj	32
 #define Nk	128
+#endif
 
 static unsigned ni = Ni, nj = Nj, nk = Nk;
 static unsigned callback_cnt;
@@ -97,7 +103,7 @@ static void tag_cleanup_grid(unsigned ni, unsigned nj, unsigned iter)
 
 } 
 
-static void create_task_grid(unsigned iter)
+static int create_task_grid(unsigned iter)
 {
 	unsigned i, j;
 	int ret;
@@ -123,6 +129,7 @@ static void create_task_grid(unsigned iter)
 		express_deps(i, j, iter);
 
 		ret = starpu_task_submit(task);
+		if (ret == -ENODEV) return 77;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
 
@@ -140,9 +147,10 @@ static void create_task_grid(unsigned iter)
 		task->tag_id = TAG(0, j, iter);
 
 		ret = starpu_task_submit(task);
+		if (ret == -ENODEV) return 77;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
-
+	return 0;
 }
 
 
@@ -220,17 +228,17 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
 	FPRINTF(stderr, "ITER: %u\n", nk);
 
-	cl.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON;
 	cl.cpu_funcs[0] = cpu_codelet;
 	cl.cuda_funcs[0] = cpu_codelet;
+	cl.opencl_funcs[0] = cpu_codelet;
 #ifdef STARPU_USE_GORDON
 	cl.gordon_func = gordon_null_kernel;
 #endif
 	cl.nbuffers = 0;
 
-	create_task_grid(0);
-
-	starpu_task_wait_for_all();
+	ret = create_task_grid(0);
+	if (ret == 0)
+	     starpu_task_wait_for_all();
 
 	tag_cleanup_grid(ni, nj, nk-2);
 	tag_cleanup_grid(ni, nj, nk-1);
@@ -239,5 +247,5 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
 	FPRINTF(stderr, "TEST DONE ...\n");
 
-	return 0;
+	return ret;
 }

+ 8 - 5
examples/tag_example/tag_example2.c

@@ -78,7 +78,7 @@ static void tag_cleanup_grid(unsigned ni, unsigned iter)
 		starpu_tag_remove(TAG(i,iter));
 } 
 
-static void create_task_grid(unsigned iter)
+static int create_task_grid(unsigned iter)
 {
 	int i;
 	int ret;
@@ -100,9 +100,10 @@ static void create_task_grid(unsigned iter)
 			starpu_tag_declare_deps(TAG(i,iter), 1, TAG(i-1,iter));
 
 		ret = starpu_task_submit(task);
+		if (ret == -ENODEV) return 77;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
-
+	return 0;
 }
 
 void cpu_codelet(void *descr[] __attribute__ ((unused)), void *_args __attribute__ ((unused)))
@@ -133,17 +134,18 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
 	cl.cpu_funcs[0] = cpu_codelet;
 	cl.cuda_funcs[0] = cpu_codelet;
+	cl.opencl_funcs[0] = cpu_codelet;
 #ifdef STARPU_USE_GORDON
 	cl.gordon_func = gordon_null_kernel;
 #endif
-	cl.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON;
 	cl.nbuffers = 0;
 
 	FPRINTF(stderr, "ITER : %u\n", nk);
 
 	for (i = 0; i < nk; i++)
 	{
-		create_task_grid(i);
+		ret = create_task_grid(i);
+		if (ret == 77) goto enodev;
 
 		starpu_tag_wait(TAG(ni-1, i));
 
@@ -154,11 +156,12 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
 	starpu_task_wait_for_all();
 
+enodev:
 	tag_cleanup_grid(ni, nk-1);
 
 	starpu_shutdown();
 
 	FPRINTF(stderr, "TEST DONE ...\n");
 
-	return 0;
+	return ret;
 }

+ 8 - 5
examples/tag_example/tag_example3.c

@@ -80,7 +80,7 @@ static void tag_cleanup_grid(unsigned ni, unsigned iter)
 		starpu_tag_remove(TAG(i,iter));
 } 
 
-static void create_task_grid(unsigned iter)
+static int create_task_grid(unsigned iter)
 {
 	int i;
 	int ret;
@@ -102,9 +102,10 @@ static void create_task_grid(unsigned iter)
 			starpu_tag_declare_deps(TAG(i,iter), 1, TAG(i-1,iter));
 
 		ret = starpu_task_submit(task);
+		if (ret == -ENODEV) return 77;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
-
+	return 0;
 }
 
 void cpu_codelet(void *descr[] __attribute__ ((unused)), void *_args __attribute__ ((unused)))
@@ -135,17 +136,18 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
 	cl.cpu_funcs[0] = cpu_codelet;
 	cl.cuda_funcs[0] = cpu_codelet;
+	cl.opencl_funcs[0] = cpu_codelet;
 #ifdef STARPU_USE_GORDON
 	cl.gordon_func = gordon_null_kernel;
 #endif
-	cl.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON;
 	cl.nbuffers = 0;
 
 	FPRINTF(stderr, "ITER : %u\n", nk);
 
 	for (i = 0; i < nk; i++)
 	{
-		create_task_grid(i);
+		ret = create_task_grid(i);
+		if (ret == 77) goto enodev;
 
 		starpu_tag_wait(TAG(ni-1, i));
 
@@ -154,9 +156,10 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 			tag_cleanup_grid(ni, i-1);
 	}
 
+enodev:
 	starpu_shutdown();
 
 	FPRINTF(stderr, "TEST DONE ...\n");
 
-	return 0;
+	return ret;
 }

+ 6 - 3
examples/tag_example/tag_example4.c

@@ -52,14 +52,14 @@ void cpu_codelet_B(void *descr[], void *_args)
 struct starpu_codelet cl_A = {
 	.cpu_funcs = { cpu_codelet_A, NULL},
 	.cuda_funcs = { cpu_codelet_A, NULL},
-	.where = STARPU_CPU|STARPU_CUDA,
+	.opencl_funcs = { cpu_codelet_A, NULL},
 	.nbuffers = 0,
 };
 
 struct starpu_codelet cl_B = {
 	.cpu_funcs = { cpu_codelet_B, NULL},
 	.cuda_funcs = { cpu_codelet_B, NULL},
-	.where = STARPU_CPU|STARPU_CUDA,
+	.opencl_funcs = { cpu_codelet_B, NULL},
 	.nbuffers = 0,
 };
 
@@ -114,6 +114,7 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 				task_A->tag_id = TAG(0, i);
 
 				ret = starpu_task_submit(task_A);
+				if (ret == -ENODEV) goto enodev;
 				STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 			}
 
@@ -127,6 +128,7 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 				starpu_tag_declare_deps(TAG(j, i), 1, TAG(0, i));
 
 				ret = starpu_task_submit(task_B);
+				if (ret == -ENODEV) goto enodev;
 				STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 			}
 		}
@@ -140,9 +142,10 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 			starpu_tag_remove(TAG(j, i));
 	}
 
+enodev:
 	starpu_shutdown();
 
 	FPRINTF(stderr, "TEST DONE ...\n");
 
-	return 0;
+	if (ret == -ENODEV) return 77; else return 0;
 }

+ 12 - 7
examples/tag_example/tag_restartable.c

@@ -95,7 +95,7 @@ static void create_task_grid(unsigned iter)
 
 }
 
-static void start_task_grid(unsigned iter)
+static int start_task_grid(unsigned iter)
 {
 	unsigned i;
 	int ret;
@@ -104,8 +104,10 @@ static void start_task_grid(unsigned iter)
 
 	for (i = 0; i < ni; i++) {
 		ret = starpu_task_submit(tasks[iter][i]);
+		if (ret == -ENODEV) return 77;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
+	return 0;
 }
 
 void cpu_codelet(void *descr[], void *_args __attribute__((unused)))
@@ -136,10 +138,10 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
 	cl.cpu_funcs[0] = cpu_codelet;
 	cl.cuda_funcs[0] = cpu_codelet;
+	cl.opencl_funcs[0] = cpu_codelet;
 #ifdef STARPU_USE_GORDON
 	cl.gordon_func = gordon_null_kernel;
 #endif
-	cl.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON;
 	cl.nbuffers = 0;
 
 	FPRINTF(stderr, "ITER : %u\n", nk);
@@ -153,16 +155,19 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
 	for (i = 0; i < nk; i++)
 	{
-		start_task_grid(i % Nrolls);
+	     ret = start_task_grid(i % Nrolls);
+	     if (ret == 77) goto enodev;
 
-		if (i+1 >= Nrolls)
+	     if (i+1 >= Nrolls)
 			/* Wait before re-using same tasks & tags */
-			starpu_tag_wait(TAG(ni-1, i + 1));
+		  starpu_tag_wait(TAG(ni-1, i + 1));
 	}
 
 	starpu_shutdown();
-
 	FPRINTF(stderr, "TEST DONE ...\n");
+	return EXIT_SUCCESS;
 
-	return 0;
+enodev:
+	starpu_shutdown();
+	return 77;
 }