소스 검색

- merge trunk

Olivier Aumage 11 년 전
부모
커밋
de8b0ac09b

+ 7 - 0
ChangeLog

@@ -87,6 +87,13 @@ Small changes:
   * Rename function starpu_trace_user_event() as
     starpu_fxt_trace_user_event()
 
+StarPU 1.1.3 (svn revision xxx)
+==============================================
+The scheduling context release
+
+New features:
+  * One can register an existing on-GPU buffer to be used by a handle.
+
 StarPU 1.1.2 (svn revision xxx)
 ==============================================
 The scheduling context release

+ 26 - 0
doc/doxygen/chapters/api/data_interfaces.doxy

@@ -29,6 +29,8 @@ Return a 32bit footprint which characterizes the data size.
 Compare the data size of two interfaces.
 \var starpu_data_interface_ops::display
 Dump the sizes of a handle to a file.
+\var starpu_data_interface_ops::describe
+Describe the data into a string.
 \var starpu_data_interface_ops::interfaceid
 An identifier that is unique to each interface.
 \var starpu_data_interface_ops::interface_size
@@ -241,6 +243,12 @@ starpu_data_handle_t var_handle;
 starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));
 \endcode
 
+\fn void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset)
+\ingroup API_Data_Interfaces
+Register into the \p handle that to store data on node \p node it should use the
+buffer located at \p ptr, or device handle \p dev_handle and offset \p offset
+(for OpenCL, notably)
+
 \fn void starpu_vector_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t nx, size_t elemsize)
 \ingroup API_Data_Interfaces
 Register the \p nx elemsize-byte elements pointed to by \p ptr and initialize \p handle to represent it.
@@ -252,6 +260,12 @@ starpu_data_handle_t vector_handle;
 starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
 \endcode
 
+\fn void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset)
+\ingroup API_Data_Interfaces
+Register into the \p handle that to store data on node \p node it should use the
+buffer located at \p ptr, or device handle \p dev_handle and offset \p offset
+(for OpenCL, notably)
+
 \fn void starpu_matrix_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize)
 \ingroup API_Data_Interfaces
 Register the \p nx x \p  ny 2D matrix of \p elemsize-byte elements pointed
@@ -267,6 +281,12 @@ matrix = (float*)malloc(width * height * sizeof(float));
 starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float));
 \endcode
 
+\fn void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld)
+\ingroup API_Data_Interfaces
+Register into the \p handle that to store data on node \p node it should use the
+buffer located at \p ptr, or device handle \p dev_handle and offset \p offset
+(for OpenCL, notably), with \p ld elements between rows.
+
 \fn void starpu_block_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize)
 \ingroup API_Data_Interfaces
 Register the \p nx x \p ny x \p nz 3D matrix of \p elemsize byte elements
@@ -281,6 +301,12 @@ block = (float*)malloc(nx*ny*nz*sizeof(float));
 starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
 \endcode
 
+\fn void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz)
+\ingroup API_Data_Interfaces
+Register into the \p handle that to store data on node \p node it should use the
+buffer located at \p ptr, or device handle \p dev_handle and offset \p offset
+(for OpenCL, notably), with \p ldy elements between rows and \ldz elements between z planes.
+
 \fn void starpu_bcsr_data_register(starpu_data_handle_t *handle, unsigned home_node, uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, uint32_t r, uint32_t c, size_t elemsize)
 \ingroup API_Data_Interfaces
 This variant of starpu_data_register() uses the BCSR (Blocked

+ 6 - 0
doc/doxygen/chapters/api/data_management.doxy

@@ -109,6 +109,12 @@ vector or matrix) which can be registered by the means of helper
 functions (e.g. starpu_vector_data_register() or
 starpu_matrix_data_register()).
 
+\fn void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node)
+\ingroup API_Data_Management
+Register that a buffer for \p handle on \p node will be set. This is typically
+used by starpu_*_ptr_register helpers before setting the interface pointers for
+this node, to tell the core that that is now allocated.
+
 \fn void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc)
 \ingroup API_Data_Management
 Register a new piece of data into the handle \p handledst with the

+ 5 - 1
examples/scheduler/schedulers.sh

@@ -23,7 +23,11 @@ check_success()
     fi
 }
 
-[ -x ./cholesky/cholesky_tag ] || exit 77
+if test ! -x ./cholesky/cholesky_tag
+then
+    echo "Application ./cholesky/cholesky_tag unavailable"
+    exit 77
+fi
 
 SCHEDULERS=`STARPU_SCHED="help" ./basic_examples/hello_world 2>&1 | awk '/\t->/ {print $1}'`
 

+ 7 - 1
include/starpu_data_interfaces.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013  Université de Bordeaux 1
+ * Copyright (C) 2010-2014  Université de Bordeaux 1
  * Copyright (C) 2010-2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2011-2012  Institut National de Recherche en Informatique et Automatique
  *
@@ -111,6 +111,7 @@ struct starpu_data_interface_ops
 	uint32_t 	 (*footprint)			(starpu_data_handle_t handle);
 	int 		 (*compare)			(void *data_interface_a, void *data_interface_b);
 	void 		 (*display)			(starpu_data_handle_t handle, FILE *f);
+	ssize_t		 (*describe)			(void *interface, char *buf, size_t size);
 	enum starpu_data_interface_id interfaceid;
 	size_t interface_size;
 
@@ -124,6 +125,7 @@ struct starpu_data_interface_ops
 int starpu_data_interface_get_next_id(void);
 
 void starpu_data_register(starpu_data_handle_t *handleptr, unsigned home_node, void *data_interface, struct starpu_data_interface_ops *ops);
+void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node);
 void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc);
 
 void *starpu_data_handle_to_pointer(starpu_data_handle_t handle, unsigned node);
@@ -147,6 +149,7 @@ struct starpu_matrix_interface
 };
 
 void starpu_matrix_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize);
+void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld);
 uint32_t starpu_matrix_get_nx(starpu_data_handle_t handle);
 uint32_t starpu_matrix_get_ny(starpu_data_handle_t handle);
 uint32_t starpu_matrix_get_local_ld(starpu_data_handle_t handle);
@@ -215,6 +218,7 @@ struct starpu_block_interface
 };
 
 void starpu_block_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize);
+void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz);
 uint32_t starpu_block_get_nx(starpu_data_handle_t handle);
 uint32_t starpu_block_get_ny(starpu_data_handle_t handle);
 uint32_t starpu_block_get_nz(starpu_data_handle_t handle);
@@ -245,6 +249,7 @@ struct starpu_vector_interface
 };
 
 void starpu_vector_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t nx, size_t elemsize);
+void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
 uint32_t starpu_vector_get_nx(starpu_data_handle_t handle);
 size_t starpu_vector_get_elemsize(starpu_data_handle_t handle);
 uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle);
@@ -266,6 +271,7 @@ struct starpu_variable_interface
 };
 
 void starpu_variable_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, size_t size);
+void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
 size_t starpu_variable_get_elemsize(starpu_data_handle_t handle);
 uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle);
 

+ 2 - 2
mpi/examples/mpi_lu/pxlu_implicit.c

@@ -133,14 +133,14 @@ double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
 	{
 		create_task_11(k);
 
-		starpu_mpi_cache_flush(MPI_COMM_WORLD, STARPU_PLU(get_block_handle)(k,k));
-
 		for (i = k+1; i<nblocks; i++)
 		{
 			create_task_12(k, i);
 			create_task_21(k, i);
 		}
 
+		starpu_mpi_cache_flush(MPI_COMM_WORLD, STARPU_PLU(get_block_handle)(k,k));
+
 		for (i = k+1; i<nblocks; i++)
 		{
 			for (j = k+1; j<nblocks; j++)

+ 17 - 0
src/common/fxt.h

@@ -109,6 +109,7 @@
 #define _STARPU_FUT_THREAD_EVENT	0x513d
 
 #define	_STARPU_FUT_CODELET_DETAILS	0x513e
+#define	_STARPU_FUT_CODELET_DATA	0x513f
 
 #define _STARPU_FUT_LOCKING_MUTEX	0x5140	
 #define _STARPU_FUT_MUTEX_LOCKED	0x5141	
@@ -421,6 +422,22 @@ do {									\
 		FUT_DO_PROBE4(_STARPU_FUT_START_CODELET_BODY, (job), ((job)->task)->sched_ctx, workerid, 0); \
 	}								\
 	{								\
+		if ((job)->task->cl)					\
+		{							\
+			const int __nbuffers = (job)->task->cl->nbuffers;	\
+			char __buf[FXT_MAX_PARAMS*sizeof(long)];	\
+			int __i;					\
+			for (__i = 0; __i < __nbuffers; __i++)		\
+			{						\
+				starpu_data_handle_t __handle = STARPU_TASK_GET_HANDLE((job)->task, __i);	\
+				void *__interface = _STARPU_TASK_GET_INTERFACES((job)->task)[__i];	\
+				if (__handle->ops->describe)		\
+				{					\
+					__handle->ops->describe(__interface, __buf, sizeof(__buf));	\
+					_STARPU_FUT_DO_PROBE1STR(_STARPU_FUT_CODELET_DATA, workerid, __buf);	\
+				}					\
+			}						\
+		}							\
 		const size_t __job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));	\
 		const uint32_t __job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));\
 		FUT_DO_PROBE6(_STARPU_FUT_CODELET_DETAILS, (job), ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->tag_id, workerid);	\

+ 1 - 1
src/datawizard/coherency.c

@@ -725,7 +725,7 @@ int _starpu_fetch_task_input(struct _starpu_job *j)
 	int workerid = starpu_worker_get_id();
 
 #ifdef STARPU_USE_FXT
-	unsigned total_size = 0;
+	unsigned long total_size = 0;
 #endif
 
 	unsigned index;

+ 15 - 2
src/datawizard/interfaces/bcsr_interface.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2013  Université de Bordeaux 1
+ * Copyright (C) 2009-2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -44,6 +44,7 @@ static void free_bcsr_buffer_on_node(void *data_interface, unsigned node);
 static size_t bcsr_interface_get_size(starpu_data_handle_t handle);
 static int bcsr_compare(void *data_interface_a, void *data_interface_b);
 static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle_t handle);
+static ssize_t describe(void *interface, char *buf, size_t size);
 
 
 struct starpu_data_interface_ops starpu_interface_bcsr_ops =
@@ -56,7 +57,8 @@ struct starpu_data_interface_ops starpu_interface_bcsr_ops =
 	.interfaceid = STARPU_BCSR_INTERFACE_ID,
 	.interface_size = sizeof(struct starpu_bcsr_interface),
 	.footprint = footprint_bcsr_interface_crc32,
-	.compare = bcsr_compare
+	.compare = bcsr_compare,
+	.describe = describe
 };
 
 static void register_bcsr_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
@@ -324,3 +326,14 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 
 	return ret;
 }
+
+static ssize_t describe(void *interface, char *buf, size_t size)
+{
+	struct starpu_bcsr_interface *bcsr = (struct starpu_bcsr_interface *) interface;
+	return snprintf(buf, size, "b%ux%ux%ux%ux%u",
+			(unsigned) bcsr->nnz,
+			(unsigned) bcsr->nrow,
+			(unsigned) bcsr->r,
+			(unsigned) bcsr->c,
+			(unsigned) bcsr->elemsize);
+}

+ 27 - 3
src/datawizard/interfaces/block_interface.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2013  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009-2014  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -77,6 +77,7 @@ static int block_compare(void *data_interface_a, void *data_interface_b);
 static void display_block_interface(starpu_data_handle_t handle, FILE *f);
 static int pack_block_handle(starpu_data_handle_t handle, unsigned node, void **ptr, ssize_t *count);
 static int unpack_block_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
+static ssize_t describe(void *interface, char *buf, size_t size);
 
 struct starpu_data_interface_ops starpu_interface_block_ops =
 {
@@ -92,7 +93,8 @@ struct starpu_data_interface_ops starpu_interface_block_ops =
 	.interface_size = sizeof(struct starpu_block_interface),
 	.display = display_block_interface,
 	.pack_data = pack_block_handle,
-	.unpack_data = unpack_block_handle
+	.unpack_data = unpack_block_handle,
+	.describe = describe
 };
 
 static void *block_handle_to_pointer(starpu_data_handle_t handle, unsigned node)
@@ -167,6 +169,18 @@ void starpu_block_data_register(starpu_data_handle_t *handleptr, unsigned home_n
 	starpu_data_register(handleptr, home_node, &block_interface, &starpu_interface_block_ops);
 }
 
+void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node,
+			uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz)
+{
+	struct starpu_block_interface *block_interface = starpu_data_get_interface_on_node(handle, node);
+	starpu_data_ptr_register(handle, node);
+	block_interface->ptr = ptr;
+	block_interface->dev_handle = dev_handle;
+	block_interface->offset = offset;
+	block_interface->ldy = ldy;
+	block_interface->ldz = ldz;
+}
+
 static uint32_t footprint_block_interface_crc32(starpu_data_handle_t handle)
 {
 	uint32_t hash;
@@ -716,3 +730,13 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 
 	return ret;
 }
+
+static ssize_t describe(void *interface, char *buf, size_t size)
+{
+	struct starpu_block_interface *block = (struct starpu_block_interface *) interface;
+	return snprintf(buf, size, "B%ux%ux%ux%u",
+			(unsigned) block->nx,
+			(unsigned) block->ny,
+			(unsigned) block->nz,
+			(unsigned) block->elemsize);
+}

+ 12 - 1
src/datawizard/interfaces/coo_interface.c

@@ -190,6 +190,16 @@ display_coo_interface(starpu_data_handle_t handle, FILE *f)
 	fprintf(f, "%u\t%u", coo_interface->nx, coo_interface->ny);
 }
 
+static ssize_t describe(void *interface, char *buf, size_t size)
+{
+	struct starpu_coo_interface *coo = (struct starpu_coo_interface *) interface;
+	return snprintf(buf, size, "M%ux%ux%ux%u",
+			(unsigned) coo->nx,
+			(unsigned) coo->ny,
+			(unsigned) coo->n_values,
+			(unsigned) coo->elemsize);
+}
+
 struct starpu_data_interface_ops starpu_interface_coo_ops =
 {
 	.register_data_handle  = register_coo_handle,
@@ -202,7 +212,8 @@ struct starpu_data_interface_ops starpu_interface_coo_ops =
 	.compare               = coo_compare,
 	.interfaceid           = STARPU_COO_INTERFACE_ID,
 	.interface_size        = sizeof(struct starpu_coo_interface),
-	.display               = display_coo_interface
+	.display               = display_coo_interface,
+	.describe              = describe
 };
 
 void

+ 12 - 1
src/datawizard/interfaces/csr_interface.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2013  Université de Bordeaux 1
+ * Copyright (C) 2009-2014  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
@@ -43,6 +43,7 @@ static void free_csr_buffer_on_node(void *data_interface, unsigned node);
 static size_t csr_interface_get_size(starpu_data_handle_t handle);
 static int csr_compare(void *data_interface_a, void *data_interface_b);
 static uint32_t footprint_csr_interface_crc32(starpu_data_handle_t handle);
+static ssize_t describe(void *interface, char *buf, size_t size);
 
 struct starpu_data_interface_ops starpu_interface_csr_ops =
 {
@@ -55,6 +56,7 @@ struct starpu_data_interface_ops starpu_interface_csr_ops =
 	.interface_size = sizeof(struct starpu_csr_interface),
 	.footprint = footprint_csr_interface_crc32,
 	.compare = csr_compare,
+	.describe = describe
 };
 
 static void register_csr_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
@@ -290,3 +292,12 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 
 	return ret;
 }
+
+static ssize_t describe(void *interface, char *buf, size_t size)
+{
+	struct starpu_csr_interface *csr = (struct starpu_csr_interface *) interface;
+	return snprintf(buf, size, "C%ux%ux%u",
+			(unsigned) csr->nnz,
+			(unsigned) csr->nrow,
+			(unsigned) csr->elemsize);
+}

+ 11 - 0
src/datawizard/interfaces/data_interface.c

@@ -373,6 +373,17 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 	}
 }
 
+void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node)
+{
+	struct _starpu_data_replicate *replicate = &handle->per_node[node];
+
+	_starpu_spin_lock(&handle->header_lock);
+	STARPU_ASSERT_MSG(replicate->allocated == 0, "starpu_data_ptr_register must be called right after starpu_data_register");
+	replicate->allocated = 1;
+	replicate->automatically_allocated = 0;
+	_starpu_spin_unlock(&handle->header_lock);
+}
+
 int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_interface_ops *interface_ops, unsigned int mf_node)
 {
 	unsigned node;

+ 25 - 3
src/datawizard/interfaces/matrix_interface.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010-2014  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -90,6 +90,7 @@ static int matrix_compare(void *data_interface_a, void *data_interface_b);
 static void display_matrix_interface(starpu_data_handle_t handle, FILE *f);
 static int pack_matrix_handle(starpu_data_handle_t handle, unsigned node, void **ptr, ssize_t *count);
 static int unpack_matrix_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
+static ssize_t describe(void *interface, char *buf, size_t size);
 
 struct starpu_data_interface_ops starpu_interface_matrix_ops =
 {
@@ -105,7 +106,8 @@ struct starpu_data_interface_ops starpu_interface_matrix_ops =
 	.interface_size = sizeof(struct starpu_matrix_interface),
 	.display = display_matrix_interface,
 	.pack_data = pack_matrix_handle,
-	.unpack_data = unpack_matrix_handle
+	.unpack_data = unpack_matrix_handle,
+	.describe = describe
 };
 
 static void register_matrix_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
@@ -176,6 +178,17 @@ void starpu_matrix_data_register(starpu_data_handle_t *handleptr, unsigned home_
 	starpu_data_register(handleptr, home_node, &matrix_interface, &starpu_interface_matrix_ops);
 }
 
+void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node,
+			uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld)
+{
+	struct starpu_matrix_interface *matrix_interface = starpu_data_get_interface_on_node(handle, node);
+	starpu_data_ptr_register(handle, node);
+	matrix_interface->ptr = ptr;
+	matrix_interface->dev_handle = dev_handle;
+	matrix_interface->offset = offset;
+	matrix_interface->ld = ld;
+}
+
 static uint32_t footprint_matrix_interface_crc32(starpu_data_handle_t handle)
 {
 	return starpu_hash_crc32c_be(starpu_matrix_get_nx(handle), starpu_matrix_get_ny(handle));
@@ -665,3 +678,12 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 
 	return ret;
 }
+
+static ssize_t describe(void *interface, char *buf, size_t size)
+{
+	struct starpu_matrix_interface *matrix = (struct starpu_matrix_interface *) interface;
+	return snprintf(buf, size, "M%ux%ux%u",
+			(unsigned) matrix->nx,
+			(unsigned) matrix->ny,
+			(unsigned) matrix->elemsize);
+}

+ 21 - 3
src/datawizard/interfaces/variable_interface.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010-2014  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -44,6 +44,7 @@ static int variable_compare(void *data_interface_a, void *data_interface_b);
 static void display_variable_interface(starpu_data_handle_t handle, FILE *f);
 static int pack_variable_handle(starpu_data_handle_t handle, unsigned node, void **ptr, ssize_t *count);
 static int unpack_variable_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
+static ssize_t describe(void *interface, char *buf, size_t size);
 
 struct starpu_data_interface_ops starpu_interface_variable_ops =
 {
@@ -59,7 +60,8 @@ struct starpu_data_interface_ops starpu_interface_variable_ops =
 	.interface_size = sizeof(struct starpu_variable_interface),
 	.display = display_variable_interface,
 	.pack_data = pack_variable_handle,
-	.unpack_data = unpack_variable_handle
+	.unpack_data = unpack_variable_handle,
+	.describe = describe
 };
 
 static void *variable_handle_to_pointer(starpu_data_handle_t handle, unsigned node)
@@ -117,6 +119,16 @@ void starpu_variable_data_register(starpu_data_handle_t *handleptr, unsigned hom
 	starpu_data_register(handleptr, home_node, &variable, &starpu_interface_variable_ops);
 }
 
+void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node,
+			uintptr_t ptr, uintptr_t dev_handle, size_t offset)
+{
+	struct starpu_variable_interface *variable_interface = starpu_data_get_interface_on_node(handle, node);
+	starpu_data_ptr_register(handle, node);
+	variable_interface->ptr = ptr;
+	variable_interface->dev_handle = dev_handle;
+	variable_interface->offset = offset;
+}
+
 
 static uint32_t footprint_variable_interface_crc32(starpu_data_handle_t handle)
 {
@@ -235,3 +247,9 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 
 	return ret;
 }
+static ssize_t describe(void *interface, char *buf, size_t size)
+{
+	struct starpu_variable_interface *variable = (struct starpu_variable_interface *) interface;
+	return snprintf(buf, size, "v%u",
+			(unsigned) variable->elemsize);
+}

+ 22 - 3
src/datawizard/interfaces/vector_interface.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2013  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009-2014  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -44,6 +44,7 @@ static int vector_compare(void *data_interface_a, void *data_interface_b);
 static void display_vector_interface(starpu_data_handle_t handle, FILE *f);
 static int pack_vector_handle(starpu_data_handle_t handle, unsigned node, void **ptr, ssize_t *count);
 static int unpack_vector_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
+static ssize_t describe(void *interface, char *buf, size_t size);
 
 struct starpu_data_interface_ops starpu_interface_vector_ops =
 {
@@ -59,7 +60,8 @@ struct starpu_data_interface_ops starpu_interface_vector_ops =
 	.interface_size = sizeof(struct starpu_vector_interface),
 	.display = display_vector_interface,
 	.pack_data = pack_vector_handle,
-	.unpack_data = unpack_vector_handle
+	.unpack_data = unpack_vector_handle,
+	.describe = describe
 };
 
 static void *vector_handle_to_pointer(starpu_data_handle_t handle, unsigned node)
@@ -122,6 +124,16 @@ void starpu_vector_data_register(starpu_data_handle_t *handleptr, unsigned home_
 	starpu_data_register(handleptr, home_node, &vector, &starpu_interface_vector_ops);
 }
 
+void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node,
+			uintptr_t ptr, uintptr_t dev_handle, size_t offset)
+{
+	struct starpu_vector_interface *vector_interface = starpu_data_get_interface_on_node(handle, node);
+	starpu_data_ptr_register(handle, node);
+	vector_interface->ptr = ptr;
+	vector_interface->dev_handle = dev_handle;
+	vector_interface->offset = offset;
+}
+
 
 static uint32_t footprint_vector_interface_crc32(starpu_data_handle_t handle)
 {
@@ -273,3 +285,10 @@ static int copy_any_to_any(void *src_interface, unsigned src_node,
 	return ret;
 }
 
+static ssize_t describe(void *interface, char *buf, size_t size)
+{
+	struct starpu_vector_interface *vector = (struct starpu_vector_interface *) interface;
+	return snprintf(buf, size, "V%ux%u",
+			(unsigned) vector->nx,
+			(unsigned) vector->elemsize);
+}

+ 9 - 2
src/datawizard/interfaces/void_interface.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2012-2013  Université de Bordeaux 1
+ * Copyright (C) 2010, 2012-2014  Université de Bordeaux 1
  * Copyright (C) 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -41,6 +41,7 @@ static int void_compare(void *data_interface_a, void *data_interface_b);
 static void display_void_interface(starpu_data_handle_t handle, FILE *f);
 static int pack_void_handle(starpu_data_handle_t handle, unsigned node, void **ptr, ssize_t *count);
 static int unpack_void_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count);
+static ssize_t describe(void *interface, char *buf, size_t size);
 
 struct starpu_data_interface_ops starpu_interface_void_ops =
 {
@@ -55,7 +56,8 @@ struct starpu_data_interface_ops starpu_interface_void_ops =
 	.interface_size = 0,
 	.display = display_void_interface,
 	.pack_data = pack_void_handle,
-	.unpack_data = unpack_void_handle
+	.unpack_data = unpack_void_handle,
+	.describe = describe
 };
 
 static void register_void_handle(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED,
@@ -137,3 +139,8 @@ static int dummy_copy(void *src_interface STARPU_ATTRIBUTE_UNUSED,
 {
 	return 0;
 }
+
+static ssize_t describe(void *interface, char *buf, size_t size)
+{
+	return snprintf(buf, size, "0");
+}

+ 33 - 3
src/debug/traces/starpu_fxt.c

@@ -113,6 +113,9 @@ static unsigned get_colour_symbol_blue(char *name)
 static double last_codelet_start[STARPU_NMAXWORKERS];
 /* _STARPU_FUT_DO_PROBE4STR records only 4 longs */
 static char last_codelet_symbol[STARPU_NMAXWORKERS][4*sizeof(unsigned long)];
+static int last_codelet_parameter[STARPU_NMAXWORKERS];
+#define MAX_PARAMETERS 8
+static char last_codelet_parameter_description[STARPU_NMAXWORKERS][MAX_PARAMETERS][FXT_MAX_PARAMS*sizeof(unsigned long)];
 
 /* If more than a period of time has elapsed, we flush the profiling info,
  * otherwise they are accumulated everytime there is a new relevant event. */
@@ -318,7 +321,7 @@ static void thread_pop_state(double time, const char *prefix, long unsigned int
 #endif
 }
 
-static void worker_set_detailed_state(double time, const char *prefix, long unsigned int workerid, const char *name, unsigned long size, unsigned long footprint, unsigned long long tag)
+static void worker_set_detailed_state(double time, const char *prefix, long unsigned int workerid, const char *name, unsigned long size, const char *parameters, unsigned long footprint, unsigned long long tag)
 {
 #ifdef STARPU_HAVE_POTI
 	char container[STARPU_POTI_STR_LEN];
@@ -326,7 +329,7 @@ static void worker_set_detailed_state(double time, const char *prefix, long unsi
 	/* TODO: set detailed state */
 	poti_SetState(time, container, "WS", name);
 #else
-	fprintf(out_paje_file, "20	%.9f	%sw%lu	WS	%s	%lu	%08lx	%016llx\n", time, prefix, workerid, name, size, footprint, tag);
+	fprintf(out_paje_file, "20	%.9f	%sw%lu	WS	%s	%lu	%s	%08lx	%016llx\n", time, prefix, workerid, name, size, parameters, footprint, tag);
 #endif
 }
 
@@ -675,6 +678,7 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 	char *name = has_name?(char *)&ev->param[4]:"unknown";
 
 	snprintf(last_codelet_symbol[worker], sizeof(last_codelet_symbol[worker]), "%s", name);
+	last_codelet_parameter[worker] = 0;
 
 	double start_codelet_time = get_event_time_stamp(ev, options);
 	last_codelet_start[worker] = start_codelet_time;
@@ -705,6 +709,21 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 
 }
 
+static void handle_codelet_data(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
+{
+#ifdef STARPU_ENABLE_PAJE_CODELET_DETAILS
+	int worker = ev->param[0];
+	if (worker < 0) return;
+	if (out_paje_file)
+	{
+		int num = last_codelet_parameter[worker]++;
+		if (num >= MAX_PARAMETERS)
+			return;
+		snprintf(last_codelet_parameter_description[worker][num], sizeof(last_codelet_parameter_description[worker][num]), "%s", (char*) &ev->param[1]);
+	}
+#endif /* STARPU_ENABLE_PAJE_CODELET_DETAILS */
+}
+
 static void handle_codelet_details(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
 #ifdef STARPU_ENABLE_PAJE_CODELET_DETAILS
@@ -717,7 +736,15 @@ static void handle_codelet_details(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 
 	if (out_paje_file)
 	{
-		worker_set_detailed_state(last_codelet_start[worker], prefix, ev->param[5], last_codelet_symbol[worker], ev->param[2], ev->param[3], ev->param[4]);
+		int i;
+		char parameters[256];
+		size_t eaten = 0;
+		for (i = 0; i < last_codelet_parameter[worker] && i < MAX_PARAMETERS; i++)
+		{
+			eaten += snprintf(parameters + eaten, sizeof(parameters) - eaten, "%s%s", i?"_":"", last_codelet_parameter_description[worker][i]);
+		}
+
+		worker_set_detailed_state(last_codelet_start[worker], prefix, ev->param[5], last_codelet_symbol[worker], ev->param[2], parameters, ev->param[3], ev->param[4]);
 		if (sched_ctx != 0)
 		{
 #ifdef STARPU_HAVE_POTI
@@ -1642,6 +1669,9 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 			case _STARPU_FUT_START_CODELET_BODY:
 				handle_start_codelet_body(&ev, options);
 				break;
+			case _STARPU_FUT_CODELET_DATA:
+				handle_codelet_data(&ev, options);
+				break;
 			case _STARPU_FUT_CODELET_DETAILS:
 				handle_codelet_details(&ev, options);
 				break;

+ 1 - 0
src/debug/traces/starpu_paje.c

@@ -137,6 +137,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 	fprintf(file, "%%	Type	string\n");
 	fprintf(file, "%%	Value	string\n");
 	fprintf(file, "%%	Size	string\n");
+	fprintf(file, "%%	Params	string\n");
 	fprintf(file, "%%	Footprint	string\n");
 	fprintf(file, "%%	Tag	string\n");
 	fprintf(file, "%%EndEventDef\n");

+ 13 - 0
tests/Makefile.am

@@ -198,6 +198,7 @@ noinst_PROGRAMS =				\
 	datawizard/in_place_partition   	\
 	datawizard/partition_lazy		\
 	datawizard/gpu_register   		\
+	datawizard/gpu_ptr_register   		\
 	datawizard/wt_host			\
 	datawizard/wt_broadcast			\
 	datawizard/readonly			\
@@ -413,6 +414,18 @@ datawizard_gpu_register_SOURCES +=	\
 	datawizard/scal_opencl.cl
 endif
 
+datawizard_gpu_ptr_register_SOURCES =	\
+	datawizard/gpu_ptr_register.c	\
+	datawizard/scal.c
+if STARPU_USE_CUDA
+datawizard_gpu_ptr_register_SOURCES +=	\
+	datawizard/scal_cuda.cu
+endif
+if STARPU_USE_OPENCL
+datawizard_gpu_ptr_register_SOURCES +=	\
+	datawizard/scal_opencl.cl
+endif
+
 datawizard_wt_host_SOURCES =			\
 	datawizard/wt_host.c
 datawizard_wt_broadcast_SOURCES =		\

+ 293 - 0
tests/datawizard/gpu_ptr_register.c

@@ -0,0 +1,293 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011-2012, 2014  Université de Bordeaux 1
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include "../helper.h"
+#include "scal.h"
+
+#if ! (defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA))
+int main(int argc, char **argv)
+{
+	return STARPU_TEST_SKIPPED;
+}
+#else
+
+static int
+submit_tasks(starpu_data_handle_t handle, int pieces, int n)
+{
+	int i, ret;
+
+	for (i = 0; i < pieces; i++)
+	{
+		struct starpu_task *task = starpu_task_create();
+
+		task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
+		task->cl = &scal_codelet;
+		task->execute_on_a_specific_worker = 1;
+		task->workerid = i%n;
+
+		ret = starpu_task_submit(task);
+		if (ret == -ENODEV)
+			return -ENODEV;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	}
+
+	ret = starpu_task_wait_for_all();
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
+
+	return 0;
+}
+
+static int
+find_a_worker(enum starpu_worker_archtype type)
+{
+	int worker;
+	int ret = starpu_worker_get_ids_by_type(type, &worker, 1);
+	if (ret == 0)
+		return -ENODEV;
+	return worker;
+}
+
+static int
+check_result(unsigned *t, size_t size)
+{
+	unsigned i;
+	for (i = 0; i < size; i++)
+	{
+		if (t[i] != i*2)
+		{
+			FPRINTF(stderr,"t[%d] is %u instead of %u\n", i, t[i], 2*i);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+#ifdef STARPU_USE_CUDA
+#if CUDART_VERSION >= 4000
+static int
+test_cuda(void)
+{
+	int ret;
+	unsigned *foo_gpu;
+	unsigned *foo;
+	int n, i, size, pieces;
+	int devid;
+	int chosen;
+	cudaError_t cures;
+	starpu_data_handle_t handle;
+
+	/* Find a CUDA worker */
+	chosen = find_a_worker(STARPU_CUDA_WORKER);
+	if (chosen == -ENODEV)
+		return -ENODEV;
+
+	n = starpu_worker_get_count();
+	size = 10 * n;
+
+	devid = starpu_worker_get_devid(chosen);
+	starpu_cuda_set_device(devid);
+	cudaMalloc((void**)&foo_gpu, size * sizeof(*foo_gpu));
+
+	foo = calloc(size, sizeof(*foo));
+	for (i = 0; i < size; i++)
+		foo[i] = i;
+
+	starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(*foo));
+	starpu_vector_ptr_register(handle, starpu_worker_get_memory_node(chosen), (uintptr_t)foo_gpu, (uintptr_t)foo_gpu, 0);
+
+	/* Broadcast the data to force in-place partitioning */
+	for (i = 0; i < n; i++)
+		starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0);
+
+	/* Even with just one worker, split in at least two */
+	if (n == 1)
+		pieces = 2;
+	else
+		pieces = n;
+
+	struct starpu_data_filter f =
+	{
+		.filter_func = starpu_vector_filter_block,
+		.nchildren = pieces,
+	};
+
+	starpu_data_partition(handle, &f);
+
+	ret = submit_tasks(handle, pieces, n);
+	if (ret == -ENODEV)
+		return -ENODEV;
+
+	starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen));
+	starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(chosen), 0);
+	starpu_data_unregister(handle);
+
+	starpu_cuda_set_device(devid);
+	cures = cudaMemcpy(foo, foo_gpu, size * sizeof(*foo_gpu), cudaMemcpyDeviceToHost);
+	if (STARPU_UNLIKELY(cures))
+		STARPU_CUDA_REPORT_ERROR(cures);
+
+	return check_result(foo, size);
+}
+#endif
+#endif
+
+#ifdef STARPU_USE_OPENCL
+static int
+test_opencl(void)
+{
+	int i;
+	int ret;
+	int chosen;
+	int n;
+	int size;
+	int pieces;
+	cl_mem foo_gpu;
+	starpu_data_handle_t handle;
+
+	ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+
+	/* Find an OpenCL worker */
+	chosen = find_a_worker(STARPU_OPENCL_WORKER);
+	if (chosen == -ENODEV)
+		return -ENODEV;
+
+	n = starpu_worker_get_count();
+	size = 10 * n;
+
+	int devid;
+	cl_int err;
+	cl_context context;
+	cl_command_queue queue;
+
+	devid = starpu_worker_get_devid(chosen);
+
+	starpu_opencl_get_context(devid, &context);
+	starpu_opencl_get_queue(devid, &queue);
+
+	foo_gpu = clCreateBuffer(context, CL_MEM_READ_WRITE, size*sizeof(int), NULL, &err);
+	if (STARPU_UNLIKELY(err != CL_SUCCESS))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	unsigned int *foo = malloc(size*sizeof(*foo));
+	for (i = 0; i < size; i++)
+		foo[i] = i;
+
+	starpu_vector_data_register(&handle,
+				    STARPU_MAIN_RAM,
+				    (uintptr_t)foo,
+				    size,
+				    sizeof(int));
+
+	starpu_vector_ptr_register(handle,
+				    starpu_worker_get_memory_node(chosen),
+				    (uintptr_t)foo_gpu,
+				    (uintptr_t)foo_gpu,
+				    0);
+
+	/* Broadcast the data to force in-place partitioning */
+	for (i = 0; i < n; i++)
+		starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0);
+
+	/* Even with just one worker, split in at least two */
+	if (n == 1)
+		pieces = 2;
+	else
+		pieces = n;
+
+	struct starpu_data_filter f =
+	{
+		.filter_func = starpu_vector_filter_block,
+		.nchildren = pieces,
+	};
+
+	starpu_data_partition(handle, &f);
+
+	ret = submit_tasks(handle, pieces, n);
+	if (ret == -ENODEV)
+		return -ENODEV;
+
+	starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen));
+	starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(chosen), 0);
+	starpu_data_unregister(handle);
+
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
+	ret = starpu_opencl_unload_opencl(&opencl_program);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+
+	err = clEnqueueReadBuffer(queue,
+				  foo_gpu,
+				  CL_FALSE,
+				  0,
+				  size*sizeof(*foo),
+				  foo,
+				  0,
+				  NULL,
+				  NULL);
+	if (STARPU_UNLIKELY(err != CL_SUCCESS))
+		STARPU_OPENCL_REPORT_ERROR(err);
+	clFinish(queue);
+	return check_result(foo, size);
+}
+#endif /* !STARPU_USE_OPENCL */
+
+int main(int argc, char **argv)
+{
+	int skipped_cuda = 1, skipped_opencl = 1;
+	int ret;
+	ret = starpu_initialize(NULL, &argc, &argv);
+	if (ret == -ENODEV)
+		return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+#ifdef STARPU_USE_OPENCL
+	ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+#endif
+
+#ifdef STARPU_USE_CUDA
+#if CUDART_VERSION >= 4000 /* We need thread-safety of CUDA */
+	ret = test_cuda();
+	if (ret == 1)
+		goto fail;
+	else if (ret == 0)
+		skipped_cuda = 0;
+#endif
+#endif
+
+#ifdef STARPU_USE_OPENCL
+	ret = test_opencl();
+	if (ret == 1)
+		goto fail;
+	else if (ret == 0)
+		skipped_opencl = 0;
+#endif
+
+	starpu_shutdown();
+
+	if (skipped_cuda == 1 && skipped_opencl == 1)
+		return STARPU_TEST_SKIPPED;
+
+	return EXIT_SUCCESS;
+
+fail:
+	starpu_shutdown();
+	return EXIT_FAILURE;
+}
+
+#endif /* defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) */