13 years ago · 260aae8a33
--- a/examples/basic_examples/multiformat.c
+++ b/examples/basic_examples/multiformat.c
@@ -74,7 +74,7 @@ static struct starpu_perfmodel conversion_model = {
 
				 };
			
 
				 
			
 
				 static struct starpu_codelet  cl = {
			
 
				-	.where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
			
 
				+	.where = STARPU_CUDA | STARPU_OPENCL,
			
 
				 	.cpu_func = multiformat_scal_cpu_func,
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	.cuda_func = multiformat_scal_cuda_func,
			
@@ -112,24 +112,40 @@ register_data(void)
 
				 static void
			
 
				 create_and_submit_tasks(void)
			
 
				 {
			
 
				-	struct starpu_task *task = starpu_task_create();
			
 
				+	int err;
			
 
				 
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	struct starpu_task *task = starpu_task_create();
			
 
				+	cl.where = STARPU_CUDA;
			
 
				 	task->cl = &cl;
			
 
				 	task->synchronous = 1;
			
 
				 	task->buffers[0].handle = array_of_structs_handle;
			
 
				 	task->buffers[0].mode = STARPU_RW;
			
 
				 	task->cl_arg = NULL;
			
 
				 	task->cl_arg_size = 0;
			
 
				-	starpu_task_submit(task);
			
 
				+	err = starpu_task_submit(task);
			
 
				+	if (err != 0)
			
 
				+	{
			
 
				+		fprintf(stderr, "Err : %s\n", strerror(-err));
			
 
				+		return;
			
 
				+	}
			
 
				+#endif
			
 
				 
			
 
				 	struct starpu_task *task2 = starpu_task_create();
			
 
				+	cl.where = STARPU_CPU;
			
 
				 	task2->cl = &cl;
			
 
				 	task2->synchronous = 1;
			
 
				 	task2->buffers[0].handle = array_of_structs_handle;
			
 
				 	task2->buffers[0].mode = STARPU_RW;
			
 
				 	task2->cl_arg = NULL;
			
 
				+	cl.where = STARPU_CPU;
			
 
				 	task2->cl_arg_size = 0;
			
 
				-	starpu_task_submit(task2);
			
 
				+	err = starpu_task_submit(task2);
			
 
				+	if (err != 0)
			
 
				+	{
			
 
				+		fprintf(stderr, "Err : %s\n", strerror(-err));
			
 
				+		return;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void
			
@@ -156,7 +172,9 @@ check_it(void)
 
				 	int i;
			
 
				 	for (i = 0; i < N_ELEMENTS; i++) {
			
 
				 		float expected_value = i + 1.0;
			
 
				+#if STARPU_USE_CUDA
			
 
				 		expected_value *= array_of_structs[i].y;
			
 
				+#endif
			
 
				 		expected_value *= array_of_structs[i].y;
			
 
				 		if (array_of_structs[i].x != expected_value)
			
 
				 			return EXIT_FAILURE;
			
@@ -172,6 +190,7 @@ struct starpu_opencl_program opencl_conversion_program;
 
				 int
			
 
				 main(void)
			
 
				 {
			
 
				+#ifdef STARPU_USE_CPU
			
 
				 	starpu_init(NULL);
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
@@ -200,4 +219,9 @@ main(void)
 
				 
			
 
				 
			
 
				 	return check_it();
			
 
				+#else
			
 
				+	/* Without the CPU, there is no point in using the multiformat
			
 
				+	 * interface, so this test is pointless. */
			
 
				+	return EXIT_SUCCESS;
			
 
				+#endif
			
 
				 }
			
--- a/examples/basic_examples/multiformat_conversion_codelets.c
+++ b/examples/basic_examples/multiformat_conversion_codelets.c
@@ -17,18 +17,6 @@
 
				 #include "multiformat_types.h"
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-void cpu_to_cuda(void *buffers[], void *arg)
			
 
				-{
			
 
				-	struct point *src = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				-	struct struct_of_arrays *dst = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
			
 
				-	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				-	int i;
			
 
				-	for (i = 0; i < n; i++) {
			
 
				-		dst->x[i] = src[i].x;
			
 
				-		dst->y[i] = src[i].y;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 void cuda_to_cpu(void *buffers[], void *arg)
			
 
				 {
			
 
				 	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
			
@@ -44,7 +32,6 @@ void cuda_to_cpu(void *buffers[], void *arg)
 
				 extern void cpu_to_cuda_cuda_func(void *buffers[], void *args);
			
 
				 struct starpu_codelet cpu_to_cuda_cl = {
			
 
				 	.where = STARPU_CUDA,
			
 
				-	.cpu_func = cpu_to_cuda,
			
 
				 	.cuda_func = cpu_to_cuda_cuda_func,
			
 
				 	.nbuffers = 1
			
 
				 };
			
@@ -57,19 +44,6 @@ struct starpu_codelet cuda_to_cpu_cl = {
 
				 #endif
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-void cpu_to_opencl(void *buffers[], void *arg)
			
 
				-{
			
 
				-	fprintf(stderr, "User Entering %s\n", __func__);
			
 
				-	struct point *src = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				-	struct struct_of_arrays *dst = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
			
 
				-	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				-	int i;
			
 
				-	for (i = 0; i < n; i++) {
			
 
				-		dst->x[i] = src[i].x;
			
 
				-		dst->y[i] = src[i].y;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 void opencl_to_cpu(void *buffers[], void *arg)
			
 
				 {
			
 
				 	fprintf(stderr, "User Entering %s\n", __func__);
			
@@ -86,7 +60,6 @@ void opencl_to_cpu(void *buffers[], void *arg)
 
				 extern void cpu_to_opencl_opencl_func(void *buffers[], void *args);
			
 
				 struct starpu_codelet cpu_to_opencl_cl = {
			
 
				 	.where = STARPU_OPENCL,
			
 
				-	.cpu_func = cpu_to_opencl,
			
 
				 	.opencl_func = cpu_to_opencl_opencl_func,
			
 
				 	.nbuffers = 1
			
 
				 };
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -242,6 +242,27 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
				 
			
 
				 	if (is_basic_worker)
			
 
				 	{
			
 
				+		unsigned node = starpu_worker_get_memory_node(workerid);
			
 
				+		if (_starpu_task_uses_multiformat_handles(task))
			
 
				+		{
			
 
				+			int i;
			
 
				+			for (i = 0; i < task->cl->nbuffers; i++)
			
 
				+			{
			
 
				+				struct starpu_task *conversion_task;
			
 
				+				starpu_data_handle_t handle;
			
 
				+
			
 
				+				handle = task->buffers[i].handle;
			
 
				+				if (!_starpu_handle_needs_conversion_task(handle, node))
			
 
				+					continue;
			
 
				+
			
 
				+				conversion_task = _starpu_create_conversion_task(handle, node);
			
 
				+				_starpu_push_local_task(worker, conversion_task, 0);
			
 
				+				//_STARPU_DEBUG("Pushing a conversion task\n");
			
 
				+			}
			
 
				+
			
 
				+			for (i = 0; i < task->cl->nbuffers; i++)
			
 
				+				task->buffers[i].handle->mf_node = node;
			
 
				+		}
			
 
				 		return _starpu_push_local_task(worker, task, 0);
			
 
				 	}
			
 
				 	else
			
@@ -309,8 +330,74 @@ int _starpu_push_task(struct _starpu_job *j, unsigned job_is_already_locked)
 
				         return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Given a handle that needs to be converted in order to be used on the given
			
 
				+ * node, returns a task that takes care of the conversion.
			
 
				+ */
			
 
				+struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
			
 
				+						   unsigned int node)
			
 
				+{
			
 
				+	struct starpu_task *conversion_task;
			
 
				+	struct starpu_multiformat_interface *interface;
			
 
				+	enum _starpu_node_kind node_kind;
			
 
				+
			
 
				+	conversion_task = starpu_task_create();
			
 
				+	conversion_task->synchronous = 0;
			
 
				+	conversion_task->buffers[0].handle = handle;
			
 
				+	conversion_task->buffers[0].mode = STARPU_RW;
			
 
				+
			
 
				+	/* The node does not really matter here */
			
 
				+	interface = (struct starpu_multiformat_interface *)
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				+	node_kind = _starpu_get_node_kind(node);
			
 
				+	
			
 
				+	handle->refcnt++;
			
 
				+	handle->busy_count++;
			
 
				+
			
 
				+	switch(node_kind)
			
 
				+	{
			
 
				+	case STARPU_CPU_RAM:
			
 
				+		switch (_starpu_get_node_kind(handle->mf_node))
			
 
				+		{
			
 
				+		case STARPU_CPU_RAM:
			
 
				+			STARPU_ASSERT(0);
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		case STARPU_CUDA_RAM:
			
 
				+			conversion_task->cl = interface->ops->cuda_to_cpu_cl;
			
 
				+			break;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+		case STARPU_OPENCL_RAM:
			
 
				+			conversion_task->cl = interface->ops->opencl_to_cpu_cl;
			
 
				+			break;
			
 
				+#endif
			
 
				+		default:
			
 
				+			fprintf(stderr, "Oops : %d\n", handle->mf_node);
			
 
				+			STARPU_ASSERT(0);
			
 
				+		}
			
 
				+		break;
			
 
				+#if STARPU_USE_CUDA
			
 
				+	case STARPU_CUDA_RAM:
			
 
				+		conversion_task->cl = interface->ops->cpu_to_cuda_cl;
			
 
				+		break;
			
 
				+#endif
			
 
				+#if STARPU_USE_OPENCL
			
 
				+	case STARPU_OPENCL_RAM:
			
 
				+		conversion_task->cl = interface->ops->cpu_to_opencl_cl;
			
 
				+		break;
			
 
				+#endif
			
 
				+	case STARPU_SPU_LS: /* Not supported */
			
 
				+	default:
			
 
				+		STARPU_ASSERT(0);
			
 
				+	}
			
 
				+
			
 
				+	return conversion_task;
			
 
				+}
			
 
				+
			
 
				+
			
 
				 struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
			
 
				 {
			
 
				+	int i;
			
 
				 	struct starpu_task *task;
			
 
				 
			
 
				 	/* We can't tell in advance which task will be picked up, so we measure
			
@@ -322,10 +409,81 @@ struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
 
				 
			
 
				 	/* perhaps there is some local task to be executed first */
			
 
				 	task = _starpu_pop_local_task(worker);
			
 
				-
			
 
				+	if (task)
			
 
				+		goto profiling;
			
 
				+
			
 
				+	/*
			
 
				+	 * The first STARPU_NMAXBUFS elements of queued_tasks[i] are conversion
			
 
				+	 * tasks for multiformat handles. The last element is the "real" task.
			
 
				+	 */
			
 
				+	int worker_id = starpu_worker_get_id();
			
 
				+	static struct starpu_task *queued_tasks[STARPU_NMAXWORKERS][STARPU_NMAXBUFS+1] = { NULL };
			
 
				+
			
 
				+	/* Maybe there is a queued task for this worker */
			
 
				+pick_from_queued_tasks:
			
 
				+	for (i = 0; i < STARPU_NMAXBUFS+1; i++)
			
 
				+	{
			
 
				+		if (queued_tasks[worker_id][i])
			
 
				+		{
			
 
				+			task = queued_tasks[worker_id][i];
			
 
				+			queued_tasks[worker_id][i] = NULL;
			
 
				+			goto profiling;
			
 
				+		}
			
 
				+	}
			
 
				+	
			
 
				 	if (!task && policy.pop_task)
			
 
				 		task = policy.pop_task();
			
 
				 
			
 
				+	if (!task)
			
 
				+		return NULL;
			
 
				+
			
 
				+	/* Make sure we do not bother with all the multiformat-specific code if 
			
 
				+	 * it is not necessary. */
			
 
				+	if (!_starpu_task_uses_multiformat_handles(task))
			
 
				+		goto profiling;
			
 
				+
			
 
				+	/*
			
 
				+	 * This worker may not be able to execute this task. In this case, we
			
 
				+	 * should return the task anyway. It will be pushed back almost immediatly.
			
 
				+	 * This way, we avoid computing and executing the conversions tasks.
			
 
				+	 * Here, we do not care about what implementation is used.
			
 
				+	 */
			
 
				+	if (!starpu_worker_can_execute_task(worker_id, task, 0))
			
 
				+		return task;
			
 
				+
			
 
				+	unsigned node = starpu_worker_get_memory_node(worker_id);
			
 
				+
			
 
				+	/*
			
 
				+	 * We do have a task that uses multiformat handles. Let's create the 
			
 
				+	 * required conversion tasks.
			
 
				+	 */
			
 
				+	for (i = 0; i < task->cl->nbuffers; i++)
			
 
				+	{
			
 
				+		struct starpu_task *conversion_task;
			
 
				+		starpu_data_handle_t handle;
			
 
				+
			
 
				+		handle = task->buffers[i].handle;
			
 
				+		if (!_starpu_handle_needs_conversion_task(handle, node))
			
 
				+			continue;
			
 
				+
			
 
				+		conversion_task = _starpu_create_conversion_task(handle, node);
			
 
				+		queued_tasks[worker_id][i] = conversion_task;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Next tasks will need to know where these handles have gone.
			
 
				+	 */
			
 
				+	for (i = 0; i < task->cl->nbuffers; i++)
			
 
				+		task->buffers[i].handle->mf_node = node;
			
 
				+
			
 
				+	queued_tasks[worker_id][STARPU_NMAXBUFS] = task;
			
 
				+
			
 
				+	/* We know there is at least one task in queued_tasks[worker_id]. */
			
 
				+	goto pick_from_queued_tasks;
			
 
				+	
			
 
				+
			
 
				+	/* We finally got our task */
			
 
				+profiling:
			
 
				 	/* Note that we may get a NULL task in case the scheduler was unlocked
			
 
				 	 * for some reason. */
			
 
				 	if (profiling && task)
			
--- a/src/core/sched_policy.h
+++ b/src/core/sched_policy.h
@@ -37,4 +37,7 @@ void _starpu_sched_post_exec_hook(struct starpu_task *task);
 
				 
			
 
				 void _starpu_wait_on_sched_event(void);
			
 
				 
			
 
				+struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
			
 
				+						   unsigned int node);
			
 
				+
			
 
				 #endif // __SCHED_POLICY_H__
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -461,3 +461,36 @@ double _starpu_task_get_conversion_time(struct starpu_task *task)
 
				 
			
 
				 	return conversion_time;
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * Returns 0 if tasks does not use any multiformat handle, 1 otherwise.
			
 
				+ */
			
 
				+int
			
 
				+_starpu_task_uses_multiformat_handles(struct starpu_task *task)
			
 
				+{
			
 
				+	int i;
			
 
				+	for (i = 0; i < task->cl->nbuffers; i++)
			
 
				+	{
			
 
				+		unsigned int id;
			
 
				+		id = starpu_get_handle_interface_id(task->buffers[i].handle);
			
 
				+		if (id == STARPU_MULTIFORMAT_INTERFACE_ID)
			
 
				+			return 1;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Checks whether the given handle needs to be converted in order to be used on
			
 
				+ * the node given as the second argument.
			
 
				+ */
			
 
				+int
			
 
				+_starpu_handle_needs_conversion_task(starpu_data_handle_t handle,
			
 
				+				     unsigned int node)
			
 
				+{
			
 
				+	enum _starpu_node_kind node_kind;
			
 
				+
			
 
				+	node_kind = _starpu_get_node_kind(node);
			
 
				+
			
 
				+	return !!(node_kind != _starpu_get_node_kind(handle->mf_node));
			
 
				+}
			
--- a/src/core/task.h
+++ b/src/core/task.h
@@ -46,4 +46,10 @@ struct _starpu_job *_starpu_get_job_associated_to_task(struct starpu_task *task)
 
				 
			
 
				 struct starpu_task *_starpu_create_task_alias(struct starpu_task *task);
			
 
				 
			
 
				+int _task_needs_conversion(struct starpu_task *task, int worker_id);
			
 
				+int _starpu_handle_needs_conversion_task(starpu_data_handle_t handle,
			
 
				+					 unsigned int node);
			
 
				+
			
 
				+int _starpu_task_uses_multiformat_handles(struct starpu_task *task);
			
 
				+
			
 
				 #endif // __CORE_TASK_H__
			
--- a/src/datawizard/coherency.h
+++ b/src/datawizard/coherency.h
@@ -217,6 +217,8 @@ struct _starpu_data_state
 
				 	unsigned stats_shared_to_owner[STARPU_MAXNODES];
			
 
				 	unsigned stats_invalidated[STARPU_MAXNODES];
			
 
				 #endif
			
 
				+
			
 
				+	unsigned int mf_node; //XXX
			
 
				 };
			
 
				 
			
 
				 void _starpu_display_msi_stats(void);
			
--- a/src/datawizard/interfaces/data_interface.c
+++ b/src/datawizard/interfaces/data_interface.c
@@ -277,7 +277,7 @@ void starpu_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
 
				 
			
 
				 	STARPU_ASSERT(handleptr);
			
 
				 	*handleptr = handle;
			
 
				-
			
 
				+	handle->mf_node = home_node;
			
 
				 
			
 
				 	/* fill the interface fields with the appropriate method */
			
 
				 	ops->register_data_handle(handle, home_node, data_interface);
			
@@ -412,6 +412,7 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
				 {
			
 
				 	STARPU_ASSERT(handle);
			
 
				 
			
 
				+
			
 
				 	if (coherent)
			
 
				 	{
			
 
				 		/* If sequential consistency is enabled, wait until data is available */
			
@@ -446,6 +447,42 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
				 			}
			
 
				 			_starpu_release_data_on_node(handle, 0, &handle->per_node[home_node]);
			
 
				 		}
			
 
				+
			
 
				+		/* If this handle uses the multiformat interface, we may have to convert
			
 
				+		 * this piece of data back into the CPU format.
			
 
				+		 * XXX : This is quite hacky, could we submit a task instead ?
			
 
				+		 */
			
 
				+		unsigned int id = starpu_get_handle_interface_id(handle);
			
 
				+		if (id == STARPU_MULTIFORMAT_INTERFACE_ID &&
			
 
				+			_starpu_get_node_kind(handle->mf_node) != STARPU_CPU_RAM)
			
 
				+		{
			
 
				+			_STARPU_DEBUG("Conversion needed\n");
			
 
				+			void *buffers[1];
			
 
				+			struct starpu_multiformat_interface *interface;
			
 
				+			interface = starpu_data_get_interface_on_node(handle, 0);
			
 
				+			struct starpu_codelet *cl;
			
 
				+			enum _starpu_node_kind node_kind = _starpu_get_node_kind(handle->mf_node);
			
 
				+			
			
 
				+			switch (node_kind)
			
 
				+			{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+				case STARPU_CUDA_RAM:
			
 
				+					cl = interface->ops->cuda_to_cpu_cl;
			
 
				+					break;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+				case STARPU_OPENCL_RAM:
			
 
				+					cl = interface->ops->opencl_to_cpu_cl;
			
 
				+					break;
			
 
				+#endif
			
 
				+				case STARPU_CPU_RAM:      /* Impossible ! */
			
 
				+				case STARPU_SPU_LS:       /* Not supported */
			
 
				+				default:
			
 
				+					STARPU_ASSERT(0);
			
 
				+			}
			
 
				+			buffers[0] = interface;
			
 
				+			cl->cpu_func(buffers, NULL);
			
 
				+		}
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
--- a/src/datawizard/interfaces/multiformat_interface.c
+++ b/src/datawizard/interfaces/multiformat_interface.c
@@ -246,8 +246,16 @@ static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		case STARPU_CUDA_RAM:
			
 
				-			cudaFree(multiformat_interface->cuda_ptr);
			
 
				-			multiformat_interface->cuda_ptr = NULL;
			
 
				+			if (multiformat_interface->cpu_ptr)
			
 
				+			{
			
 
				+				cudaFree(multiformat_interface->cpu_ptr);
			
 
				+				multiformat_interface->cpu_ptr = NULL;
			
 
				+			}
			
 
				+			if (multiformat_interface->cuda_ptr)
			
 
				+			{
			
 
				+				cudaFree(multiformat_interface->cuda_ptr);
			
 
				+				multiformat_interface->cuda_ptr = NULL;
			
 
				+			}
			
 
				 			break;
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
@@ -283,6 +291,15 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 
				 				multiformat_interface->cpu_ptr = (void *) addr;
			
 
				 				multiformat_interface->dev_handle = addr;
			
 
				 			}
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+			multiformat_interface->cuda_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
			
 
				+			STARPU_ASSERT(multiformat_interface->cuda_ptr != NULL);
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+			multiformat_interface->opencl_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
			
 
				+			STARPU_ASSERT(multiformat_interface->opencl_ptr != NULL);
			
 
				+#endif
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		case STARPU_CUDA_RAM:
			
@@ -298,6 +315,9 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 
				 					multiformat_interface->cuda_ptr = (void *)addr;
			
 
				 					multiformat_interface->dev_handle = addr;
			
 
				 				}
			
 
				+
			
 
				+				allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
			
 
				+				status = cudaMalloc((void **)&multiformat_interface->cpu_ptr, allocated_memory);
			
 
				 				break;
			
 
				 			}
			
 
				 #endif
			
@@ -319,6 +339,11 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 
				 					multiformat_interface->dev_handle = addr;
			
 
				 
			
 
				 				}
			
 
				+
			
 
				+				_starpu_opencl_allocate_memory(&multiformat_interface->cpu_ptr,
			
 
				+							multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize,
			
 
				+							CL_MEM_READ_WRITE);
			
 
				+				
			
 
				 				break;
			
 
				 			}
			
 
				 #endif
			
@@ -384,15 +409,7 @@ static int copy_cuda_common(void *src_interface, unsigned src_node,
 
				 				if (src_multiformat->cuda_ptr == NULL)
			
 
				 					return -ENOMEM;
			
 
				 			}
			
 
				-			/* Converting data , from host to host */
			
 
				-			double tmp = starpu_timing_now();
			
 
				-			void *buffers[1];
			
 
				-			buffers[0] = src_interface;
			
 
				-			struct starpu_codelet *cl = src_multiformat->ops->cpu_to_cuda_cl;
			
 
				-			cl->cpu_func(buffers, NULL);
			
 
				-			dst_multiformat->conversion_time = starpu_timing_now() - tmp;
			
 
				-
			
 
				-			status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
			
 
				+			status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind);
			
 
				 			if (STARPU_UNLIKELY(status))
			
 
				 			{
			
 
				 				STARPU_CUDA_REPORT_ERROR(status);
			
@@ -406,11 +423,6 @@ static int copy_cuda_common(void *src_interface, unsigned src_node,
 
				 			if (STARPU_UNLIKELY(status))
			
 
				 				STARPU_CUDA_REPORT_ERROR(status);
			
 
				 
			
 
				-			void *buffers[1];
			
 
				-			struct starpu_codelet *cl = src_multiformat->ops->cuda_to_cpu_cl;
			
 
				-			buffers[0] = dst_interface;
			
 
				-			cl->cpu_func(buffers, NULL);
			
 
				-
			
 
				 			break;
			
 
				 		}
			
 
				 		case cudaMemcpyDeviceToDevice:
			
@@ -461,16 +473,7 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *
 
				 					return -ENOMEM;
			
 
				 			}
			
 
				 
			
 
				-			/* Converting data , from host to host */
			
 
				-			double tmp = starpu_timing_now();
			
 
				-			void *buffers[1]; // XXX
			
 
				-			buffers[0] = src_interface;
			
 
				-			struct starpu_codelet *cl = src_multiformat->ops->cpu_to_cuda_cl;
			
 
				-			cl->cpu_func(buffers, NULL);
			
 
				-			dst_multiformat->conversion_time = starpu_timing_now() - tmp;
			
 
				-
			
 
				-			/* Actual copy from host to device */
			
 
				-			status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
			
 
				+			status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream);
			
 
				 			if (STARPU_UNLIKELY(status))
			
 
				 			{
			
 
				 				STARPU_CUDA_REPORT_ERROR(status);
			
@@ -484,12 +487,6 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *
 
				 			if (STARPU_UNLIKELY(status))
			
 
				 				STARPU_CUDA_REPORT_ERROR(status);
			
 
				 
			
 
				-			/* Converting data */
			
 
				-			void *buffers[1];
			
 
				-			struct starpu_codelet *cl = src_multiformat->ops->cuda_to_cpu_cl;
			
 
				-			buffers[0] = dst_interface;
			
 
				-			cl->cpu_func(buffers, NULL);
			
 
				-
			
 
				 			break;
			
 
				 		}
			
 
				 		case cudaMemcpyDeviceToDevice:
			
@@ -623,26 +620,10 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
 
				 
			
 
				 	size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
			
 
				 
			
 
				-	if (src_multiformat->opencl_ptr == NULL)
			
 
				-	{
			
 
				-		src_multiformat->opencl_ptr = malloc(src_multiformat->nx * src_multiformat->ops->opencl_elemsize);
			
 
				-		if (src_multiformat->opencl_ptr == NULL)
			
 
				-		{
			
 
				-			return -ENOMEM;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	double tmp = starpu_timing_now();
			
 
				-	void *buffers[1];
			
 
				-	struct starpu_codelet *cl = src_multiformat->ops->cpu_to_opencl_cl;
			
 
				-	buffers[0] = src_interface;
			
 
				-	cl->cpu_func(buffers, NULL);
			
 
				-	dst_multiformat->conversion_time = starpu_timing_now() - tmp;
			
 
				 
			
 
				-
			
 
				-	err = _starpu_opencl_copy_ram_to_opencl_async_sync(src_multiformat->opencl_ptr,
			
 
				+	err = _starpu_opencl_copy_ram_to_opencl_async_sync(src_multiformat->cpu_ptr,
			
 
				 							   src_node,
			
 
				-							   (cl_mem) dst_multiformat->dev_handle,
			
 
				+							   (cl_mem) dst_multiformat->cpu_ptr,
			
 
				 							   dst_node,
			
 
				 							   size,
			
 
				 							   dst_multiformat->offset,
			
@@ -672,9 +653,13 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
 
				 	STARPU_ASSERT(src_multiformat->ops != NULL);
			
 
				 	STARPU_ASSERT(dst_multiformat->ops != NULL);
			
 
				 
			
 
				-	size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize,
			
 
				+	size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
			
 
				 
			
 
				-	err = _starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_multiformat->dev_handle,
			
 
				+	if (dst_multiformat->opencl_ptr == NULL) {
			
 
				+		/* XXX : it is weird that we might have to allocate memory here... */
			
 
				+		dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize);
			
 
				+	}
			
 
				+	err = _starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_multiformat->opencl_ptr,
			
 
				 							   src_node,
			
 
				 							   dst_multiformat->opencl_ptr,
			
 
				 							   dst_node,
			
@@ -687,12 +672,6 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
 
				 
			
 
				 	_STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
			
 
				 
			
 
				-	/* XXX So much for asynchronicity */
			
 
				-	clWaitForEvents(1, _event);
			
 
				-	void *buffers[1];
			
 
				-	struct starpu_codelet *cl = src_multiformat->ops->opencl_to_cpu_cl;
			
 
				-	buffers[0] = dst_interface;
			
 
				-	cl->cpu_func(buffers, NULL);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
--- a/src/datawizard/user_interactions.c
+++ b/src/datawizard/user_interactions.c
@@ -22,6 +22,7 @@
 
				 #include <datawizard/copy_driver.h>
			
 
				 #include <datawizard/write_back.h>
			
 
				 #include <core/dependencies/data_concurrency.h>
			
 
				+#include <core/sched_policy.h>
			
 
				 
			
 
				 /* Explicitly ask StarPU to allocate room for a piece of data on the specified
			
 
				  * memory node. */
			
@@ -206,6 +207,18 @@ int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_access_mode mod
 
				 		return -EDEADLK;
			
 
				         }
			
 
				 
			
 
				+	unsigned int id = starpu_get_handle_interface_id(handle);
			
 
				+	if (id == STARPU_MULTIFORMAT_INTERFACE_ID &&
			
 
				+	    _starpu_handle_needs_conversion_task(handle, 0))
			
 
				+	{
			
 
				+		struct starpu_task *task = _starpu_create_conversion_task(handle, 0);
			
 
				+		handle->refcnt--;
			
 
				+		handle->busy_count--;
			
 
				+		handle->mf_node = 0;
			
 
				+		task->synchronous = 1;
			
 
				+		starpu_task_submit(task);
			
 
				+	}
			
 
				+
			
 
				 	struct user_interaction_wrapper wrapper =
			
 
				 	{
			
 
				 		.handle = handle,
			
--- a/src/sched_policies/heft.c
+++ b/src/sched_policies/heft.c
@@ -212,6 +212,33 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
				 		starpu_prefetch_task_input_on_node(task, memory_node);
			
 
				 	}
			
 
				 
			
 
				+	if (!_starpu_task_uses_multiformat_handles(task))
			
 
				+		goto push_task;
			
 
				+
			
 
				+	/*
			
 
				+	 * Our task uses multiformat handles, which may need to be converted.
			
 
				+	 */
			
 
				+	int i;
			
 
				+	for (i = 0; i < task->cl->nbuffers; i++)
			
 
				+	{
			
 
				+		struct starpu_task *conversion_task;
			
 
				+		starpu_data_handle_t handle;
			
 
				+
			
 
				+		handle = task->buffers[i].handle;
			
 
				+		unsigned int node = starpu_worker_get_memory_node(best_workerid);
			
 
				+		if (!_starpu_handle_needs_conversion_task(handle, node))
			
 
				+			continue;
			
 
				+
			
 
				+		conversion_task = _starpu_create_conversion_task(handle, node);
			
 
				+		starpu_push_local_task(best_workerid, conversion_task, prio);
			
 
				+	}
			
 
				+
			
 
				+	unsigned node = starpu_worker_get_memory_node(best_workerid);
			
 
				+	for (i = 0; i < task->cl->nbuffers; i++)
			
 
				+		task->buffers[i].handle->mf_node = node;
			
 
				+
			
 
				+push_task:
			
 
				+	//_STARPU_DEBUG("Heft : pushing local task\n");
			
 
				 	return starpu_push_local_task(best_workerid, task, prio);
			
 
				 }
			
 
				 
			
@@ -275,7 +302,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 
			
 
				 				double conversion_time = starpu_task_expected_conversion_time(task, perf_arch, nimpl);
			
 
				 				if (conversion_time > 0.0)
			
 
				-					local_data_penalty[worker][nimpl] += conversion_time;
			
 
				+					local_task_length[worker][nimpl] += conversion_time;
			
 
				 				//_STARPU_DEBUG("Scheduler heft: task length (%lf) local power (%lf) worker (%u) kernel (%u) \n", local_task_length[worker],local_power[worker],worker,nimpl);
			
 
				 
			
 
				 			}
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -102,6 +102,8 @@ noinst_PROGRAMS =				\
 
				 	core/restart				\
			
 
				 	core/execute_on_a_specific_worker	\
			
 
				 	core/insert_task			\
			
 
				+	core/multiformat_data_release           \
			
 
				+	core/multiformat_handle_conversion      \
			
 
				 	core/multithreaded			\
			
 
				 	core/multithreaded_init			\
			
 
				 	core/starpu_task_wait_for_all		\
			
@@ -152,6 +154,7 @@ noinst_PROGRAMS =				\
 
				 	datawizard/increment_redux_v2		\
			
 
				 	datawizard/handle_to_pointer		\
			
 
				 	datawizard/lazy_allocation		\
			
 
				+	datawizard/interfaces/multiformat/multiformat_interface \
			
 
				 	datawizard/interfaces/vector/test_vector_interface   \
			
 
				 	errorcheck/starpu_init_noworker		\
			
 
				 	errorcheck/invalid_blocking_calls	\
			
@@ -253,6 +256,11 @@ BUILT_SOURCES += 						\
 
				 	microbenchs/null_kernel_gordon.spuelf
			
 
				 endif
			
 
				 
			
 
				+core_multiformat_handle_conversion_SOURCES = \
			
 
				+	core/multiformat_handle_conversion.c
			
 
				+
			
 
				+core_multiformat_data_release_SOURCES = \
			
 
				+	core/multiformat_data_release.c
			
 
				 
			
 
				 ##############
			
 
				 # Interfaces #
			
@@ -272,3 +280,23 @@ datawizard_interfaces_vector_test_vector_interface_SOURCES +=               \
 
				 nobase_STARPU_OPENCL_DATA_DATA += \
			
 
				 	datawizard/interfaces/vector/test_vector_opencl_kernel.cl
			
 
				 endif
			
 
				+
			
 
				+datawizard_interfaces_multiformat_multiformat_interface_SOURCES =           \
			
 
				+	datawizard/interfaces/test_interfaces.c                             \
			
 
				+	datawizard/interfaces/multiformat/multiformat_interface.c           \
			
 
				+	datawizard/interfaces/multiformat/multiformat_conversion_codelets.c
			
 
				+
			
 
				+if STARPU_USE_CUDA
			
 
				+datawizard_interfaces_multiformat_multiformat_interface_SOURCES+=                  \
			
 
				+	datawizard/interfaces/multiformat/multiformat_cuda.cu                      \
			
 
				+	datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu
			
 
				+endif
			
 
				+
			
 
				+if STARPU_USE_OPENCL
			
 
				+datawizard_interfaces_multiformat_multiformat_interface_SOURCES+=                  \
			
 
				+	datawizard/interfaces/multiformat/multiformat_opencl.c                     \
			
 
				+	datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c
			
 
				+nobase_STARPU_OPENCL_DATA_DATA +=                                                          \
			
 
				+	datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl                     \
			
 
				+	datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl_kernel.cl
			
 
				+endif
			
--- a/tests/core/multiformat_data_release.c
+++ b/tests/core/multiformat_data_release.c
@@ -0,0 +1,270 @@
 
				+#include <starpu.h>
			
 
				+
			
 
				+#define NX 16
			
 
				+
			
 
				+static int vector[NX];
			
 
				+static starpu_data_handle_t handle;
			
 
				+
			
 
				+#define ENTER() do { fprintf(stderr, "Entering %s\n", __func__); } while (0)
			
 
				+
			
 
				+/* Counting the calls to the codelets */
			
 
				+struct stats {
			
 
				+	unsigned int cpu;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	unsigned int cuda;
			
 
				+	unsigned int cpu_to_cuda;
			
 
				+	unsigned int cuda_to_cpu;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	unsigned int opencl;
			
 
				+	unsigned int cpu_to_opencl;
			
 
				+	unsigned int opencl_to_cpu;
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+static struct stats global_stats;
			
 
				+
			
 
				+/* "Fake" conversion codelets */
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static void cpu_to_cuda_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	ENTER();
			
 
				+	global_stats.cpu_to_cuda++;
			
 
				+}
			
 
				+
			
 
				+static void cuda_to_cpu_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	ENTER();
			
 
				+	global_stats.cuda_to_cpu++;
			
 
				+}
			
 
				+
			
 
				+static struct starpu_codelet cpu_to_cuda_cl = {
			
 
				+	.where = STARPU_CUDA,
			
 
				+	.cuda_func = cpu_to_cuda_func,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet cuda_to_cpu_cl = {
			
 
				+	.where = STARPU_CPU,
			
 
				+	.cpu_func = cuda_to_cpu_func,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+static void cpu_to_opencl_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	ENTER();
			
 
				+	global_stats.cpu_to_opencl++;
			
 
				+}
			
 
				+
			
 
				+static void opencl_to_cpu_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	ENTER();
			
 
				+	global_stats.opencl_to_cpu++;
			
 
				+}
			
 
				+
			
 
				+static struct starpu_codelet cpu_to_opencl_cl = {
			
 
				+	.where = STARPU_OPENCL,
			
 
				+	.opencl_func = cpu_to_opencl_func,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet opencl_to_cpu_cl = {
			
 
				+	.where = STARPU_CPU,
			
 
				+	.cpu_func = opencl_to_cpu_func,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+#endif /* !STARPU_USE_OPENCL */
			
 
				+
			
 
				+static struct starpu_multiformat_data_interface_ops ops = {
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_elemsize = sizeof(int),
			
 
				+	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
			
 
				+	.cuda_to_cpu_cl = &cuda_to_cpu_cl,
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_elemsize = sizeof(int),
			
 
				+	.cpu_to_opencl_cl = &cpu_to_opencl_cl,
			
 
				+	.opencl_to_cpu_cl = &opencl_to_cpu_cl,
			
 
				+#endif
			
 
				+	.cpu_elemsize = sizeof(int)
			
 
				+};
			
 
				+
			
 
				+
			
 
				+static void
			
 
				+register_handle(void)
			
 
				+{
			
 
				+	int i;
			
 
				+	for (i = 0; i < NX; i++)
			
 
				+		vector[i] = i;
			
 
				+	starpu_multiformat_data_register(&handle, 0, vector, NX, &ops);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+unregister_handle(void)
			
 
				+{
			
 
				+	starpu_data_unregister(handle);
			
 
				+}
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static void cuda_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	ENTER();
			
 
				+	global_stats.cuda++;
			
 
				+}
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+static void opencl_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	ENTER();
			
 
				+	global_stats.opencl++;
			
 
				+}
			
 
				+#endif /* !STARPU_USE_OPENCL */
			
 
				+
			
 
				+static void
			
 
				+create_and_submit(int where)
			
 
				+{
			
 
				+	static struct starpu_codelet cl = {
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		.cuda_func   = cuda_func,
			
 
				+#endif
			
 
				+#if STARPU_USE_OPENCL
			
 
				+		.opencl_func = opencl_func,
			
 
				+#endif
			
 
				+		.nbuffers    = 1
			
 
				+	};
			
 
				+	cl.where = where;
			
 
				+
			
 
				+	struct starpu_task *task = starpu_task_create();
			
 
				+	task->cl = &cl;
			
 
				+	task->buffers[0].handle = handle;
			
 
				+	task->buffers[0].mode = STARPU_RW;
			
 
				+
			
 
				+	/* We need to be sure the data has been copied to the GPU at the end 
			
 
				+	 * of this function */
			
 
				+	task->synchronous = 1;
			
 
				+	starpu_task_submit(task);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+print_stats(struct stats *s)
			
 
				+{
			
 
				+	fprintf(stderr, "cpu         : %d\n", s->cpu);
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	fprintf(stderr, "cuda        : %d\n" 
			
 
				+			"cpu->cuda   : %d\n"
			
 
				+			"cuda->cpu   : %d\n",
			
 
				+			s->cuda,
			
 
				+			s->cpu_to_cuda,
			
 
				+			s->cuda_to_cpu);
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	fprintf(stderr, "opencl      : %d\n" 
			
 
				+			"cpu->opencl : %d\n"
			
 
				+			"opencl->cpu : %d\n",
			
 
				+			s->opencl,
			
 
				+			s->cpu_to_opencl,
			
 
				+			s->opencl_to_cpu);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+compare(struct stats *s1, struct stats *s2)
			
 
				+{
			
 
				+	if (
			
 
				+#ifdef STARPU_USE_CPU
			
 
				+	    s1->cpu == s2->cpu &&
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	    s1->cuda == s2->cuda &&
			
 
				+	    s1->cpu_to_cuda == s2->cpu_to_cuda &&
			
 
				+	    s1->cuda_to_cpu == s2->cuda_to_cpu &&
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	    s1->opencl == s2->opencl &&
			
 
				+	    s1->cpu_to_opencl == s2->cpu_to_opencl &&
			
 
				+	    s1->opencl_to_cpu == s2->opencl_to_cpu &&
			
 
				+#endif
			
 
				+	    1 /* Just so the build does not fail if we disable EVERYTHING */
			
 
				+	)
			
 
				+		return 0;
			
 
				+	else
			
 
				+		return 1;
			
 
				+
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+test(void)
			
 
				+{
			
 
				+	struct stats expected_stats;
			
 
				+	memset(&expected_stats, 0, sizeof(expected_stats));
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	create_and_submit(STARPU_CUDA);
			
 
				+	starpu_data_acquire(handle, STARPU_RW);
			
 
				+
			
 
				+	expected_stats.cuda = 1;
			
 
				+	expected_stats.cpu_to_cuda = 1;
			
 
				+	expected_stats.cuda_to_cpu = 1;
			
 
				+
			
 
				+	starpu_data_release(handle);
			
 
				+	if (compare(&global_stats, &expected_stats) != 0)
			
 
				+	{
			
 
				+		fprintf(stderr, "CUDA failed\n");
			
 
				+		print_stats(&global_stats);
			
 
				+		fprintf(stderr ,"\n");
			
 
				+		print_stats(&expected_stats);
			
 
				+		return 1;
			
 
				+	}
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	create_and_submit(STARPU_OPENCL);
			
 
				+	starpu_data_acquire(handle, STARPU_RW);
			
 
				+	expected_stats.opencl = 1;
			
 
				+	expected_stats.cpu_to_opencl = 1;
			
 
				+	expected_stats.opencl_to_cpu = 1;
			
 
				+
			
 
				+	starpu_data_release(handle);
			
 
				+	if (compare(&global_stats, &expected_stats) != 0)
			
 
				+	{
			
 
				+		fprintf(stderr, "OPENCL failed\n");
			
 
				+		print_stats(&global_stats);
			
 
				+		fprintf(stderr ,"\n");
			
 
				+		print_stats(&expected_stats);
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+#endif /* !STARPU_USE_OPENCL */
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main(void)
			
 
				+{
			
 
				+#ifdef STARPU_USE_CPU
			
 
				+	struct starpu_conf conf = {
			
 
				+		.ncpus = -1,
			
 
				+		.ncuda = 1,
			
 
				+		.nopencl = 1
			
 
				+	};
			
 
				+	memset(&global_stats, 0, sizeof(global_stats));
			
 
				+	starpu_init(&conf);
			
 
				+
			
 
				+	register_handle();
			
 
				+
			
 
				+	int err = test();
			
 
				+
			
 
				+	unregister_handle();
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	return err?EXIT_FAILURE:EXIT_SUCCESS;
			
 
				+#else /* ! STARPU_USE_CPU */
			
 
				+	/* Without the CPU, there is no point in using the multiformat
			
 
				+	 * interface, so this test is pointless. */
			
 
				+	return EXIT_SUCCESS;
			
 
				+#endif
			
 
				+}
			
--- a/tests/core/multiformat_handle_conversion.c
+++ b/tests/core/multiformat_handle_conversion.c
@@ -0,0 +1,317 @@
 
				+#include <starpu.h>
			
 
				+
			
 
				+#define NX 4
			
 
				+
			
 
				+#define DEBUG 0
			
 
				+
			
 
				+#if DEBUG
			
 
				+#define SYNCHRONOUS 1 /* Easier to debug with synchronous tasks */
			
 
				+#define ENTER() do { fprintf(stderr, "Entering %s\n", __func__); } while (0)
			
 
				+#else
			
 
				+#define SYNCHRONOUS 0 
			
 
				+#define ENTER()
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+/* Counting the calls to the codelets */
			
 
				+struct stats {
			
 
				+	unsigned int cpu;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	unsigned int cuda;
			
 
				+	unsigned int cpu_to_cuda;
			
 
				+	unsigned int cuda_to_cpu;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	unsigned int opencl;
			
 
				+	unsigned int cpu_to_opencl;
			
 
				+	unsigned int opencl_to_cpu;
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+struct stats global_stats;
			
 
				+
			
 
				+/* "Fake" conversion codelets */
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static void cpu_to_cuda_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	ENTER();
			
 
				+	global_stats.cpu_to_cuda++;
			
 
				+}
			
 
				+
			
 
				+static void cuda_to_cpu_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	ENTER();
			
 
				+	global_stats.cuda_to_cpu++;
			
 
				+}
			
 
				+
			
 
				+struct starpu_codelet cpu_to_cuda_cl = {
			
 
				+	.where = STARPU_CUDA,
			
 
				+	.cuda_func = cpu_to_cuda_func,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+
			
 
				+struct starpu_codelet cuda_to_cpu_cl = {
			
 
				+	.where = STARPU_CPU,
			
 
				+	.cpu_func = cuda_to_cpu_func,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+static void cpu_to_opencl_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	ENTER();
			
 
				+	global_stats.cpu_to_opencl++;
			
 
				+}
			
 
				+
			
 
				+static void opencl_to_cpu_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	ENTER();
			
 
				+	global_stats.opencl_to_cpu++;
			
 
				+}
			
 
				+
			
 
				+struct starpu_codelet cpu_to_opencl_cl = {
			
 
				+	.where = STARPU_OPENCL,
			
 
				+	.opencl_func = cpu_to_opencl_func,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+
			
 
				+struct starpu_codelet opencl_to_cpu_cl = {
			
 
				+	.where = STARPU_CPU,
			
 
				+	.cpu_func = opencl_to_cpu_func,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+#endif /* !STARPU_USE_OPENCL */
			
 
				+
			
 
				+static struct starpu_multiformat_data_interface_ops ops = {
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_elemsize = sizeof(int),
			
 
				+	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
			
 
				+	.cuda_to_cpu_cl = &cuda_to_cpu_cl,
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_elemsize = sizeof(int),
			
 
				+	.cpu_to_opencl_cl = &cpu_to_opencl_cl,
			
 
				+	.opencl_to_cpu_cl = &opencl_to_cpu_cl,
			
 
				+#endif
			
 
				+	.cpu_elemsize = sizeof(int)
			
 
				+};
			
 
				+
			
 
				+static void cpu_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	ENTER();
			
 
				+	global_stats.cpu++;
			
 
				+}
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static void cuda_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	ENTER();
			
 
				+	global_stats.cuda++;
			
 
				+}
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+static void opencl_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	ENTER();
			
 
				+	global_stats.opencl++;
			
 
				+}
			
 
				+#endif /* !STARPU_USE_OPENCL */
			
 
				+
			
 
				+
			
 
				+static void
			
 
				+create_and_submit_tasks(int where, starpu_data_handle_t handles[])
			
 
				+{
			
 
				+	fprintf(stderr, "***** Starting Task 1\n");
			
 
				+	static struct starpu_codelet cl = {
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		.cuda_func   = cuda_func,
			
 
				+#endif
			
 
				+#if STARPU_USE_OPENCL
			
 
				+		.opencl_func = opencl_func,
			
 
				+#endif
			
 
				+		.nbuffers    = 1
			
 
				+	};
			
 
				+	cl.where = where;
			
 
				+
			
 
				+	struct starpu_task *task = starpu_task_create();
			
 
				+	task->synchronous = SYNCHRONOUS;
			
 
				+	task->cl = &cl;
			
 
				+	task->buffers[0].handle = handles[0];
			
 
				+	task->buffers[0].mode = STARPU_RW;
			
 
				+	starpu_task_submit(task);
			
 
				+
			
 
				+	fprintf(stderr, "***** Starting Task 2\n");
			
 
				+	static struct starpu_codelet cl2 = {
			
 
				+		.where = STARPU_CPU,
			
 
				+		.cpu_func = cpu_func,
			
 
				+		.nbuffers = 1
			
 
				+	};
			
 
				+
			
 
				+	struct starpu_task *task2 = starpu_task_create();
			
 
				+	task2->synchronous = SYNCHRONOUS;
			
 
				+	task2->cl = &cl2;
			
 
				+	task2->buffers[0].handle = handles[1];
			
 
				+	task2->buffers[0].mode = STARPU_RW;
			
 
				+	starpu_task_submit(task2);
			
 
				+
			
 
				+
			
 
				+	fprintf(stderr, "***** Starting Task 3\n");
			
 
				+	static struct starpu_codelet cl3 = {
			
 
				+		.cpu_func    = cpu_func,
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		.cuda_func   = cuda_func,
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+		.opencl_func = opencl_func,
			
 
				+#endif
			
 
				+		.nbuffers    = 2
			
 
				+	};
			
 
				+	cl3.where = where;
			
 
				+
			
 
				+	struct starpu_task *task3 = starpu_task_create();
			
 
				+	task3->synchronous = SYNCHRONOUS;
			
 
				+	task3->cl = &cl3;
			
 
				+	task3->buffers[0].handle = handles[0];
			
 
				+	task3->buffers[0].mode = STARPU_RW;
			
 
				+	task3->buffers[1].handle = handles[1];
			
 
				+	task3->buffers[1].mode = STARPU_RW;
			
 
				+	starpu_task_submit(task3);
			
 
				+
			
 
				+	starpu_task_wait_for_all();
			
 
				+	fprintf(stderr, "***** End of all tasks\n");
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+#if DEBUG
			
 
				+static void
			
 
				+print_stats(struct stats *s)
			
 
				+{
			
 
				+	fprintf(stderr, "cpu         : %d\n", s->cpu);
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	fprintf(stderr, "cuda        : %d\n" 
			
 
				+			"cpu->cuda   : %d\n"
			
 
				+			"cuda->cpu   : %d\n",
			
 
				+			s->cuda,
			
 
				+			s->cpu_to_cuda,
			
 
				+			s->cuda_to_cpu);
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	fprintf(stderr, "opencl      : %d\n" 
			
 
				+			"cpu->opencl : %d\n"
			
 
				+			"opencl->cpu : %d\n",
			
 
				+			s->opencl,
			
 
				+			s->cpu_to_opencl,
			
 
				+			s->opencl_to_cpu);
			
 
				+#endif
			
 
				+}
			
 
				+#endif /* !DEBUG */
			
 
				+
			
 
				+/* XXX Just a little bit of copy/pasta here... */
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static int
			
 
				+test_cuda(void)
			
 
				+{
			
 
				+	int i;
			
 
				+	int vector1[NX];
			
 
				+	int vector2[NX];
			
 
				+	starpu_data_handle_t handles[2];
			
 
				+
			
 
				+	for (i = 0; i < NX; i++)
			
 
				+	{
			
 
				+		vector1[i] = i;
			
 
				+		vector2[i] = i;
			
 
				+	}
			
 
				+
			
 
				+	starpu_multiformat_data_register(handles, 0, vector1, NX, &ops);
			
 
				+	starpu_multiformat_data_register(handles+1, 0, vector2, NX, &ops);
			
 
				+
			
 
				+	memset(&global_stats, 0, sizeof(global_stats));
			
 
				+	create_and_submit_tasks(STARPU_CUDA, handles);
			
 
				+
			
 
				+	starpu_data_unregister(handles[0]);
			
 
				+	starpu_data_unregister(handles[1]);
			
 
				+
			
 
				+#if DEBUG
			
 
				+	print_stats(&global_stats);
			
 
				+#endif
			
 
				+
			
 
				+	return !(global_stats.cpu == 1 &&
			
 
				+		 global_stats.cpu_to_cuda == 2 &&
			
 
				+		 global_stats.cuda_to_cpu == 2 &&
			
 
				+		 global_stats.cuda == 2);
			
 
				+}
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+static int
			
 
				+test_opencl(void)
			
 
				+{
			
 
				+	int i;
			
 
				+	int vector1[NX];
			
 
				+	int vector2[NX];
			
 
				+	starpu_data_handle_t handles[2];
			
 
				+
			
 
				+	for (i = 0; i < NX; i++)
			
 
				+	{
			
 
				+		vector1[i] = i;
			
 
				+		vector2[i] = i;
			
 
				+	}
			
 
				+
			
 
				+	starpu_multiformat_data_register(handles, 0, vector1, NX, &ops);
			
 
				+	starpu_multiformat_data_register(handles+1, 0, vector2, NX, &ops);
			
 
				+
			
 
				+	memset(&global_stats, 0, sizeof(global_stats));
			
 
				+	create_and_submit_tasks(STARPU_OPENCL, handles);
			
 
				+
			
 
				+	starpu_data_unregister(handles[0]);
			
 
				+	starpu_data_unregister(handles[1]);
			
 
				+
			
 
				+#if DEBUG
			
 
				+	print_stats(&global_stats);
			
 
				+#endif
			
 
				+
			
 
				+	return !(global_stats.cpu == 1 &&
			
 
				+		 global_stats.cpu_to_opencl == 2 &&
			
 
				+		 global_stats.opencl_to_cpu == 2 &&
			
 
				+		 global_stats.opencl == 2);
			
 
				+	
			
 
				+}
			
 
				+#endif /* !STARPU_USE_OPENCL */
			
 
				+
			
 
				+int
			
 
				+main(void)
			
 
				+{
			
 
				+#ifdef STARPU_USE_CPU
			
 
				+	struct starpu_conf conf = {
			
 
				+		.ncpus   = -1,
			
 
				+		.ncuda   = 2,
			
 
				+		.nopencl = 1
			
 
				+	};
			
 
				+
			
 
				+	starpu_init(&conf);
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	if (test_opencl() != 0)
			
 
				+	{
			
 
				+		fprintf(stderr, "OPENCL FAILED\n");
			
 
				+		exit(1);
			
 
				+	}
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	if (test_cuda() != 0)
			
 
				+	{
			
 
				+		fprintf(stderr, "CUDA FAILED \n");
			
 
				+		exit(1);
			
 
				+	}
			
 
				+#endif
			
 
				+	
			
 
				+	starpu_shutdown();
			
 
				+#endif
			
 
				+	/* Without the CPU, there is no point in using the multiformat
			
 
				+	 * interface, so this test is pointless. */
			
 
				+
			
 
				+	return EXIT_SUCCESS;
			
 
				+}
			
 
				+
			
--- a/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c
+++ b/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c
@@ -0,0 +1,73 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include "multiformat_types.h"
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+void cuda_to_cpu(void *buffers[], void *arg)
			
 
				+{
			
 
				+	fprintf(stderr, "Entering %s\n", __func__);
			
 
				+	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
			
 
				+	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	int i;
			
 
				+	for (i = 0; i < n; i++)
			
 
				+	{
			
 
				+		dst[i].x = src->x[i];
			
 
				+		dst[i].y = src->y[i];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+extern void cpu_to_cuda_cuda_func(void *buffers[], void *args); struct starpu_codelet cpu_to_cuda_cl = {
			
 
				+	.where = STARPU_CUDA,
			
 
				+	.cuda_func = cpu_to_cuda_cuda_func,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+
			
 
				+struct starpu_codelet cuda_to_cpu_cl = {
			
 
				+	.where = STARPU_CPU,
			
 
				+	.cpu_func = cuda_to_cpu,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+void opencl_to_cpu(void *buffers[], void *arg)
			
 
				+{
			
 
				+	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
			
 
				+	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	int i;
			
 
				+	for (i = 0; i < n; i++)
			
 
				+	{
			
 
				+		dst[i].x = src->x[i];
			
 
				+		dst[i].y = src->y[i];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+extern void cpu_to_opencl_opencl_func(void *buffers[], void *args);
			
 
				+struct starpu_codelet cpu_to_opencl_cl = {
			
 
				+	.where = STARPU_OPENCL,
			
 
				+	.opencl_func = cpu_to_opencl_opencl_func,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+
			
 
				+struct starpu_codelet opencl_to_cpu_cl = {
			
 
				+	.where = STARPU_CPU,
			
 
				+	.cpu_func = opencl_to_cpu,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+#endif
			
--- a/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu
+++ b/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu
@@ -0,0 +1,50 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_cuda.h>
			
 
				+#include "multiformat_types.h"
			
 
				+
			
 
				+static __global__ void cpu_to_cuda_cuda(struct point *src,
			
 
				+	struct struct_of_arrays *dst, unsigned n)
			
 
				+{
			
 
				+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
			
 
				+
			
 
				+	if (i < n)
			
 
				+	{
			
 
				+		dst->x[i] = src[i].x;
			
 
				+		dst->y[i] = src[i].y;
			
 
				+	}
			
 
				+
			
 
				+}
			
 
				+
			
 
				+extern "C" void cpu_to_cuda_cuda_func(void *buffers[], void *_args)
			
 
				+{
			
 
				+	fprintf(stderr, "Entering %s\n", __func__);
			
 
				+	struct point *src;
			
 
				+	struct struct_of_arrays *dst;
			
 
				+
			
 
				+	src = (struct point *) STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	dst = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
			
 
				+
			
 
				+	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+
			
 
				+	unsigned threads_per_block = 64;
			
 
				+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
			
 
				+
			
 
				+        cpu_to_cuda_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(src, dst, n);
			
 
				+
			
 
				+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
			
 
				+}
			
--- a/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c
+++ b/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c
@@ -0,0 +1,104 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_opencl.h>
			
 
				+
			
 
				+#define KERNEL_LOCATION "tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl_kernel.cl"
			
 
				+static struct starpu_opencl_program opencl_conversion_program;
			
 
				+
			
 
				+void cpu_to_opencl_opencl_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) args;
			
 
				+	int id, devid;
			
 
				+        cl_int err;
			
 
				+	cl_kernel kernel;
			
 
				+	cl_command_queue queue;
			
 
				+	cl_event event;
			
 
				+
			
 
				+	unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	cl_mem src = (cl_mem) STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	cl_mem dst = (cl_mem) STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
			
 
				+
			
 
				+	id = starpu_worker_get_id();
			
 
				+	devid = starpu_worker_get_devid(id);
			
 
				+
			
 
				+	starpu_opencl_load_opencl_from_file(KERNEL_LOCATION,
			
 
				+					    &opencl_conversion_program,
			
 
				+					    NULL);
			
 
				+
			
 
				+	err = starpu_opencl_load_kernel(&kernel,
			
 
				+					&queue,
			
 
				+					&opencl_conversion_program,
			
 
				+					"cpu_to_opencl_opencl",
			
 
				+					devid);
			
 
				+
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	err = clSetKernelArg(kernel, 0, sizeof(src), &src);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	err = clSetKernelArg(kernel, 1, sizeof(dst), &dst);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	err = clSetKernelArg(kernel, 2, sizeof(n), &n);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+
			
 
				+	{
			
 
				+		size_t global=n;
			
 
				+		size_t local;
			
 
				+                size_t s;
			
 
				+                cl_device_id device;
			
 
				+
			
 
				+                starpu_opencl_get_device(devid, &device);
			
 
				+
			
 
				+                err = clGetKernelWorkGroupInfo (kernel,
			
 
				+						device,
			
 
				+						CL_KERNEL_WORK_GROUP_SIZE,
			
 
				+						sizeof(local),
			
 
				+						&local,
			
 
				+						&s);
			
 
				+                if (err != CL_SUCCESS)
			
 
				+			STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+                if (local > global)
			
 
				+			local = global;
			
 
				+
			
 
				+		err = clEnqueueNDRangeKernel(queue,
			
 
				+					kernel,
			
 
				+					1,
			
 
				+					NULL,
			
 
				+					&global,
			
 
				+					&local,
			
 
				+					0,
			
 
				+					NULL,
			
 
				+					&event);
			
 
				+
			
 
				+		if (err != CL_SUCCESS)
			
 
				+			STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+	}
			
 
				+
			
 
				+	clFinish(queue);
			
 
				+	starpu_opencl_collect_stats(event);
			
 
				+	clReleaseEvent(event);
			
 
				+
			
 
				+	starpu_opencl_release_kernel(kernel);
			
 
				+        starpu_opencl_unload_opencl(&opencl_conversion_program);
			
 
				+}
			
--- a/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl_kernel.cl
+++ b/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl_kernel.cl
@@ -0,0 +1,27 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include "multiformat_types.h"
			
 
				+__kernel void cpu_to_opencl_opencl(__global struct point *src,
			
 
				+				   __global struct struct_of_arrays *dst,
			
 
				+				   unsigned int n)
			
 
				+{
			
 
				+	const unsigned int i = get_global_id(0);
			
 
				+	if (i < n)
			
 
				+	{
			
 
				+		dst->x[i] = src[i].x;
			
 
				+		dst->y[i] = src[i].y;
			
 
				+	}
			
 
				+}
			
--- a/tests/datawizard/interfaces/multiformat/multiformat_cuda.cu
+++ b/tests/datawizard/interfaces/multiformat/multiformat_cuda.cu
@@ -0,0 +1,76 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_cuda.h>
			
 
				+#include "multiformat_types.h"
			
 
				+#include "../test_interfaces.h"
			
 
				+
			
 
				+extern struct test_config multiformat_config;
			
 
				+
			
 
				+static __global__ void multiformat_cuda(struct struct_of_arrays *soa, unsigned n,
			
 
				+					int *err, int factor)
			
 
				+{
			
 
				+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
			
 
				+
			
 
				+	if (i >= n)
			
 
				+		return;
			
 
				+
			
 
				+	if (soa->x[i] != i * factor || soa->y[i] != i * factor)
			
 
				+	{
			
 
				+		*err = 1;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		soa->x[i] = -soa->x[i];
			
 
				+		soa->y[i] = -soa->y[i];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+extern "C" void test_multiformat_cuda_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	fprintf(stderr, "Entering %s\n", __func__);
			
 
				+	int factor;
			
 
				+	int *ret;
			
 
				+	cudaError_t error;
			
 
				+	unsigned int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	struct struct_of_arrays *soa;
			
 
				+
			
 
				+	soa = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
			
 
				+	unsigned threads_per_block = 64;
			
 
				+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
			
 
				+	factor = *(int *) args;
			
 
				+
			
 
				+	error = cudaMalloc(&ret, sizeof(int));
			
 
				+	if (error != cudaSuccess)
			
 
				+		STARPU_CUDA_REPORT_ERROR(error);
			
 
				+
			
 
				+	error = cudaMemcpy(ret,
			
 
				+			   &multiformat_config.copy_failed,
			
 
				+			   sizeof(int),
			
 
				+			   cudaMemcpyHostToDevice);
			
 
				+	if (error != cudaSuccess)
			
 
				+		STARPU_CUDA_REPORT_ERROR(error);
			
 
				+
			
 
				+        multiformat_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(soa, n, ret, factor);
			
 
				+
			
 
				+	error = cudaMemcpy(&multiformat_config.copy_failed,
			
 
				+			   ret,
			
 
				+			   sizeof(int),
			
 
				+			   cudaMemcpyDeviceToHost);
			
 
				+	
			
 
				+	cudaFree(ret);
			
 
				+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
			
 
				+}
			
--- a/tests/datawizard/interfaces/multiformat/multiformat_interface.c
+++ b/tests/datawizard/interfaces/multiformat/multiformat_interface.c
@@ -0,0 +1,148 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include "multiformat_types.h"
			
 
				+#include "../test_interfaces.h"
			
 
				+
			
 
				+static void test_multiformat_cpu_func(void *buffers[], void *args);
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+extern void test_multiformat_cuda_func(void *buffers[], void *args);
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+extern void test_multiformat_opencl_func(void *buffers[], void *args);
			
 
				+#endif
			
 
				+
			
 
				+static struct point array_of_structs[N_ELEMENTS];
			
 
				+static starpu_data_handle_t multiformat_handle;
			
 
				+
			
 
				+struct test_config multiformat_config = {
			
 
				+	.cpu_func      = test_multiformat_cpu_func,
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_func     = test_multiformat_cuda_func,
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_func   = test_multiformat_opencl_func,
			
 
				+#endif
			
 
				+	.handle        = &multiformat_handle,
			
 
				+	.copy_failed   = 0,
			
 
				+	.name          = "multiformat_interface"
			
 
				+};
			
 
				+
			
 
				+static void
			
 
				+test_multiformat_cpu_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	struct point *aos;
			
 
				+	unsigned int n, i;
			
 
				+	int factor;
			
 
				+
			
 
				+	aos = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	factor = *(int *) args;
			
 
				+
			
 
				+	for (i = 0; i < n; i++)
			
 
				+	{
			
 
				+			fprintf(stderr, "(%d %d) [%d]", aos[i].x, aos[i].y, factor);
			
 
				+		if (aos[i].x != i * factor || aos[i].y != i * factor)
			
 
				+		{
			
 
				+			multiformat_config.copy_failed = 1;
			
 
				+		}
			
 
				+		aos[i].x = -aos[i].x;
			
 
				+		aos[i].y = -aos[i].y;
			
 
				+	}
			
 
				+	fprintf(stderr, "\n");
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+extern struct starpu_codelet cpu_to_cuda_cl;
			
 
				+extern struct starpu_codelet cuda_to_cpu_cl;
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+extern struct starpu_codelet cpu_to_opencl_cl;
			
 
				+extern struct starpu_codelet opencl_to_cpu_cl;
			
 
				+#endif
			
 
				+
			
 
				+struct starpu_multiformat_data_interface_ops format_ops = {
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_elemsize = 2* sizeof(float),
			
 
				+	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
			
 
				+	.cuda_to_cpu_cl = &cuda_to_cpu_cl,
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_elemsize = 2 * sizeof(float),
			
 
				+	.cpu_to_opencl_cl = &cpu_to_opencl_cl,
			
 
				+	.opencl_to_cpu_cl = &opencl_to_cpu_cl,
			
 
				+#endif
			
 
				+	.cpu_elemsize = sizeof(struct point),
			
 
				+};
			
 
				+
			
 
				+static void
			
 
				+register_data(void)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < N_ELEMENTS; i++)
			
 
				+	{
			
 
				+		array_of_structs[i].x = i;
			
 
				+		array_of_structs[i].y = i;
			
 
				+	}
			
 
				+	starpu_multiformat_data_register(&multiformat_handle,
			
 
				+					 0,
			
 
				+					 &array_of_structs,
			
 
				+					 N_ELEMENTS,
			
 
				+					 &format_ops);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+unregister_data(void)
			
 
				+{
			
 
				+	starpu_data_unregister(multiformat_handle);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main(void)
			
 
				+{
			
 
				+#ifdef STARPU_USE_CPU
			
 
				+	data_interface_test_summary *summary;
			
 
				+	struct starpu_conf conf = {
			
 
				+		.ncpus   = -1,
			
 
				+		.ncuda   = 2,
			
 
				+		.nopencl = 1
			
 
				+	};
			
 
				+
			
 
				+	starpu_init(&conf);
			
 
				+
			
 
				+	register_data();
			
 
				+
			
 
				+	summary = run_tests(&multiformat_config);
			
 
				+	if (!summary)
			
 
				+		exit(EXIT_FAILURE);
			
 
				+
			
 
				+	data_interface_test_summary_print(stderr, summary);
			
 
				+
			
 
				+	unregister_data();
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	return data_interface_test_summary_success(summary);
			
 
				+#else
			
 
				+	/* Without the CPU, there is no point in using the multiformat
			
 
				+	 * interface, so this test is pointless. */
			
 
				+	return EXIT_SUCCESS;
			
 
				+#endif
			
 
				+}
			
--- a/tests/datawizard/interfaces/multiformat/multiformat_opencl.c
+++ b/tests/datawizard/interfaces/multiformat/multiformat_opencl.c
@@ -0,0 +1,130 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_opencl.h>
			
 
				+#include "../test_interfaces.h"
			
 
				+
			
 
				+#define KERNEL_LOCATION "tests/datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl"
			
 
				+
			
 
				+extern struct test_config multiformat_config;
			
 
				+static struct starpu_opencl_program multiformat_program;
			
 
				+
			
 
				+void test_multiformat_opencl_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	int id, devid, factor;
			
 
				+	unsigned int n;
			
 
				+
			
 
				+        cl_int             err;
			
 
				+	cl_kernel          kernel;
			
 
				+	cl_command_queue   queue;
			
 
				+	cl_event           event;
			
 
				+	cl_context         context;
			
 
				+	cl_mem             val, fail;
			
 
				+
			
 
				+	starpu_opencl_load_opencl_from_file(KERNEL_LOCATION,
			
 
				+					    &multiformat_program,
			
 
				+					    NULL);
			
 
				+
			
 
				+	factor = *(int *)args;
			
 
				+	n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	val = (cl_mem)STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
			
 
				+
			
 
				+	id = starpu_worker_get_id();
			
 
				+	devid = starpu_worker_get_devid(id);
			
 
				+	starpu_opencl_get_context(devid, &context);
			
 
				+
			
 
				+	err = starpu_opencl_load_kernel(&kernel,
			
 
				+					&queue,
			
 
				+					&multiformat_program,
			
 
				+					"multiformat_opencl",
			
 
				+					devid);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
			
 
				+		sizeof(int), &multiformat_config.copy_failed, &err);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	/* Setting args */
			
 
				+	err  = clSetKernelArg(kernel, 0, sizeof(val), &val);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	err = clSetKernelArg(kernel, 1, sizeof(n), &n);
			
 
				+	if (err)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	err = clSetKernelArg(kernel, 2, sizeof(fail), &fail);
			
 
				+	if (err)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	err = clSetKernelArg(kernel, 3, sizeof(factor), &factor);
			
 
				+	if (err)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	{
			
 
				+		size_t global=n;
			
 
				+		size_t local;
			
 
				+                size_t s;
			
 
				+                cl_device_id device;
			
 
				+
			
 
				+                starpu_opencl_get_device(devid, &device);
			
 
				+
			
 
				+                err = clGetKernelWorkGroupInfo (kernel,
			
 
				+						device,
			
 
				+						CL_KERNEL_WORK_GROUP_SIZE,
			
 
				+						sizeof(local),
			
 
				+						&local,
			
 
				+						&s);
			
 
				+                if (err != CL_SUCCESS)
			
 
				+			STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+                if (local > global)
			
 
				+			local = global;
			
 
				+
			
 
				+		err = clEnqueueNDRangeKernel(queue,
			
 
				+					kernel,
			
 
				+					1,
			
 
				+					NULL,
			
 
				+					&global,
			
 
				+					&local,
			
 
				+					0,
			
 
				+					NULL,
			
 
				+					&event);
			
 
				+
			
 
				+		if (err != CL_SUCCESS)
			
 
				+			STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+	}
			
 
				+
			
 
				+	err = clEnqueueReadBuffer(queue,
			
 
				+				  fail,
			
 
				+				  CL_TRUE,
			
 
				+				  0, 
			
 
				+				  sizeof(int),
			
 
				+				  &multiformat_config.copy_failed,
			
 
				+				  0,
			
 
				+				  NULL,
			
 
				+				  NULL);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	clFinish(queue);
			
 
				+	starpu_opencl_collect_stats(event);
			
 
				+	clReleaseEvent(event);
			
 
				+
			
 
				+	starpu_opencl_release_kernel(kernel);
			
 
				+        starpu_opencl_unload_opencl(&multiformat_program);
			
 
				+}
			
--- a/tests/datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl
+++ b/tests/datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl
@@ -0,0 +1,36 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include "multiformat_types.h"
			
 
				+__kernel void multiformat_opencl(__global struct struct_of_arrays *soa,
			
 
				+				 unsigned int nx,
			
 
				+				 __global int *err,
			
 
				+				 int factor)
			
 
				+{
			
 
				+        const int i = get_global_id(0);
			
 
				+	if (i >= nx)
			
 
				+		return;
			
 
				+
			
 
				+	if (soa->x[i] != i * factor || soa->y[i] != i * factor)
			
 
				+	{
			
 
				+		*err = i;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		soa->x[i] = -soa->x[i];
			
 
				+		soa->y[i] = -soa->y[i];
			
 
				+	}
			
 
				+}
			
--- a/tests/datawizard/interfaces/multiformat/multiformat_types.h
+++ b/tests/datawizard/interfaces/multiformat/multiformat_types.h
@@ -0,0 +1,31 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#ifndef TEST_MULTIFORMAT_TYPES_H
			
 
				+#define TEST_MULTIFORMAT_TYPES_H
			
 
				+
			
 
				+#define N_ELEMENTS 2
			
 
				+
			
 
				+struct struct_of_arrays{
			
 
				+	int x[N_ELEMENTS];
			
 
				+	int y[N_ELEMENTS];
			
 
				+};
			
 
				+
			
 
				+struct point {
			
 
				+	int x, y;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif