Forráskód Böngészése

Merging branches/multi-format-2

* Conversions are no longer triggered in the copy functions.
* The need for a conversion is automatically detected when scheduling the task.
* Support for both CUDA and OpenCL.
* Add a standard interface test.
* Add a test for starpu_data_release() when used with the multiformat interface.
* Add a test for handle conversion.
Cyril Roelandt 13 éve
szülő
commit
260aae8a33

+ 28 - 4
examples/basic_examples/multiformat.c

@@ -74,7 +74,7 @@ static struct starpu_perfmodel conversion_model = {
 };
 
 static struct starpu_codelet  cl = {
-	.where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
+	.where = STARPU_CUDA | STARPU_OPENCL,
 	.cpu_func = multiformat_scal_cpu_func,
 #ifdef STARPU_USE_CUDA
 	.cuda_func = multiformat_scal_cuda_func,
@@ -112,24 +112,40 @@ register_data(void)
 static void
 create_and_submit_tasks(void)
 {
-	struct starpu_task *task = starpu_task_create();
+	int err;
 
+#ifdef STARPU_USE_CUDA
+	struct starpu_task *task = starpu_task_create();
+	cl.where = STARPU_CUDA;
 	task->cl = &cl;
 	task->synchronous = 1;
 	task->buffers[0].handle = array_of_structs_handle;
 	task->buffers[0].mode = STARPU_RW;
 	task->cl_arg = NULL;
 	task->cl_arg_size = 0;
-	starpu_task_submit(task);
+	err = starpu_task_submit(task);
+	if (err != 0)
+	{
+		fprintf(stderr, "Err : %s\n", strerror(-err));
+		return;
+	}
+#endif
 
 	struct starpu_task *task2 = starpu_task_create();
+	cl.where = STARPU_CPU;
 	task2->cl = &cl;
 	task2->synchronous = 1;
 	task2->buffers[0].handle = array_of_structs_handle;
 	task2->buffers[0].mode = STARPU_RW;
 	task2->cl_arg = NULL;
+	cl.where = STARPU_CPU;
 	task2->cl_arg_size = 0;
-	starpu_task_submit(task2);
+	err = starpu_task_submit(task2);
+	if (err != 0)
+	{
+		fprintf(stderr, "Err : %s\n", strerror(-err));
+		return;
+	}
 }
 
 static void
@@ -156,7 +172,9 @@ check_it(void)
 	int i;
 	for (i = 0; i < N_ELEMENTS; i++) {
 		float expected_value = i + 1.0;
+#if STARPU_USE_CUDA
 		expected_value *= array_of_structs[i].y;
+#endif
 		expected_value *= array_of_structs[i].y;
 		if (array_of_structs[i].x != expected_value)
 			return EXIT_FAILURE;
@@ -172,6 +190,7 @@ struct starpu_opencl_program opencl_conversion_program;
 int
 main(void)
 {
+#ifdef STARPU_USE_CPU
 	starpu_init(NULL);
 
 #ifdef STARPU_USE_OPENCL
@@ -200,4 +219,9 @@ main(void)
 
 
 	return check_it();
+#else
+	/* Without the CPU, there is no point in using the multiformat
+	 * interface, so this test is pointless. */
+	return EXIT_SUCCESS;
+#endif
 }

+ 0 - 27
examples/basic_examples/multiformat_conversion_codelets.c

@@ -17,18 +17,6 @@
 #include "multiformat_types.h"
 
 #ifdef STARPU_USE_CUDA
-void cpu_to_cuda(void *buffers[], void *arg)
-{
-	struct point *src = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
-	struct struct_of_arrays *dst = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
-	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
-	int i;
-	for (i = 0; i < n; i++) {
-		dst->x[i] = src[i].x;
-		dst->y[i] = src[i].y;
-	}
-}
-
 void cuda_to_cpu(void *buffers[], void *arg)
 {
 	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
@@ -44,7 +32,6 @@ void cuda_to_cpu(void *buffers[], void *arg)
 extern void cpu_to_cuda_cuda_func(void *buffers[], void *args);
 struct starpu_codelet cpu_to_cuda_cl = {
 	.where = STARPU_CUDA,
-	.cpu_func = cpu_to_cuda,
 	.cuda_func = cpu_to_cuda_cuda_func,
 	.nbuffers = 1
 };
@@ -57,19 +44,6 @@ struct starpu_codelet cuda_to_cpu_cl = {
 #endif
 
 #ifdef STARPU_USE_OPENCL
-void cpu_to_opencl(void *buffers[], void *arg)
-{
-	fprintf(stderr, "User Entering %s\n", __func__);
-	struct point *src = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
-	struct struct_of_arrays *dst = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
-	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
-	int i;
-	for (i = 0; i < n; i++) {
-		dst->x[i] = src[i].x;
-		dst->y[i] = src[i].y;
-	}
-}
-
 void opencl_to_cpu(void *buffers[], void *arg)
 {
 	fprintf(stderr, "User Entering %s\n", __func__);
@@ -86,7 +60,6 @@ void opencl_to_cpu(void *buffers[], void *arg)
 extern void cpu_to_opencl_opencl_func(void *buffers[], void *args);
 struct starpu_codelet cpu_to_opencl_cl = {
 	.where = STARPU_OPENCL,
-	.cpu_func = cpu_to_opencl,
 	.opencl_func = cpu_to_opencl_opencl_func,
 	.nbuffers = 1
 };

+ 159 - 1
src/core/sched_policy.c

@@ -242,6 +242,27 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
 	if (is_basic_worker)
 	{
+		unsigned node = starpu_worker_get_memory_node(workerid);
+		if (_starpu_task_uses_multiformat_handles(task))
+		{
+			int i;
+			for (i = 0; i < task->cl->nbuffers; i++)
+			{
+				struct starpu_task *conversion_task;
+				starpu_data_handle_t handle;
+
+				handle = task->buffers[i].handle;
+				if (!_starpu_handle_needs_conversion_task(handle, node))
+					continue;
+
+				conversion_task = _starpu_create_conversion_task(handle, node);
+				_starpu_push_local_task(worker, conversion_task, 0);
+				//_STARPU_DEBUG("Pushing a conversion task\n");
+			}
+
+			for (i = 0; i < task->cl->nbuffers; i++)
+				task->buffers[i].handle->mf_node = node;
+		}
 		return _starpu_push_local_task(worker, task, 0);
 	}
 	else
@@ -309,8 +330,74 @@ int _starpu_push_task(struct _starpu_job *j, unsigned job_is_already_locked)
         return ret;
 }
 
+/*
+ * Given a handle that needs to be converted in order to be used on the given
+ * node, returns a task that takes care of the conversion.
+ */
+struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
+						   unsigned int node)
+{
+	struct starpu_task *conversion_task;
+	struct starpu_multiformat_interface *interface;
+	enum _starpu_node_kind node_kind;
+
+	conversion_task = starpu_task_create();
+	conversion_task->synchronous = 0;
+	conversion_task->buffers[0].handle = handle;
+	conversion_task->buffers[0].mode = STARPU_RW;
+
+	/* The node does not really matter here */
+	interface = (struct starpu_multiformat_interface *)
+		starpu_data_get_interface_on_node(handle, 0);
+	node_kind = _starpu_get_node_kind(node);
+	
+	handle->refcnt++;
+	handle->busy_count++;
+
+	switch(node_kind)
+	{
+	case STARPU_CPU_RAM:
+		switch (_starpu_get_node_kind(handle->mf_node))
+		{
+		case STARPU_CPU_RAM:
+			STARPU_ASSERT(0);
+#ifdef STARPU_USE_CUDA
+		case STARPU_CUDA_RAM:
+			conversion_task->cl = interface->ops->cuda_to_cpu_cl;
+			break;
+#endif
+#ifdef STARPU_USE_OPENCL
+		case STARPU_OPENCL_RAM:
+			conversion_task->cl = interface->ops->opencl_to_cpu_cl;
+			break;
+#endif
+		default:
+			fprintf(stderr, "Oops : %d\n", handle->mf_node);
+			STARPU_ASSERT(0);
+		}
+		break;
+#if STARPU_USE_CUDA
+	case STARPU_CUDA_RAM:
+		conversion_task->cl = interface->ops->cpu_to_cuda_cl;
+		break;
+#endif
+#if STARPU_USE_OPENCL
+	case STARPU_OPENCL_RAM:
+		conversion_task->cl = interface->ops->cpu_to_opencl_cl;
+		break;
+#endif
+	case STARPU_SPU_LS: /* Not supported */
+	default:
+		STARPU_ASSERT(0);
+	}
+
+	return conversion_task;
+}
+
+
 struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
 {
+	int i;
 	struct starpu_task *task;
 
 	/* We can't tell in advance which task will be picked up, so we measure
@@ -322,10 +409,81 @@ struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
 
 	/* perhaps there is some local task to be executed first */
 	task = _starpu_pop_local_task(worker);
-
+	if (task)
+		goto profiling;
+
+	/*
+	 * The first STARPU_NMAXBUFS elements of queued_tasks[i] are conversion
+	 * tasks for multiformat handles. The last element is the "real" task.
+	 */
+	int worker_id = starpu_worker_get_id();
+	static struct starpu_task *queued_tasks[STARPU_NMAXWORKERS][STARPU_NMAXBUFS+1] = { NULL };
+
+	/* Maybe there is a queued task for this worker */
+pick_from_queued_tasks:
+	for (i = 0; i < STARPU_NMAXBUFS+1; i++)
+	{
+		if (queued_tasks[worker_id][i])
+		{
+			task = queued_tasks[worker_id][i];
+			queued_tasks[worker_id][i] = NULL;
+			goto profiling;
+		}
+	}
+	
 	if (!task && policy.pop_task)
 		task = policy.pop_task();
 
+	if (!task)
+		return NULL;
+
+	/* Make sure we do not bother with all the multiformat-specific code if 
+	 * it is not necessary. */
+	if (!_starpu_task_uses_multiformat_handles(task))
+		goto profiling;
+
+	/*
+	 * This worker may not be able to execute this task. In this case, we
+	 * should return the task anyway. It will be pushed back almost immediatly.
+	 * This way, we avoid computing and executing the conversions tasks.
+	 * Here, we do not care about what implementation is used.
+	 */
+	if (!starpu_worker_can_execute_task(worker_id, task, 0))
+		return task;
+
+	unsigned node = starpu_worker_get_memory_node(worker_id);
+
+	/*
+	 * We do have a task that uses multiformat handles. Let's create the 
+	 * required conversion tasks.
+	 */
+	for (i = 0; i < task->cl->nbuffers; i++)
+	{
+		struct starpu_task *conversion_task;
+		starpu_data_handle_t handle;
+
+		handle = task->buffers[i].handle;
+		if (!_starpu_handle_needs_conversion_task(handle, node))
+			continue;
+
+		conversion_task = _starpu_create_conversion_task(handle, node);
+		queued_tasks[worker_id][i] = conversion_task;
+	}
+
+	/*
+	 * Next tasks will need to know where these handles have gone.
+	 */
+	for (i = 0; i < task->cl->nbuffers; i++)
+		task->buffers[i].handle->mf_node = node;
+
+	queued_tasks[worker_id][STARPU_NMAXBUFS] = task;
+
+	/* We know there is at least one task in queued_tasks[worker_id]. */
+	goto pick_from_queued_tasks;
+	
+
+	/* We finally got our task */
+profiling:
 	/* Note that we may get a NULL task in case the scheduler was unlocked
 	 * for some reason. */
 	if (profiling && task)

+ 3 - 0
src/core/sched_policy.h

@@ -37,4 +37,7 @@ void _starpu_sched_post_exec_hook(struct starpu_task *task);
 
 void _starpu_wait_on_sched_event(void);
 
+struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
+						   unsigned int node);
+
 #endif // __SCHED_POLICY_H__

+ 33 - 0
src/core/task.c

@@ -461,3 +461,36 @@ double _starpu_task_get_conversion_time(struct starpu_task *task)
 
 	return conversion_time;
 }
+
+/*
+ * Returns 0 if tasks does not use any multiformat handle, 1 otherwise.
+ */
+int
+_starpu_task_uses_multiformat_handles(struct starpu_task *task)
+{
+	int i;
+	for (i = 0; i < task->cl->nbuffers; i++)
+	{
+		unsigned int id;
+		id = starpu_get_handle_interface_id(task->buffers[i].handle);
+		if (id == STARPU_MULTIFORMAT_INTERFACE_ID)
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * Checks whether the given handle needs to be converted in order to be used on
+ * the node given as the second argument.
+ */
+int
+_starpu_handle_needs_conversion_task(starpu_data_handle_t handle,
+				     unsigned int node)
+{
+	enum _starpu_node_kind node_kind;
+
+	node_kind = _starpu_get_node_kind(node);
+
+	return !!(node_kind != _starpu_get_node_kind(handle->mf_node));
+}

+ 6 - 0
src/core/task.h

@@ -46,4 +46,10 @@ struct _starpu_job *_starpu_get_job_associated_to_task(struct starpu_task *task)
 
 struct starpu_task *_starpu_create_task_alias(struct starpu_task *task);
 
+int _task_needs_conversion(struct starpu_task *task, int worker_id);
+int _starpu_handle_needs_conversion_task(starpu_data_handle_t handle,
+					 unsigned int node);
+
+int _starpu_task_uses_multiformat_handles(struct starpu_task *task);
+
 #endif // __CORE_TASK_H__

+ 2 - 0
src/datawizard/coherency.h

@@ -217,6 +217,8 @@ struct _starpu_data_state
 	unsigned stats_shared_to_owner[STARPU_MAXNODES];
 	unsigned stats_invalidated[STARPU_MAXNODES];
 #endif
+
+	unsigned int mf_node; //XXX
 };
 
 void _starpu_display_msi_stats(void);

+ 38 - 1
src/datawizard/interfaces/data_interface.c

@@ -277,7 +277,7 @@ void starpu_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
 
 	STARPU_ASSERT(handleptr);
 	*handleptr = handle;
-
+	handle->mf_node = home_node;
 
 	/* fill the interface fields with the appropriate method */
 	ops->register_data_handle(handle, home_node, data_interface);
@@ -412,6 +412,7 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 {
 	STARPU_ASSERT(handle);
 
+
 	if (coherent)
 	{
 		/* If sequential consistency is enabled, wait until data is available */
@@ -446,6 +447,42 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 			}
 			_starpu_release_data_on_node(handle, 0, &handle->per_node[home_node]);
 		}
+
+		/* If this handle uses the multiformat interface, we may have to convert
+		 * this piece of data back into the CPU format.
+		 * XXX : This is quite hacky, could we submit a task instead ?
+		 */
+		unsigned int id = starpu_get_handle_interface_id(handle);
+		if (id == STARPU_MULTIFORMAT_INTERFACE_ID &&
+			_starpu_get_node_kind(handle->mf_node) != STARPU_CPU_RAM)
+		{
+			_STARPU_DEBUG("Conversion needed\n");
+			void *buffers[1];
+			struct starpu_multiformat_interface *interface;
+			interface = starpu_data_get_interface_on_node(handle, 0);
+			struct starpu_codelet *cl;
+			enum _starpu_node_kind node_kind = _starpu_get_node_kind(handle->mf_node);
+			
+			switch (node_kind)
+			{
+#ifdef STARPU_USE_CUDA
+				case STARPU_CUDA_RAM:
+					cl = interface->ops->cuda_to_cpu_cl;
+					break;
+#endif
+#ifdef STARPU_USE_OPENCL
+				case STARPU_OPENCL_RAM:
+					cl = interface->ops->opencl_to_cpu_cl;
+					break;
+#endif
+				case STARPU_CPU_RAM:      /* Impossible ! */
+				case STARPU_SPU_LS:       /* Not supported */
+				default:
+					STARPU_ASSERT(0);
+			}
+			buffers[0] = interface;
+			cl->cpu_func(buffers, NULL);
+		}
 	}
 	else
 	{

+ 37 - 58
src/datawizard/interfaces/multiformat_interface.c

@@ -246,8 +246,16 @@ static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
 			break;
 #ifdef STARPU_USE_CUDA
 		case STARPU_CUDA_RAM:
-			cudaFree(multiformat_interface->cuda_ptr);
-			multiformat_interface->cuda_ptr = NULL;
+			if (multiformat_interface->cpu_ptr)
+			{
+				cudaFree(multiformat_interface->cpu_ptr);
+				multiformat_interface->cpu_ptr = NULL;
+			}
+			if (multiformat_interface->cuda_ptr)
+			{
+				cudaFree(multiformat_interface->cuda_ptr);
+				multiformat_interface->cuda_ptr = NULL;
+			}
 			break;
 #endif
 #ifdef STARPU_USE_OPENCL
@@ -283,6 +291,15 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 				multiformat_interface->cpu_ptr = (void *) addr;
 				multiformat_interface->dev_handle = addr;
 			}
+
+#ifdef STARPU_USE_CUDA
+			multiformat_interface->cuda_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
+			STARPU_ASSERT(multiformat_interface->cuda_ptr != NULL);
+#endif
+#ifdef STARPU_USE_OPENCL
+			multiformat_interface->opencl_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
+			STARPU_ASSERT(multiformat_interface->opencl_ptr != NULL);
+#endif
 			break;
 #ifdef STARPU_USE_CUDA
 		case STARPU_CUDA_RAM:
@@ -298,6 +315,9 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 					multiformat_interface->cuda_ptr = (void *)addr;
 					multiformat_interface->dev_handle = addr;
 				}
+
+				allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
+				status = cudaMalloc((void **)&multiformat_interface->cpu_ptr, allocated_memory);
 				break;
 			}
 #endif
@@ -319,6 +339,11 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 					multiformat_interface->dev_handle = addr;
 
 				}
+
+				_starpu_opencl_allocate_memory(&multiformat_interface->cpu_ptr,
+							multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize,
+							CL_MEM_READ_WRITE);
+				
 				break;
 			}
 #endif
@@ -384,15 +409,7 @@ static int copy_cuda_common(void *src_interface, unsigned src_node,
 				if (src_multiformat->cuda_ptr == NULL)
 					return -ENOMEM;
 			}
-			/* Converting data , from host to host */
-			double tmp = starpu_timing_now();
-			void *buffers[1];
-			buffers[0] = src_interface;
-			struct starpu_codelet *cl = src_multiformat->ops->cpu_to_cuda_cl;
-			cl->cpu_func(buffers, NULL);
-			dst_multiformat->conversion_time = starpu_timing_now() - tmp;
-
-			status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
+			status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind);
 			if (STARPU_UNLIKELY(status))
 			{
 				STARPU_CUDA_REPORT_ERROR(status);
@@ -406,11 +423,6 @@ static int copy_cuda_common(void *src_interface, unsigned src_node,
 			if (STARPU_UNLIKELY(status))
 				STARPU_CUDA_REPORT_ERROR(status);
 
-			void *buffers[1];
-			struct starpu_codelet *cl = src_multiformat->ops->cuda_to_cpu_cl;
-			buffers[0] = dst_interface;
-			cl->cpu_func(buffers, NULL);
-
 			break;
 		}
 		case cudaMemcpyDeviceToDevice:
@@ -461,16 +473,7 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *
 					return -ENOMEM;
 			}
 
-			/* Converting data , from host to host */
-			double tmp = starpu_timing_now();
-			void *buffers[1]; // XXX
-			buffers[0] = src_interface;
-			struct starpu_codelet *cl = src_multiformat->ops->cpu_to_cuda_cl;
-			cl->cpu_func(buffers, NULL);
-			dst_multiformat->conversion_time = starpu_timing_now() - tmp;
-
-			/* Actual copy from host to device */
-			status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
+			status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream);
 			if (STARPU_UNLIKELY(status))
 			{
 				STARPU_CUDA_REPORT_ERROR(status);
@@ -484,12 +487,6 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *
 			if (STARPU_UNLIKELY(status))
 				STARPU_CUDA_REPORT_ERROR(status);
 
-			/* Converting data */
-			void *buffers[1];
-			struct starpu_codelet *cl = src_multiformat->ops->cuda_to_cpu_cl;
-			buffers[0] = dst_interface;
-			cl->cpu_func(buffers, NULL);
-
 			break;
 		}
 		case cudaMemcpyDeviceToDevice:
@@ -623,26 +620,10 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
 
 	size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
 
-	if (src_multiformat->opencl_ptr == NULL)
-	{
-		src_multiformat->opencl_ptr = malloc(src_multiformat->nx * src_multiformat->ops->opencl_elemsize);
-		if (src_multiformat->opencl_ptr == NULL)
-		{
-			return -ENOMEM;
-		}
-	}
-
-	double tmp = starpu_timing_now();
-	void *buffers[1];
-	struct starpu_codelet *cl = src_multiformat->ops->cpu_to_opencl_cl;
-	buffers[0] = src_interface;
-	cl->cpu_func(buffers, NULL);
-	dst_multiformat->conversion_time = starpu_timing_now() - tmp;
 
-
-	err = _starpu_opencl_copy_ram_to_opencl_async_sync(src_multiformat->opencl_ptr,
+	err = _starpu_opencl_copy_ram_to_opencl_async_sync(src_multiformat->cpu_ptr,
 							   src_node,
-							   (cl_mem) dst_multiformat->dev_handle,
+							   (cl_mem) dst_multiformat->cpu_ptr,
 							   dst_node,
 							   size,
 							   dst_multiformat->offset,
@@ -672,9 +653,13 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
 	STARPU_ASSERT(src_multiformat->ops != NULL);
 	STARPU_ASSERT(dst_multiformat->ops != NULL);
 
-	size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize,
+	size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
 
-	err = _starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_multiformat->dev_handle,
+	if (dst_multiformat->opencl_ptr == NULL) {
+		/* XXX : it is weird that we might have to allocate memory here... */
+		dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize);
+	}
+	err = _starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_multiformat->opencl_ptr,
 							   src_node,
 							   dst_multiformat->opencl_ptr,
 							   dst_node,
@@ -687,12 +672,6 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
 
 	_STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
 
-	/* XXX So much for asynchronicity */
-	clWaitForEvents(1, _event);
-	void *buffers[1];
-	struct starpu_codelet *cl = src_multiformat->ops->opencl_to_cpu_cl;
-	buffers[0] = dst_interface;
-	cl->cpu_func(buffers, NULL);
 
 	return ret;
 }

+ 13 - 0
src/datawizard/user_interactions.c

@@ -22,6 +22,7 @@
 #include <datawizard/copy_driver.h>
 #include <datawizard/write_back.h>
 #include <core/dependencies/data_concurrency.h>
+#include <core/sched_policy.h>
 
 /* Explicitly ask StarPU to allocate room for a piece of data on the specified
  * memory node. */
@@ -206,6 +207,18 @@ int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_access_mode mod
 		return -EDEADLK;
         }
 
+	unsigned int id = starpu_get_handle_interface_id(handle);
+	if (id == STARPU_MULTIFORMAT_INTERFACE_ID &&
+	    _starpu_handle_needs_conversion_task(handle, 0))
+	{
+		struct starpu_task *task = _starpu_create_conversion_task(handle, 0);
+		handle->refcnt--;
+		handle->busy_count--;
+		handle->mf_node = 0;
+		task->synchronous = 1;
+		starpu_task_submit(task);
+	}
+
 	struct user_interaction_wrapper wrapper =
 	{
 		.handle = handle,

+ 28 - 1
src/sched_policies/heft.c

@@ -212,6 +212,33 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 		starpu_prefetch_task_input_on_node(task, memory_node);
 	}
 
+	if (!_starpu_task_uses_multiformat_handles(task))
+		goto push_task;
+
+	/*
+	 * Our task uses multiformat handles, which may need to be converted.
+	 */
+	int i;
+	for (i = 0; i < task->cl->nbuffers; i++)
+	{
+		struct starpu_task *conversion_task;
+		starpu_data_handle_t handle;
+
+		handle = task->buffers[i].handle;
+		unsigned int node = starpu_worker_get_memory_node(best_workerid);
+		if (!_starpu_handle_needs_conversion_task(handle, node))
+			continue;
+
+		conversion_task = _starpu_create_conversion_task(handle, node);
+		starpu_push_local_task(best_workerid, conversion_task, prio);
+	}
+
+	unsigned node = starpu_worker_get_memory_node(best_workerid);
+	for (i = 0; i < task->cl->nbuffers; i++)
+		task->buffers[i].handle->mf_node = node;
+
+push_task:
+	//_STARPU_DEBUG("Heft : pushing local task\n");
 	return starpu_push_local_task(best_workerid, task, prio);
 }
 
@@ -275,7 +302,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
 				double conversion_time = starpu_task_expected_conversion_time(task, perf_arch, nimpl);
 				if (conversion_time > 0.0)
-					local_data_penalty[worker][nimpl] += conversion_time;
+					local_task_length[worker][nimpl] += conversion_time;
 				//_STARPU_DEBUG("Scheduler heft: task length (%lf) local power (%lf) worker (%u) kernel (%u) \n", local_task_length[worker],local_power[worker],worker,nimpl);
 
 			}

+ 28 - 0
tests/Makefile.am

@@ -102,6 +102,8 @@ noinst_PROGRAMS =				\
 	core/restart				\
 	core/execute_on_a_specific_worker	\
 	core/insert_task			\
+	core/multiformat_data_release           \
+	core/multiformat_handle_conversion      \
 	core/multithreaded			\
 	core/multithreaded_init			\
 	core/starpu_task_wait_for_all		\
@@ -152,6 +154,7 @@ noinst_PROGRAMS =				\
 	datawizard/increment_redux_v2		\
 	datawizard/handle_to_pointer		\
 	datawizard/lazy_allocation		\
+	datawizard/interfaces/multiformat/multiformat_interface \
 	datawizard/interfaces/vector/test_vector_interface   \
 	errorcheck/starpu_init_noworker		\
 	errorcheck/invalid_blocking_calls	\
@@ -253,6 +256,11 @@ BUILT_SOURCES += 						\
 	microbenchs/null_kernel_gordon.spuelf
 endif
 
+core_multiformat_handle_conversion_SOURCES = \
+	core/multiformat_handle_conversion.c
+
+core_multiformat_data_release_SOURCES = \
+	core/multiformat_data_release.c
 
 ##############
 # Interfaces #
@@ -272,3 +280,23 @@ datawizard_interfaces_vector_test_vector_interface_SOURCES +=               \
 nobase_STARPU_OPENCL_DATA_DATA += \
 	datawizard/interfaces/vector/test_vector_opencl_kernel.cl
 endif
+
+datawizard_interfaces_multiformat_multiformat_interface_SOURCES =           \
+	datawizard/interfaces/test_interfaces.c                             \
+	datawizard/interfaces/multiformat/multiformat_interface.c           \
+	datawizard/interfaces/multiformat/multiformat_conversion_codelets.c
+
+if STARPU_USE_CUDA
+datawizard_interfaces_multiformat_multiformat_interface_SOURCES+=                  \
+	datawizard/interfaces/multiformat/multiformat_cuda.cu                      \
+	datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu
+endif
+
+if STARPU_USE_OPENCL
+datawizard_interfaces_multiformat_multiformat_interface_SOURCES+=                  \
+	datawizard/interfaces/multiformat/multiformat_opencl.c                     \
+	datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c
+nobase_STARPU_OPENCL_DATA_DATA +=                                                          \
+	datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl                     \
+	datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl_kernel.cl
+endif

+ 270 - 0
tests/core/multiformat_data_release.c

@@ -0,0 +1,270 @@
+#include <starpu.h>
+
+#define NX 16
+
+static int vector[NX];
+static starpu_data_handle_t handle;
+
+#define ENTER() do { fprintf(stderr, "Entering %s\n", __func__); } while (0)
+
+/* Counting the calls to the codelets */
+struct stats {
+	unsigned int cpu;
+#ifdef STARPU_USE_CUDA
+	unsigned int cuda;
+	unsigned int cpu_to_cuda;
+	unsigned int cuda_to_cpu;
+#endif
+#ifdef STARPU_USE_OPENCL
+	unsigned int opencl;
+	unsigned int cpu_to_opencl;
+	unsigned int opencl_to_cpu;
+#endif
+};
+
+static struct stats global_stats;
+
+/* "Fake" conversion codelets */
+#ifdef STARPU_USE_CUDA
+static void cpu_to_cuda_func(void *buffers[], void *args)
+{
+	ENTER();
+	global_stats.cpu_to_cuda++;
+}
+
+static void cuda_to_cpu_func(void *buffers[], void *args)
+{
+	ENTER();
+	global_stats.cuda_to_cpu++;
+}
+
+static struct starpu_codelet cpu_to_cuda_cl = {
+	.where = STARPU_CUDA,
+	.cuda_func = cpu_to_cuda_func,
+	.nbuffers = 1
+};
+
+static struct starpu_codelet cuda_to_cpu_cl = {
+	.where = STARPU_CPU,
+	.cpu_func = cuda_to_cpu_func,
+	.nbuffers = 1
+};
+#endif /* !STARPU_USE_CUDA */
+
+#ifdef STARPU_USE_OPENCL
+static void cpu_to_opencl_func(void *buffers[], void *args)
+{
+	ENTER();
+	global_stats.cpu_to_opencl++;
+}
+
+static void opencl_to_cpu_func(void *buffers[], void *args)
+{
+	ENTER();
+	global_stats.opencl_to_cpu++;
+}
+
+static struct starpu_codelet cpu_to_opencl_cl = {
+	.where = STARPU_OPENCL,
+	.opencl_func = cpu_to_opencl_func,
+	.nbuffers = 1
+};
+
+static struct starpu_codelet opencl_to_cpu_cl = {
+	.where = STARPU_CPU,
+	.cpu_func = opencl_to_cpu_func,
+	.nbuffers = 1
+};
+#endif /* !STARPU_USE_OPENCL */
+
+static struct starpu_multiformat_data_interface_ops ops = {
+#ifdef STARPU_USE_CUDA
+	.cuda_elemsize = sizeof(int),
+	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
+	.cuda_to_cpu_cl = &cuda_to_cpu_cl,
+#endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_elemsize = sizeof(int),
+	.cpu_to_opencl_cl = &cpu_to_opencl_cl,
+	.opencl_to_cpu_cl = &opencl_to_cpu_cl,
+#endif
+	.cpu_elemsize = sizeof(int)
+};
+
+
+static void
+register_handle(void)
+{
+	int i;
+	for (i = 0; i < NX; i++)
+		vector[i] = i;
+	starpu_multiformat_data_register(&handle, 0, vector, NX, &ops);
+}
+
+static void
+unregister_handle(void)
+{
+	starpu_data_unregister(handle);
+}
+
+#ifdef STARPU_USE_CUDA
+static void cuda_func(void *buffers[], void *args)
+{
+	ENTER();
+	global_stats.cuda++;
+}
+#endif /* !STARPU_USE_CUDA */
+
+#ifdef STARPU_USE_OPENCL
+static void opencl_func(void *buffers[], void *args)
+{
+	ENTER();
+	global_stats.opencl++;
+}
+#endif /* !STARPU_USE_OPENCL */
+
+static void
+create_and_submit(int where)
+{
+	static struct starpu_codelet cl = {
+#ifdef STARPU_USE_CUDA
+		.cuda_func   = cuda_func,
+#endif
+#if STARPU_USE_OPENCL
+		.opencl_func = opencl_func,
+#endif
+		.nbuffers    = 1
+	};
+	cl.where = where;
+
+	struct starpu_task *task = starpu_task_create();
+	task->cl = &cl;
+	task->buffers[0].handle = handle;
+	task->buffers[0].mode = STARPU_RW;
+
+	/* We need to be sure the data has been copied to the GPU at the end 
+	 * of this function */
+	task->synchronous = 1;
+	starpu_task_submit(task);
+}
+
+static void
+print_stats(struct stats *s)
+{
+	fprintf(stderr, "cpu         : %d\n", s->cpu);
+#ifdef STARPU_USE_CUDA
+	fprintf(stderr, "cuda        : %d\n" 
+			"cpu->cuda   : %d\n"
+			"cuda->cpu   : %d\n",
+			s->cuda,
+			s->cpu_to_cuda,
+			s->cuda_to_cpu);
+#endif
+#ifdef STARPU_USE_OPENCL
+	fprintf(stderr, "opencl      : %d\n" 
+			"cpu->opencl : %d\n"
+			"opencl->cpu : %d\n",
+			s->opencl,
+			s->cpu_to_opencl,
+			s->opencl_to_cpu);
+#endif
+}
+
+static int
+compare(struct stats *s1, struct stats *s2)
+{
+	if (
+#ifdef STARPU_USE_CPU
+	    s1->cpu == s2->cpu &&
+#endif
+#ifdef STARPU_USE_CUDA
+	    s1->cuda == s2->cuda &&
+	    s1->cpu_to_cuda == s2->cpu_to_cuda &&
+	    s1->cuda_to_cpu == s2->cuda_to_cpu &&
+#endif
+#ifdef STARPU_USE_OPENCL
+	    s1->opencl == s2->opencl &&
+	    s1->cpu_to_opencl == s2->cpu_to_opencl &&
+	    s1->opencl_to_cpu == s2->opencl_to_cpu &&
+#endif
+	    1 /* Just so the build does not fail if we disable EVERYTHING */
+	)
+		return 0;
+	else
+		return 1;
+
+}
+
+static int
+test(void)
+{
+	struct stats expected_stats;
+	memset(&expected_stats, 0, sizeof(expected_stats));
+
+#ifdef STARPU_USE_CUDA
+	create_and_submit(STARPU_CUDA);
+	starpu_data_acquire(handle, STARPU_RW);
+
+	expected_stats.cuda = 1;
+	expected_stats.cpu_to_cuda = 1;
+	expected_stats.cuda_to_cpu = 1;
+
+	starpu_data_release(handle);
+	if (compare(&global_stats, &expected_stats) != 0)
+	{
+		fprintf(stderr, "CUDA failed\n");
+		print_stats(&global_stats);
+		fprintf(stderr ,"\n");
+		print_stats(&expected_stats);
+		return 1;
+	}
+#endif /* !STARPU_USE_CUDA */
+
+#ifdef STARPU_USE_OPENCL
+	create_and_submit(STARPU_OPENCL);
+	starpu_data_acquire(handle, STARPU_RW);
+	expected_stats.opencl = 1;
+	expected_stats.cpu_to_opencl = 1;
+	expected_stats.opencl_to_cpu = 1;
+
+	starpu_data_release(handle);
+	if (compare(&global_stats, &expected_stats) != 0)
+	{
+		fprintf(stderr, "OPENCL failed\n");
+		print_stats(&global_stats);
+		fprintf(stderr ,"\n");
+		print_stats(&expected_stats);
+		return 1;
+	}
+
+#endif /* !STARPU_USE_OPENCL */
+
+	return 0;
+}
+
+int
+main(void)
+{
+#ifdef STARPU_USE_CPU
+	struct starpu_conf conf = {
+		.ncpus = -1,
+		.ncuda = 1,
+		.nopencl = 1
+	};
+	memset(&global_stats, 0, sizeof(global_stats));
+	starpu_init(&conf);
+
+	register_handle();
+
+	int err = test();
+
+	unregister_handle();
+	starpu_shutdown();
+
+	return err?EXIT_FAILURE:EXIT_SUCCESS;
+#else /* ! STARPU_USE_CPU */
+	/* Without the CPU, there is no point in using the multiformat
+	 * interface, so this test is pointless. */
+	return EXIT_SUCCESS;
+#endif
+}

+ 317 - 0
tests/core/multiformat_handle_conversion.c

@@ -0,0 +1,317 @@
+#include <starpu.h>
+
+#define NX 4
+
+#define DEBUG 0
+
+#if DEBUG
+#define SYNCHRONOUS 1 /* Easier to debug with synchronous tasks */
+#define ENTER() do { fprintf(stderr, "Entering %s\n", __func__); } while (0)
+#else
+#define SYNCHRONOUS 0 
+#define ENTER()
+#endif
+
+
+/* Counting the calls to the codelets */
+struct stats {
+	unsigned int cpu;
+#ifdef STARPU_USE_CUDA
+	unsigned int cuda;
+	unsigned int cpu_to_cuda;
+	unsigned int cuda_to_cpu;
+#endif
+#ifdef STARPU_USE_OPENCL
+	unsigned int opencl;
+	unsigned int cpu_to_opencl;
+	unsigned int opencl_to_cpu;
+#endif
+};
+
+struct stats global_stats;
+
+/* "Fake" conversion codelets */
+#ifdef STARPU_USE_CUDA
+static void cpu_to_cuda_func(void *buffers[], void *args)
+{
+	ENTER();
+	global_stats.cpu_to_cuda++;
+}
+
+static void cuda_to_cpu_func(void *buffers[], void *args)
+{
+	ENTER();
+	global_stats.cuda_to_cpu++;
+}
+
+struct starpu_codelet cpu_to_cuda_cl = {
+	.where = STARPU_CUDA,
+	.cuda_func = cpu_to_cuda_func,
+	.nbuffers = 1
+};
+
+struct starpu_codelet cuda_to_cpu_cl = {
+	.where = STARPU_CPU,
+	.cpu_func = cuda_to_cpu_func,
+	.nbuffers = 1
+};
+#endif /* !STARPU_USE_CUDA */
+
+#ifdef STARPU_USE_OPENCL
+static void cpu_to_opencl_func(void *buffers[], void *args)
+{
+	ENTER();
+	global_stats.cpu_to_opencl++;
+}
+
+static void opencl_to_cpu_func(void *buffers[], void *args)
+{
+	ENTER();
+	global_stats.opencl_to_cpu++;
+}
+
+struct starpu_codelet cpu_to_opencl_cl = {
+	.where = STARPU_OPENCL,
+	.opencl_func = cpu_to_opencl_func,
+	.nbuffers = 1
+};
+
+struct starpu_codelet opencl_to_cpu_cl = {
+	.where = STARPU_CPU,
+	.cpu_func = opencl_to_cpu_func,
+	.nbuffers = 1
+};
+#endif /* !STARPU_USE_OPENCL */
+
+static struct starpu_multiformat_data_interface_ops ops = {
+#ifdef STARPU_USE_CUDA
+	.cuda_elemsize = sizeof(int),
+	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
+	.cuda_to_cpu_cl = &cuda_to_cpu_cl,
+#endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_elemsize = sizeof(int),
+	.cpu_to_opencl_cl = &cpu_to_opencl_cl,
+	.opencl_to_cpu_cl = &opencl_to_cpu_cl,
+#endif
+	.cpu_elemsize = sizeof(int)
+};
+
+static void cpu_func(void *buffers[], void *args)
+{
+	ENTER();
+	global_stats.cpu++;
+}
+
+#ifdef STARPU_USE_CUDA
+static void cuda_func(void *buffers[], void *args)
+{
+	ENTER();
+	global_stats.cuda++;
+}
+#endif /* !STARPU_USE_CUDA */
+
+#ifdef STARPU_USE_OPENCL
+static void opencl_func(void *buffers[], void *args)
+{
+	ENTER();
+	global_stats.opencl++;
+}
+#endif /* !STARPU_USE_OPENCL */
+
+
+static void
+create_and_submit_tasks(int where, starpu_data_handle_t handles[])
+{
+	fprintf(stderr, "***** Starting Task 1\n");
+	static struct starpu_codelet cl = {
+#ifdef STARPU_USE_CUDA
+		.cuda_func   = cuda_func,
+#endif
+#if STARPU_USE_OPENCL
+		.opencl_func = opencl_func,
+#endif
+		.nbuffers    = 1
+	};
+	cl.where = where;
+
+	struct starpu_task *task = starpu_task_create();
+	task->synchronous = SYNCHRONOUS;
+	task->cl = &cl;
+	task->buffers[0].handle = handles[0];
+	task->buffers[0].mode = STARPU_RW;
+	starpu_task_submit(task);
+
+	fprintf(stderr, "***** Starting Task 2\n");
+	static struct starpu_codelet cl2 = {
+		.where = STARPU_CPU,
+		.cpu_func = cpu_func,
+		.nbuffers = 1
+	};
+
+	struct starpu_task *task2 = starpu_task_create();
+	task2->synchronous = SYNCHRONOUS;
+	task2->cl = &cl2;
+	task2->buffers[0].handle = handles[1];
+	task2->buffers[0].mode = STARPU_RW;
+	starpu_task_submit(task2);
+
+
+	fprintf(stderr, "***** Starting Task 3\n");
+	static struct starpu_codelet cl3 = {
+		.cpu_func    = cpu_func,
+#ifdef STARPU_USE_CUDA
+		.cuda_func   = cuda_func,
+#endif
+#ifdef STARPU_USE_OPENCL
+		.opencl_func = opencl_func,
+#endif
+		.nbuffers    = 2
+	};
+	cl3.where = where;
+
+	struct starpu_task *task3 = starpu_task_create();
+	task3->synchronous = SYNCHRONOUS;
+	task3->cl = &cl3;
+	task3->buffers[0].handle = handles[0];
+	task3->buffers[0].mode = STARPU_RW;
+	task3->buffers[1].handle = handles[1];
+	task3->buffers[1].mode = STARPU_RW;
+	starpu_task_submit(task3);
+
+	starpu_task_wait_for_all();
+	fprintf(stderr, "***** End of all tasks\n");
+	return;
+}
+
+#if DEBUG
+static void
+print_stats(struct stats *s)
+{
+	fprintf(stderr, "cpu         : %d\n", s->cpu);
+#ifdef STARPU_USE_CUDA
+	fprintf(stderr, "cuda        : %d\n" 
+			"cpu->cuda   : %d\n"
+			"cuda->cpu   : %d\n",
+			s->cuda,
+			s->cpu_to_cuda,
+			s->cuda_to_cpu);
+#endif
+#ifdef STARPU_USE_OPENCL
+	fprintf(stderr, "opencl      : %d\n" 
+			"cpu->opencl : %d\n"
+			"opencl->cpu : %d\n",
+			s->opencl,
+			s->cpu_to_opencl,
+			s->opencl_to_cpu);
+#endif
+}
+#endif /* !DEBUG */
+
+/* XXX Just a little bit of copy/pasta here... */
+#ifdef STARPU_USE_CUDA
+static int
+test_cuda(void)
+{
+	int i;
+	int vector1[NX];
+	int vector2[NX];
+	starpu_data_handle_t handles[2];
+
+	for (i = 0; i < NX; i++)
+	{
+		vector1[i] = i;
+		vector2[i] = i;
+	}
+
+	starpu_multiformat_data_register(handles, 0, vector1, NX, &ops);
+	starpu_multiformat_data_register(handles+1, 0, vector2, NX, &ops);
+
+	memset(&global_stats, 0, sizeof(global_stats));
+	create_and_submit_tasks(STARPU_CUDA, handles);
+
+	starpu_data_unregister(handles[0]);
+	starpu_data_unregister(handles[1]);
+
+#if DEBUG
+	print_stats(&global_stats);
+#endif
+
+	return !(global_stats.cpu == 1 &&
+		 global_stats.cpu_to_cuda == 2 &&
+		 global_stats.cuda_to_cpu == 2 &&
+		 global_stats.cuda == 2);
+}
+#endif /* !STARPU_USE_CUDA */
+
+#ifdef STARPU_USE_OPENCL
+static int
+test_opencl(void)
+{
+	int i;
+	int vector1[NX];
+	int vector2[NX];
+	starpu_data_handle_t handles[2];
+
+	for (i = 0; i < NX; i++)
+	{
+		vector1[i] = i;
+		vector2[i] = i;
+	}
+
+	starpu_multiformat_data_register(handles, 0, vector1, NX, &ops);
+	starpu_multiformat_data_register(handles+1, 0, vector2, NX, &ops);
+
+	memset(&global_stats, 0, sizeof(global_stats));
+	create_and_submit_tasks(STARPU_OPENCL, handles);
+
+	starpu_data_unregister(handles[0]);
+	starpu_data_unregister(handles[1]);
+
+#if DEBUG
+	print_stats(&global_stats);
+#endif
+
+	return !(global_stats.cpu == 1 &&
+		 global_stats.cpu_to_opencl == 2 &&
+		 global_stats.opencl_to_cpu == 2 &&
+		 global_stats.opencl == 2);
+	
+}
+#endif /* !STARPU_USE_OPENCL */
+
+int
+main(void)
+{
+#ifdef STARPU_USE_CPU
+	struct starpu_conf conf = {
+		.ncpus   = -1,
+		.ncuda   = 2,
+		.nopencl = 1
+	};
+
+	starpu_init(&conf);
+
+#ifdef STARPU_USE_OPENCL
+	if (test_opencl() != 0)
+	{
+		fprintf(stderr, "OPENCL FAILED\n");
+		exit(1);
+	}
+#endif
+#ifdef STARPU_USE_CUDA
+	if (test_cuda() != 0)
+	{
+		fprintf(stderr, "CUDA FAILED \n");
+		exit(1);
+	}
+#endif
+	
+	starpu_shutdown();
+#endif
+	/* Without the CPU, there is no point in using the multiformat
+	 * interface, so this test is pointless. */
+
+	return EXIT_SUCCESS;
+}
+

+ 73 - 0
tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c

@@ -0,0 +1,73 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <starpu.h>
+#include "multiformat_types.h"
+
+#ifdef STARPU_USE_CUDA
+void cuda_to_cpu(void *buffers[], void *arg)
+{
+	fprintf(stderr, "Entering %s\n", __func__);
+	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
+	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
+	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
+	int i;
+	for (i = 0; i < n; i++)
+	{
+		dst[i].x = src->x[i];
+		dst[i].y = src->y[i];
+	}
+}
+
+extern void cpu_to_cuda_cuda_func(void *buffers[], void *args); struct starpu_codelet cpu_to_cuda_cl = {
+	.where = STARPU_CUDA,
+	.cuda_func = cpu_to_cuda_cuda_func,
+	.nbuffers = 1
+};
+
+struct starpu_codelet cuda_to_cpu_cl = {
+	.where = STARPU_CPU,
+	.cpu_func = cuda_to_cpu,
+	.nbuffers = 1
+};
+#endif
+
+#ifdef STARPU_USE_OPENCL
+void opencl_to_cpu(void *buffers[], void *arg)
+{
+	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
+	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
+	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
+	int i;
+	for (i = 0; i < n; i++)
+	{
+		dst[i].x = src->x[i];
+		dst[i].y = src->y[i];
+	}
+}
+
+extern void cpu_to_opencl_opencl_func(void *buffers[], void *args);
+struct starpu_codelet cpu_to_opencl_cl = {
+	.where = STARPU_OPENCL,
+	.opencl_func = cpu_to_opencl_opencl_func,
+	.nbuffers = 1
+};
+
+struct starpu_codelet opencl_to_cpu_cl = {
+	.where = STARPU_CPU,
+	.cpu_func = opencl_to_cpu,
+	.nbuffers = 1
+};
+#endif

+ 50 - 0
tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu

@@ -0,0 +1,50 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <starpu.h>
+#include <starpu_cuda.h>
+#include "multiformat_types.h"
+
+static __global__ void cpu_to_cuda_cuda(struct point *src,
+	struct struct_of_arrays *dst, unsigned n)
+{
+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
+
+	if (i < n)
+	{
+		dst->x[i] = src[i].x;
+		dst->y[i] = src[i].y;
+	}
+
+}
+
+extern "C" void cpu_to_cuda_cuda_func(void *buffers[], void *_args)
+{
+	fprintf(stderr, "Entering %s\n", __func__);
+	struct point *src;
+	struct struct_of_arrays *dst;
+
+	src = (struct point *) STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
+	dst = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
+
+	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
+
+	unsigned threads_per_block = 64;
+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
+
+        cpu_to_cuda_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(src, dst, n);
+
+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
+}

+ 104 - 0
tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c

@@ -0,0 +1,104 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <starpu.h>
+#include <starpu_opencl.h>
+
+#define KERNEL_LOCATION "tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl_kernel.cl"
+static struct starpu_opencl_program opencl_conversion_program;
+
+void cpu_to_opencl_opencl_func(void *buffers[], void *args)
+{
+	(void) args;
+	int id, devid;
+        cl_int err;
+	cl_kernel kernel;
+	cl_command_queue queue;
+	cl_event event;
+
+	unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
+	cl_mem src = (cl_mem) STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
+	cl_mem dst = (cl_mem) STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
+
+	id = starpu_worker_get_id();
+	devid = starpu_worker_get_devid(id);
+
+	starpu_opencl_load_opencl_from_file(KERNEL_LOCATION,
+					    &opencl_conversion_program,
+					    NULL);
+
+	err = starpu_opencl_load_kernel(&kernel,
+					&queue,
+					&opencl_conversion_program,
+					"cpu_to_opencl_opencl",
+					devid);
+
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = clSetKernelArg(kernel, 0, sizeof(src), &src);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = clSetKernelArg(kernel, 1, sizeof(dst), &dst);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = clSetKernelArg(kernel, 2, sizeof(n), &n);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+
+	{
+		size_t global=n;
+		size_t local;
+                size_t s;
+                cl_device_id device;
+
+                starpu_opencl_get_device(devid, &device);
+
+                err = clGetKernelWorkGroupInfo (kernel,
+						device,
+						CL_KERNEL_WORK_GROUP_SIZE,
+						sizeof(local),
+						&local,
+						&s);
+                if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+
+                if (local > global)
+			local = global;
+
+		err = clEnqueueNDRangeKernel(queue,
+					kernel,
+					1,
+					NULL,
+					&global,
+					&local,
+					0,
+					NULL,
+					&event);
+
+		if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+	}
+
+	clFinish(queue);
+	starpu_opencl_collect_stats(event);
+	clReleaseEvent(event);
+
+	starpu_opencl_release_kernel(kernel);
+        starpu_opencl_unload_opencl(&opencl_conversion_program);
+}

+ 27 - 0
tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl_kernel.cl

@@ -0,0 +1,27 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include "multiformat_types.h"
+__kernel void cpu_to_opencl_opencl(__global struct point *src,
+				   __global struct struct_of_arrays *dst,
+				   unsigned int n)
+{
+	const unsigned int i = get_global_id(0);
+	if (i < n)
+	{
+		dst->x[i] = src[i].x;
+		dst->y[i] = src[i].y;
+	}
+}

+ 76 - 0
tests/datawizard/interfaces/multiformat/multiformat_cuda.cu

@@ -0,0 +1,76 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <starpu.h>
+#include <starpu_cuda.h>
+#include "multiformat_types.h"
+#include "../test_interfaces.h"
+
+extern struct test_config multiformat_config;
+
+static __global__ void multiformat_cuda(struct struct_of_arrays *soa, unsigned n,
+					int *err, int factor)
+{
+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
+
+	if (i >= n)
+		return;
+
+	if (soa->x[i] != i * factor || soa->y[i] != i * factor)
+	{
+		*err = 1;
+	}
+	else
+	{
+		soa->x[i] = -soa->x[i];
+		soa->y[i] = -soa->y[i];
+	}
+}
+
+extern "C" void test_multiformat_cuda_func(void *buffers[], void *args)
+{
+	fprintf(stderr, "Entering %s\n", __func__);
+	int factor;
+	int *ret;
+	cudaError_t error;
+	unsigned int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
+	struct struct_of_arrays *soa;
+
+	soa = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
+	unsigned threads_per_block = 64;
+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
+	factor = *(int *) args;
+
+	error = cudaMalloc(&ret, sizeof(int));
+	if (error != cudaSuccess)
+		STARPU_CUDA_REPORT_ERROR(error);
+
+	error = cudaMemcpy(ret,
+			   &multiformat_config.copy_failed,
+			   sizeof(int),
+			   cudaMemcpyHostToDevice);
+	if (error != cudaSuccess)
+		STARPU_CUDA_REPORT_ERROR(error);
+
+        multiformat_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(soa, n, ret, factor);
+
+	error = cudaMemcpy(&multiformat_config.copy_failed,
+			   ret,
+			   sizeof(int),
+			   cudaMemcpyDeviceToHost);
+	
+	cudaFree(ret);
+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
+}

+ 148 - 0
tests/datawizard/interfaces/multiformat/multiformat_interface.c

@@ -0,0 +1,148 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <starpu.h>
+#include "multiformat_types.h"
+#include "../test_interfaces.h"
+
+static void test_multiformat_cpu_func(void *buffers[], void *args);
+#ifdef STARPU_USE_CUDA
+extern void test_multiformat_cuda_func(void *buffers[], void *args);
+#endif
+#ifdef STARPU_USE_OPENCL
+extern void test_multiformat_opencl_func(void *buffers[], void *args);
+#endif
+
+static struct point array_of_structs[N_ELEMENTS];
+static starpu_data_handle_t multiformat_handle;
+
+struct test_config multiformat_config = {
+	.cpu_func      = test_multiformat_cpu_func,
+#ifdef STARPU_USE_CUDA
+	.cuda_func     = test_multiformat_cuda_func,
+#endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_func   = test_multiformat_opencl_func,
+#endif
+	.handle        = &multiformat_handle,
+	.copy_failed   = 0,
+	.name          = "multiformat_interface"
+};
+
+static void
+test_multiformat_cpu_func(void *buffers[], void *args)
+{
+	struct point *aos;
+	unsigned int n, i;
+	int factor;
+
+	aos = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
+	n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
+	factor = *(int *) args;
+
+	for (i = 0; i < n; i++)
+	{
+			fprintf(stderr, "(%d %d) [%d]", aos[i].x, aos[i].y, factor);
+		if (aos[i].x != i * factor || aos[i].y != i * factor)
+		{
+			multiformat_config.copy_failed = 1;
+		}
+		aos[i].x = -aos[i].x;
+		aos[i].y = -aos[i].y;
+	}
+	fprintf(stderr, "\n");
+}
+
+
+
+#ifdef STARPU_USE_CUDA
+extern struct starpu_codelet cpu_to_cuda_cl;
+extern struct starpu_codelet cuda_to_cpu_cl;
+#endif
+
+#ifdef STARPU_USE_OPENCL
+extern struct starpu_codelet cpu_to_opencl_cl;
+extern struct starpu_codelet opencl_to_cpu_cl;
+#endif
+
+struct starpu_multiformat_data_interface_ops format_ops = {
+#ifdef STARPU_USE_CUDA
+	.cuda_elemsize = 2* sizeof(float),
+	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
+	.cuda_to_cpu_cl = &cuda_to_cpu_cl,
+#endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_elemsize = 2 * sizeof(float),
+	.cpu_to_opencl_cl = &cpu_to_opencl_cl,
+	.opencl_to_cpu_cl = &opencl_to_cpu_cl,
+#endif
+	.cpu_elemsize = sizeof(struct point),
+};
+
+static void
+register_data(void)
+{
+	int i;
+
+	for (i = 0; i < N_ELEMENTS; i++)
+	{
+		array_of_structs[i].x = i;
+		array_of_structs[i].y = i;
+	}
+	starpu_multiformat_data_register(&multiformat_handle,
+					 0,
+					 &array_of_structs,
+					 N_ELEMENTS,
+					 &format_ops);
+}
+
+static void
+unregister_data(void)
+{
+	starpu_data_unregister(multiformat_handle);
+}
+
+int
+main(void)
+{
+#ifdef STARPU_USE_CPU
+	data_interface_test_summary *summary;
+	struct starpu_conf conf = {
+		.ncpus   = -1,
+		.ncuda   = 2,
+		.nopencl = 1
+	};
+
+	starpu_init(&conf);
+
+	register_data();
+
+	summary = run_tests(&multiformat_config);
+	if (!summary)
+		exit(EXIT_FAILURE);
+
+	data_interface_test_summary_print(stderr, summary);
+
+	unregister_data();
+
+	starpu_shutdown();
+
+	return data_interface_test_summary_success(summary);
+#else
+	/* Without the CPU, there is no point in using the multiformat
+	 * interface, so this test is pointless. */
+	return EXIT_SUCCESS;
+#endif
+}

+ 130 - 0
tests/datawizard/interfaces/multiformat/multiformat_opencl.c

@@ -0,0 +1,130 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <starpu.h>
+#include <starpu_opencl.h>
+#include "../test_interfaces.h"
+
+#define KERNEL_LOCATION "tests/datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl"
+
+extern struct test_config multiformat_config;
+static struct starpu_opencl_program multiformat_program;
+
+void test_multiformat_opencl_func(void *buffers[], void *args)
+{
+	int id, devid, factor;
+	unsigned int n;
+
+        cl_int             err;
+	cl_kernel          kernel;
+	cl_command_queue   queue;
+	cl_event           event;
+	cl_context         context;
+	cl_mem             val, fail;
+
+	starpu_opencl_load_opencl_from_file(KERNEL_LOCATION,
+					    &multiformat_program,
+					    NULL);
+
+	factor = *(int *)args;
+	n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
+	val = (cl_mem)STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
+
+	id = starpu_worker_get_id();
+	devid = starpu_worker_get_devid(id);
+	starpu_opencl_get_context(devid, &context);
+
+	err = starpu_opencl_load_kernel(&kernel,
+					&queue,
+					&multiformat_program,
+					"multiformat_opencl",
+					devid);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+		sizeof(int), &multiformat_config.copy_failed, &err);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	/* Setting args */
+	err  = clSetKernelArg(kernel, 0, sizeof(val), &val);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = clSetKernelArg(kernel, 1, sizeof(n), &n);
+	if (err)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = clSetKernelArg(kernel, 2, sizeof(fail), &fail);
+	if (err)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = clSetKernelArg(kernel, 3, sizeof(factor), &factor);
+	if (err)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	{
+		size_t global=n;
+		size_t local;
+                size_t s;
+                cl_device_id device;
+
+                starpu_opencl_get_device(devid, &device);
+
+                err = clGetKernelWorkGroupInfo (kernel,
+						device,
+						CL_KERNEL_WORK_GROUP_SIZE,
+						sizeof(local),
+						&local,
+						&s);
+                if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+
+                if (local > global)
+			local = global;
+
+		err = clEnqueueNDRangeKernel(queue,
+					kernel,
+					1,
+					NULL,
+					&global,
+					&local,
+					0,
+					NULL,
+					&event);
+
+		if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+	}
+
+	err = clEnqueueReadBuffer(queue,
+				  fail,
+				  CL_TRUE,
+				  0, 
+				  sizeof(int),
+				  &multiformat_config.copy_failed,
+				  0,
+				  NULL,
+				  NULL);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	clFinish(queue);
+	starpu_opencl_collect_stats(event);
+	clReleaseEvent(event);
+
+	starpu_opencl_release_kernel(kernel);
+        starpu_opencl_unload_opencl(&multiformat_program);
+}

+ 36 - 0
tests/datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl

@@ -0,0 +1,36 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "multiformat_types.h"
+__kernel void multiformat_opencl(__global struct struct_of_arrays *soa,
+				 unsigned int nx,
+				 __global int *err,
+				 int factor)
+{
+        const int i = get_global_id(0);
+	if (i >= nx)
+		return;
+
+	if (soa->x[i] != i * factor || soa->y[i] != i * factor)
+	{
+		*err = i;
+	}
+	else
+	{
+		soa->x[i] = -soa->x[i];
+		soa->y[i] = -soa->y[i];
+	}
+}

+ 31 - 0
tests/datawizard/interfaces/multiformat/multiformat_types.h

@@ -0,0 +1,31 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#ifndef TEST_MULTIFORMAT_TYPES_H
+#define TEST_MULTIFORMAT_TYPES_H
+
+#define N_ELEMENTS 2
+
+struct struct_of_arrays{
+	int x[N_ELEMENTS];
+	int y[N_ELEMENTS];
+};
+
+struct point {
+	int x, y;
+};
+
+
+#endif