13 years ago · ac859c1e11
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -3,6 +3,7 @@
 
				 # Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
			
 
				 # Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				 # Copyright (C) 2011  Télécom-SudParis
			
 
				+# Copyright (C) 2012 INRIA
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -156,6 +157,7 @@ examplebin_PROGRAMS +=				\
 
				 	filters/fvector				\
			
 
				 	filters/fblock				\
			
 
				 	filters/fmatrix				\
			
 
				+	filters/multiformat/multiformat_filter  \
			
 
				 	tag_example/tag_example			\
			
 
				 	tag_example/tag_example3		\
			
 
				 	tag_example/tag_example2		\
			
@@ -387,6 +389,26 @@ nobase_STARPU_OPENCL_DATA_DATA += \
 
				 	filters/fblock_opencl_kernel.cl
			
 
				 endif
			
 
				 
			
 
				+#
			
 
				+#
			
 
				+#
			
 
				+filters_multiformat_multiformat_filter_SOURCES=                \
			
 
				+	filters/multiformat/multiformat_filter.c               \
			
 
				+	filters/multiformat/multiformat_ops.c                  \
			
 
				+	filters/multiformat/conversion_codelets.c
			
 
				+
			
 
				+if STARPU_USE_CUDA
			
 
				+filters_multiformat_multiformat_filter_SOURCES+=               \
			
 
				+	filters/multiformat/cuda.cu
			
 
				+endif
			
 
				+
			
 
				+if STARPU_USE_OPENCL
			
 
				+filters_multiformat_multiformat_filter_SOURCES+=\
			
 
				+	filters/multiformat/opencl.c
			
 
				+nobase_STARPU_OPENCL_DATA_DATA += \
			
 
				+	filters/multiformat/opencl.cl
			
 
				+endif
			
 
				+
			
 
				 ################
			
 
				 # AXPY example #
			
 
				 ################
			
--- a/examples/filters/multiformat/conversion_codelets.c
+++ b/examples/filters/multiformat/conversion_codelets.c
@@ -0,0 +1,93 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 INRIA
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include "multiformat_types.h"
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+void cuda_to_cpu(void *buffers[], void *arg)
			
 
				+{
			
 
				+	FPRINTF(stderr, "ENTER %s\n", __func__);
			
 
				+	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
			
 
				+	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < n; i++)
			
 
				+	{
			
 
				+		dst[i].x = src->x[i];
			
 
				+		dst[i].y = src->y[i];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void cpu_to_cuda_cuda_func(void *buffers[], void *args)
			
 
				+{
			
 
				+}
			
 
				+struct starpu_codelet cpu_to_cuda_cl =
			
 
				+{
			
 
				+	.where = STARPU_CUDA,
			
 
				+	.cuda_funcs = {cpu_to_cuda_cuda_func, NULL},
			
 
				+	.modes = { STARPU_RW },
			
 
				+	.nbuffers = 1,
			
 
				+	.name = "codelet_cpu_to_cuda"
			
 
				+};
			
 
				+
			
 
				+struct starpu_codelet cuda_to_cpu_cl =
			
 
				+{
			
 
				+	.where = STARPU_CPU,
			
 
				+	.cpu_funcs = {cuda_to_cpu, NULL},
			
 
				+	.modes = { STARPU_RW },
			
 
				+	.nbuffers = 1,
			
 
				+	.name = "codelet_cuda_to_cpu"
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+void opencl_to_cpu(void *buffers[], void *arg)
			
 
				+{
			
 
				+	FPRINTF(stderr, "ENTER %s\n", __func__);
			
 
				+	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
			
 
				+	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	int i;
			
 
				+	for (i = 0; i < n; i++)
			
 
				+	{
			
 
				+		dst[i].x = src->x[i];
			
 
				+		dst[i].y = src->y[i];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void cpu_to_opencl_opencl_func(void *buffers[], void *args)
			
 
				+{
			
 
				+}
			
 
				+struct starpu_codelet cpu_to_opencl_cl =
			
 
				+{
			
 
				+	.where = STARPU_OPENCL,
			
 
				+	.opencl_funcs = {cpu_to_opencl_opencl_func, NULL},
			
 
				+	.modes = { STARPU_RW },
			
 
				+	.nbuffers = 1,
			
 
				+	.name = "codelet_cpu_to_opencl"
			
 
				+};
			
 
				+
			
 
				+struct starpu_codelet opencl_to_cpu_cl =
			
 
				+{
			
 
				+	.where = STARPU_CPU,
			
 
				+	.cpu_funcs = {opencl_to_cpu, NULL},
			
 
				+	.modes = { STARPU_RW },
			
 
				+	.nbuffers = 1,
			
 
				+	.name = "codelet_opencl_to_cpu"
			
 
				+};
			
 
				+#endif
			
--- a/examples/filters/multiformat/cuda.cu
+++ b/examples/filters/multiformat/cuda.cu
@@ -0,0 +1,43 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 INRIA
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_cuda.h>
			
 
				+#include "multiformat_types.h"
			
 
				+
			
 
				+static __global__ void multiformat_cuda(struct struct_of_arrays *soa, unsigned n)
			
 
				+{
			
 
				+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
			
 
				+
			
 
				+	if (i < n)
			
 
				+		soa->x[i] *= soa->y[i];
			
 
				+}
			
 
				+
			
 
				+extern "C" void multiformat_scal_cuda_func(void *buffers[], void *_args)
			
 
				+{
			
 
				+	(void) _args;
			
 
				+
			
 
				+	FPRINTF(stderr, "Running the cuda kernel (%s)\n", __func__);
			
 
				+	unsigned int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	struct struct_of_arrays *soa;
			
 
				+
			
 
				+	soa = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
			
 
				+	unsigned threads_per_block = 64;
			
 
				+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
			
 
				+        multiformat_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(soa, n);
			
 
				+
			
 
				+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
			
 
				+}
			
--- a/examples/filters/multiformat/multiformat_filter.c
+++ b/examples/filters/multiformat/multiformat_filter.c
@@ -0,0 +1,371 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 INRIA
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * This is a really simple example intended to show how to use filters with the
			
 
				+ * multiformat interface. It does not do anything really useful. Since the
			
 
				+ * memory is not contiguous (cf. struct struct_of_arrays), the user must write
			
 
				+ * its own copy functions. Some of them have not been implemented here
			
 
				+ * (synchronous functions, for example).
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+#include <starpu_opencl.h>
			
 
				+#endif
			
 
				+#include "multiformat_types.h"
			
 
				+
			
 
				+static int ncpu = 0;
			
 
				+static int ncuda = 0;
			
 
				+static int nopencl = 0;
			
 
				+static unsigned int nchunks = 1;
			
 
				+
			
 
				+static struct point array_of_structs[N_ELEMENTS];
			
 
				+static starpu_data_handle_t array_of_structs_handle;
			
 
				+
			
 
				+#if STARPU_USE_CPU
			
 
				+static void
			
 
				+multiformat_scal_cpu_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	struct point *aos;
			
 
				+	unsigned int n, i;
			
 
				+
			
 
				+	aos = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+
			
 
				+	for (i = 0; i < n; i++)
			
 
				+		aos[i].x *= aos[i].y;
			
 
				+}
			
 
				+#endif /* STARPU_USE_CPU */
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+extern struct starpu_codelet cpu_to_cuda_cl;
			
 
				+extern struct starpu_codelet cuda_to_cpu_cl;
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+extern struct starpu_codelet cpu_to_opencl_cl;
			
 
				+extern struct starpu_codelet opencl_to_cpu_cl;
			
 
				+#endif
			
 
				+
			
 
				+extern struct starpu_data_copy_methods my_multiformat_copy_data_methods_s;
			
 
				+static struct starpu_multiformat_data_interface_ops format_ops =
			
 
				+{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_elemsize = sizeof(struct struct_of_arrays),
			
 
				+	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
			
 
				+	.cuda_to_cpu_cl = &cuda_to_cpu_cl,
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_elemsize = sizeof(struct struct_of_arrays),
			
 
				+	.cpu_to_opencl_cl = &cpu_to_opencl_cl,
			
 
				+	.opencl_to_cpu_cl = &opencl_to_cpu_cl,
			
 
				+#endif
			
 
				+	.cpu_elemsize = sizeof(struct point),
			
 
				+	.copy = &my_multiformat_copy_data_methods_s
			
 
				+};
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+extern void multiformat_scal_cuda_func(void *buffers[], void *arg);
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+extern void multiformat_scal_opencl_func(void *buffers[], void *arg);
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_CPU
			
 
				+static struct starpu_codelet cpu_cl =
			
 
				+{
			
 
				+	.where = STARPU_CPU,
			
 
				+	.cpu_funcs = {multiformat_scal_cpu_func, NULL},
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = { STARPU_RW },
			
 
				+	.name = "codelet_real"
			
 
				+};
			
 
				+#endif /* !STARPU_USE_CPU */
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static struct starpu_codelet cuda_cl =
			
 
				+{
			
 
				+	.where = STARPU_CUDA,
			
 
				+	.cuda_funcs = { multiformat_scal_cuda_func, NULL },
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = { STARPU_RW },
			
 
				+	.name = "cuda_codelet"
			
 
				+};
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+static struct starpu_codelet opencl_cl =
			
 
				+{
			
 
				+	.where = STARPU_OPENCL,
			
 
				+	.opencl_funcs = { multiformat_scal_opencl_func, NULL },
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = { STARPU_RW },
			
 
				+	.name = "opencl_codelet"
			
 
				+};
			
 
				+#endif /* !STARPU_USE_OPENCL */
			
 
				+
			
 
				+/*
			
 
				+ * Main functions 
			
 
				+ */
			
 
				+static void
			
 
				+init_problem_data(void)
			
 
				+{
			
 
				+	int i; 
			
 
				+	for (i = 0; i < N_ELEMENTS; i++)
			
 
				+	{
			
 
				+		array_of_structs[i].x = 1.0 + i;
			
 
				+		array_of_structs[i].y = 42.0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+register_data(void)
			
 
				+{
			
 
				+	starpu_multiformat_data_register(&array_of_structs_handle,
			
 
				+					 0,
			
 
				+					 &array_of_structs,
			
 
				+					 N_ELEMENTS,
			
 
				+					 &format_ops);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+unregister_data(void)
			
 
				+{
			
 
				+	starpu_data_unregister(array_of_structs_handle);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+multiformat_divide_in_equal_chunks_filter_func(void *father,
			
 
				+					       void *child,
			
 
				+					       struct starpu_data_filter *f,
			
 
				+					       unsigned id,
			
 
				+					       unsigned nchunks)
			
 
				+{
			
 
				+	/*
			
 
				+	 * One chunk for a CPU device.
			
 
				+	 * At least one for a GPU (CUDA or OpenCL).
			
 
				+	 * If possible, a third chunk for another kind of GPU.
			
 
				+	 */ 
			
 
				+	assert(nchunks == 2 || nchunks == 3);
			
 
				+	assert (id < nchunks);
			
 
				+
			
 
				+	struct starpu_multiformat_interface *mf_father, *mf_child;
			
 
				+
			
 
				+	mf_father = (struct starpu_multiformat_interface *) father;
			
 
				+	mf_child = (struct starpu_multiformat_interface *) child;
			
 
				+
			
 
				+	uint32_t length_first = f->filter_arg;
			
 
				+	uint32_t nx = mf_father->nx;
			
 
				+
			
 
				+	assert(length_first < nx);
			
 
				+
			
 
				+	mf_child->ops = mf_father->ops;
			
 
				+	memcpy(mf_child->ops, mf_father->ops, sizeof(mf_child->ops));
			
 
				+
			
 
				+
			
 
				+	/* The actual partitioning */
			
 
				+	mf_child->nx = length_first;
			
 
				+
			
 
				+#if STARPU_USE_CPU
			
 
				+	if (mf_father->cpu_ptr)
			
 
				+	{
			
 
				+		struct point *tmp = (struct point *) mf_father->cpu_ptr;
			
 
				+		tmp += id * length_first;
			
 
				+		mf_child->cpu_ptr = tmp;
			
 
				+	}
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+partition_data(void)
			
 
				+{
			
 
				+	struct starpu_data_filter f =
			
 
				+	{
			
 
				+		.filter_func = multiformat_divide_in_equal_chunks_filter_func,
			
 
				+		.nchildren = nchunks,
			
 
				+		.get_nchildren = NULL,
			
 
				+		.get_child_ops = NULL,
			
 
				+		.filter_arg = N_ELEMENTS/nchunks
			
 
				+	};
			
 
				+
			
 
				+	starpu_data_partition(array_of_structs_handle, &f);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+create_and_submit_tasks(void)
			
 
				+{
			
 
				+	int err;
			
 
				+	unsigned int i;
			
 
				+	for (i = 0; i < nchunks; i++)
			
 
				+	{
			
 
				+		struct starpu_task *task = starpu_task_create();
			
 
				+		if (i == 0)
			
 
				+		{
			
 
				+			task->cl = &cpu_cl;
			
 
				+		}
			
 
				+		else if (i == 1)
			
 
				+		{
			
 
				+#if STARPU_USE_CUDA
			
 
				+			if (ncuda > 0)
			
 
				+				task->cl = &cuda_cl;
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+#if STARPU_USE_OPENCL
			
 
				+			if (ncuda == 0 && nopencl > 0)
			
 
				+				task->cl = &opencl_cl;
			
 
				+#endif /* !STARPU_USE_OPENCL */
			
 
				+		}
			
 
				+#if STARPU_USE_OPENCL
			
 
				+		else /* i == 2 */
			
 
				+		{
			
 
				+			task->cl = &opencl_cl;
			
 
				+		}
			
 
				+#endif /* !STARPU_USE_OPENCL */
			
 
				+
			
 
				+		starpu_data_handle_t handle;
			
 
				+		handle = starpu_data_get_sub_data(array_of_structs_handle, 1, i);
			
 
				+		task->handles[0] = handle;
			
 
				+
			
 
				+		err = starpu_task_submit(task);
			
 
				+		if (err != 0)
			
 
				+			return err;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	err = starpu_task_wait_for_all();
			
 
				+	if (err != 0)
			
 
				+		return err;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void
			
 
				+print_it(void)
			
 
				+{
			
 
				+	int i;
			
 
				+	for (i = 0; i < N_ELEMENTS; i++)
			
 
				+	{
			
 
				+		FPRINTF(stderr, "(%.2f %.2f) ",
			
 
				+			array_of_structs[i].x,
			
 
				+			array_of_structs[i].y);
			
 
				+	}
			
 
				+	FPRINTF(stderr, "\n");
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+check_it(void)
			
 
				+{
			
 
				+	int i;
			
 
				+	for (i = 0; i < N_ELEMENTS; i++)
			
 
				+	{
			
 
				+		float expected_value = i + 1.0;
			
 
				+		expected_value *= array_of_structs[i].y;
			
 
				+		if (array_of_structs[i].x != expected_value)
			
 
				+			return EXIT_FAILURE;
			
 
				+	}
			
 
				+
			
 
				+	return EXIT_SUCCESS;
			
 
				+}
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+struct starpu_opencl_program opencl_program;
			
 
				+struct starpu_opencl_program opencl_conversion_program;
			
 
				+#endif
			
 
				+
			
 
				+static int
			
 
				+gpus_available()
			
 
				+{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	if (ncuda > 0)
			
 
				+		return 1;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	if (nopencl > 0)
			
 
				+		return 1;
			
 
				+#endif
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main(void)
			
 
				+{
			
 
				+#ifdef STARPU_USE_CPU
			
 
				+	int err;
			
 
				+	struct starpu_conf conf =
			
 
				+	{
			
 
				+		.ncpus = -1,
			
 
				+		.ncuda = 1,
			
 
				+		.nopencl = 1
			
 
				+	};
			
 
				+	starpu_init(&conf);
			
 
				+
			
 
				+	ncpu = starpu_cpu_worker_get_count();
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	ncuda = starpu_cuda_worker_get_count();
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	nopencl = starpu_opencl_worker_get_count();
			
 
				+#endif
			
 
				+
			
 
				+	if (ncpu == 0 || !gpus_available())
			
 
				+		return 77;
			
 
				+
			
 
				+	if (ncuda > 0)
			
 
				+		nchunks++;
			
 
				+	if (nopencl > 0)
			
 
				+		nchunks++;
			
 
				+
			
 
				+	/* For the sake of simplicity. */
			
 
				+	assert(N_ELEMENTS % nchunks == 0);
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	err = starpu_opencl_load_opencl_from_file("examples/filters/multiformat/opencl.cl",
			
 
				+					    &opencl_program, NULL);
			
 
				+	assert(err == 0);
			
 
				+#endif
			
 
				+	init_problem_data();
			
 
				+
			
 
				+	print_it();
			
 
				+
			
 
				+	register_data();
			
 
				+	partition_data();
			
 
				+
			
 
				+	err = create_and_submit_tasks();
			
 
				+	if (err != 0)
			
 
				+	{
			
 
				+		FPRINTF(stderr, "create_submit_task : %s\n",
			
 
				+			strerror(-err));
			
 
				+	}
			
 
				+
			
 
				+	starpu_data_unpartition(array_of_structs_handle, 0);
			
 
				+
			
 
				+	unregister_data();
			
 
				+
			
 
				+	print_it();
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+        assert(starpu_opencl_unload_opencl(&opencl_program) == CL_SUCCESS);
			
 
				+#endif
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+
			
 
				+	return check_it();
			
 
				+#else
			
 
				+	/* Without the CPU, there is no point in using the multiformat
			
 
				+	 * interface, so this test is pointless. */
			
 
				+	return EXIT_SUCCESS;
			
 
				+#endif
			
 
				+}
			
--- a/examples/filters/multiformat/multiformat_ops.c
+++ b/examples/filters/multiformat/multiformat_ops.c
@@ -0,0 +1,432 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 INRIA
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+#include <starpu_opencl.h>
			
 
				+#endif
			
 
				+
			
 
				+#include "multiformat_types.h"
			
 
				+
			
 
				+#if STARPU_USE_CUDA
			
 
				+static int copy_cuda_common_async(void *src_interface, unsigned src_node,
			
 
				+				  void *dst_interface, unsigned dst_node,
			
 
				+				  cudaStream_t stream, enum cudaMemcpyKind kind)
			
 
				+{
			
 
				+	struct starpu_multiformat_interface *src_multiformat;
			
 
				+	struct starpu_multiformat_interface *dst_multiformat;
			
 
				+
			
 
				+	src_multiformat = (struct starpu_multiformat_interface *) src_interface;
			
 
				+	dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
			
 
				+
			
 
				+	size_t size;
			
 
				+	cudaError_t status;
			
 
				+
			
 
				+	switch (kind)
			
 
				+	{
			
 
				+	case cudaMemcpyHostToDevice:
			
 
				+	{
			
 
				+		/*
			
 
				+		 * XXX : Should we do that ? It is a mix between copy and conversion...
			
 
				+		 */
			
 
				+		/*
			
 
				+		 * Copying the data to the CUDA device.
			
 
				+		 */
			
 
				+		status = cudaMemcpyAsync(dst_multiformat->cpu_ptr,
			
 
				+					 src_multiformat->cpu_ptr,
			
 
				+					 src_multiformat->nx * src_multiformat->ops->cpu_elemsize,
			
 
				+					 kind, stream);
			
 
				+		assert(status == cudaSuccess);
			
 
				+
			
 
				+		/*
			
 
				+		 * Copying the real data (that is pointed to).
			
 
				+		 */
			
 
				+		float *x = malloc(src_multiformat->nx * sizeof(float));
			
 
				+		float *y = malloc(src_multiformat->nx * sizeof(float));
			
 
				+		assert(x && y);
			
 
				+
			
 
				+		int i;
			
 
				+		for (i = 0; i < src_multiformat->nx; i++)
			
 
				+		{
			
 
				+			struct point *p;
			
 
				+			p = (struct point *) src_multiformat->cpu_ptr;
			
 
				+			x[i] = p[i].x;
			
 
				+			y[i] = p[i].y;
			
 
				+		}
			
 
				+
			
 
				+		void *rets[2];
			
 
				+		unsigned size = src_multiformat->nx * sizeof(float);
			
 
				+
			
 
				+		status = cudaMalloc(&rets[0], sizeof(float)*src_multiformat->nx);
			
 
				+		assert(status == cudaSuccess);
			
 
				+		status = cudaMemcpyAsync(rets[0], x, size, kind, stream);
			
 
				+		assert(status == cudaSuccess);
			
 
				+
			
 
				+		status = cudaMalloc(&rets[1], sizeof(float)*src_multiformat->nx);
			
 
				+		assert(status == cudaSuccess);
			
 
				+		status = cudaMemcpyAsync(rets[1], y, size, kind, stream);
			
 
				+		assert(status == cudaSuccess);
			
 
				+
			
 
				+		status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, rets,
			
 
				+				2*sizeof(void*), kind, stream);
			
 
				+		assert(status == cudaSuccess);
			
 
				+
			
 
				+		free(x);
			
 
				+		free(y);
			
 
				+		break;
			
 
				+	}
			
 
				+	case cudaMemcpyDeviceToHost:
			
 
				+	{
			
 
				+		/*
			
 
				+		 * Copying the cuda_ptr from the cuda device to the RAM.
			
 
				+		 */
			
 
				+		size = sizeof(struct struct_of_arrays);
			
 
				+		if (!dst_multiformat->cuda_ptr)
			
 
				+		{
			
 
				+			dst_multiformat->cuda_ptr = calloc(1, size);
			
 
				+			assert(dst_multiformat->cuda_ptr != NULL);
			
 
				+		}
			
 
				+
			
 
				+
			
 
				+		/* Getting the addresses of our data on the CUDA device. */
			
 
				+		void *addrs[2];
			
 
				+		status = cudaMemcpyAsync(addrs, src_multiformat->cuda_ptr,
			
 
				+					 2 * sizeof(void*), kind, stream);
			
 
				+		assert(status == cudaSuccess);
			
 
				+
			
 
				+
			
 
				+		/*
			
 
				+		 * Getting the real data.
			
 
				+		 */
			
 
				+		struct struct_of_arrays *soa;
			
 
				+		soa = (struct struct_of_arrays *) dst_multiformat->cuda_ptr;
			
 
				+		size = src_multiformat->nx * sizeof(float);
			
 
				+
			
 
				+		if (!soa->x)
			
 
				+			soa->x = malloc(size);
			
 
				+		status = cudaMemcpyAsync(soa->x, addrs[0], size, kind, stream);
			
 
				+		assert(status == cudaSuccess);
			
 
				+		
			
 
				+
			
 
				+		if (!soa->y)
			
 
				+			soa->y = malloc(size);
			
 
				+		status = cudaMemcpyAsync(soa->y, addrs[1], size, kind, stream);
			
 
				+		assert(status == cudaSuccess);
			
 
				+
			
 
				+		/* Let's free this. */
			
 
				+		status = cudaFree(addrs[0]);
			
 
				+		assert(status == cudaSuccess);
			
 
				+		status = cudaFree(addrs[1]);
			
 
				+		assert(status == cudaSuccess);
			
 
				+		break;
			
 
				+	}
			
 
				+	default:
			
 
				+		assert(0);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+copy_ram_to_cuda_async(void *src_interface, unsigned src_node,
			
 
				+		       void *dst_interface, unsigned dst_node,
			
 
				+		       cudaStream_t stream)
			
 
				+{
			
 
				+	fprintf(stderr, "ENTER %s\n", __func__);
			
 
				+	copy_cuda_common_async(src_interface, src_node,
			
 
				+				dst_interface, dst_node,
			
 
				+				stream, cudaMemcpyHostToDevice);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+copy_cuda_to_ram_async(void *src_interface, unsigned src_node,
			
 
				+		       void *dst_interface, unsigned dst_node,
			
 
				+		       cudaStream_t stream)
			
 
				+{
			
 
				+	fprintf(stderr, "ENTER %s\n", __func__);
			
 
				+	copy_cuda_common_async(src_interface, src_node,
			
 
				+				dst_interface, dst_node,
			
 
				+				stream, cudaMemcpyDeviceToHost);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+copy_ram_to_cuda(void *src_interface, unsigned src_node,
			
 
				+		 void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	/* TODO */
			
 
				+	fprintf(stderr, "ENTER %s\n", __func__);
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+copy_cuda_to_ram(void *src_interface, unsigned src_node,
			
 
				+		 void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	/* TODO */
			
 
				+	fprintf(stderr, "ENTER %s\n", __func__);
			
 
				+	return 1;
			
 
				+}
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+static cl_int
			
 
				+_opencl_malloc(cl_context context, cl_mem *mem, size_t size, cl_mem_flags flags)
			
 
				+{
			
 
				+	cl_int err;
			
 
				+        cl_mem memory;
			
 
				+
			
 
				+	memory = clCreateBuffer(context, flags, size, NULL, &err);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		return err;
			
 
				+
			
 
				+        *mem = memory;
			
 
				+        return CL_SUCCESS;
			
 
				+}
			
 
				+
			
 
				+static cl_int
			
 
				+_opencl_copy_ram_to_opencl_async_sync(void *ptr, unsigned src_node,
			
 
				+				      cl_mem buffer, unsigned dst_node,
			
 
				+				      size_t size, size_t offset,
			
 
				+				      cl_event *event, int *ret,
			
 
				+				      cl_command_queue queue)
			
 
				+{
			
 
				+        cl_int err;
			
 
				+        cl_bool blocking;
			
 
				+
			
 
				+        blocking = (event == NULL) ? CL_TRUE : CL_FALSE;
			
 
				+
			
 
				+        err = clEnqueueWriteBuffer(queue, buffer, blocking, offset, size, ptr, 0, NULL, event);
			
 
				+
			
 
				+        if (err == CL_SUCCESS)
			
 
				+                *ret = (event == NULL) ? 0 : -EAGAIN;
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static cl_int
			
 
				+_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node,
			
 
				+			   void *ptr, unsigned dst_node,
			
 
				+			   size_t size, size_t offset, cl_event *event,
			
 
				+			   cl_command_queue queue)
			
 
				+
			
 
				+{
			
 
				+        cl_int err;
			
 
				+        cl_bool blocking;
			
 
				+
			
 
				+        blocking = (event == NULL) ? CL_TRUE : CL_FALSE;
			
 
				+        err = clEnqueueReadBuffer(queue, buffer, blocking, offset, size, ptr, 0, NULL, event);
			
 
				+
			
 
				+        return err;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+copy_ram_to_opencl(void *src_interface, unsigned src_node,
			
 
				+		   void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+copy_opencl_to_ram(void *src_interface, unsigned src_node,
			
 
				+		   void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+cl_mem xy[2];
			
 
				+static int
			
 
				+copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
			
 
				+			 void *dst_interface, unsigned dst_node,
			
 
				+			 void *event)
			
 
				+{
			
 
				+	(void) event;
			
 
				+	FPRINTF(stderr, "Enter %s\n", __func__);
			
 
				+	struct starpu_multiformat_interface *src_mf;
			
 
				+	struct starpu_multiformat_interface *dst_mf;
			
 
				+
			
 
				+	src_mf = (struct starpu_multiformat_interface *) src_interface;
			
 
				+	dst_mf = (struct starpu_multiformat_interface *) dst_interface;
			
 
				+
			
 
				+	/*
			
 
				+	 * Opencl stuff.
			
 
				+	 */
			
 
				+	cl_context context;
			
 
				+	cl_command_queue queue;
			
 
				+	int id = starpu_worker_get_id();
			
 
				+	int devid = starpu_worker_get_devid(id);
			
 
				+	starpu_opencl_get_queue(devid, &queue);
			
 
				+	starpu_opencl_get_context(devid, &context);
			
 
				+
			
 
				+	/*
			
 
				+	 * Copying the cpu pointer to the OpenCL device.
			
 
				+	 */
			
 
				+	int err;
			
 
				+	cl_int ret;
			
 
				+	size_t cpu_size = src_mf->nx * src_mf->ops->cpu_elemsize;
			
 
				+	err = _opencl_copy_ram_to_opencl_async_sync(src_mf->cpu_ptr,
			
 
				+						    src_node,
			
 
				+						    dst_mf->cpu_ptr,
			
 
				+						    dst_node,
			
 
				+						    cpu_size,
			
 
				+						    0,
			
 
				+						    (cl_event *) event,
			
 
				+						    &ret,
			
 
				+						    queue);
			
 
				+	assert(err == 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * Copying the real data.
			
 
				+	 */
			
 
				+	float *x = malloc(src_mf->nx * sizeof(float));
			
 
				+	float *y = malloc(src_mf->nx * sizeof(float));
			
 
				+	assert(x && y);
			
 
				+
			
 
				+	int i;
			
 
				+	for (i = 0; i < src_mf->nx; i++)
			
 
				+	{
			
 
				+		struct point *p;
			
 
				+		p = (struct point *) src_mf->cpu_ptr;
			
 
				+		x[i] = p[i].x;
			
 
				+		y[i] = p[i].y;
			
 
				+	}
			
 
				+
			
 
				+	ret = _opencl_malloc(context, xy, src_mf->nx*sizeof(*x), CL_MEM_READ_WRITE);
			
 
				+	assert(ret == CL_SUCCESS);
			
 
				+	ret = _opencl_malloc(context, xy+1, src_mf->nx*sizeof(*y), CL_MEM_READ_ONLY);
			
 
				+	assert(ret == CL_SUCCESS);
			
 
				+
			
 
				+	err = _opencl_copy_ram_to_opencl_async_sync(x,
			
 
				+						    src_node,
			
 
				+						    xy[0],
			
 
				+						    dst_node,
			
 
				+						    src_mf->nx*sizeof(*x),
			
 
				+						    0,
			
 
				+						    NULL,
			
 
				+						    &ret,
			
 
				+						    queue);
			
 
				+	assert(err == CL_SUCCESS);
			
 
				+	err = _opencl_copy_ram_to_opencl_async_sync(y,
			
 
				+						    src_node,
			
 
				+						    xy[1],
			
 
				+						    dst_node,
			
 
				+						    src_mf->nx * sizeof(*y),
			
 
				+						    0,
			
 
				+						    NULL,
			
 
				+						    &ret,
			
 
				+						    queue);
			
 
				+	assert(err == CL_SUCCESS);
			
 
				+
			
 
				+
			
 
				+	struct struct_of_arrays *soa;
			
 
				+	soa = (struct struct_of_arrays *) dst_mf->opencl_ptr;
			
 
				+	soa->x = (void *) xy[0];
			
 
				+	soa->y = (void *) xy[1];
			
 
				+
			
 
				+	/* Not needed anymore */
			
 
				+	free(x);
			
 
				+	free(y);
			
 
				+	return 0;
			
 
				+
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
			
 
				+			 void *dst_interface, unsigned dst_node,
			
 
				+			 void *event)
			
 
				+{
			
 
				+	FPRINTF(stderr, "Enter %s\n", __func__);
			
 
				+	struct starpu_multiformat_interface *src_mf;
			
 
				+	struct starpu_multiformat_interface *dst_mf;
			
 
				+
			
 
				+	src_mf = (struct starpu_multiformat_interface *) src_interface;
			
 
				+	dst_mf = (struct starpu_multiformat_interface *) dst_interface;
			
 
				+
			
 
				+	/*
			
 
				+	 * OpenCL stuff.
			
 
				+	 */
			
 
				+	int id = starpu_worker_get_id();
			
 
				+	int devid = starpu_worker_get_devid(id);
			
 
				+	cl_command_queue queue;
			
 
				+	starpu_opencl_get_queue(devid, &queue);
			
 
				+	cl_int ret;
			
 
				+	if (dst_mf->opencl_ptr == NULL)
			
 
				+	{
			
 
				+		dst_mf->opencl_ptr = malloc(sizeof(struct struct_of_arrays));
			
 
				+		assert(dst_mf->opencl_ptr);
			
 
				+	}
			
 
				+
			
 
				+	float *x = malloc(src_mf->nx * sizeof(float));
			
 
				+	float *y = malloc(src_mf->nx * sizeof(float));
			
 
				+	assert(x && y);
			
 
				+
			
 
				+	struct struct_of_arrays *soa;
			
 
				+	soa = (struct struct_of_arrays *) dst_mf->opencl_ptr;
			
 
				+	ret = _opencl_copy_opencl_to_ram(
			
 
				+		xy[0],
			
 
				+		src_node,
			
 
				+		x,
			
 
				+		dst_node,
			
 
				+		src_mf->nx * sizeof(float),
			
 
				+		0,
			
 
				+		NULL,
			
 
				+		queue);
			
 
				+	assert(ret == CL_SUCCESS);
			
 
				+
			
 
				+
			
 
				+	ret = _opencl_copy_opencl_to_ram(
			
 
				+		xy[1],
			
 
				+		src_node,
			
 
				+		y,
			
 
				+		dst_node,
			
 
				+		src_mf->nx * sizeof(float),
			
 
				+		0,
			
 
				+		NULL,
			
 
				+		queue);
			
 
				+	assert(ret == CL_SUCCESS);
			
 
				+	
			
 
				+
			
 
				+	soa->x = x;
			
 
				+	soa->y = y;
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif /* STARPU_USE_OPENCL */
			
 
				+
			
 
				+ const struct starpu_data_copy_methods my_multiformat_copy_data_methods_s =
			
 
				+{
			
 
				+	.ram_to_ram = NULL,
			
 
				+	.ram_to_spu = NULL,
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.ram_to_cuda        = copy_ram_to_cuda,
			
 
				+	.cuda_to_ram        = copy_cuda_to_ram,
			
 
				+	.ram_to_cuda_async  = copy_ram_to_cuda_async,
			
 
				+	.cuda_to_ram_async  = copy_cuda_to_ram_async,
			
 
				+	.cuda_to_cuda       = NULL,
			
 
				+	.cuda_to_cuda_async = NULL,
			
 
				+#endif
			
 
				+#if STARPU_USE_OPENCL
			
 
				+	.ram_to_opencl       = copy_ram_to_opencl,
			
 
				+	.opencl_to_ram       = copy_opencl_to_ram,
			
 
				+	.opencl_to_opencl    = NULL,
			
 
				+        .ram_to_opencl_async = copy_ram_to_opencl_async,
			
 
				+	.opencl_to_ram_async = copy_opencl_to_ram_async,
			
 
				+#endif
			
 
				+	.cuda_to_spu = NULL,
			
 
				+	.spu_to_ram  = NULL,
			
 
				+	.spu_to_cuda = NULL,
			
 
				+	.spu_to_spu  = NULL
			
 
				+};
			
--- a/examples/filters/multiformat/multiformat_types.h
+++ b/examples/filters/multiformat/multiformat_types.h
@@ -0,0 +1,37 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 INRIA
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#ifndef MULTIFORMAT_TYPES_H
			
 
				+#define MULTIFORMAT_TYPES_H
			
 
				+
			
 
				+#define N_ELEMENTS 6
			
 
				+
			
 
				+struct struct_of_arrays
			
 
				+{
			
 
				+	float *x, *y;
			
 
				+};
			
 
				+
			
 
				+struct point
			
 
				+{
			
 
				+	float x, y;
			
 
				+};
			
 
				+
			
 
				+#define FPRINTF(ofile, fmt, args ...) \
			
 
				+do {                                  \
			
 
				+if (!getenv("STARPU_SSILENT"))        \
			
 
				+	fprintf(ofile, fmt, ##args);  \
			
 
				+} while(0)
			
 
				+
			
 
				+#endif
			
--- a/examples/filters/multiformat/opencl.c
+++ b/examples/filters/multiformat/opencl.c
@@ -0,0 +1,96 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 INRIA
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_opencl.h>
			
 
				+#include "multiformat_types.h"
			
 
				+
			
 
				+extern struct starpu_opencl_program opencl_program;
			
 
				+
			
 
				+void multiformat_scal_opencl_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	FPRINTF(stderr, "ENTER %s\n", __func__);
			
 
				+	(void) args;
			
 
				+	int id, devid;
			
 
				+        cl_int err;
			
 
				+	cl_kernel kernel;
			
 
				+	cl_command_queue queue;
			
 
				+	cl_event event;
			
 
				+
			
 
				+	unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+
			
 
				+	id = starpu_worker_get_id();
			
 
				+	devid = starpu_worker_get_devid(id);
			
 
				+
			
 
				+	err = starpu_opencl_load_kernel(&kernel,
			
 
				+					&queue,
			
 
				+					&opencl_program,
			
 
				+					"multiformat_opencl",
			
 
				+					devid);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	struct struct_of_arrays *soa = (struct struct_of_arrays *)
			
 
				+			STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
			
 
				+	cl_mem x = (cl_mem) soa->x;
			
 
				+	cl_mem y = (cl_mem) soa->y;
			
 
				+	err = clSetKernelArg(kernel, 0, sizeof(x), &x);
			
 
				+	if (err) STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+	err = clSetKernelArg(kernel, 1, sizeof(y), &y);
			
 
				+	if (err) STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+	err = clSetKernelArg(kernel, 2, sizeof(n), &n);
			
 
				+	if (err) STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	{
			
 
				+		size_t global=n;
			
 
				+		size_t local;
			
 
				+                size_t s;
			
 
				+                cl_device_id device;
			
 
				+
			
 
				+                starpu_opencl_get_device(devid, &device);
			
 
				+
			
 
				+                err = clGetKernelWorkGroupInfo (kernel,
			
 
				+						device,
			
 
				+						CL_KERNEL_WORK_GROUP_SIZE,
			
 
				+						sizeof(local),
			
 
				+						&local,
			
 
				+						&s);
			
 
				+                if (err != CL_SUCCESS)
			
 
				+			STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+                if (local > global)
			
 
				+			local = global;
			
 
				+
			
 
				+		err = clEnqueueNDRangeKernel(queue,
			
 
				+					kernel,
			
 
				+					1,
			
 
				+					NULL,
			
 
				+					&global,
			
 
				+					&local,
			
 
				+					0,
			
 
				+					NULL,
			
 
				+					&event);
			
 
				+
			
 
				+		if (err != CL_SUCCESS)
			
 
				+			STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+	}
			
 
				+
			
 
				+	clFinish(queue);
			
 
				+	starpu_opencl_collect_stats(event);
			
 
				+	clReleaseEvent(event);
			
 
				+
			
 
				+	starpu_opencl_release_kernel(kernel);
			
 
				+}
			
--- a/examples/filters/multiformat/opencl.cl
+++ b/examples/filters/multiformat/opencl.cl
@@ -0,0 +1,24 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 INRIA
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+__kernel void multiformat_opencl(__global float *x,
			
 
				+				 __global float *y,
			
 
				+				 int nx)
			
 
				+{
			
 
				+        const int i = get_global_id(0);
			
 
				+        if (i < nx)
			
 
				+		x[i] *= y[i];
			
 
				+}
			
--- a/include/starpu_data_interfaces.h
+++ b/include/starpu_data_interfaces.h
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2010-2011  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				- * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ * Copyright (C) 2011-2012  Institut National de Recherche en Informatique et Automatique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -364,6 +364,7 @@ struct starpu_multiformat_data_interface_ops
 
				 	struct starpu_codelet *cpu_to_cuda_cl;
			
 
				 	struct starpu_codelet *cuda_to_cpu_cl;
			
 
				 #endif
			
 
				+	struct starpu_data_copy_methods *copy;
			
 
				 };
			
 
				 
			
 
				 struct starpu_multiformat_interface
			
--- a/src/datawizard/filters.c
+++ b/src/datawizard/filters.c
@@ -3,6 +3,7 @@
 
				  * Copyright (C) 2010-2012  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2012 INRIA
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -263,6 +264,29 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, uint32_t gatherin
 
				 		if (child_handle->nchildren > 0)
			
 
				 			starpu_data_unpartition(child_handle, gathering_node);
			
 
				 
			
 
				+		/* If this is a multiformat handle, we must convert the data now */
			
 
				+		unsigned int id = starpu_get_handle_interface_id(child_handle);
			
 
				+		if (id == STARPU_MULTIFORMAT_INTERFACE_ID &&
			
 
				+			_starpu_get_node_kind(child_handle->mf_node) != STARPU_CPU_RAM)
			
 
				+		{
			
 
				+			void fake(void *buffers[], void *args) {}
			
 
				+			struct starpu_codelet cl =
			
 
				+			{
			
 
				+				.where = STARPU_CPU,
			
 
				+				.cpu_funcs = { fake, NULL },
			
 
				+				.modes = { STARPU_RW },
			
 
				+				.nbuffers = 1
			
 
				+			};
			
 
				+			struct starpu_multiformat_interface *format_interface;
			
 
				+			format_interface = starpu_data_get_interface_on_node(child_handle, 0);
			
 
				+			struct starpu_task *task = starpu_task_create();
			
 
				+			task->handles[0] = child_handle;
			
 
				+			task->cl = &cl;
			
 
				+			task->synchronous = 1;
			
 
				+			if (starpu_task_submit(task) != 0)
			
 
				+				_STARPU_ERROR("Could not submit the conversion task while unpartitionning\n");
			
 
				+		}
			
 
				+
			
 
				 		int ret;
			
 
				 		ret = _starpu_fetch_data_on_node(child_handle, &child_handle->per_node[gathering_node], STARPU_R, 0, NULL, NULL);
			
 
				 		/* for now we pretend that the RAM is almost unlimited and that gathering
			
@@ -390,6 +414,8 @@ static void starpu_data_create_children(starpu_data_handle_t handle, unsigned nc
 
				 			handle_child->per_worker[worker].data_interface = calloc(1, interfacesize);
			
 
				 			STARPU_ASSERT(handle_child->per_worker[worker].data_interface);
			
 
				 		}
			
 
				+
			
 
				+		handle_child->mf_node = handle->mf_node;
			
 
				 	}
			
 
				 
			
 
				 	/* this handle now has children */
			
--- a/src/datawizard/interfaces/multiformat_interface.c
+++ b/src/datawizard/interfaces/multiformat_interface.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ * Copyright (C) 2011-2012  Institut National de Recherche en Informatique et Automatique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -184,6 +184,9 @@ void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
 
				 		.nx         = nobjects,
			
 
				 		.ops        = format_ops
			
 
				 	};
			
 
				+
			
 
				+	if (format_ops->copy)
			
 
				+		interface_multiformat_ops.copy_methods = format_ops->copy;
			
 
				 	starpu_data_register(handleptr, home_node, &multiformat, &interface_multiformat_ops);
			
 
				 }