лет назад: 14 · 6884c732c5
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -40,6 +40,8 @@ endif
 
				 
			
 
				 EXTRA_DIST = 					\
			
 
				 	basic_examples/vector_scal_opencl_kernel.cl \
			
 
				+	basic_examples/multiformat_opencl_kernel.cl  \
			
 
				+	basic_examples/multiformat_conversion_codelets_opencl_kernel.cl \
			
 
				 	common/blas_model.c			\
			
 
				 	spmv/spmv_cuda.cu			\
			
 
				 	spmv/spmv_opencl.cl			\
			
@@ -123,7 +125,8 @@ noinst_HEADERS = 				\
 
				 	spmv/matrix_market/mmio.h		\
			
 
				 	spmv/matrix_market/mm_to_bcsr.h		\
			
 
				 	spmv/spmv.h				\
			
 
				-	spmv/dw_block_spmv.h
			
 
				+	spmv/dw_block_spmv.h                    \
			
 
				+	basic_examples/multiformat_types.h
			
 
				 
			
 
				 #####################################
			
 
				 # What to install and what to check #
			
@@ -152,6 +155,7 @@ examplebin_PROGRAMS +=				\
 
				 	basic_examples/mult			\
			
 
				 	basic_examples/block			\
			
 
				 	basic_examples/variable			\
			
 
				+	basic_examples/multiformat              \
			
 
				 	filters/fvector				\
			
 
				 	filters/fblock				\
			
 
				 	filters/fmatrix				\
			
@@ -212,6 +216,7 @@ STARPU_EXAMPLES +=				\
 
				 	basic_examples/mult			\
			
 
				 	basic_examples/block			\
			
 
				 	basic_examples/variable			\
			
 
				+	basic_examples/multiformat              \
			
 
				 	filters/fvector				\
			
 
				 	filters/fblock				\
			
 
				 	filters/fmatrix				\
			
@@ -297,6 +302,29 @@ basic_examples_vector_scal_fortran_LDADD =	\
 
				 endif
			
 
				 endif
			
 
				 
			
 
				+#######################
			
 
				+# Multiformat example #
			
 
				+#######################
			
 
				+basic_examples_multiformat_SOURCES =                                    \
			
 
				+	basic_examples/multiformat.c                                    \
			
 
				+	basic_examples/multiformat_conversion_codelets.c
			
 
				+
			
 
				+if STARPU_USE_CUDA
			
 
				+basic_examples_multiformat_SOURCES+=                                     \
			
 
				+	basic_examples/multiformat_cuda.cu                               \
			
 
				+	basic_examples/multiformat_conversion_codelets_cuda.cu
			
 
				+endif
			
 
				+
			
 
				+if STARPU_USE_OPENCL
			
 
				+basic_examples_multiformat_SOURCES+=                                     \
			
 
				+	basic_examples/multiformat_opencl.c                              \
			
 
				+	basic_examples/multiformat_conversion_codelets_opencl.c          
			
 
				+
			
 
				+nobase_STARPU_OPENCL_DATA_DATA+=                                         \
			
 
				+	basic_examples/multiformat_opencl_kernel.cl                      \
			
 
				+	basic_examples/multiformat_conversion_codelets_opencl_kernel.cl
			
 
				+endif
			
 
				+
			
 
				 #################
			
 
				 # block example #
			
 
				 #################
			
--- a/examples/basic_examples/multiformat.c
+++ b/examples/basic_examples/multiformat.c
@@ -0,0 +1,189 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+#include <starpu_opencl.h>
			
 
				+#endif
			
 
				+#include "multiformat_types.h"
			
 
				+
			
 
				+static struct struct_of_arrays global_struct_of_arrays;
			
 
				+static  starpu_data_handle global_struct_of_arrays_handle;
			
 
				+
			
 
				+static void
			
 
				+multiformat_scal_cpu_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	struct struct_of_arrays *s;
			
 
				+	unsigned int n, i;
			
 
				+
			
 
				+	s = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+
			
 
				+	for (i = 0; i < n; i++) {
			
 
				+		s->x[i] *= s->y[i];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+extern starpu_codelet cpu_to_cuda_cl;
			
 
				+extern starpu_codelet cuda_to_cpu_cl;
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+extern starpu_codelet cpu_to_opencl_cl;
			
 
				+extern starpu_codelet opencl_to_cpu_cl;
			
 
				+#endif
			
 
				+
			
 
				+static struct starpu_multiformat_data_interface_ops format_ops = {
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_elemsize = sizeof(struct point),
			
 
				+	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
			
 
				+	.cuda_to_cpu_cl = &cuda_to_cpu_cl,
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_elemsize = sizeof(struct point),
			
 
				+	.cpu_to_opencl_cl = &cpu_to_opencl_cl,
			
 
				+	.opencl_to_cpu_cl = &opencl_to_cpu_cl,
			
 
				+#endif
			
 
				+	.cpu_elemsize = sizeof(global_struct_of_arrays),
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+extern void multiformat_scal_cuda_func(void *buffers[], void *arg);
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+extern void multiformat_scal_opencl_func(void *buffers[], void *arg);
			
 
				+#endif
			
 
				+
			
 
				+static struct starpu_perfmodel_t conversion_model = {
			
 
				+	.type = STARPU_HISTORY_BASED,
			
 
				+	.symbol = "multiformat_conversion_model"
			
 
				+};
			
 
				+
			
 
				+static starpu_codelet  cl = {
			
 
				+	.where = STARPU_CUDA | STARPU_OPENCL,
			
 
				+	.cpu_func = multiformat_scal_cpu_func,
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_func = multiformat_scal_cuda_func,
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_func = multiformat_scal_opencl_func,
			
 
				+#endif
			
 
				+	.nbuffers = 1,
			
 
				+	.conversion_model = &conversion_model
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Main functions 
			
 
				+ */
			
 
				+static void
			
 
				+init_problem_data(void)
			
 
				+{
			
 
				+	int i; 
			
 
				+	for (i = 0; i < N_ELEMENTS; i++) {
			
 
				+		global_struct_of_arrays.x[i] = 1.0f + i;
			
 
				+		global_struct_of_arrays.y[i] = 42.0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+register_data(void)
			
 
				+{
			
 
				+	starpu_multiformat_data_register(&global_struct_of_arrays_handle,
			
 
				+					 0,
			
 
				+					 &global_struct_of_arrays,
			
 
				+					 N_ELEMENTS,
			
 
				+					 &format_ops);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+create_and_submit_tasks(void)
			
 
				+{
			
 
				+	struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+	task->cl = &cl;
			
 
				+	task->synchronous = 1;
			
 
				+	task->buffers[0].handle = global_struct_of_arrays_handle;
			
 
				+	task->buffers[0].mode = STARPU_RW;
			
 
				+	task->cl_arg = NULL;
			
 
				+	task->cl_arg_size = 0;
			
 
				+	starpu_task_submit(task);
			
 
				+
			
 
				+	struct starpu_task *task2 = starpu_task_create();
			
 
				+	task2->cl = &cl;
			
 
				+	task2->synchronous = 1;
			
 
				+	task2->buffers[0].handle = global_struct_of_arrays_handle;
			
 
				+	task2->buffers[0].mode = STARPU_RW;
			
 
				+	task2->cl_arg = NULL;
			
 
				+	task2->cl_arg_size = 0;
			
 
				+	starpu_task_submit(task2);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+unregister_data(void)
			
 
				+{
			
 
				+	starpu_data_unregister(global_struct_of_arrays_handle);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+print_it(void)
			
 
				+{
			
 
				+	int i;
			
 
				+	for (i = 0; i < N_ELEMENTS; i++) {
			
 
				+		fprintf(stderr, "(%.2f %.2f) ",
			
 
				+			global_struct_of_arrays.x[i],
			
 
				+			global_struct_of_arrays.y[i]);
			
 
				+	}
			
 
				+	fprintf(stderr, "\n");
			
 
				+}
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+struct starpu_opencl_program opencl_program;
			
 
				+struct starpu_opencl_program opencl_conversion_program;
			
 
				+#endif
			
 
				+
			
 
				+int
			
 
				+main(void)
			
 
				+{
			
 
				+	starpu_init(NULL);
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	starpu_opencl_load_opencl_from_file("examples/basic_examples/multiformat_opencl_kernel.cl",
			
 
				+					    &opencl_program, NULL);
			
 
				+	starpu_opencl_load_opencl_from_file("examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl", 
			
 
				+		&opencl_conversion_program, NULL);
			
 
				+#endif
			
 
				+	init_problem_data();
			
 
				+
			
 
				+	print_it();
			
 
				+
			
 
				+	register_data();
			
 
				+
			
 
				+	create_and_submit_tasks();
			
 
				+
			
 
				+	unregister_data();
			
 
				+
			
 
				+	print_it();
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+        starpu_opencl_unload_opencl(&opencl_program);
			
 
				+        starpu_opencl_unload_opencl(&opencl_conversion_program);
			
 
				+#endif
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/examples/basic_examples/multiformat_conversion_codelets.c
+++ b/examples/basic_examples/multiformat_conversion_codelets.c
@@ -0,0 +1,99 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include "multiformat_types.h"
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+void cpu_to_cuda(void *buffers[], void *arg)
			
 
				+{
			
 
				+	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	struct point *dst = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
			
 
				+	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	int i;
			
 
				+	for (i = 0; i < n; i++) {
			
 
				+		dst[i].x = src->x[i];
			
 
				+		dst[i].y = src->y[i];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void cuda_to_cpu(void *buffers[], void *arg)
			
 
				+{
			
 
				+	struct point *src = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
			
 
				+	struct struct_of_arrays *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	int i;
			
 
				+	for (i = 0; i < n; i++) {
			
 
				+		dst->x[i] = src[i].x;
			
 
				+		dst->y[i] = src[i].y;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+extern void cpu_to_cuda_cuda_func(void *buffers[], void *args);
			
 
				+starpu_codelet cpu_to_cuda_cl = {
			
 
				+	.where = STARPU_CUDA,
			
 
				+	.cpu_func = cpu_to_cuda,
			
 
				+	.cuda_func = cpu_to_cuda_cuda_func,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+
			
 
				+starpu_codelet cuda_to_cpu_cl = {
			
 
				+	.where = STARPU_CPU,
			
 
				+	.cpu_func = cuda_to_cpu,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+void cpu_to_opencl(void *buffers[], void *arg)
			
 
				+{
			
 
				+	fprintf(stderr, "User Entering %s\n", __func__);
			
 
				+	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	struct point *dst = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
			
 
				+	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	int i;
			
 
				+	for (i = 0; i < n; i++) {
			
 
				+		dst[i].x = src->x[i];
			
 
				+		dst[i].y = src->y[i];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void opencl_to_cpu(void *buffers[], void *arg)
			
 
				+{
			
 
				+	fprintf(stderr, "User Entering %s\n", __func__);
			
 
				+	struct point *src = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
			
 
				+	struct struct_of_arrays *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	int i;
			
 
				+	for (i = 0; i < n; i++) {
			
 
				+		dst->x[i] = src[i].x;
			
 
				+		dst->y[i] = src[i].y;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+extern void cpu_to_opencl_opencl_func(void *buffers[], void *args);
			
 
				+starpu_codelet cpu_to_opencl_cl = {
			
 
				+	.where = STARPU_OPENCL,
			
 
				+	.cpu_func = cpu_to_opencl,
			
 
				+	.opencl_func = cpu_to_opencl_opencl_func,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+
			
 
				+starpu_codelet opencl_to_cpu_cl = {
			
 
				+	.where = STARPU_CPU,
			
 
				+	.cpu_func = opencl_to_cpu,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+#endif
			
--- a/examples/basic_examples/multiformat_conversion_codelets_cuda.cu
+++ b/examples/basic_examples/multiformat_conversion_codelets_cuda.cu
@@ -0,0 +1,46 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_cuda.h>
			
 
				+#include "multiformat_types.h"
			
 
				+
			
 
				+static __global__ void cpu_to_cuda_cuda(struct struct_of_arrays *src,
			
 
				+	struct point *dst, unsigned n)
			
 
				+{
			
 
				+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
			
 
				+
			
 
				+	if (i >= n)
			
 
				+		return;
			
 
				+	dst[i].x = src->x[i];
			
 
				+	dst[i].y = src->y[i];
			
 
				+
			
 
				+}
			
 
				+
			
 
				+extern "C" void cpu_to_cuda_cuda_func(void *buffers[], void *_args)
			
 
				+{
			
 
				+	struct struct_of_arrays *src;
			
 
				+	src = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	struct point *dst;
			
 
				+	dst = (struct point *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
			
 
				+	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+
			
 
				+	unsigned threads_per_block = 64;
			
 
				+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
			
 
				+
			
 
				+        cpu_to_cuda_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(src, dst, n);
			
 
				+
			
 
				+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
			
 
				+}
			
--- a/examples/basic_examples/multiformat_conversion_codelets_opencl.c
+++ b/examples/basic_examples/multiformat_conversion_codelets_opencl.c
@@ -0,0 +1,98 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_opencl.h>
			
 
				+
			
 
				+extern struct starpu_opencl_program opencl_conversion_program;
			
 
				+
			
 
				+void cpu_to_opencl_opencl_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) args;
			
 
				+	int id, devid;
			
 
				+        cl_int err;
			
 
				+	cl_kernel kernel;
			
 
				+	cl_command_queue queue;
			
 
				+	cl_event event;
			
 
				+
			
 
				+	unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	cl_mem src = (cl_mem) STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
			
 
				+	cl_mem dst = (cl_mem) STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
			
 
				+
			
 
				+	id = starpu_worker_get_id();
			
 
				+	devid = starpu_worker_get_devid(id);
			
 
				+
			
 
				+	err = starpu_opencl_load_kernel(&kernel,
			
 
				+					&queue,
			
 
				+					&opencl_conversion_program,
			
 
				+					"cpu_to_opencl_opencl",
			
 
				+					devid);
			
 
				+
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	err = clSetKernelArg(kernel, 0, sizeof(src), &src);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	err = clSetKernelArg(kernel, 1, sizeof(dst), &dst);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	err = clSetKernelArg(kernel, 2, sizeof(n), &n);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+
			
 
				+	{
			
 
				+		size_t global=n;
			
 
				+		size_t local;
			
 
				+                size_t s;
			
 
				+                cl_device_id device;
			
 
				+
			
 
				+                starpu_opencl_get_device(devid, &device);
			
 
				+
			
 
				+                err = clGetKernelWorkGroupInfo (kernel,
			
 
				+						device,
			
 
				+						CL_KERNEL_WORK_GROUP_SIZE,
			
 
				+						sizeof(local),
			
 
				+						&local,
			
 
				+						&s);
			
 
				+                if (err != CL_SUCCESS)
			
 
				+			STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+                if (local > global)
			
 
				+			local = global;
			
 
				+
			
 
				+		err = clEnqueueNDRangeKernel(queue,
			
 
				+					kernel,
			
 
				+					1,
			
 
				+					NULL,
			
 
				+					&global,
			
 
				+					&local,
			
 
				+					0,
			
 
				+					NULL,
			
 
				+					&event);
			
 
				+
			
 
				+		if (err != CL_SUCCESS)
			
 
				+			STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+	}
			
 
				+
			
 
				+	clFinish(queue);
			
 
				+	starpu_opencl_collect_stats(event);
			
 
				+	clReleaseEvent(event);
			
 
				+
			
 
				+	starpu_opencl_release_kernel(kernel);
			
 
				+}
			
--- a/examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl
+++ b/examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl
@@ -0,0 +1,26 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include "examples/basic_examples/multiformat_types.h"
			
 
				+__kernel void cpu_to_opencl_opencl(__global struct struct_of_arrays *src,
			
 
				+				__global struct point *dst,
			
 
				+				unsigned int n)
			
 
				+{
			
 
				+	const unsigned int i = get_global_id(0);
			
 
				+	if (i < n) {
			
 
				+		dst[i].x = src->x[i];
			
 
				+		dst[i].y = src->y[i];
			
 
				+	}
			
 
				+}
			
--- a/examples/basic_examples/multiformat_cuda.cu
+++ b/examples/basic_examples/multiformat_cuda.cu
@@ -0,0 +1,41 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_cuda.h>
			
 
				+#include "multiformat_types.h"
			
 
				+static __global__ void multiformat_cuda(struct point *val, unsigned n)
			
 
				+{
			
 
				+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
			
 
				+
			
 
				+	if (i >= n)
			
 
				+		return;
			
 
				+	
			
 
				+	val[i].x *= val[i].y;
			
 
				+}
			
 
				+
			
 
				+extern "C" void multiformat_scal_cuda_func(void *buffers[], void *_args)
			
 
				+{
			
 
				+	(void) _args;
			
 
				+
			
 
				+	fprintf(stderr, "Running the cuda kernel (%s)\n", __func__);
			
 
				+	unsigned int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	struct point *val =  (struct point *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
			
 
				+	unsigned threads_per_block = 64;
			
 
				+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
			
 
				+        multiformat_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(val, n);
			
 
				+
			
 
				+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
			
 
				+}
			
--- a/examples/basic_examples/multiformat_opencl.c
+++ b/examples/basic_examples/multiformat_opencl.c
@@ -0,0 +1,91 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_opencl.h>
			
 
				+
			
 
				+extern struct starpu_opencl_program opencl_program;
			
 
				+
			
 
				+void multiformat_scal_opencl_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) args;
			
 
				+	int id, devid;
			
 
				+        cl_int err;
			
 
				+	cl_kernel kernel;
			
 
				+	cl_command_queue queue;
			
 
				+	cl_event event;
			
 
				+
			
 
				+	unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
			
 
				+	cl_mem val = (cl_mem)STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
			
 
				+
			
 
				+	id = starpu_worker_get_id();
			
 
				+	devid = starpu_worker_get_devid(id);
			
 
				+
			
 
				+	err = starpu_opencl_load_kernel(&kernel,
			
 
				+					&queue,
			
 
				+					&opencl_program,
			
 
				+					"multiformat_opencl",
			
 
				+					devid);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	err  = clSetKernelArg(kernel, 0, sizeof(val), &val);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	err = clSetKernelArg(kernel, 1, sizeof(n), &n);
			
 
				+	if (err)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	{
			
 
				+		size_t global=n;
			
 
				+		size_t local;
			
 
				+                size_t s;
			
 
				+                cl_device_id device;
			
 
				+
			
 
				+                starpu_opencl_get_device(devid, &device);
			
 
				+
			
 
				+                err = clGetKernelWorkGroupInfo (kernel,
			
 
				+						device,
			
 
				+						CL_KERNEL_WORK_GROUP_SIZE,
			
 
				+						sizeof(local),
			
 
				+						&local,
			
 
				+						&s);
			
 
				+                if (err != CL_SUCCESS)
			
 
				+			STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+                if (local > global)
			
 
				+			local = global;
			
 
				+
			
 
				+		err = clEnqueueNDRangeKernel(queue,
			
 
				+					kernel,
			
 
				+					1,
			
 
				+					NULL,
			
 
				+					&global,
			
 
				+					&local,
			
 
				+					0,
			
 
				+					NULL,
			
 
				+					&event);
			
 
				+
			
 
				+		if (err != CL_SUCCESS)
			
 
				+			STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+	}
			
 
				+
			
 
				+	clFinish(queue);
			
 
				+	starpu_opencl_collect_stats(event);
			
 
				+	clReleaseEvent(event);
			
 
				+
			
 
				+	starpu_opencl_release_kernel(kernel);
			
 
				+}
			
--- a/examples/basic_examples/multiformat_opencl_kernel.cl
+++ b/examples/basic_examples/multiformat_opencl_kernel.cl
@@ -0,0 +1,24 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include "examples/basic_examples/multiformat_types.h"
			
 
				+__kernel void multiformat_opencl(__global struct point* val, int nx)
			
 
				+{
			
 
				+        const int i = get_global_id(0);
			
 
				+        if (i < nx) {
			
 
				+                val[i].x *= val[i].y;
			
 
				+        }
			
 
				+}
			
--- a/examples/basic_examples/multiformat_types.h
+++ b/examples/basic_examples/multiformat_types.h
@@ -0,0 +1,30 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#ifndef MULTIFORMAT_TYPES_H
			
 
				+#define MULTIFORMAT_TYPES_H
			
 
				+
			
 
				+#define N_ELEMENTS 10
			
 
				+
			
 
				+struct struct_of_arrays{
			
 
				+	float x[N_ELEMENTS];
			
 
				+	float y[N_ELEMENTS];
			
 
				+};
			
 
				+struct point {
			
 
				+	float x, y;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif
			
--- a/include/starpu_data_interfaces.h
+++ b/include/starpu_data_interfaces.h
@@ -314,6 +314,56 @@ uint32_t starpu_bcsr_get_r(starpu_data_handle);
 
				 uint32_t starpu_bcsr_get_c(starpu_data_handle);
			
 
				 size_t starpu_bcsr_get_elemsize(starpu_data_handle);
			
 
				 
			
 
				+/*
			
 
				+ * Multiformat interface
			
 
				+ */
			
 
				+struct starpu_multiformat_data_interface_ops {
			
 
				+	size_t cpu_elemsize;
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	size_t opencl_elemsize;
			
 
				+	void *cpu_to_opencl_cl;
			
 
				+	void *opencl_to_cpu_cl;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	size_t cuda_elemsize;
			
 
				+	void *cpu_to_cuda_cl;
			
 
				+	void *cuda_to_cpu_cl;
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+typedef struct starpu_multiformat_interface_s {
			
 
				+	void *cpu_ptr;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	void *cuda_ptr;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	void *opencl_ptr;
			
 
				+#endif
			
 
				+	uintptr_t dev_handle;
			
 
				+	size_t offset;
			
 
				+	uint32_t nx;
			
 
				+	struct starpu_multiformat_data_interface_ops *ops;
			
 
				+	double conversion_time;
			
 
				+} starpu_multiformat_interface_t;
			
 
				+
			
 
				+void starpu_multiformat_data_register(starpu_data_handle *handle,
			
 
				+				      uint32_t home_node,
			
 
				+				      void *ptr,
			
 
				+				      uint32_t nobjects,
			
 
				+				      struct starpu_multiformat_data_interface_ops *format_ops);
			
 
				+
			
 
				+#define STARPU_MULTIFORMAT_GET_PTR(interface)  (((starpu_multiformat_interface_t *)(interface))->cpu_ptr)
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+#define STARPU_MULTIFORMAT_GET_CUDA_PTR(interface) (((starpu_multiformat_interface_t *)(interface))->cuda_ptr)
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+#define STARPU_MULTIFORMAT_GET_OPENCL_PTR(interface) (((starpu_multiformat_interface_t *)(interface))->opencl_ptr)
			
 
				+#endif
			
 
				+
			
 
				+#define STARPU_MULTIFORMAT_GET_NX(interface)  (((starpu_multiformat_interface_t *)(interface))->nx)
			
 
				+
			
 
				 #define STARPU_MATRIX_INTERFACE_ID	0
			
 
				 #define STARPU_BLOCK_INTERFACE_ID	1
			
 
				 #define STARPU_VECTOR_INTERFACE_ID	2
			
@@ -321,7 +371,8 @@ size_t starpu_bcsr_get_elemsize(starpu_data_handle);
 
				 #define STARPU_BCSR_INTERFACE_ID	4
			
 
				 #define STARPU_VARIABLE_INTERFACE_ID	5
			
 
				 #define STARPU_VOID_INTERFACE_ID	6
			
 
				-#define STARPU_NINTERFACES_ID		7 /* number of data interfaces */
			
 
				+#define STARPU_MULTIFORMAT_INTERFACE_ID 7
			
 
				+#define STARPU_NINTERFACES_ID		8 /* number of data interfaces */
			
 
				 
			
 
				 unsigned starpu_get_handle_interface_id(starpu_data_handle);
			
 
				 
			
--- a/include/starpu_scheduler.h
+++ b/include/starpu_scheduler.h
@@ -183,6 +183,8 @@ double starpu_task_expected_data_transfer_time(uint32_t memory_node, struct star
 
				 double starpu_data_expected_transfer_time(starpu_data_handle handle, unsigned memory_node, starpu_access_mode mode);
			
 
				 /* Returns expected power consumption in J */
			
 
				 double starpu_task_expected_power(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl);
			
 
				+/* Returns expected conversion time in ms (multiformat interface only) */
			
 
				+double starpu_task_expected_conversion_time(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -99,6 +99,9 @@ typedef struct starpu_codelet_t {
 
				 	 * In the case of parallel codelets, accounts for all units. */
			
 
				 	struct starpu_perfmodel_t *power_model;
			
 
				 
			
 
				+	/* Conversion model of the codelet */
			
 
				+	struct starpu_perfmodel_t *conversion_model;
			
 
				+
			
 
				 	/* statistics collected at runtime: this is filled by StarPU and should
			
 
				 	 * not be accessed directly (use the starpu_display_codelet_stats
			
 
				 	 * function instead for instance). */
			
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -182,6 +182,7 @@ libstarpu_la_SOURCES = 						\
 
				 	datawizard/interfaces/vector_filters.c			\
			
 
				 	datawizard/interfaces/variable_interface.c		\
			
 
				 	datawizard/interfaces/void_interface.c			\
			
 
				+	datawizard/interfaces/multiformat_interface.c           \
			
 
				 	util/malloc.c						\
			
 
				 	util/execute_on_all.c					\
			
 
				 	util/starpu_create_sync_task.c				\
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -196,6 +196,11 @@ double starpu_task_expected_power(struct starpu_task *task, enum starpu_perf_arc
 
				 	return starpu_model_expected_perf(task, task->cl->power_model, arch, nimpl);
			
 
				 }
			
 
				 
			
 
				+double starpu_task_expected_conversion_time(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)
			
 
				+{
			
 
				+	return starpu_model_expected_perf(task, task->cl->conversion_model, arch, nimpl);
			
 
				+}
			
 
				+
			
 
				 /* Predict the transfer time (in µs) to move a handle to a memory node */
			
 
				 double starpu_data_expected_transfer_time(starpu_data_handle handle, unsigned memory_node, starpu_access_mode mode)
			
 
				 {
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -387,3 +387,26 @@ void _starpu_set_current_task(struct starpu_task *task)
 
				 {
			
 
				 	pthread_setspecific(current_task_key, task);
			
 
				 }
			
 
				+
			
 
				+double _starpu_task_get_conversion_time(struct starpu_task *task)
			
 
				+{
			
 
				+	int i;
			
 
				+	double conversion_time = 0.0;
			
 
				+
			
 
				+	for (i = 0; i < task->cl->nbuffers; i++) {
			
 
				+		starpu_data_handle handle = task->buffers[i].handle;
			
 
				+		unsigned int id = starpu_get_handle_interface_id(handle);
			
 
				+		if (id == STARPU_MULTIFORMAT_INTERFACE_ID) {
			
 
				+			starpu_multiformat_interface_t *tmp;
			
 
				+			uint32_t node = starpu_worker_get_memory_node(task->workerid);
			
 
				+			tmp = starpu_data_get_interface_on_node(handle, node);
			
 
				+			conversion_time += tmp->conversion_time;
			
 
				+			/* XXX : this may not be the right place to reset this field,
			
 
				+			 * but we need to make sure the conversion time won't be counted 
			
 
				+                         * twice */
			
 
				+			tmp->conversion_time = 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return conversion_time;
			
 
				+}
			
--- a/src/datawizard/interfaces/multiformat_interface.c
+++ b/src/datawizard/interfaces/multiformat_interface.c
@@ -0,0 +1,605 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include <common/config.h>
			
 
				+#include <datawizard/coherency.h>
			
 
				+#include <datawizard/copy_driver.h>
			
 
				+#include <datawizard/filters.h>
			
 
				+#include <common/hash.h>
			
 
				+#include <starpu_cuda.h>
			
 
				+#include <starpu_opencl.h>
			
 
				+#include <drivers/opencl/driver_opencl.h>
			
 
				+
			
 
				+static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
			
 
				+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
			
 
				+static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
			
 
				+static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,					void *dst_interface, unsigned dst_node, cudaStream_t stream);
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
			
 
				+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
			
 
				+#endif
			
 
				+
			
 
				+static const struct starpu_data_copy_methods multiformat_copy_data_methods_s = {
			
 
				+	.ram_to_ram = copy_ram_to_ram,
			
 
				+	.ram_to_spu = NULL,
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.ram_to_cuda = copy_ram_to_cuda,
			
 
				+	.cuda_to_ram = copy_cuda_to_ram,
			
 
				+	.ram_to_cuda_async = copy_ram_to_cuda_async,
			
 
				+	.cuda_to_ram_async = copy_cuda_to_ram_async,
			
 
				+	.cuda_to_cuda = copy_cuda_to_cuda,
			
 
				+	.cuda_to_cuda_async = copy_cuda_to_cuda_async,
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.ram_to_opencl = copy_ram_to_opencl,
			
 
				+	.opencl_to_ram = copy_opencl_to_ram,
			
 
				+	.opencl_to_opencl = copy_opencl_to_opencl,
			
 
				+        .ram_to_opencl_async = copy_ram_to_opencl_async,
			
 
				+	.opencl_to_ram_async = copy_opencl_to_ram_async,
			
 
				+#endif
			
 
				+	.cuda_to_spu = NULL,
			
 
				+	.spu_to_ram = NULL,
			
 
				+	.spu_to_cuda = NULL,
			
 
				+	.spu_to_spu = NULL
			
 
				+};
			
 
				+
			
 
				+static void register_multiformat_handle(starpu_data_handle handle, uint32_t home_node, void *data_interface);
			
 
				+static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node);
			
 
				+static void *multiformat_handle_to_pointer(starpu_data_handle data_handle, uint32_t node);
			
 
				+static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node);
			
 
				+static size_t multiformat_interface_get_size(starpu_data_handle handle);
			
 
				+static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle handle);
			
 
				+static int multiformat_compare(void *data_interface_a, void *data_interface_b);
			
 
				+static void display_multiformat_interface(starpu_data_handle handle, FILE *f);
			
 
				+static uint32_t starpu_multiformat_get_nx(starpu_data_handle handle);
			
 
				+#ifdef STARPU_USE_GORDON
			
 
				+static int convert_multiformat_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss); 
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+static struct starpu_data_interface_ops_t interface_multiformat_ops = {
			
 
				+	.register_data_handle  = register_multiformat_handle,
			
 
				+	.allocate_data_on_node = allocate_multiformat_buffer_on_node,
			
 
				+	.handle_to_pointer     = multiformat_handle_to_pointer,
			
 
				+	.free_data_on_node     = free_multiformat_buffer_on_node,
			
 
				+	.copy_methods          = &multiformat_copy_data_methods_s,
			
 
				+	.get_size              = multiformat_interface_get_size,
			
 
				+	.footprint             = footprint_multiformat_interface_crc32,
			
 
				+	.compare               = multiformat_compare,
			
 
				+#ifdef STARPU_USE_GORDON
			
 
				+	.convert_to_gordon     = NULL,
			
 
				+#endif
			
 
				+	.interfaceid           = STARPU_MULTIFORMAT_INTERFACE_ID,
			
 
				+	.interface_size        = sizeof(starpu_multiformat_interface_t),
			
 
				+	.display               = display_multiformat_interface
			
 
				+};
			
 
				+
			
 
				+static void *multiformat_handle_to_pointer(starpu_data_handle handle, uint32_t node)
			
 
				+{
			
 
				+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				+	starpu_multiformat_interface_t *multiformat_interface = 
			
 
				+		starpu_data_get_interface_on_node(handle, node);
			
 
				+
			
 
				+	switch(_starpu_get_node_kind(node)) {
			
 
				+		case STARPU_CPU_RAM:
			
 
				+			return multiformat_interface->cpu_ptr;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		case STARPU_CUDA_RAM:
			
 
				+			return multiformat_interface->cuda_ptr;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+		case STARPU_OPENCL_RAM:
			
 
				+			return multiformat_interface->opencl_ptr;
			
 
				+#endif
			
 
				+		default:
			
 
				+			STARPU_ASSERT(0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void register_multiformat_handle(starpu_data_handle handle, uint32_t home_node, void *data_interface)
			
 
				+{
			
 
				+	starpu_multiformat_interface_t *multiformat_interface;
			
 
				+	multiformat_interface = (starpu_multiformat_interface_t *) data_interface;
			
 
				+
			
 
				+	unsigned node;
			
 
				+	for (node = 0; node < STARPU_MAXNODES; node++) {
			
 
				+		starpu_multiformat_interface_t *local_interface =
			
 
				+			starpu_data_get_interface_on_node(handle, node);
			
 
				+
			
 
				+		if (node == home_node) {
			
 
				+			local_interface->cpu_ptr    = multiformat_interface->cpu_ptr;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+			local_interface->cuda_ptr   = multiformat_interface->cuda_ptr;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+			local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
			
 
				+#endif
			
 
				+			local_interface->dev_handle = multiformat_interface->dev_handle;
			
 
				+			local_interface->offset     = multiformat_interface->offset;
			
 
				+		}
			
 
				+		else {
			
 
				+			local_interface->cpu_ptr    = NULL;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+			local_interface->cuda_ptr   = NULL;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+			local_interface->opencl_ptr = NULL;
			
 
				+#endif
			
 
				+			local_interface->dev_handle = 0;
			
 
				+			local_interface->offset     = 0;
			
 
				+		}
			
 
				+		local_interface->nx = multiformat_interface->nx;
			
 
				+		local_interface->ops = multiformat_interface->ops;
			
 
				+		local_interface->conversion_time = 0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void starpu_multiformat_data_register(starpu_data_handle *handleptr,
			
 
				+				      uint32_t home_node,
			
 
				+				      void *ptr,
			
 
				+				      uint32_t nobjects,
			
 
				+				      struct starpu_multiformat_data_interface_ops *format_ops)
			
 
				+{
			
 
				+	starpu_multiformat_interface_t multiformat = {
			
 
				+		.cpu_ptr    = ptr,
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		.cuda_ptr   = NULL,
			
 
				+#endif
			
 
				+#ifdef STARPu_USE_OPENCL
			
 
				+		.opencl_ptr = NULL,
			
 
				+#endif
			
 
				+		.nx         = nobjects, 
			
 
				+		.dev_handle = (uintptr_t) ptr,
			
 
				+		.offset     = 0,
			
 
				+		.ops        = format_ops
			
 
				+	};
			
 
				+	starpu_data_register(handleptr, home_node, &multiformat, &interface_multiformat_ops);
			
 
				+}
			
 
				+
			
 
				+static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle handle)
			
 
				+{
			
 
				+	return _starpu_crc32_be(starpu_multiformat_get_nx(handle), 0);
			
 
				+}
			
 
				+
			
 
				+static int multiformat_compare(void *data_interface_a, void *data_interface_b)
			
 
				+{
			
 
				+	starpu_multiformat_interface_t *multiformat_a = data_interface_a;
			
 
				+	starpu_multiformat_interface_t *multiformat_b = data_interface_b;
			
 
				+
			
 
				+	return ((multiformat_a->nx == multiformat_b->nx)
			
 
				+			&& (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize)
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+			&& (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize)
			
 
				+#endif
			
 
				+#if STARPU_USE_OPENCL
			
 
				+			&& (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
			
 
				+#endif
			
 
				+		);
			
 
				+}
			
 
				+
			
 
				+static void display_multiformat_interface(starpu_data_handle handle, FILE *f)
			
 
				+{
			
 
				+	/* TODO */
			
 
				+	STARPU_ASSERT(0);
			
 
				+}
			
 
				+
			
 
				+/* XXX : returns CPU size */
			
 
				+static size_t multiformat_interface_get_size(starpu_data_handle handle)
			
 
				+{
			
 
				+	size_t size;
			
 
				+	starpu_multiformat_interface_t *multiformat_interface;
			
 
				+	multiformat_interface = starpu_data_get_interface_on_node(handle, 0);
			
 
				+	size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
			
 
				+	return size;
			
 
				+}
			
 
				+
			
 
				+uint32_t starpu_multiformat_get_nx(starpu_data_handle handle)
			
 
				+{
			
 
				+	starpu_multiformat_interface_t *multiformat_interface;
			
 
				+	multiformat_interface = starpu_data_get_interface_on_node(handle, 0);
			
 
				+	return multiformat_interface->nx;
			
 
				+}
			
 
				+
			
 
				+static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
			
 
				+{
			
 
				+	starpu_multiformat_interface_t *multiformat_interface;
			
 
				+	multiformat_interface = (starpu_multiformat_interface_t *) data_interface;
			
 
				+	starpu_node_kind kind = _starpu_get_node_kind(node);
			
 
				+
			
 
				+	switch(kind) {
			
 
				+		case STARPU_CPU_RAM:
			
 
				+			free(multiformat_interface->cpu_ptr);
			
 
				+			multiformat_interface->cpu_ptr = NULL;
			
 
				+			break;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		case STARPU_CUDA_RAM:
			
 
				+			cudaFree(multiformat_interface->cuda_ptr);
			
 
				+			multiformat_interface->cuda_ptr = NULL;
			
 
				+			break;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+		case STARPU_OPENCL_RAM:
			
 
				+			/* TODO */
			
 
				+			break;
			
 
				+#endif
			
 
				+		default:
			
 
				+			STARPU_ABORT();
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node)
			
 
				+{
			
 
				+	starpu_multiformat_interface_t *multiformat_interface;
			
 
				+	multiformat_interface = (starpu_multiformat_interface_t *) data_interface_;
			
 
				+	unsigned fail = 0;
			
 
				+	uintptr_t addr = 0;
			
 
				+	ssize_t allocated_memory;
			
 
				+
			
 
				+	starpu_node_kind kind = _starpu_get_node_kind(dst_node);
			
 
				+	switch(kind) {
			
 
				+		case STARPU_CPU_RAM:
			
 
				+			allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
			
 
				+			addr = (uintptr_t)malloc(allocated_memory);
			
 
				+			if (!addr) {
			
 
				+				fail = 1;
			
 
				+			}
			
 
				+			else {
			
 
				+				multiformat_interface->cpu_ptr = (void *) addr;
			
 
				+				multiformat_interface->dev_handle = addr;
			
 
				+			}
			
 
				+			break;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		case STARPU_CUDA_RAM:
			
 
				+			{
			
 
				+				allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
			
 
				+				cudaError_t status = cudaMalloc((void **)&addr, allocated_memory);
			
 
				+				if (STARPU_UNLIKELY(status)) {
			
 
				+					STARPU_CUDA_REPORT_ERROR(status);
			
 
				+				}
			
 
				+				else {
			
 
				+					multiformat_interface->cuda_ptr = (void *)addr;
			
 
				+					multiformat_interface->dev_handle = addr;
			
 
				+				}
			
 
				+				break;
			
 
				+			}
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+		case STARPU_OPENCL_RAM:
			
 
				+			{
			
 
				+                                int ret;
			
 
				+                                void *ptr;
			
 
				+				allocated_memory = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
			
 
				+                                ret = _starpu_opencl_allocate_memory(&ptr, allocated_memory, CL_MEM_READ_WRITE);
			
 
				+                                addr = (uintptr_t)ptr;
			
 
				+				if (ret) {
			
 
				+					fail = 1;
			
 
				+				}
			
 
				+				else {
			
 
				+					multiformat_interface->opencl_ptr = (void *)addr;
			
 
				+					multiformat_interface->dev_handle = addr;
			
 
				+
			
 
				+				}
			
 
				+				break;
			
 
				+			}
			
 
				+			break;
			
 
				+#endif
			
 
				+		default:
			
 
				+			STARPU_ASSERT(0);
			
 
				+	}
			
 
				+
			
 
				+	if (fail)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	multiformat_interface->offset = 0;
			
 
				+	return allocated_memory;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Copy methods
			
 
				+ */
			
 
				+static int copy_ram_to_ram(void *src_interface, unsigned src_node,
			
 
				+			   void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	starpu_multiformat_interface_t *src_multiformat;
			
 
				+	starpu_multiformat_interface_t *dst_multiformat;
			
 
				+
			
 
				+	src_multiformat = (starpu_multiformat_interface_t *) src_interface;
			
 
				+	dst_multiformat = (starpu_multiformat_interface_t *) dst_interface;
			
 
				+
			
 
				+	STARPU_ASSERT(src_multiformat != NULL);
			
 
				+	STARPU_ASSERT(dst_multiformat != NULL);
			
 
				+	STARPU_ASSERT(dst_multiformat->ops != NULL);
			
 
				+
			
 
				+	size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize;
			
 
				+	memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static int copy_cuda_common(void *src_interface, unsigned src_node,
			
 
				+			    void *dst_interface, unsigned dst_node,
			
 
				+			    enum cudaMemcpyKind kind)
			
 
				+{
			
 
				+	starpu_multiformat_interface_t *src_multiformat;
			
 
				+	starpu_multiformat_interface_t *dst_multiformat;
			
 
				+
			
 
				+	src_multiformat = (starpu_multiformat_interface_t *) src_interface;
			
 
				+	dst_multiformat = (starpu_multiformat_interface_t *) dst_interface;
			
 
				+
			
 
				+	size_t size;
			
 
				+
			
 
				+	cudaError_t status;
			
 
				+
			
 
				+	switch (kind) {
			
 
				+		case cudaMemcpyHostToDevice:
			
 
				+		{
			
 
				+			size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
			
 
				+			if (src_multiformat->cuda_ptr == NULL) {
			
 
				+				src_multiformat->cuda_ptr = malloc(size);
			
 
				+				if (src_multiformat->cuda_ptr == NULL)
			
 
				+					return -ENOMEM;
			
 
				+			}
			
 
				+			/* Converting data , from host to host */
			
 
				+			double tmp = starpu_timing_now();
			
 
				+			void *buffers[1];
			
 
				+			buffers[0] = src_interface;
			
 
				+			starpu_codelet *cl = src_multiformat->ops->cpu_to_cuda_cl;
			
 
				+			cl->cpu_func(buffers, NULL);
			
 
				+			dst_multiformat->conversion_time = starpu_timing_now() - tmp;
			
 
				+
			
 
				+			status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
			
 
				+			if (STARPU_UNLIKELY(status)) {
			
 
				+				STARPU_CUDA_REPORT_ERROR(status);
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+		case cudaMemcpyDeviceToHost:
			
 
				+		{
			
 
				+			size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
			
 
				+			status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
			
 
				+			if (STARPU_UNLIKELY(status))
			
 
				+				STARPU_CUDA_REPORT_ERROR(status);
			
 
				+		
			
 
				+			void *buffers[1];
			
 
				+			starpu_codelet *cl = src_multiformat->ops->cuda_to_cpu_cl;
			
 
				+			buffers[0] = dst_interface;
			
 
				+			cl->cpu_func(buffers, NULL);
			
 
				+							  
			
 
				+			break;
			
 
				+		}
			
 
				+		default:
			
 
				+			STARPU_ASSERT(0);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
			
 
				+}
			
 
				+
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
			
 
				+}
			
 
				+
			
 
				+static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream, enum cudaMemcpyKind kind)
			
 
				+{
			
 
				+	starpu_multiformat_interface_t *src_multiformat;
			
 
				+	starpu_multiformat_interface_t *dst_multiformat;
			
 
				+
			
 
				+	src_multiformat = (starpu_multiformat_interface_t *) src_interface;
			
 
				+	dst_multiformat = (starpu_multiformat_interface_t *) dst_interface;
			
 
				+
			
 
				+	size_t size;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	cudaError_t status;
			
 
				+#endif
			
 
				+
			
 
				+	switch (kind) {
			
 
				+		case cudaMemcpyHostToDevice:
			
 
				+		{
			
 
				+			size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
			
 
				+			if (src_multiformat->cuda_ptr == NULL) {
			
 
				+				src_multiformat->cuda_ptr = malloc(size);
			
 
				+				if (src_multiformat->cuda_ptr == NULL)
			
 
				+					return -ENOMEM;
			
 
				+			}
			
 
				+
			
 
				+			/* Converting data , from host to host */
			
 
				+			double tmp = starpu_timing_now();
			
 
				+			void *buffers[1]; // XXX
			
 
				+			buffers[0] = src_interface;
			
 
				+			starpu_codelet *cl = src_multiformat->ops->cpu_to_cuda_cl;
			
 
				+			cl->cpu_func(buffers, NULL);
			
 
				+			dst_multiformat->conversion_time = starpu_timing_now() - tmp;
			
 
				+
			
 
				+			/* Actual copy from host to device */
			
 
				+			status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
			
 
				+			if (STARPU_UNLIKELY(status)) {
			
 
				+				STARPU_CUDA_REPORT_ERROR(status);
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+		case cudaMemcpyDeviceToHost:
			
 
				+		{
			
 
				+			size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
			
 
				+			status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
			
 
				+			if (STARPU_UNLIKELY(status))
			
 
				+				STARPU_CUDA_REPORT_ERROR(status);
			
 
				+
			
 
				+			/* Converting data */
			
 
				+			void *buffers[1];
			
 
				+			starpu_codelet *cl = src_multiformat->ops->cuda_to_cpu_cl;
			
 
				+			buffers[0] = dst_interface;
			
 
				+			cl->cpu_func(buffers, NULL);
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+		default:
			
 
				+			STARPU_ASSERT(0);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
			
 
				+{
			
 
				+	return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
			
 
				+}
			
 
				+
			
 
				+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
			
 
				+{
			
 
				+	return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
			
 
				+}
			
 
				+
			
 
				+static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
			
 
				+{
			
 
				+	/* TODO */
			
 
				+	STARPU_ASSERT(0);
			
 
				+}
			
 
				+
			
 
				+static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,					void *dst_interface, unsigned dst_node, cudaStream_t stream)
			
 
				+{
			
 
				+	/* TODO */
			
 
				+	STARPU_ASSERT(0);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
			
 
				+				    void *dst_interface, unsigned dst_node,
			
 
				+				    void *_event)
			
 
				+{
			
 
				+	int err, ret;
			
 
				+	size_t size;
			
 
				+	starpu_multiformat_interface_t *src_multiformat;
			
 
				+	starpu_multiformat_interface_t *dst_multiformat;
			
 
				+
			
 
				+	src_multiformat = (starpu_multiformat_interface_t *) src_interface;
			
 
				+	dst_multiformat = (starpu_multiformat_interface_t *) dst_interface;
			
 
				+
			
 
				+	STARPU_ASSERT(src_multiformat != NULL);
			
 
				+	STARPU_ASSERT(dst_multiformat != NULL);
			
 
				+	STARPU_ASSERT(src_multiformat->ops != NULL);
			
 
				+
			
 
				+	size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
			
 
				+
			
 
				+	if (src_multiformat->opencl_ptr == NULL) {
			
 
				+		src_multiformat->opencl_ptr = malloc(src_multiformat->nx * src_multiformat->ops->opencl_elemsize);
			
 
				+		if (src_multiformat->opencl_ptr == NULL) {
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+
			
 
				+		double tmp = starpu_timing_now();
			
 
				+		void *buffers[1];
			
 
				+		starpu_codelet *cl = src_multiformat->ops->cpu_to_opencl_cl;
			
 
				+		buffers[0] = src_interface;
			
 
				+		cl->cpu_func(buffers, NULL);
			
 
				+		dst_multiformat->conversion_time = starpu_timing_now() - tmp;
			
 
				+
			
 
				+		if (src_multiformat->opencl_ptr == NULL)
			
 
				+			return -ENOMEM; // XXX
			
 
				+	}
			
 
				+
			
 
				+	err = _starpu_opencl_copy_ram_to_opencl_async_sync(src_multiformat->opencl_ptr,
			
 
				+							   src_node,
			
 
				+							   (cl_mem) dst_multiformat->dev_handle,
			
 
				+							   dst_node,
			
 
				+							   size,
			
 
				+							   dst_multiformat->offset,
			
 
				+							   (cl_event *) _event,
			
 
				+							   &ret);
			
 
				+        if (STARPU_UNLIKELY(err))
			
 
				+                STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
			
 
				+				    void *dst_interface, unsigned dst_node,
			
 
				+				    void *_event)
			
 
				+{
			
 
				+	int err, ret;
			
 
				+	size_t size;
			
 
				+	starpu_multiformat_interface_t *src_multiformat;
			
 
				+	starpu_multiformat_interface_t *dst_multiformat;
			
 
				+
			
 
				+	src_multiformat = (starpu_multiformat_interface_t *) src_interface;
			
 
				+	dst_multiformat = (starpu_multiformat_interface_t *) dst_interface;
			
 
				+
			
 
				+	STARPU_ASSERT(src_multiformat != NULL);
			
 
				+	STARPU_ASSERT(dst_multiformat != NULL);
			
 
				+	STARPU_ASSERT(src_multiformat->ops != NULL);
			
 
				+	STARPU_ASSERT(dst_multiformat->ops != NULL);
			
 
				+
			
 
				+	size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize,
			
 
				+
			
 
				+	err = _starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_multiformat->dev_handle,
			
 
				+							   src_node,
			
 
				+							   dst_multiformat->opencl_ptr,
			
 
				+							   dst_node,
			
 
				+							   size,
			
 
				+                                                           src_multiformat->offset,
			
 
				+							   (cl_event *)_event,
			
 
				+							   &ret);
			
 
				+        if (STARPU_UNLIKELY(err))
			
 
				+                STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
			
 
				+
			
 
				+	/* XXX So much for asynchronicity */
			
 
				+	clWaitForEvents(1, _event);
			
 
				+	void *buffers[1];
			
 
				+	starpu_codelet *cl = src_multiformat->ops->opencl_to_cpu_cl;
			
 
				+	buffers[0] = dst_interface;
			
 
				+	cl->cpu_func(buffers, NULL);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
			
 
				+                              void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
			
 
				+{
			
 
				+        return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				+}
			
 
				+
			
 
				+static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
			
 
				+				void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
			
 
				+{
			
 
				+        return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				+}
			
 
				+
			
 
				+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
			
 
				+                                 void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+/* TODO */
			
 
				+}
			
 
				+#endif
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -77,7 +77,7 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args,
 
				 	if (rank == 0)
			
 
				 	{
			
 
				 		_starpu_driver_update_job_feedback(j, cpu_args,
			
 
				-				perf_arch, &codelet_start, &codelet_end);
			
 
				+				perf_arch, &codelet_start, &codelet_end, 0);
			
 
				 		_starpu_push_task_output(task, 0);
			
 
				 	}
			
 
				 
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -26,6 +26,9 @@
 
				 #include "driver_cuda.h"
			
 
				 #include <core/sched_policy.h>
			
 
				 
			
 
				+
			
 
				+double _starpu_task_get_conversion_time(struct starpu_task *task);
			
 
				+
			
 
				 /* the number of CUDA devices */
			
 
				 static int ncudagpus;
			
 
				 
			
@@ -198,6 +201,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 
				 		return -EAGAIN;
			
 
				 	}
			
 
				 
			
 
				+	double conversion_time = _starpu_task_get_conversion_time(task);
			
 
				 	if (calibrate_model)
			
 
				 	{
			
 
				 		cures = cudaStreamSynchronize(starpu_cuda_get_local_transfer_stream());
			
@@ -226,7 +230,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 
				 
			
 
				 	_starpu_driver_end_job(args, j, &codelet_end, 0);
			
 
				 
			
 
				-	_starpu_driver_update_job_feedback(j, args, args->perf_arch, &codelet_start, &codelet_end);
			
 
				+	_starpu_driver_update_job_feedback(j, args, args->perf_arch, &codelet_start, &codelet_end, conversion_time);
			
 
				 
			
 
				 	_starpu_push_task_output(task, mask);
			
 
				 
			
--- a/src/drivers/driver_common/driver_common.c
+++ b/src/drivers/driver_common/driver_common.c
@@ -87,7 +87,7 @@ void _starpu_driver_end_job(struct starpu_worker_s *args, starpu_job_t j, struct
 
				 }
			
 
				 void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *worker_args,
			
 
				 					enum starpu_perf_archtype perf_arch,
			
 
				-					struct timespec *codelet_start, struct timespec *codelet_end)
			
 
				+					struct timespec *codelet_start, struct timespec *codelet_end, double conversion_time)
			
 
				 {
			
 
				 	struct starpu_task_profiling_info *profiling_info = j->task->profiling_info;
			
 
				 	struct timespec measured_ts;
			
@@ -133,6 +133,12 @@ void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *
 
				 	if (profiling_info && profiling_info->power_consumed && cl->power_model && cl->power_model->benchmarking) {
			
 
				 		_starpu_update_perfmodel_history(j, j->task->cl->power_model,  perf_arch, worker_args->devid, profiling_info->power_consumed,j->nimpl);
			
 
				 		}
			
 
				+
			
 
				+	if (j->task->cl->conversion_model) {
			
 
				+		_starpu_update_perfmodel_history(j, j->task->cl->conversion_model, perf_arch,
			
 
				+						 worker_args->devid, conversion_time, j->nimpl);
			
 
				+	}
			
 
				+
			
 
				 }
			
 
				 
			
 
				 /* Workers may block when there is no work to do at all. We assume that the
			
--- a/src/drivers/driver_common/driver_common.h
+++ b/src/drivers/driver_common/driver_common.h
@@ -29,7 +29,7 @@ void _starpu_driver_end_job(struct starpu_worker_s *args, starpu_job_t j,
 
				 		struct timespec *codelet_end, int rank);
			
 
				 void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *worker_args,
			
 
				 		enum starpu_perf_archtype perf_arch,
			
 
				-		struct timespec *codelet_start, struct timespec *codelet_end);
			
 
				+		struct timespec *codelet_start, struct timespec *codelet_end, double);
			
 
				 
			
 
				 void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex);
			
 
				 
			
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -28,6 +28,8 @@
 
				 #include "driver_opencl_utils.h"
			
 
				 #include <common/utils.h>
			
 
				 
			
 
				+double _starpu_task_get_conversion_time(struct starpu_task *task);
			
 
				+
			
 
				 static pthread_mutex_t big_lock = PTHREAD_MUTEX_INITIALIZER;
			
 
				 
			
 
				 static cl_context contexts[STARPU_MAXOPENCLDEVS];
			
@@ -564,6 +566,7 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 
				 		 * codelet back, and try it later */
			
 
				 		return -EAGAIN;
			
 
				 	}
			
 
				+	double conversion_time = _starpu_task_get_conversion_time(task);
			
 
				 
			
 
				 	_starpu_driver_start_job(args, j, &codelet_start, 0);
			
 
				 
			
@@ -582,7 +585,7 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 
				 	_starpu_driver_end_job(args, j, &codelet_end, 0);
			
 
				 
			
 
				 	_starpu_driver_update_job_feedback(j, args, args->perf_arch,
			
 
				-							&codelet_start, &codelet_end);
			
 
				+							&codelet_start, &codelet_end, conversion_time);
			
 
				 
			
 
				 	_starpu_push_task_output(task, mask);
			
 
				 
			
--- a/src/sched_policies/heft.c
+++ b/src/sched_policies/heft.c
@@ -250,6 +250,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 
			
 
				 			if (bundle)
			
 
				 			{
			
 
				+				/* TODO : conversion time */
			
 
				 				local_task_length[worker][nimpl] = starpu_task_bundle_expected_length(bundle, perf_arch, nimpl);
			
 
				 				local_data_penalty[worker][nimpl] = starpu_task_bundle_expected_data_transfer_time(bundle, memory_node);
			
 
				 				local_power[worker][nimpl] = starpu_task_bundle_expected_power(bundle, perf_arch,nimpl);
			
@@ -260,6 +261,10 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 				local_task_length[worker][nimpl] = starpu_task_expected_length(task, perf_arch, nimpl);
			
 
				 				local_data_penalty[worker][nimpl] = starpu_task_expected_data_transfer_time(memory_node, task);
			
 
				 				local_power[worker][nimpl] = starpu_task_expected_power(task, perf_arch,nimpl);
			
 
				+
			
 
				+				double conversion_time = starpu_task_expected_conversion_time(task, perf_arch, nimpl);
			
 
				+				if (conversion_time > 0.0)
			
 
				+					local_data_penalty[worker][nimpl] += conversion_time;
			
 
				 				//_STARPU_DEBUG("Scheduler heft: task length (%lf) local power (%lf) worker (%u) kernel (%u) \n", local_task_length[worker],local_power[worker],worker,nimpl);
			
 
				 
			
 
				 			}