13 anos atrás · d8e86d1388
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -125,7 +125,9 @@ noinst_HEADERS = 				\
 
				 	spmv/spmv.h				\
			
 
				 	spmv/dw_block_spmv.h                    \
			
 
				 	basic_examples/multiformat_types.h      \
			
 
				-	filters/multiformat/multiformat_types.h
			
 
				+	filters/multiformat/multiformat_types.h \
			
 
				+	filters/custom_mf/custom_interface.h    \
			
 
				+	filters/custom_mf/custom_types.h
			
 
				 
			
 
				 #####################################
			
 
				 # What to install and what to check #
			
@@ -156,6 +158,7 @@ examplebin_PROGRAMS +=				\
 
				 	basic_examples/block			\
			
 
				 	basic_examples/variable			\
			
 
				 	basic_examples/multiformat              \
			
 
				+	filters/custom_mf/custom_mf_filter      \
			
 
				 	filters/fvector				\
			
 
				 	filters/fblock				\
			
 
				 	filters/fmatrix				\
			
@@ -391,9 +394,21 @@ nobase_STARPU_OPENCL_DATA_DATA += \
 
				 	filters/fblock_opencl_kernel.cl
			
 
				 endif
			
 
				 
			
 
				-#
			
 
				-#
			
 
				-#
			
 
				+
			
 
				+#############################
			
 
				+# Custom multiformat filter #
			
 
				+#############################
			
 
				+filters_custom_mf_custom_mf_filter_SOURCES=\
			
 
				+	filters/custom_mf/custom_mf_filter.c \
			
 
				+	filters/custom_mf/custom_interface.c   \
			
 
				+	filters/custom_mf/custom_conversion_codelets.c
			
 
				+
			
 
				+if STARPU_USE_CUDA
			
 
				+filters_custom_mf_custom_mf_filter_SOURCES+=\
			
 
				+	filters/custom_mf/conversion.cu \
			
 
				+	filters/custom_mf/cuda.cu
			
 
				+endif
			
 
				+
			
 
				 filters_multiformat_multiformat_filter_SOURCES=                \
			
 
				 	filters/multiformat/multiformat_filter.c               \
			
 
				 	filters/multiformat/multiformat_ops.c                  \
			
--- a/examples/filters/custom_mf/conversion.cu
+++ b/examples/filters/custom_mf/conversion.cu
@@ -0,0 +1,51 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_cuda.h>
			
 
				+#include "custom_types.h"
			
 
				+#include "custom_interface.h"
			
 
				+
			
 
				+static __global__ void custom_cuda(struct point *aop,
			
 
				+				unsigned n,
			
 
				+				float *x,
			
 
				+				float *y)
			
 
				+{
			
 
				+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
			
 
				+
			
 
				+	if (i < n)
			
 
				+	{
			
 
				+		x[i] = aop[i].x;
			
 
				+		y[i] = aop[i].y;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+extern "C" void cpu_to_cuda_cuda_func(void *buffers[], void *_args)
			
 
				+{
			
 
				+	(void) _args;
			
 
				+
			
 
				+	unsigned int n = CUSTOM_GET_NX(buffers[0]);
			
 
				+	float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]);
			
 
				+	float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]);
			
 
				+
			
 
				+	struct point *aop;
			
 
				+	aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]);
			
 
				+	unsigned threads_per_block = 64;
			
 
				+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
			
 
				+        custom_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(aop, n, x, y);
			
 
				+
			
 
				+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
			
 
				+}
			
--- a/examples/filters/custom_mf/cuda.cu
+++ b/examples/filters/custom_mf/cuda.cu
@@ -0,0 +1,45 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_cuda.h>
			
 
				+#include "custom_types.h"
			
 
				+#include "custom_interface.h"
			
 
				+
			
 
				+static __global__ void scal_cuda(unsigned n,
			
 
				+				 float *x,
			
 
				+				 float *y)
			
 
				+{
			
 
				+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
			
 
				+
			
 
				+	if (i < n)
			
 
				+		x[i] *= y[i];
			
 
				+}
			
 
				+
			
 
				+extern "C" void custom_scal_cuda_func(void *buffers[], void *_args)
			
 
				+{
			
 
				+	(void) _args;
			
 
				+
			
 
				+	unsigned int n = CUSTOM_GET_NX(buffers[0]);
			
 
				+	float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]);
			
 
				+	float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]);
			
 
				+
			
 
				+	unsigned threads_per_block = 64;
			
 
				+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
			
 
				+        scal_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(n, x, y);
			
 
				+
			
 
				+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
			
 
				+}
			
--- a/examples/filters/custom_mf/custom_conversion_codelets.c
+++ b/examples/filters/custom_mf/custom_conversion_codelets.c
@@ -0,0 +1,57 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 INRIA
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include "custom_interface.h"
			
 
				+#include "custom_types.h"
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+void cuda_to_cpu(void *buffers[], void *arg)
			
 
				+{
			
 
				+	unsigned int n = CUSTOM_GET_NX(buffers[0]);
			
 
				+	float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]);
			
 
				+	float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]);
			
 
				+	struct point *aop;
			
 
				+	aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]);
			
 
				+
			
 
				+	int i;
			
 
				+	for (i = 0; i < n; i++)
			
 
				+	{
			
 
				+		aop[i].x = x[i];
			
 
				+		aop[i].y = y[i];
			
 
				+	}
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+extern void cpu_to_cuda_cuda_func(void *buffers[], void *args);
			
 
				+struct starpu_codelet cpu_to_cuda_cl =
			
 
				+{
			
 
				+	.where = STARPU_CUDA,
			
 
				+	.cuda_funcs = {cpu_to_cuda_cuda_func, NULL},
			
 
				+	.modes = { STARPU_RW },
			
 
				+	.nbuffers = 1,
			
 
				+	.name = "codelet_cpu_to_cuda"
			
 
				+};
			
 
				+
			
 
				+struct starpu_codelet cuda_to_cpu_cl =
			
 
				+{
			
 
				+	.where = STARPU_CPU,
			
 
				+	.cpu_funcs = {cuda_to_cpu, NULL},
			
 
				+	.modes = { STARPU_RW },
			
 
				+	.nbuffers = 1,
			
 
				+	.name = "codelet_cuda_to_cpu"
			
 
				+};
			
 
				+#endif
			
--- a/examples/filters/custom_mf/custom_interface.c
+++ b/examples/filters/custom_mf/custom_interface.c
@@ -0,0 +1,414 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_hash.h>
			
 
				+#include "custom_interface.h"
			
 
				+#include "custom_types.h"
			
 
				+
			
 
				+static int copy_ram_to_ram(void *src_interface, unsigned src_node,
			
 
				+			   void *dst_interface, unsigned dst_node);
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node,
			
 
				+			    void *dst_interface, unsigned dst_node);
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node,
			
 
				+			    void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node,
			
 
				+				  void *dst_interface, unsigned dst_node,
			
 
				+				  cudaStream_t stream);
			
 
				+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node,
			
 
				+				  void *dst_interface, unsigned dst_node,
			
 
				+				  cudaStream_t stream);
			
 
				+static int copy_cuda_to_cuda(void *src_interface, unsigned src_node,
			
 
				+			     void *dst_interface, unsigned dst_node);
			
 
				+static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
			
 
				+				   void *dst_interface, unsigned dst_node,
			
 
				+				   cudaStream_t stream);
			
 
				+#endif
			
 
				+
			
 
				+static const struct starpu_data_copy_methods custom_copy_data_methods_s =
			
 
				+{
			
 
				+	.ram_to_ram = copy_ram_to_ram,
			
 
				+	.ram_to_spu = NULL,
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.ram_to_cuda        = copy_ram_to_cuda,
			
 
				+	.cuda_to_ram        = copy_cuda_to_ram,
			
 
				+	.ram_to_cuda_async  = copy_ram_to_cuda_async,
			
 
				+	.cuda_to_ram_async  = copy_cuda_to_ram_async,
			
 
				+	.cuda_to_cuda       = copy_cuda_to_cuda,
			
 
				+	.cuda_to_cuda_async = copy_cuda_to_cuda_async,
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.ram_to_opencl       = NULL,
			
 
				+	.opencl_to_ram       = NULL,
			
 
				+	.opencl_to_opencl    = NULL,
			
 
				+        .ram_to_opencl_async = NULL,
			
 
				+	.opencl_to_ram_async = NULL,
			
 
				+#endif
			
 
				+	.cuda_to_spu = NULL,
			
 
				+	.spu_to_ram  = NULL,
			
 
				+	.spu_to_cuda = NULL,
			
 
				+	.spu_to_spu  = NULL
			
 
				+};
			
 
				+
			
 
				+static void     register_custom_handle(starpu_data_handle_t handle,
			
 
				+				       uint32_t home_node,
			
 
				+				       void *data_interface);
			
 
				+static ssize_t  allocate_custom_buffer_on_node(void *data_interface_,
			
 
				+					       uint32_t dst_node);
			
 
				+static void*    custom_handle_to_pointer(starpu_data_handle_t data_handle,
			
 
				+					 uint32_t node);
			
 
				+static void     free_custom_buffer_on_node(void *data_interface, uint32_t node);
			
 
				+static size_t   custom_interface_get_size(starpu_data_handle_t handle);
			
 
				+static uint32_t footprint_custom_interface_crc32(starpu_data_handle_t handle);
			
 
				+static int      custom_compare(void *data_interface_a, void *data_interface_b);
			
 
				+static void     display_custom_interface(starpu_data_handle_t handle, FILE *f);
			
 
				+static uint32_t custom_get_nx(starpu_data_handle_t handle);
			
 
				+
			
 
				+
			
 
				+static struct starpu_multiformat_data_interface_ops*
			
 
				+get_mf_ops(void *data_interface)
			
 
				+{
			
 
				+	struct custom_data_interface *custom;
			
 
				+	custom = (struct starpu_multiformat_interface *) data_interface;
			
 
				+
			
 
				+	return custom->ops;
			
 
				+}
			
 
				+
			
 
				+static struct starpu_data_interface_ops interface_custom_ops =
			
 
				+{
			
 
				+	.register_data_handle  = register_custom_handle,
			
 
				+	.allocate_data_on_node = allocate_custom_buffer_on_node,
			
 
				+	.handle_to_pointer     = custom_handle_to_pointer,
			
 
				+	.free_data_on_node     = free_custom_buffer_on_node,
			
 
				+	.copy_methods          = &custom_copy_data_methods_s,
			
 
				+	.get_size              = custom_interface_get_size,
			
 
				+	.footprint             = footprint_custom_interface_crc32,
			
 
				+	.compare               = custom_compare,
			
 
				+#ifdef STARPU_USE_GORDON
			
 
				+	.convert_to_gordon     = NULL,
			
 
				+#endif
			
 
				+	.interfaceid           = STARPU_NINTERFACES_ID+1, //XXX
			
 
				+	.interface_size        = sizeof(struct custom_data_interface),
			
 
				+	.display               = display_custom_interface,
			
 
				+	.is_multiformat        = 1,
			
 
				+	.get_mf_ops            = get_mf_ops
			
 
				+};
			
 
				+
			
 
				+static void
			
 
				+register_custom_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface)
			
 
				+{
			
 
				+	struct custom_data_interface *custom_interface;
			
 
				+	custom_interface = (struct custom_data_interface *) data_interface;
			
 
				+
			
 
				+	unsigned node;
			
 
				+	unsigned nnodes = starpu_memory_nodes_get_count();
			
 
				+	for (node = 0; node < nnodes; node++)
			
 
				+	{
			
 
				+		struct custom_data_interface *local_interface =
			
 
				+			(struct custom_data_interface *) starpu_data_get_interface_on_node(handle, node);
			
 
				+
			
 
				+		if (node == home_node)
			
 
				+		{
			
 
				+			local_interface->cpu_ptr    = custom_interface->cpu_ptr;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+			local_interface->cuda_ptr   = custom_interface->cuda_ptr;
			
 
				+#endif
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			local_interface->cpu_ptr    = NULL;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+			local_interface->cuda_ptr   = NULL;
			
 
				+#endif
			
 
				+		}
			
 
				+		local_interface->nx = custom_interface->nx;
			
 
				+		local_interface->ops = custom_interface->ops;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static ssize_t allocate_custom_buffer_on_node(void *data_interface, uint32_t node)
			
 
				+{
			
 
				+	ssize_t size = 0;
			
 
				+	struct custom_data_interface *custom_interface;
			
 
				+	custom_interface = (struct custom_data_interface *) data_interface;
			
 
				+
			
 
				+	switch(starpu_node_get_kind(node))
			
 
				+	{
			
 
				+	case STARPU_CPU_RAM:
			
 
				+		size = custom_interface->nx * custom_interface->ops->cpu_elemsize;
			
 
				+		custom_interface->cpu_ptr = (void*) malloc(size);
			
 
				+		if (!custom_interface->cpu_ptr)
			
 
				+			return -ENOMEM;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		custom_interface->cuda_ptr = (void *) malloc(size);
			
 
				+		if (!custom_interface->cuda_ptr)
			
 
				+		{
			
 
				+			free(custom_interface->cpu_ptr);
			
 
				+			custom_interface->cpu_ptr = NULL;
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+#endif
			
 
				+		break;
			
 
				+#if STARPU_USE_CUDA
			
 
				+	case STARPU_CUDA_RAM:
			
 
				+	{
			
 
				+		cudaError_t err;
			
 
				+		size = custom_interface->nx * custom_interface->ops->cpu_elemsize;
			
 
				+		err = cudaMalloc(&custom_interface->cuda_ptr, size);
			
 
				+		if (err != cudaSuccess)
			
 
				+			return -ENOMEM;
			
 
				+
			
 
				+		err = cudaMalloc(&custom_interface->cpu_ptr, size);
			
 
				+		if (err != cudaSuccess)
			
 
				+		{
			
 
				+			cudaFree(custom_interface->cuda_ptr);
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+		break;
			
 
				+	}
			
 
				+#endif
			
 
				+	default:
			
 
				+		assert(0);
			
 
				+	}
			
 
				+
			
 
				+	/* XXX We may want to return cpu_size + cuda_size + ... */
			
 
				+	return size;
			
 
				+}
			
 
				+
			
 
				+static void free_custom_buffer_on_node(void *data_interface, uint32_t node)
			
 
				+{
			
 
				+	struct custom_data_interface *custom_interface;
			
 
				+	custom_interface = (struct custom_data_interface *) data_interface;
			
 
				+
			
 
				+	switch(starpu_node_get_kind(node))
			
 
				+	{
			
 
				+	case STARPU_CPU_RAM:
			
 
				+		if (custom_interface->cpu_ptr != NULL)
			
 
				+		{
			
 
				+			free(custom_interface->cpu_ptr);
			
 
				+			custom_interface->cpu_ptr = NULL;
			
 
				+		}
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		if (custom_interface->cuda_ptr != NULL)
			
 
				+		{
			
 
				+			free(custom_interface->cuda_ptr);
			
 
				+			custom_interface->cuda_ptr = NULL;
			
 
				+		}
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+		break;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	case STARPU_CUDA_RAM:
			
 
				+		if (custom_interface->cpu_ptr != NULL)
			
 
				+		{
			
 
				+			cudaError_t err;
			
 
				+			err = cudaFree(custom_interface->cpu_ptr);
			
 
				+			if (err != cudaSuccess)
			
 
				+				fprintf(stderr, "cudaFree failed...\n");
			
 
				+		}
			
 
				+		if (custom_interface->cuda_ptr != NULL)
			
 
				+		{
			
 
				+			cudaError_t err;
			
 
				+			err = cudaFree(custom_interface->cuda_ptr);
			
 
				+			if (err != cudaSuccess)
			
 
				+				fprintf(stderr, "cudaFree failed...\n");
			
 
				+		}
			
 
				+		break;
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+	default:
			
 
				+		assert(0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void*
			
 
				+custom_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
			
 
				+{
			
 
				+	struct custom_data_interface *data_interface =
			
 
				+		(struct custom_data_interface *) starpu_data_get_interface_on_node(handle, node);
			
 
				+
			
 
				+
			
 
				+	switch(starpu_node_get_kind(node))
			
 
				+	{
			
 
				+		case STARPU_CPU_RAM:
			
 
				+			return data_interface->cpu_ptr;
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		case STARPU_CUDA_RAM:
			
 
				+			return data_interface->cuda_ptr;
			
 
				+#endif
			
 
				+		default:
			
 
				+			assert(0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static size_t custom_interface_get_size(starpu_data_handle_t handle)
			
 
				+{
			
 
				+	size_t size;
			
 
				+	struct custom_data_interface *data_interface;
			
 
				+
			
 
				+	data_interface = (struct custom_data_interface *)
			
 
				+				starpu_data_get_interface_on_node(handle, 0);
			
 
				+	size = data_interface->nx * data_interface->ops->cpu_elemsize;
			
 
				+	return size;
			
 
				+}
			
 
				+
			
 
				+static uint32_t footprint_custom_interface_crc32(starpu_data_handle_t handle)
			
 
				+{
			
 
				+	return starpu_crc32_be(custom_get_nx(handle), 0);
			
 
				+}
			
 
				+
			
 
				+static int custom_compare(void *data_interface_a, void *data_interface_b)
			
 
				+{
			
 
				+	/* TODO */
			
 
				+	assert(0);
			
 
				+}
			
 
				+
			
 
				+static void display_custom_interface(starpu_data_handle_t handle, FILE *f)
			
 
				+{
			
 
				+	/* TODO */
			
 
				+	assert(0);
			
 
				+}
			
 
				+
			
 
				+static uint32_t
			
 
				+custom_get_nx(starpu_data_handle_t handle)
			
 
				+{
			
 
				+	struct custom_data_interface *data_interface;
			
 
				+	data_interface = (struct custom_data_interface *)
			
 
				+				starpu_data_get_interface_on_node(handle, 0);
			
 
				+	return data_interface->nx;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void custom_data_register(starpu_data_handle_t *handle,
			
 
				+				 uint32_t home_node,
			
 
				+				 void *ptr,
			
 
				+				 uint32_t nx,
			
 
				+				 struct starpu_multiformat_data_interface_ops *format_ops)
			
 
				+{
			
 
				+	/* XXX Deprecated fields ? */
			
 
				+	struct custom_data_interface custom =
			
 
				+	{
			
 
				+		.cpu_ptr = ptr,
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+		.cuda_ptr = NULL,
			
 
				+#endif
			
 
				+		.nx  = nx,
			
 
				+		.ops = format_ops
			
 
				+	};
			
 
				+
			
 
				+	starpu_data_register(handle, home_node, &custom, &interface_custom_ops);
			
 
				+}
			
 
				+
			
 
				+static int copy_ram_to_ram(void *src_interface, unsigned src_node,
			
 
				+			   void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	/* TODO */
			
 
				+	assert(0);
			
 
				+}
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static int copy_ram_to_cuda(void *src_interface, unsigned src_node,
			
 
				+			    void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	/* TODO */
			
 
				+	assert(0);
			
 
				+}
			
 
				+static int copy_cuda_to_ram(void *src_interface, unsigned src_node,
			
 
				+			    void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	/* TODO */
			
 
				+	assert(0);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+copy_cuda_common_async(void *src_interface, unsigned src_node,
			
 
				+		       void *dst_interface, unsigned dst_node,
			
 
				+		       cudaStream_t stream, enum cudaMemcpyKind kind)
			
 
				+{
			
 
				+	struct custom_data_interface *src_custom, *dst_custom;
			
 
				+
			
 
				+	src_custom = (struct custom_data_interface *) src_interface;
			
 
				+	dst_custom = (struct custom_data_interface *) dst_interface;
			
 
				+
			
 
				+	ssize_t size = 0;
			
 
				+	cudaError_t err;
			
 
				+
			
 
				+	switch (kind)
			
 
				+	{
			
 
				+	case cudaMemcpyHostToDevice:
			
 
				+	{
			
 
				+		size = src_custom->nx * src_custom->ops->cpu_elemsize;
			
 
				+		if (dst_custom->cpu_ptr == NULL)
			
 
				+		{
			
 
				+			err = cudaMalloc(&dst_custom->cpu_ptr, size);
			
 
				+			assert(err == cudaSuccess);
			
 
				+		}
			
 
				+
			
 
				+		err = cudaMemcpyAsync(dst_custom->cpu_ptr,
			
 
				+				      src_custom->cpu_ptr,
			
 
				+				      size, kind, stream);
			
 
				+		assert(err == cudaSuccess);
			
 
				+
			
 
				+
			
 
				+		err = cudaMalloc(&dst_custom->cuda_ptr, size);
			
 
				+		assert(err == cudaSuccess);
			
 
				+		break;
			
 
				+	}
			
 
				+	case cudaMemcpyDeviceToHost:
			
 
				+		size = 2*src_custom->nx*sizeof(float);
			
 
				+		if (dst_custom->cuda_ptr == NULL)
			
 
				+		{
			
 
				+			dst_custom->cuda_ptr = malloc(size);
			
 
				+			if (dst_custom->cuda_ptr == NULL)
			
 
				+				return -ENOMEM;
			
 
				+		}
			
 
				+		err = cudaMemcpyAsync(dst_custom->cuda_ptr,
			
 
				+				      src_custom->cuda_ptr,
			
 
				+				      size, kind, stream);
			
 
				+		assert(err == cudaSuccess);
			
 
				+		break;
			
 
				+	default:
			
 
				+		assert(0);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node,
			
 
				+				  void *dst_interface, unsigned dst_node,
			
 
				+				  cudaStream_t stream)
			
 
				+{
			
 
				+	return copy_cuda_common_async(src_interface, src_node,
			
 
				+				      dst_interface, dst_node,
			
 
				+				      stream, cudaMemcpyHostToDevice);
			
 
				+}
			
 
				+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node,
			
 
				+				  void *dst_interface, unsigned dst_node,
			
 
				+				  cudaStream_t stream)
			
 
				+{
			
 
				+	return copy_cuda_common_async(src_interface, src_node,
			
 
				+				      dst_interface, dst_node,
			
 
				+				      stream, cudaMemcpyDeviceToHost);
			
 
				+}
			
 
				+static int copy_cuda_to_cuda(void *src_interface, unsigned src_node,
			
 
				+			     void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	assert(0);
			
 
				+}
			
 
				+static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
			
 
				+				   void *dst_interface, unsigned dst_node,
			
 
				+				   cudaStream_t stream)
			
 
				+{
			
 
				+	assert(0);
			
 
				+}
			
 
				+#endif /* !STARPU_USE_CUDA */
			
--- a/examples/filters/custom_mf/custom_interface.h
+++ b/examples/filters/custom_mf/custom_interface.h
@@ -0,0 +1,43 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#ifndef __CUSTOM_INTERFACE_H__
			
 
				+#define __CUSTOM_INTERFACE_H__
			
 
				+#include <starpu.h>
			
 
				+struct custom_data_interface
			
 
				+{
			
 
				+	void *cpu_ptr;
			
 
				+	void *cuda_ptr;
			
 
				+	struct starpu_multiformat_data_interface_ops *ops;
			
 
				+	uint32_t nx;
			
 
				+};
			
 
				+
			
 
				+void custom_data_register(starpu_data_handle_t *handle,
			
 
				+				 uint32_t home_node,
			
 
				+				 void *ptr,
			
 
				+				 uint32_t nx,
			
 
				+				 struct starpu_multiformat_data_interface_ops* ops);
			
 
				+
			
 
				+#define CUSTOM_GET_NX(interface) (((struct custom_data_interface*)(interface))->nx)
			
 
				+#define CUSTOM_GET_CPU_PTR(interface) (((struct custom_data_interface*)(interface))->cpu_ptr)
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+#define CUSTOM_GET_X_PTR(interface) (((struct custom_data_interface*)(interface))->cuda_ptr)
			
 
				+#define CUSTOM_GET_Y_PTR(interface) \
			
 
				+	(((struct custom_data_interface*)(interface))->cuda_ptr)+ \
			
 
				+	CUSTOM_GET_NX((interface))
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+
			
 
				+#endif /* ! __CUSTOM_INTERFACE_H__ */
			
--- a/examples/filters/custom_mf/custom_mf_filter.c
+++ b/examples/filters/custom_mf/custom_mf_filter.c
@@ -0,0 +1,241 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <starpu.h>
			
 
				+#include "custom_interface.h"
			
 
				+#include "custom_types.h"
			
 
				+
			
 
				+#define N 20
			
 
				+
			
 
				+#define DEBUG 0
			
 
				+
			
 
				+static struct point array_of_structs[N];
			
 
				+static starpu_data_handle_t handle;
			
 
				+static unsigned int nchunks = 4;
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+extern struct starpu_codelet cpu_to_cuda_cl;
			
 
				+extern struct starpu_codelet cuda_to_cpu_cl;
			
 
				+#endif
			
 
				+
			
 
				+static struct starpu_multiformat_data_interface_ops format_ops =
			
 
				+{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_elemsize = sizeof(struct struct_of_arrays),
			
 
				+	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
			
 
				+	.cuda_to_cpu_cl = &cuda_to_cpu_cl,
			
 
				+#endif
			
 
				+	.cpu_elemsize = sizeof(struct point),
			
 
				+};
			
 
				+
			
 
				+
			
 
				+static void
			
 
				+custom_filter(void *father, void *child, struct starpu_data_filter *f,
			
 
				+		unsigned id, unsigned nchunks)
			
 
				+{
			
 
				+	struct custom_data_interface *custom_father, *custom_child;
			
 
				+	custom_father = (struct custom_data_interface *) father;
			
 
				+	custom_child = (struct custom_data_interface *) child;
			
 
				+
			
 
				+	assert(N % nchunks == 0); // XXX 
			
 
				+	ssize_t chunk_size = N/nchunks;
			
 
				+
			
 
				+	if (custom_father->cpu_ptr)
			
 
				+	{
			
 
				+		struct point *tmp = (struct point *) custom_father->cpu_ptr;
			
 
				+		tmp += id * chunk_size;
			
 
				+		custom_child->cpu_ptr = tmp;
			
 
				+	}
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	else if (custom_father->cuda_ptr)
			
 
				+	{
			
 
				+		struct struct_of_arrays *soa_father, *soa_child;
			
 
				+		soa_father = (struct struct_of_arrays*) custom_father->cuda_ptr;
			
 
				+		soa_child = (struct struct_of_arrays*) custom_child->cuda_ptr;
			
 
				+		soa_child->x = soa_father->x + chunk_size;
			
 
				+		soa_child->y = soa_father->y + chunk_size;
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				+	custom_child->ops = custom_father->ops;
			
 
				+	custom_child->nx = chunk_size;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+register_and_partition_data(void)
			
 
				+{
			
 
				+	int i;
			
 
				+	for (i = 0; i < N; i++)
			
 
				+	{
			
 
				+		array_of_structs[i].x = i+1.0;
			
 
				+		array_of_structs[i].y = 42.0;
			
 
				+	}
			
 
				+	custom_data_register(&handle, 0, &array_of_structs, N, &format_ops);
			
 
				+
			
 
				+	struct starpu_data_filter f =
			
 
				+	{
			
 
				+		.filter_func   = custom_filter,
			
 
				+		.nchildren     = nchunks,
			
 
				+		.get_nchildren = NULL,
			
 
				+		.get_child_ops = NULL
			
 
				+	};
			
 
				+	starpu_data_partition(handle, &f);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+unpartition_and_unregister_data(void)
			
 
				+{
			
 
				+	starpu_data_unpartition(handle, 0);
			
 
				+	starpu_data_unregister(handle);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+custom_scal_cpu_func(void *buffers[], void *args)
			
 
				+{
			
 
				+	struct point *aos;
			
 
				+	unsigned int n, i;
			
 
				+
			
 
				+	aos = CUSTOM_GET_CPU_PTR(buffers[0]);
			
 
				+	n = CUSTOM_GET_NX(buffers[0]);
			
 
				+
			
 
				+	for (i = 0; i < n; i++)
			
 
				+		aos[i].x *= aos[i].y;
			
 
				+}
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+extern void custom_scal_cuda_func(void *buffers[], void *args);
			
 
				+#endif
			
 
				+
			
 
				+static struct starpu_codelet cpu_cl =
			
 
				+{
			
 
				+	.where = STARPU_CPU,
			
 
				+	.cpu_funcs = { custom_scal_cpu_func, NULL},
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = { STARPU_RW },
			
 
				+	.name = "codelet_real"
			
 
				+};
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static struct starpu_codelet cuda_cl =
			
 
				+{
			
 
				+	.where = STARPU_CUDA,
			
 
				+	.cuda_funcs = { custom_scal_cuda_func, NULL },
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = { STARPU_RW },
			
 
				+	.name = "cuda_codelet"
			
 
				+};
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+
			
 
				+static int
			
 
				+create_and_submit_tasks(void)
			
 
				+{
			
 
				+	int err;
			
 
				+	unsigned int i;
			
 
				+	for (i = 0; i < nchunks; i++)
			
 
				+	{
			
 
				+		struct starpu_task *task = starpu_task_create();
			
 
				+		if (i %2 == 0)
			
 
				+		{
			
 
				+			task->cl = &cpu_cl;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+			task->cl = &cuda_cl;
			
 
				+#else
			
 
				+			task->cl = &cpu_cl;
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+		}
			
 
				+
			
 
				+		task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
			
 
				+		err = starpu_task_submit(task);
			
 
				+		if (err != 0)
			
 
				+			return err;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	err = starpu_task_wait_for_all();
			
 
				+	if (err != 0)
			
 
				+		return err;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#if DEBUG
			
 
				+static void
			
 
				+print_it(void)
			
 
				+{
			
 
				+	int i;
			
 
				+	for (i = 0; i < N; i++)
			
 
				+	{
			
 
				+		FPRINTF(stderr, "(%.2f, %.2f) ",
			
 
				+			array_of_structs[i].x,
			
 
				+			array_of_structs[i].y);
			
 
				+	}
			
 
				+	FPRINTF(stderr, "\n");
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+static int
			
 
				+check_it(void)
			
 
				+{
			
 
				+	int i;
			
 
				+	for (i = 0; i < N; i++)
			
 
				+	{
			
 
				+		float expected_value = i + 1.0;
			
 
				+		expected_value *= array_of_structs[i].y;
			
 
				+		if (array_of_structs[i].x != expected_value)
			
 
				+			return EXIT_FAILURE;
			
 
				+	}
			
 
				+
			
 
				+	return EXIT_SUCCESS;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+main(void)
			
 
				+{
			
 
				+#ifndef STARPU_USE_CPU
			
 
				+	return 77;
			
 
				+#else
			
 
				+	int err;
			
 
				+
			
 
				+	err = starpu_init(NULL);
			
 
				+	if (err == -ENODEV)
			
 
				+		goto enodev;
			
 
				+
			
 
				+
			
 
				+	register_and_partition_data();
			
 
				+#if DEBUG
			
 
				+	print_it();
			
 
				+#endif
			
 
				+	err = create_and_submit_tasks();
			
 
				+	if (err != 0)
			
 
				+	{
			
 
				+		FPRINTF(stderr, "create_submit_task : %s\n",
			
 
				+			strerror(-err));
			
 
				+		return EXIT_FAILURE;
			
 
				+	}
			
 
				+	unpartition_and_unregister_data();
			
 
				+#if DEBUG
			
 
				+	print_it();
			
 
				+#endif
			
 
				+	starpu_shutdown();		
			
 
				+	return check_it();
			
 
				+
			
 
				+
			
 
				+enodev:
			
 
				+	return 77;
			
 
				+#endif
			
 
				+}
			
--- a/examples/filters/custom_mf/custom_types.h
+++ b/examples/filters/custom_mf/custom_types.h
@@ -0,0 +1,36 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#ifndef __CUSTOM_TYPES_H__
			
 
				+#define __CUSTOM_TYPES_H__
			
 
				+
			
 
				+
			
 
				+struct struct_of_arrays
			
 
				+{
			
 
				+	float *x, *y;
			
 
				+};
			
 
				+
			
 
				+struct point
			
 
				+{
			
 
				+	float x, y;
			
 
				+};
			
 
				+
			
 
				+#define FPRINTF(ofile, fmt, args ...) \
			
 
				+do {                                  \
			
 
				+if (!getenv("STARPU_SSILENT"))        \
			
 
				+	fprintf(ofile, fmt, ##args);  \
			
 
				+} while(0)
			
 
				+
			
 
				+#endif