Przeglądaj źródła

Add a custom multiformat example that does not use the multiformat interface.

It shows how to partition multiformat interfaces that use pointers. Only supports CUDA for the moment.
Cyril Roelandt 13 lat temu
rodzic
commit
d8e86d1388

+ 19 - 4
examples/Makefile.am

@@ -125,7 +125,9 @@ noinst_HEADERS = 				\
 	spmv/spmv.h				\
 	spmv/dw_block_spmv.h                    \
 	basic_examples/multiformat_types.h      \
-	filters/multiformat/multiformat_types.h
+	filters/multiformat/multiformat_types.h \
+	filters/custom_mf/custom_interface.h    \
+	filters/custom_mf/custom_types.h
 
 #####################################
 # What to install and what to check #
@@ -156,6 +158,7 @@ examplebin_PROGRAMS +=				\
 	basic_examples/block			\
 	basic_examples/variable			\
 	basic_examples/multiformat              \
+	filters/custom_mf/custom_mf_filter      \
 	filters/fvector				\
 	filters/fblock				\
 	filters/fmatrix				\
@@ -391,9 +394,21 @@ nobase_STARPU_OPENCL_DATA_DATA += \
 	filters/fblock_opencl_kernel.cl
 endif
 
-#
-#
-#
+
+#############################
+# Custom multiformat filter #
+#############################
+filters_custom_mf_custom_mf_filter_SOURCES=\
+	filters/custom_mf/custom_mf_filter.c \
+	filters/custom_mf/custom_interface.c   \
+	filters/custom_mf/custom_conversion_codelets.c
+
+if STARPU_USE_CUDA
+filters_custom_mf_custom_mf_filter_SOURCES+=\
+	filters/custom_mf/conversion.cu \
+	filters/custom_mf/cuda.cu
+endif
+
 filters_multiformat_multiformat_filter_SOURCES=                \
 	filters/multiformat/multiformat_filter.c               \
 	filters/multiformat/multiformat_ops.c                  \

+ 51 - 0
examples/filters/custom_mf/conversion.cu

@@ -0,0 +1,51 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_cuda.h>
+#include "custom_types.h"
+#include "custom_interface.h"
+
+static __global__ void custom_cuda(struct point *aop,
+				unsigned n,
+				float *x,
+				float *y)
+{
+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
+
+	if (i < n)
+	{
+		x[i] = aop[i].x;
+		y[i] = aop[i].y;
+	}
+}
+
+extern "C" void cpu_to_cuda_cuda_func(void *buffers[], void *_args)
+{
+	(void) _args;
+
+	unsigned int n = CUSTOM_GET_NX(buffers[0]);
+	float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]);
+	float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]);
+
+	struct point *aop;
+	aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]);
+	unsigned threads_per_block = 64;
+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
+        custom_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(aop, n, x, y);
+
+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
+}

+ 45 - 0
examples/filters/custom_mf/cuda.cu

@@ -0,0 +1,45 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_cuda.h>
+#include "custom_types.h"
+#include "custom_interface.h"
+
+static __global__ void scal_cuda(unsigned n,
+				 float *x,
+				 float *y)
+{
+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
+
+	if (i < n)
+		x[i] *= y[i];
+}
+
+extern "C" void custom_scal_cuda_func(void *buffers[], void *_args)
+{
+	(void) _args;
+
+	unsigned int n = CUSTOM_GET_NX(buffers[0]);
+	float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]);
+	float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]);
+
+	unsigned threads_per_block = 64;
+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
+        scal_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(n, x, y);
+
+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
+}

+ 57 - 0
examples/filters/custom_mf/custom_conversion_codelets.c

@@ -0,0 +1,57 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 INRIA
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include "custom_interface.h"
+#include "custom_types.h"
+
+#ifdef STARPU_USE_CUDA
+void cuda_to_cpu(void *buffers[], void *arg)
+{
+	unsigned int n = CUSTOM_GET_NX(buffers[0]);
+	float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]);
+	float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]);
+	struct point *aop;
+	aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]);
+
+	int i;
+	for (i = 0; i < n; i++)
+	{
+		aop[i].x = x[i];
+		aop[i].y = y[i];
+	}
+	return;
+}
+
+extern void cpu_to_cuda_cuda_func(void *buffers[], void *args);
+struct starpu_codelet cpu_to_cuda_cl =
+{
+	.where = STARPU_CUDA,
+	.cuda_funcs = {cpu_to_cuda_cuda_func, NULL},
+	.modes = { STARPU_RW },
+	.nbuffers = 1,
+	.name = "codelet_cpu_to_cuda"
+};
+
+struct starpu_codelet cuda_to_cpu_cl =
+{
+	.where = STARPU_CPU,
+	.cpu_funcs = {cuda_to_cpu, NULL},
+	.modes = { STARPU_RW },
+	.nbuffers = 1,
+	.name = "codelet_cuda_to_cpu"
+};
+#endif

+ 414 - 0
examples/filters/custom_mf/custom_interface.c

@@ -0,0 +1,414 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <starpu.h>
+#include <starpu_hash.h>
+#include "custom_interface.h"
+#include "custom_types.h"
+
+static int copy_ram_to_ram(void *src_interface, unsigned src_node,
+			   void *dst_interface, unsigned dst_node);
+#ifdef STARPU_USE_CUDA
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node,
+			    void *dst_interface, unsigned dst_node);
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node,
+			    void *dst_interface, unsigned dst_node);
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node,
+				  void *dst_interface, unsigned dst_node,
+				  cudaStream_t stream);
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node,
+				  void *dst_interface, unsigned dst_node,
+				  cudaStream_t stream);
+static int copy_cuda_to_cuda(void *src_interface, unsigned src_node,
+			     void *dst_interface, unsigned dst_node);
+static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
+				   void *dst_interface, unsigned dst_node,
+				   cudaStream_t stream);
+#endif
+
+static const struct starpu_data_copy_methods custom_copy_data_methods_s =
+{
+	.ram_to_ram = copy_ram_to_ram,
+	.ram_to_spu = NULL,
+#ifdef STARPU_USE_CUDA
+	.ram_to_cuda        = copy_ram_to_cuda,
+	.cuda_to_ram        = copy_cuda_to_ram,
+	.ram_to_cuda_async  = copy_ram_to_cuda_async,
+	.cuda_to_ram_async  = copy_cuda_to_ram_async,
+	.cuda_to_cuda       = copy_cuda_to_cuda,
+	.cuda_to_cuda_async = copy_cuda_to_cuda_async,
+#endif
+#ifdef STARPU_USE_OPENCL
+	.ram_to_opencl       = NULL,
+	.opencl_to_ram       = NULL,
+	.opencl_to_opencl    = NULL,
+        .ram_to_opencl_async = NULL,
+	.opencl_to_ram_async = NULL,
+#endif
+	.cuda_to_spu = NULL,
+	.spu_to_ram  = NULL,
+	.spu_to_cuda = NULL,
+	.spu_to_spu  = NULL
+};
+
+static void     register_custom_handle(starpu_data_handle_t handle,
+				       uint32_t home_node,
+				       void *data_interface);
+static ssize_t  allocate_custom_buffer_on_node(void *data_interface_,
+					       uint32_t dst_node);
+static void*    custom_handle_to_pointer(starpu_data_handle_t data_handle,
+					 uint32_t node);
+static void     free_custom_buffer_on_node(void *data_interface, uint32_t node);
+static size_t   custom_interface_get_size(starpu_data_handle_t handle);
+static uint32_t footprint_custom_interface_crc32(starpu_data_handle_t handle);
+static int      custom_compare(void *data_interface_a, void *data_interface_b);
+static void     display_custom_interface(starpu_data_handle_t handle, FILE *f);
+static uint32_t custom_get_nx(starpu_data_handle_t handle);
+
+
+static struct starpu_multiformat_data_interface_ops*
+get_mf_ops(void *data_interface)
+{
+	struct custom_data_interface *custom;
+	custom = (struct starpu_multiformat_interface *) data_interface;
+
+	return custom->ops;
+}
+
+static struct starpu_data_interface_ops interface_custom_ops =
+{
+	.register_data_handle  = register_custom_handle,
+	.allocate_data_on_node = allocate_custom_buffer_on_node,
+	.handle_to_pointer     = custom_handle_to_pointer,
+	.free_data_on_node     = free_custom_buffer_on_node,
+	.copy_methods          = &custom_copy_data_methods_s,
+	.get_size              = custom_interface_get_size,
+	.footprint             = footprint_custom_interface_crc32,
+	.compare               = custom_compare,
+#ifdef STARPU_USE_GORDON
+	.convert_to_gordon     = NULL,
+#endif
+	.interfaceid           = STARPU_NINTERFACES_ID+1, //XXX
+	.interface_size        = sizeof(struct custom_data_interface),
+	.display               = display_custom_interface,
+	.is_multiformat        = 1,
+	.get_mf_ops            = get_mf_ops
+};
+
+static void
+register_custom_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface)
+{
+	struct custom_data_interface *custom_interface;
+	custom_interface = (struct custom_data_interface *) data_interface;
+
+	unsigned node;
+	unsigned nnodes = starpu_memory_nodes_get_count();
+	for (node = 0; node < nnodes; node++)
+	{
+		struct custom_data_interface *local_interface =
+			(struct custom_data_interface *) starpu_data_get_interface_on_node(handle, node);
+
+		if (node == home_node)
+		{
+			local_interface->cpu_ptr    = custom_interface->cpu_ptr;
+#ifdef STARPU_USE_CUDA
+			local_interface->cuda_ptr   = custom_interface->cuda_ptr;
+#endif
+		}
+		else
+		{
+			local_interface->cpu_ptr    = NULL;
+#ifdef STARPU_USE_CUDA
+			local_interface->cuda_ptr   = NULL;
+#endif
+		}
+		local_interface->nx = custom_interface->nx;
+		local_interface->ops = custom_interface->ops;
+	}
+}
+
+static ssize_t allocate_custom_buffer_on_node(void *data_interface, uint32_t node)
+{
+	ssize_t size = 0;
+	struct custom_data_interface *custom_interface;
+	custom_interface = (struct custom_data_interface *) data_interface;
+
+	switch(starpu_node_get_kind(node))
+	{
+	case STARPU_CPU_RAM:
+		size = custom_interface->nx * custom_interface->ops->cpu_elemsize;
+		custom_interface->cpu_ptr = (void*) malloc(size);
+		if (!custom_interface->cpu_ptr)
+			return -ENOMEM;
+#ifdef STARPU_USE_CUDA
+		custom_interface->cuda_ptr = (void *) malloc(size);
+		if (!custom_interface->cuda_ptr)
+		{
+			free(custom_interface->cpu_ptr);
+			custom_interface->cpu_ptr = NULL;
+			return -ENOMEM;
+		}
+#endif
+		break;
+#if STARPU_USE_CUDA
+	case STARPU_CUDA_RAM:
+	{
+		cudaError_t err;
+		size = custom_interface->nx * custom_interface->ops->cpu_elemsize;
+		err = cudaMalloc(&custom_interface->cuda_ptr, size);
+		if (err != cudaSuccess)
+			return -ENOMEM;
+
+		err = cudaMalloc(&custom_interface->cpu_ptr, size);
+		if (err != cudaSuccess)
+		{
+			cudaFree(custom_interface->cuda_ptr);
+			return -ENOMEM;
+		}
+		break;
+	}
+#endif
+	default:
+		assert(0);
+	}
+
+	/* XXX We may want to return cpu_size + cuda_size + ... */
+	return size;
+}
+
+static void free_custom_buffer_on_node(void *data_interface, uint32_t node)
+{
+	struct custom_data_interface *custom_interface;
+	custom_interface = (struct custom_data_interface *) data_interface;
+
+	switch(starpu_node_get_kind(node))
+	{
+	case STARPU_CPU_RAM:
+		if (custom_interface->cpu_ptr != NULL)
+		{
+			free(custom_interface->cpu_ptr);
+			custom_interface->cpu_ptr = NULL;
+		}
+#ifdef STARPU_USE_CUDA
+		if (custom_interface->cuda_ptr != NULL)
+		{
+			free(custom_interface->cuda_ptr);
+			custom_interface->cuda_ptr = NULL;
+		}
+#endif /* !STARPU_USE_CUDA */
+		break;
+#ifdef STARPU_USE_CUDA
+	case STARPU_CUDA_RAM:
+		if (custom_interface->cpu_ptr != NULL)
+		{
+			cudaError_t err;
+			err = cudaFree(custom_interface->cpu_ptr);
+			if (err != cudaSuccess)
+				fprintf(stderr, "cudaFree failed...\n");
+		}
+		if (custom_interface->cuda_ptr != NULL)
+		{
+			cudaError_t err;
+			err = cudaFree(custom_interface->cuda_ptr);
+			if (err != cudaSuccess)
+				fprintf(stderr, "cudaFree failed...\n");
+		}
+		break;
+#endif /* !STARPU_USE_CUDA */
+	default:
+		assert(0);
+	}
+}
+
+static void*
+custom_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
+{
+	struct custom_data_interface *data_interface =
+		(struct custom_data_interface *) starpu_data_get_interface_on_node(handle, node);
+
+
+	switch(starpu_node_get_kind(node))
+	{
+		case STARPU_CPU_RAM:
+			return data_interface->cpu_ptr;
+#ifdef STARPU_USE_CUDA
+		case STARPU_CUDA_RAM:
+			return data_interface->cuda_ptr;
+#endif
+		default:
+			assert(0);
+	}
+}
+
+static size_t custom_interface_get_size(starpu_data_handle_t handle)
+{
+	size_t size;
+	struct custom_data_interface *data_interface;
+
+	data_interface = (struct custom_data_interface *)
+				starpu_data_get_interface_on_node(handle, 0);
+	size = data_interface->nx * data_interface->ops->cpu_elemsize;
+	return size;
+}
+
+static uint32_t footprint_custom_interface_crc32(starpu_data_handle_t handle)
+{
+	return starpu_crc32_be(custom_get_nx(handle), 0);
+}
+
+static int custom_compare(void *data_interface_a, void *data_interface_b)
+{
+	/* TODO */
+	assert(0);
+}
+
+static void display_custom_interface(starpu_data_handle_t handle, FILE *f)
+{
+	/* TODO */
+	assert(0);
+}
+
+static uint32_t
+custom_get_nx(starpu_data_handle_t handle)
+{
+	struct custom_data_interface *data_interface;
+	data_interface = (struct custom_data_interface *)
+				starpu_data_get_interface_on_node(handle, 0);
+	return data_interface->nx;
+}
+
+
+void custom_data_register(starpu_data_handle_t *handle,
+				 uint32_t home_node,
+				 void *ptr,
+				 uint32_t nx,
+				 struct starpu_multiformat_data_interface_ops *format_ops)
+{
+	/* XXX Deprecated fields ? */
+	struct custom_data_interface custom =
+	{
+		.cpu_ptr = ptr,
+#ifdef STARPU_USE_CUDA
+		.cuda_ptr = NULL,
+#endif
+		.nx  = nx,
+		.ops = format_ops
+	};
+
+	starpu_data_register(handle, home_node, &custom, &interface_custom_ops);
+}
+
+static int copy_ram_to_ram(void *src_interface, unsigned src_node,
+			   void *dst_interface, unsigned dst_node)
+{
+	/* TODO */
+	assert(0);
+}
+#ifdef STARPU_USE_CUDA
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node,
+			    void *dst_interface, unsigned dst_node)
+{
+	/* TODO */
+	assert(0);
+}
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node,
+			    void *dst_interface, unsigned dst_node)
+{
+	/* TODO */
+	assert(0);
+}
+
+static int
+copy_cuda_common_async(void *src_interface, unsigned src_node,
+		       void *dst_interface, unsigned dst_node,
+		       cudaStream_t stream, enum cudaMemcpyKind kind)
+{
+	struct custom_data_interface *src_custom, *dst_custom;
+
+	src_custom = (struct custom_data_interface *) src_interface;
+	dst_custom = (struct custom_data_interface *) dst_interface;
+
+	ssize_t size = 0;
+	cudaError_t err;
+
+	switch (kind)
+	{
+	case cudaMemcpyHostToDevice:
+	{
+		size = src_custom->nx * src_custom->ops->cpu_elemsize;
+		if (dst_custom->cpu_ptr == NULL)
+		{
+			err = cudaMalloc(&dst_custom->cpu_ptr, size);
+			assert(err == cudaSuccess);
+		}
+
+		err = cudaMemcpyAsync(dst_custom->cpu_ptr,
+				      src_custom->cpu_ptr,
+				      size, kind, stream);
+		assert(err == cudaSuccess);
+
+
+		err = cudaMalloc(&dst_custom->cuda_ptr, size);
+		assert(err == cudaSuccess);
+		break;
+	}
+	case cudaMemcpyDeviceToHost:
+		size = 2*src_custom->nx*sizeof(float);
+		if (dst_custom->cuda_ptr == NULL)
+		{
+			dst_custom->cuda_ptr = malloc(size);
+			if (dst_custom->cuda_ptr == NULL)
+				return -ENOMEM;
+		}
+		err = cudaMemcpyAsync(dst_custom->cuda_ptr,
+				      src_custom->cuda_ptr,
+				      size, kind, stream);
+		assert(err == cudaSuccess);
+		break;
+	default:
+		assert(0);
+	}
+
+	return 0;
+}
+
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node,
+				  void *dst_interface, unsigned dst_node,
+				  cudaStream_t stream)
+{
+	return copy_cuda_common_async(src_interface, src_node,
+				      dst_interface, dst_node,
+				      stream, cudaMemcpyHostToDevice);
+}
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node,
+				  void *dst_interface, unsigned dst_node,
+				  cudaStream_t stream)
+{
+	return copy_cuda_common_async(src_interface, src_node,
+				      dst_interface, dst_node,
+				      stream, cudaMemcpyDeviceToHost);
+}
+static int copy_cuda_to_cuda(void *src_interface, unsigned src_node,
+			     void *dst_interface, unsigned dst_node)
+{
+	assert(0);
+}
+static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
+				   void *dst_interface, unsigned dst_node,
+				   cudaStream_t stream)
+{
+	assert(0);
+}
+#endif /* !STARPU_USE_CUDA */

+ 43 - 0
examples/filters/custom_mf/custom_interface.h

@@ -0,0 +1,43 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#ifndef __CUSTOM_INTERFACE_H__
+#define __CUSTOM_INTERFACE_H__
+#include <starpu.h>
+struct custom_data_interface
+{
+	void *cpu_ptr;
+	void *cuda_ptr;
+	struct starpu_multiformat_data_interface_ops *ops;
+	uint32_t nx;
+};
+
+void custom_data_register(starpu_data_handle_t *handle,
+				 uint32_t home_node,
+				 void *ptr,
+				 uint32_t nx,
+				 struct starpu_multiformat_data_interface_ops* ops);
+
+#define CUSTOM_GET_NX(interface) (((struct custom_data_interface*)(interface))->nx)
+#define CUSTOM_GET_CPU_PTR(interface) (((struct custom_data_interface*)(interface))->cpu_ptr)
+
+#ifdef STARPU_USE_CUDA
+#define CUSTOM_GET_X_PTR(interface) (((struct custom_data_interface*)(interface))->cuda_ptr)
+#define CUSTOM_GET_Y_PTR(interface) \
+	(((struct custom_data_interface*)(interface))->cuda_ptr)+ \
+	CUSTOM_GET_NX((interface))
+#endif /* !STARPU_USE_CUDA */
+
+#endif /* ! __CUSTOM_INTERFACE_H__ */

+ 241 - 0
examples/filters/custom_mf/custom_mf_filter.c

@@ -0,0 +1,241 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <starpu.h>
+#include "custom_interface.h"
+#include "custom_types.h"
+
+#define N 20
+
+#define DEBUG 0
+
+static struct point array_of_structs[N];
+static starpu_data_handle_t handle;
+static unsigned int nchunks = 4;
+
+#ifdef STARPU_USE_CUDA
+extern struct starpu_codelet cpu_to_cuda_cl;
+extern struct starpu_codelet cuda_to_cpu_cl;
+#endif
+
+static struct starpu_multiformat_data_interface_ops format_ops =
+{
+#ifdef STARPU_USE_CUDA
+	.cuda_elemsize = sizeof(struct struct_of_arrays),
+	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
+	.cuda_to_cpu_cl = &cuda_to_cpu_cl,
+#endif
+	.cpu_elemsize = sizeof(struct point),
+};
+
+
+static void
+custom_filter(void *father, void *child, struct starpu_data_filter *f,
+		unsigned id, unsigned nchunks)
+{
+	struct custom_data_interface *custom_father, *custom_child;
+	custom_father = (struct custom_data_interface *) father;
+	custom_child = (struct custom_data_interface *) child;
+
+	assert(N % nchunks == 0); // XXX 
+	ssize_t chunk_size = N/nchunks;
+
+	if (custom_father->cpu_ptr)
+	{
+		struct point *tmp = (struct point *) custom_father->cpu_ptr;
+		tmp += id * chunk_size;
+		custom_child->cpu_ptr = tmp;
+	}
+#ifdef STARPU_USE_CUDA
+	else if (custom_father->cuda_ptr)
+	{
+		struct struct_of_arrays *soa_father, *soa_child;
+		soa_father = (struct struct_of_arrays*) custom_father->cuda_ptr;
+		soa_child = (struct struct_of_arrays*) custom_child->cuda_ptr;
+		soa_child->x = soa_father->x + chunk_size;
+		soa_child->y = soa_father->y + chunk_size;
+	}
+#endif
+
+	custom_child->ops = custom_father->ops;
+	custom_child->nx = chunk_size;
+}
+
+static void
+register_and_partition_data(void)
+{
+	int i;
+	for (i = 0; i < N; i++)
+	{
+		array_of_structs[i].x = i+1.0;
+		array_of_structs[i].y = 42.0;
+	}
+	custom_data_register(&handle, 0, &array_of_structs, N, &format_ops);
+
+	struct starpu_data_filter f =
+	{
+		.filter_func   = custom_filter,
+		.nchildren     = nchunks,
+		.get_nchildren = NULL,
+		.get_child_ops = NULL
+	};
+	starpu_data_partition(handle, &f);
+}
+
+static void
+unpartition_and_unregister_data(void)
+{
+	starpu_data_unpartition(handle, 0);
+	starpu_data_unregister(handle);
+}
+
+static void
+custom_scal_cpu_func(void *buffers[], void *args)
+{
+	struct point *aos;
+	unsigned int n, i;
+
+	aos = CUSTOM_GET_CPU_PTR(buffers[0]);
+	n = CUSTOM_GET_NX(buffers[0]);
+
+	for (i = 0; i < n; i++)
+		aos[i].x *= aos[i].y;
+}
+
+#ifdef STARPU_USE_CUDA
+extern void custom_scal_cuda_func(void *buffers[], void *args);
+#endif
+
+static struct starpu_codelet cpu_cl =
+{
+	.where = STARPU_CPU,
+	.cpu_funcs = { custom_scal_cpu_func, NULL},
+	.nbuffers = 1,
+	.modes = { STARPU_RW },
+	.name = "codelet_real"
+};
+
+#ifdef STARPU_USE_CUDA
+static struct starpu_codelet cuda_cl =
+{
+	.where = STARPU_CUDA,
+	.cuda_funcs = { custom_scal_cuda_func, NULL },
+	.nbuffers = 1,
+	.modes = { STARPU_RW },
+	.name = "cuda_codelet"
+};
+#endif /* !STARPU_USE_CUDA */
+
+static int
+create_and_submit_tasks(void)
+{
+	int err;
+	unsigned int i;
+	for (i = 0; i < nchunks; i++)
+	{
+		struct starpu_task *task = starpu_task_create();
+		if (i %2 == 0)
+		{
+			task->cl = &cpu_cl;
+		}
+		else
+		{
+#ifdef STARPU_USE_CUDA
+			task->cl = &cuda_cl;
+#else
+			task->cl = &cpu_cl;
+#endif /* !STARPU_USE_CUDA */
+		}
+
+		task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
+		err = starpu_task_submit(task);
+		if (err != 0)
+			return err;
+	}
+
+
+	err = starpu_task_wait_for_all();
+	if (err != 0)
+		return err;
+
+	return 0;
+}
+
+#if DEBUG
+static void
+print_it(void)
+{
+	int i;
+	for (i = 0; i < N; i++)
+	{
+		FPRINTF(stderr, "(%.2f, %.2f) ",
+			array_of_structs[i].x,
+			array_of_structs[i].y);
+	}
+	FPRINTF(stderr, "\n");
+}
+#endif
+
+static int
+check_it(void)
+{
+	int i;
+	for (i = 0; i < N; i++)
+	{
+		float expected_value = i + 1.0;
+		expected_value *= array_of_structs[i].y;
+		if (array_of_structs[i].x != expected_value)
+			return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
+}
+
+int
+main(void)
+{
+#ifndef STARPU_USE_CPU
+	return 77;
+#else
+	int err;
+
+	err = starpu_init(NULL);
+	if (err == -ENODEV)
+		goto enodev;
+
+
+	register_and_partition_data();
+#if DEBUG
+	print_it();
+#endif
+	err = create_and_submit_tasks();
+	if (err != 0)
+	{
+		FPRINTF(stderr, "create_submit_task : %s\n",
+			strerror(-err));
+		return EXIT_FAILURE;
+	}
+	unpartition_and_unregister_data();
+#if DEBUG
+	print_it();
+#endif
+	starpu_shutdown();		
+	return check_it();
+
+
+enodev:
+	return 77;
+#endif
+}

+ 36 - 0
examples/filters/custom_mf/custom_types.h

@@ -0,0 +1,36 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#ifndef __CUSTOM_TYPES_H__
+#define __CUSTOM_TYPES_H__
+
+
+struct struct_of_arrays
+{
+	float *x, *y;
+};
+
+struct point
+{
+	float x, y;
+};
+
+#define FPRINTF(ofile, fmt, args ...) \
+do {                                  \
+if (!getenv("STARPU_SSILENT"))        \
+	fprintf(ofile, fmt, ##args);  \
+} while(0)
+
+#endif