Просмотр исходного кода

Add a custom multiformat example that does not use the multiformat interface.

It shows how to partition multiformat interfaces that use pointers. Only supports CUDA for the moment.
Cyril Roelandt лет назад: 13
Родитель
Сommit
d8e86d1388

+ 19 - 4
examples/Makefile.am

@@ -125,7 +125,9 @@ noinst_HEADERS = 				\
 	spmv/spmv.h				\
 	spmv/dw_block_spmv.h                    \
 	basic_examples/multiformat_types.h      \
-	filters/multiformat/multiformat_types.h
+	filters/multiformat/multiformat_types.h \
+	filters/custom_mf/custom_interface.h    \
+	filters/custom_mf/custom_types.h
 
 #####################################
 # What to install and what to check #
@@ -156,6 +158,7 @@ examplebin_PROGRAMS +=				\
 	basic_examples/block			\
 	basic_examples/variable			\
 	basic_examples/multiformat              \
+	filters/custom_mf/custom_mf_filter      \
 	filters/fvector				\
 	filters/fblock				\
 	filters/fmatrix				\
@@ -391,9 +394,21 @@ nobase_STARPU_OPENCL_DATA_DATA += \
 	filters/fblock_opencl_kernel.cl
 endif
 
-#
-#
-#
+
+#############################
+# Custom multiformat filter #
+#############################
+filters_custom_mf_custom_mf_filter_SOURCES=\
+	filters/custom_mf/custom_mf_filter.c \
+	filters/custom_mf/custom_interface.c   \
+	filters/custom_mf/custom_conversion_codelets.c
+
+if STARPU_USE_CUDA
+filters_custom_mf_custom_mf_filter_SOURCES+=\
+	filters/custom_mf/conversion.cu \
+	filters/custom_mf/cuda.cu
+endif
+
 filters_multiformat_multiformat_filter_SOURCES=                \
 	filters/multiformat/multiformat_filter.c               \
 	filters/multiformat/multiformat_ops.c                  \

+ 51 - 0
examples/filters/custom_mf/conversion.cu

@@ -0,0 +1,51 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_cuda.h>
+#include "custom_types.h"
+#include "custom_interface.h"
+
+static __global__ void custom_cuda(struct point *aop,
+				unsigned n,
+				float *x,
+				float *y)
+{
+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
+
+	if (i < n)
+	{
+		x[i] = aop[i].x;
+		y[i] = aop[i].y;
+	}
+}
+
+extern "C" void cpu_to_cuda_cuda_func(void *buffers[], void *_args)
+{
+	(void) _args;
+
+	unsigned int n = CUSTOM_GET_NX(buffers[0]);
+	float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]);
+	float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]);
+
+	struct point *aop;
+	aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]);
+	unsigned threads_per_block = 64;
+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
+        custom_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(aop, n, x, y);
+
+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
+}

+ 45 - 0
examples/filters/custom_mf/cuda.cu

@@ -0,0 +1,45 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_cuda.h>
+#include "custom_types.h"
+#include "custom_interface.h"
+
+static __global__ void scal_cuda(unsigned n,
+				 float *x,
+				 float *y)
+{
+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
+
+	if (i < n)
+		x[i] *= y[i];
+}
+
+extern "C" void custom_scal_cuda_func(void *buffers[], void *_args)
+{
+	(void) _args;
+
+	unsigned int n = CUSTOM_GET_NX(buffers[0]);
+	float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]);
+	float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]);
+
+	unsigned threads_per_block = 64;
+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
+        scal_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(n, x, y);
+
+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
+}

+ 57 - 0
examples/filters/custom_mf/custom_conversion_codelets.c

@@ -0,0 +1,57 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 INRIA
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include "custom_interface.h"
+#include "custom_types.h"
+
+#ifdef STARPU_USE_CUDA
+void cuda_to_cpu(void *buffers[], void *arg)
+{
+	unsigned int n = CUSTOM_GET_NX(buffers[0]);
+	float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]);
+	float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]);
+	struct point *aop;
+	aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]);
+
+	int i;
+	for (i = 0; i < n; i++)
+	{
+		aop[i].x = x[i];
+		aop[i].y = y[i];
+	}
+	return;
+}
+
+extern void cpu_to_cuda_cuda_func(void *buffers[], void *args);
+struct starpu_codelet cpu_to_cuda_cl =
+{
+	.where = STARPU_CUDA,
+	.cuda_funcs = {cpu_to_cuda_cuda_func, NULL},
+	.modes = { STARPU_RW },
+	.nbuffers = 1,
+	.name = "codelet_cpu_to_cuda"
+};
+
+struct starpu_codelet cuda_to_cpu_cl =
+{
+	.where = STARPU_CPU,
+	.cpu_funcs = {cuda_to_cpu, NULL},
+	.modes = { STARPU_RW },
+	.nbuffers = 1,
+	.name = "codelet_cuda_to_cpu"
+};
+#endif

+ 414 - 0
examples/filters/custom_mf/custom_interface.c

@@ -0,0 +1,414 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <starpu.h>
+#include <starpu_hash.h>
+#include "custom_interface.h"
+#include "custom_types.h"
+
+static int copy_ram_to_ram(void *src_interface, unsigned src_node,
+			   void *dst_interface, unsigned dst_node);
+#ifdef STARPU_USE_CUDA
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node,
+			    void *dst_interface, unsigned dst_node);
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node,
+			    void *dst_interface, unsigned dst_node);
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node,
+				  void *dst_interface, unsigned dst_node,
+				  cudaStream_t stream);
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node,
+				  void *dst_interface, unsigned dst_node,
+				  cudaStream_t stream);
+static int copy_cuda_to_cuda(void *src_interface, unsigned src_node,
+			     void *dst_interface, unsigned dst_node);
+static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
+				   void *dst_interface, unsigned dst_node,
+				   cudaStream_t stream);
+#endif
+
+static const struct starpu_data_copy_methods custom_copy_data_methods_s =
+{
+	.ram_to_ram = copy_ram_to_ram,
+	.ram_to_spu = NULL,
+#ifdef STARPU_USE_CUDA
+	.ram_to_cuda        = copy_ram_to_cuda,
+	.cuda_to_ram        = copy_cuda_to_ram,
+	.ram_to_cuda_async  = copy_ram_to_cuda_async,
+	.cuda_to_ram_async  = copy_cuda_to_ram_async,
+	.cuda_to_cuda       = copy_cuda_to_cuda,
+	.cuda_to_cuda_async = copy_cuda_to_cuda_async,
+#endif
+#ifdef STARPU_USE_OPENCL
+	.ram_to_opencl       = NULL,
+	.opencl_to_ram       = NULL,
+	.opencl_to_opencl    = NULL,
+        .ram_to_opencl_async = NULL,
+	.opencl_to_ram_async = NULL,
+#endif
+	.cuda_to_spu = NULL,
+	.spu_to_ram  = NULL,
+	.spu_to_cuda = NULL,
+	.spu_to_spu  = NULL
+};
+
+static void     register_custom_handle(starpu_data_handle_t handle,
+				       uint32_t home_node,
+				       void *data_interface);
+static ssize_t  allocate_custom_buffer_on_node(void *data_interface_,
+					       uint32_t dst_node);
+static void*    custom_handle_to_pointer(starpu_data_handle_t data_handle,
+					 uint32_t node);
+static void     free_custom_buffer_on_node(void *data_interface, uint32_t node);
+static size_t   custom_interface_get_size(starpu_data_handle_t handle);
+static uint32_t footprint_custom_interface_crc32(starpu_data_handle_t handle);
+static int      custom_compare(void *data_interface_a, void *data_interface_b);
+static void     display_custom_interface(starpu_data_handle_t handle, FILE *f);
+static uint32_t custom_get_nx(starpu_data_handle_t handle);
+
+
+static struct starpu_multiformat_data_interface_ops*
+get_mf_ops(void *data_interface)
+{
+	struct custom_data_interface *custom;
+	custom = (struct starpu_multiformat_interface *) data_interface;
+
+	return custom->ops;
+}
+
+static struct starpu_data_interface_ops interface_custom_ops =
+{
+	.register_data_handle  = register_custom_handle,
+	.allocate_data_on_node = allocate_custom_buffer_on_node,
+	.handle_to_pointer     = custom_handle_to_pointer,
+	.free_data_on_node     = free_custom_buffer_on_node,
+	.copy_methods          = &custom_copy_data_methods_s,
+	.get_size              = custom_interface_get_size,
+	.footprint             = footprint_custom_interface_crc32,
+	.compare               = custom_compare,
+#ifdef STARPU_USE_GORDON
+	.convert_to_gordon     = NULL,
+#endif
+	.interfaceid           = STARPU_NINTERFACES_ID+1, //XXX
+	.interface_size        = sizeof(struct custom_data_interface),
+	.display               = display_custom_interface,
+	.is_multiformat        = 1,
+	.get_mf_ops            = get_mf_ops
+};
+
+static void
+register_custom_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface)
+{
+	struct custom_data_interface *custom_interface;
+	custom_interface = (struct custom_data_interface *) data_interface;
+
+	unsigned node;
+	unsigned nnodes = starpu_memory_nodes_get_count();
+	for (node = 0; node < nnodes; node++)
+	{
+		struct custom_data_interface *local_interface =
+			(struct custom_data_interface *) starpu_data_get_interface_on_node(handle, node);
+
+		if (node == home_node)
+		{
+			local_interface->cpu_ptr    = custom_interface->cpu_ptr;
+#ifdef STARPU_USE_CUDA
+			local_interface->cuda_ptr   = custom_interface->cuda_ptr;
+#endif
+		}
+		else
+		{
+			local_interface->cpu_ptr    = NULL;
+#ifdef STARPU_USE_CUDA
+			local_interface->cuda_ptr   = NULL;
+#endif
+		}
+		local_interface->nx = custom_interface->nx;
+		local_interface->ops = custom_interface->ops;
+	}
+}
+
+static ssize_t allocate_custom_buffer_on_node(void *data_interface, uint32_t node)
+{
+	ssize_t size = 0;
+	struct custom_data_interface *custom_interface;
+	custom_interface = (struct custom_data_interface *) data_interface;
+
+	switch(starpu_node_get_kind(node))
+	{
+	case STARPU_CPU_RAM:
+		size = custom_interface->nx * custom_interface->ops->cpu_elemsize;
+		custom_interface->cpu_ptr = (void*) malloc(size);
+		if (!custom_interface->cpu_ptr)
+			return -ENOMEM;
+#ifdef STARPU_USE_CUDA
+		custom_interface->cuda_ptr = (void *) malloc(size);
+		if (!custom_interface->cuda_ptr)
+		{
+			free(custom_interface->cpu_ptr);
+			custom_interface->cpu_ptr = NULL;
+			return -ENOMEM;
+		}
+#endif
+		break;
+#if STARPU_USE_CUDA
+	case STARPU_CUDA_RAM:
+	{
+		cudaError_t err;
+		size = custom_interface->nx * custom_interface->ops->cpu_elemsize;
+		err = cudaMalloc(&custom_interface->cuda_ptr, size);
+		if (err != cudaSuccess)
+			return -ENOMEM;
+
+		err = cudaMalloc(&custom_interface->cpu_ptr, size);
+		if (err != cudaSuccess)
+		{
+			cudaFree(custom_interface->cuda_ptr);
+			return -ENOMEM;
+		}
+		break;
+	}
+#endif
+	default:
+		assert(0);
+	}
+
+	/* XXX We may want to return cpu_size + cuda_size + ... */
+	return size;
+}
+
+static void free_custom_buffer_on_node(void *data_interface, uint32_t node)
+{
+	struct custom_data_interface *custom_interface;
+	custom_interface = (struct custom_data_interface *) data_interface;
+
+	switch(starpu_node_get_kind(node))
+	{
+	case STARPU_CPU_RAM:
+		if (custom_interface->cpu_ptr != NULL)
+		{
+			free(custom_interface->cpu_ptr);
+			custom_interface->cpu_ptr = NULL;
+		}
+#ifdef STARPU_USE_CUDA
+		if (custom_interface->cuda_ptr != NULL)
+		{
+			free(custom_interface->cuda_ptr);
+			custom_interface->cuda_ptr = NULL;
+		}
+#endif /* !STARPU_USE_CUDA */
+		break;
+#ifdef STARPU_USE_CUDA
+	case STARPU_CUDA_RAM:
+		if (custom_interface->cpu_ptr != NULL)
+		{
+			cudaError_t err;
+			err = cudaFree(custom_interface->cpu_ptr);
+			if (err != cudaSuccess)
+				fprintf(stderr, "cudaFree failed...\n");
+		}
+		if (custom_interface->cuda_ptr != NULL)
+		{
+			cudaError_t err;
+			err = cudaFree(custom_interface->cuda_ptr);
+			if (err != cudaSuccess)
+				fprintf(stderr, "cudaFree failed...\n");
+		}
+		break;
+#endif /* !STARPU_USE_CUDA */
+	default:
+		assert(0);
+	}
+}
+
+static void*
+custom_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
+{
+	struct custom_data_interface *data_interface =
+		(struct custom_data_interface *) starpu_data_get_interface_on_node(handle, node);
+
+
+	switch(starpu_node_get_kind(node))
+	{
+		case STARPU_CPU_RAM:
+			return data_interface->cpu_ptr;
+#ifdef STARPU_USE_CUDA
+		case STARPU_CUDA_RAM:
+			return data_interface->cuda_ptr;
+#endif
+		default:
+			assert(0);
+	}
+}
+
+static size_t custom_interface_get_size(starpu_data_handle_t handle)
+{
+	size_t size;
+	struct custom_data_interface *data_interface;
+
+	data_interface = (struct custom_data_interface *)
+				starpu_data_get_interface_on_node(handle, 0);
+	size = data_interface->nx * data_interface->ops->cpu_elemsize;
+	return size;
+}
+
+static uint32_t footprint_custom_interface_crc32(starpu_data_handle_t handle)
+{
+	return starpu_crc32_be(custom_get_nx(handle), 0);
+}
+
+static int custom_compare(void *data_interface_a, void *data_interface_b)
+{
+	/* TODO */
+	assert(0);
+}
+
+static void display_custom_interface(starpu_data_handle_t handle, FILE *f)
+{
+	/* TODO */
+	assert(0);
+}
+
+static uint32_t
+custom_get_nx(starpu_data_handle_t handle)
+{
+	struct custom_data_interface *data_interface;
+	data_interface = (struct custom_data_interface *)
+				starpu_data_get_interface_on_node(handle, 0);
+	return data_interface->nx;
+}
+
+
+void custom_data_register(starpu_data_handle_t *handle,
+				 uint32_t home_node,
+				 void *ptr,
+				 uint32_t nx,
+				 struct starpu_multiformat_data_interface_ops *format_ops)
+{
+	/* XXX Deprecated fields ? */
+	struct custom_data_interface custom =
+	{
+		.cpu_ptr = ptr,
+#ifdef STARPU_USE_CUDA
+		.cuda_ptr = NULL,
+#endif
+		.nx  = nx,
+		.ops = format_ops
+	};
+
+	starpu_data_register(handle, home_node, &custom, &interface_custom_ops);
+}
+
+static int copy_ram_to_ram(void *src_interface, unsigned src_node,
+			   void *dst_interface, unsigned dst_node)
+{
+	/* TODO */
+	assert(0);
+}
+#ifdef STARPU_USE_CUDA
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node,
+			    void *dst_interface, unsigned dst_node)
+{
+	/* TODO */
+	assert(0);
+}
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node,
+			    void *dst_interface, unsigned dst_node)
+{
+	/* TODO */
+	assert(0);
+}
+
+static int
+copy_cuda_common_async(void *src_interface, unsigned src_node,
+		       void *dst_interface, unsigned dst_node,
+		       cudaStream_t stream, enum cudaMemcpyKind kind)
+{
+	struct custom_data_interface *src_custom, *dst_custom;
+
+	src_custom = (struct custom_data_interface *) src_interface;
+	dst_custom = (struct custom_data_interface *) dst_interface;
+
+	ssize_t size = 0;
+	cudaError_t err;
+
+	switch (kind)
+	{
+	case cudaMemcpyHostToDevice:
+	{
+		size = src_custom->nx * src_custom->ops->cpu_elemsize;
+		if (dst_custom->cpu_ptr == NULL)
+		{
+			err = cudaMalloc(&dst_custom->cpu_ptr, size);
+			assert(err == cudaSuccess);
+		}
+
+		err = cudaMemcpyAsync(dst_custom->cpu_ptr,
+				      src_custom->cpu_ptr,
+				      size, kind, stream);
+		assert(err == cudaSuccess);
+
+
+		err = cudaMalloc(&dst_custom->cuda_ptr, size);
+		assert(err == cudaSuccess);
+		break;
+	}
+	case cudaMemcpyDeviceToHost:
+		size = 2*src_custom->nx*sizeof(float);
+		if (dst_custom->cuda_ptr == NULL)
+		{
+			dst_custom->cuda_ptr = malloc(size);
+			if (dst_custom->cuda_ptr == NULL)
+				return -ENOMEM;
+		}
+		err = cudaMemcpyAsync(dst_custom->cuda_ptr,
+				      src_custom->cuda_ptr,
+				      size, kind, stream);
+		assert(err == cudaSuccess);
+		break;
+	default:
+		assert(0);
+	}
+
+	return 0;
+}
+
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node,
+				  void *dst_interface, unsigned dst_node,
+				  cudaStream_t stream)
+{
+	return copy_cuda_common_async(src_interface, src_node,
+				      dst_interface, dst_node,
+				      stream, cudaMemcpyHostToDevice);
+}
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node,
+				  void *dst_interface, unsigned dst_node,
+				  cudaStream_t stream)
+{
+	return copy_cuda_common_async(src_interface, src_node,
+				      dst_interface, dst_node,
+				      stream, cudaMemcpyDeviceToHost);
+}
+static int copy_cuda_to_cuda(void *src_interface, unsigned src_node,
+			     void *dst_interface, unsigned dst_node)
+{
+	assert(0);
+}
+static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
+				   void *dst_interface, unsigned dst_node,
+				   cudaStream_t stream)
+{
+	assert(0);
+}
+#endif /* !STARPU_USE_CUDA */

+ 43 - 0
examples/filters/custom_mf/custom_interface.h

@@ -0,0 +1,43 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#ifndef __CUSTOM_INTERFACE_H__
+#define __CUSTOM_INTERFACE_H__
+#include <starpu.h>
+struct custom_data_interface
+{
+	void *cpu_ptr;
+	void *cuda_ptr;
+	struct starpu_multiformat_data_interface_ops *ops;
+	uint32_t nx;
+};
+
+void custom_data_register(starpu_data_handle_t *handle,
+				 uint32_t home_node,
+				 void *ptr,
+				 uint32_t nx,
+				 struct starpu_multiformat_data_interface_ops* ops);
+
+#define CUSTOM_GET_NX(interface) (((struct custom_data_interface*)(interface))->nx)
+#define CUSTOM_GET_CPU_PTR(interface) (((struct custom_data_interface*)(interface))->cpu_ptr)
+
+#ifdef STARPU_USE_CUDA
+#define CUSTOM_GET_X_PTR(interface) (((struct custom_data_interface*)(interface))->cuda_ptr)
+#define CUSTOM_GET_Y_PTR(interface) \
+	(((struct custom_data_interface*)(interface))->cuda_ptr)+ \
+	CUSTOM_GET_NX((interface))
+#endif /* !STARPU_USE_CUDA */
+
+#endif /* ! __CUSTOM_INTERFACE_H__ */

+ 241 - 0
examples/filters/custom_mf/custom_mf_filter.c

@@ -0,0 +1,241 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <starpu.h>
+#include "custom_interface.h"
+#include "custom_types.h"
+
+#define N 20
+
+#define DEBUG 0
+
+static struct point array_of_structs[N];
+static starpu_data_handle_t handle;
+static unsigned int nchunks = 4;
+
+#ifdef STARPU_USE_CUDA
+extern struct starpu_codelet cpu_to_cuda_cl;
+extern struct starpu_codelet cuda_to_cpu_cl;
+#endif
+
+static struct starpu_multiformat_data_interface_ops format_ops =
+{
+#ifdef STARPU_USE_CUDA
+	.cuda_elemsize = sizeof(struct struct_of_arrays),
+	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
+	.cuda_to_cpu_cl = &cuda_to_cpu_cl,
+#endif
+	.cpu_elemsize = sizeof(struct point),
+};
+
+
+static void
+custom_filter(void *father, void *child, struct starpu_data_filter *f,
+		unsigned id, unsigned nchunks)
+{
+	struct custom_data_interface *custom_father, *custom_child;
+	custom_father = (struct custom_data_interface *) father;
+	custom_child = (struct custom_data_interface *) child;
+
+	assert(N % nchunks == 0); // XXX 
+	ssize_t chunk_size = N/nchunks;
+
+	if (custom_father->cpu_ptr)
+	{
+		struct point *tmp = (struct point *) custom_father->cpu_ptr;
+		tmp += id * chunk_size;
+		custom_child->cpu_ptr = tmp;
+	}
+#ifdef STARPU_USE_CUDA
+	else if (custom_father->cuda_ptr)
+	{
+		struct struct_of_arrays *soa_father, *soa_child;
+		soa_father = (struct struct_of_arrays*) custom_father->cuda_ptr;
+		soa_child = (struct struct_of_arrays*) custom_child->cuda_ptr;
+		soa_child->x = soa_father->x + chunk_size;
+		soa_child->y = soa_father->y + chunk_size;
+	}
+#endif
+
+	custom_child->ops = custom_father->ops;
+	custom_child->nx = chunk_size;
+}
+
+static void
+register_and_partition_data(void)
+{
+	int i;
+	for (i = 0; i < N; i++)
+	{
+		array_of_structs[i].x = i+1.0;
+		array_of_structs[i].y = 42.0;
+	}
+	custom_data_register(&handle, 0, &array_of_structs, N, &format_ops);
+
+	struct starpu_data_filter f =
+	{
+		.filter_func   = custom_filter,
+		.nchildren     = nchunks,
+		.get_nchildren = NULL,
+		.get_child_ops = NULL
+	};
+	starpu_data_partition(handle, &f);
+}
+
+static void
+unpartition_and_unregister_data(void)
+{
+	starpu_data_unpartition(handle, 0);
+	starpu_data_unregister(handle);
+}
+
+static void
+custom_scal_cpu_func(void *buffers[], void *args)
+{
+	struct point *aos;
+	unsigned int n, i;
+
+	aos = CUSTOM_GET_CPU_PTR(buffers[0]);
+	n = CUSTOM_GET_NX(buffers[0]);
+
+	for (i = 0; i < n; i++)
+		aos[i].x *= aos[i].y;
+}
+
+#ifdef STARPU_USE_CUDA
+extern void custom_scal_cuda_func(void *buffers[], void *args);
+#endif
+
+static struct starpu_codelet cpu_cl =
+{
+	.where = STARPU_CPU,
+	.cpu_funcs = { custom_scal_cpu_func, NULL},
+	.nbuffers = 1,
+	.modes = { STARPU_RW },
+	.name = "codelet_real"
+};
+
+#ifdef STARPU_USE_CUDA
+static struct starpu_codelet cuda_cl =
+{
+	.where = STARPU_CUDA,
+	.cuda_funcs = { custom_scal_cuda_func, NULL },
+	.nbuffers = 1,
+	.modes = { STARPU_RW },
+	.name = "cuda_codelet"
+};
+#endif /* !STARPU_USE_CUDA */
+
+static int
+create_and_submit_tasks(void)
+{
+	int err;
+	unsigned int i;
+	for (i = 0; i < nchunks; i++)
+	{
+		struct starpu_task *task = starpu_task_create();
+		if (i %2 == 0)
+		{
+			task->cl = &cpu_cl;
+		}
+		else
+		{
+#ifdef STARPU_USE_CUDA
+			task->cl = &cuda_cl;
+#else
+			task->cl = &cpu_cl;
+#endif /* !STARPU_USE_CUDA */
+		}
+
+		task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
+		err = starpu_task_submit(task);
+		if (err != 0)
+			return err;
+	}
+
+
+	err = starpu_task_wait_for_all();
+	if (err != 0)
+		return err;
+
+	return 0;
+}
+
+#if DEBUG
+static void
+print_it(void)
+{
+	int i;
+	for (i = 0; i < N; i++)
+	{
+		FPRINTF(stderr, "(%.2f, %.2f) ",
+			array_of_structs[i].x,
+			array_of_structs[i].y);
+	}
+	FPRINTF(stderr, "\n");
+}
+#endif
+
+static int
+check_it(void)
+{
+	int i;
+	for (i = 0; i < N; i++)
+	{
+		float expected_value = i + 1.0;
+		expected_value *= array_of_structs[i].y;
+		if (array_of_structs[i].x != expected_value)
+			return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
+}
+
+int
+main(void)
+{
+#ifndef STARPU_USE_CPU
+	return 77;
+#else
+	int err;
+
+	err = starpu_init(NULL);
+	if (err == -ENODEV)
+		goto enodev;
+
+
+	register_and_partition_data();
+#if DEBUG
+	print_it();
+#endif
+	err = create_and_submit_tasks();
+	if (err != 0)
+	{
+		FPRINTF(stderr, "create_submit_task : %s\n",
+			strerror(-err));
+		return EXIT_FAILURE;
+	}
+	unpartition_and_unregister_data();
+#if DEBUG
+	print_it();
+#endif
+	starpu_shutdown();		
+	return check_it();
+
+
+enodev:
+	return 77;
+#endif
+}

+ 36 - 0
examples/filters/custom_mf/custom_types.h

@@ -0,0 +1,36 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#ifndef __CUSTOM_TYPES_H__
+#define __CUSTOM_TYPES_H__
+
+
+struct struct_of_arrays
+{
+	float *x, *y;
+};
+
+struct point
+{
+	float x, y;
+};
+
+#define FPRINTF(ofile, fmt, args ...) \
+do {                                  \
+if (!getenv("STARPU_SSILENT"))        \
+	fprintf(ofile, fmt, ##args);  \
+} while(0)
+
+#endif