Преглед изворни кода

Add starpu_block_shadow_filter_func_vector, which supports shadow for vector partitioning. Add example/test and documentation.

Samuel Thibault пре 13 година
родитељ
комит
4d21f76259

+ 12 - 0
doc/chapters/basic-api.texi

@@ -1148,6 +1148,18 @@ vector represented by @var{father_interface} once partitioned in
 @var{nparts} chunks of equal size.
 @var{nparts} chunks of equal size.
 @end deftypefun
 @end deftypefun
 
 
+@deftypefun void starpu_block_shadow_filter_func_vector (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
+Return in @code{*@var{child_interface}} the @var{id}th element of the
+vector represented by @var{father_interface} once partitioned in
+@var{nparts} chunks of equal size with a shadow border @code{filter_arg_ptr}
+
+The @code{filter_arg_ptr} field must be the shadow size casted into @code{void*}.
+
+IMPORTANT: This can only be used for read-only access, as no coherency is
+enforced for the shadowed parts.
+
+A usage example is available in examples/filters/shadow.c
+@end deftypefun
 
 
 @deftypefun void starpu_vector_list_filter_func (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
 @deftypefun void starpu_vector_list_filter_func (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
 Return in @code{*@var{child_interface}} the @var{id}th element of the
 Return in @code{*@var{child_interface}} the @var{id}th element of the

+ 1 - 0
examples/Makefile.am

@@ -179,6 +179,7 @@ examplebin_PROGRAMS +=				\
 	filters/fvector				\
 	filters/fvector				\
 	filters/fblock				\
 	filters/fblock				\
 	filters/fmatrix				\
 	filters/fmatrix				\
+	filters/shadow				\
 	tag_example/tag_example			\
 	tag_example/tag_example			\
 	tag_example/tag_example2		\
 	tag_example/tag_example2		\
 	tag_example/tag_example3		\
 	tag_example/tag_example3		\

+ 172 - 0
examples/filters/shadow.c

@@ -0,0 +1,172 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This examplifies the use of the shadow filter: a source vector of NX
+ * elements (plus 2*SHADOW wrap-around elements) is copied into a destination
+ * vector of NX+NPARTS*2*SHADOW elements, thus showing how shadowing shows up.
+ */
+
+#include <starpu.h>
+#include <starpu_cuda.h>
+
+/* Shadow width */
+#define SHADOW 2
+#define NX    30
+#define PARTS 3
+
+#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
+
+void cpu_func(void *buffers[], void *cl_arg)
+{
+        unsigned i;
+
+        /* length of the shadowed source vector */
+        unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
+        /* local copy of the shadowed source vector pointer */
+        int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]);
+
+        /* length of the destination vector */
+        unsigned n2 = STARPU_VECTOR_GET_NX(buffers[1]);
+        /* local copy of the destination vector pointer */
+        int *val2 = (int *)STARPU_VECTOR_GET_PTR(buffers[1]);
+
+	/* If things go right, sizes should match */
+	STARPU_ASSERT(n == n2);
+	for (i = 0; i < n; i++)
+		val2[i] = val[i];
+}
+
+#ifdef STARPU_USE_CUDA
+void cuda_func(void *buffers[], void *cl_arg)
+{
+        /* length of the shadowed source vector */
+        unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
+        /* local copy of the shadowed source vector pointer */
+        int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]);
+
+        /* length of the destination vector */
+        unsigned n2 = STARPU_VECTOR_GET_NX(buffers[1]);
+        /* local copy of the destination vector pointer */
+        int *val2 = (int *)STARPU_VECTOR_GET_PTR(buffers[1]);
+
+	/* If things go right, sizes should match */
+	STARPU_ASSERT(n == n2);
+	cudaMemcpy(val2, val, n*sizeof(*val), cudaMemcpyDeviceToDevice);
+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
+}
+#endif
+
+int main(int argc, char **argv)
+{
+	unsigned i, j;
+        int vector[NX + 2*SHADOW];
+        int vector2[NX + PARTS*2*SHADOW];
+	starpu_data_handle_t handle, handle2;
+        int factor=1;
+	int ret;
+
+        struct starpu_codelet cl =
+	{
+                .where = STARPU_CPU
+#ifdef STARPU_USE_CUDA
+			|STARPU_CUDA
+#endif
+			,
+                .cpu_funcs = {cpu_func, NULL},
+#ifdef STARPU_USE_CUDA
+                .cuda_funcs = {cuda_func, NULL},
+#endif
+                .nbuffers = 2,
+		.modes = {STARPU_R, STARPU_W}
+        };
+
+        for(i=0 ; i<NX ; i++) vector[SHADOW+i] = i;
+	for(i=0 ; i<SHADOW ; i++) vector[i] = vector[i+NX];
+	for(i=0 ; i<SHADOW ; i++) vector[SHADOW+NX+i] = vector[SHADOW+i];
+        FPRINTF(stderr,"IN  Vector: ");
+        for(i=0 ; i<NX + 2*SHADOW ; i++) FPRINTF(stderr, "%5d ", vector[i]);
+        FPRINTF(stderr,"\n");
+
+	ret = starpu_init(NULL);
+	if (ret == -ENODEV)
+		exit(77);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	/* Declare source vector to StarPU */
+	starpu_vector_data_register(&handle, 0, (uintptr_t)vector, NX + 2*SHADOW, sizeof(vector[0]));
+
+	/* Declare destination vector to StarPU */
+	starpu_vector_data_register(&handle2, 0, (uintptr_t)vector2, NX + PARTS*2*SHADOW, sizeof(vector[0]));
+
+        /* Partition the source vector in PARTS sub-vectors with shadows */
+	struct starpu_data_filter f =
+	{
+		.filter_func = starpu_block_shadow_filter_func_vector,
+		.nchildren = PARTS,
+		.filter_arg_ptr = (void*)(uintptr_t) SHADOW /* Shadow width */
+	};
+	starpu_data_partition(handle, &f);
+
+        /* Partition the destination vector in PARTS sub-vectors */
+	struct starpu_data_filter f2 =
+	{
+		.filter_func = starpu_block_filter_func_vector,
+		.nchildren = PARTS,
+	};
+	starpu_data_partition(handle2, &f2);
+
+        /* Submit a task on each sub-vector */
+	for (i=0; i<starpu_data_get_nb_children(handle); i++)
+	{
+                starpu_data_handle_t sub_handle = starpu_data_get_sub_data(handle, 1, i);
+                starpu_data_handle_t sub_handle2 = starpu_data_get_sub_data(handle2, 1, i);
+                struct starpu_task *task = starpu_task_create();
+
+                factor *= 10;
+		task->handles[0] = sub_handle;
+		task->handles[1] = sub_handle2;
+                task->cl = &cl;
+                task->synchronous = 1;
+                task->cl_arg = &factor;
+                task->cl_arg_size = sizeof(factor);
+
+		ret = starpu_task_submit(task);
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	}
+
+	starpu_data_unpartition(handle, 0);
+	starpu_data_unpartition(handle2, 0);
+        starpu_data_unregister(handle);
+        starpu_data_unregister(handle2);
+	starpu_shutdown();
+
+        FPRINTF(stderr,"OUT Vector: ");
+        for(i=0 ; i<NX + PARTS*2*SHADOW ; i++) FPRINTF(stderr, "%5d ", vector2[i]);
+        FPRINTF(stderr,"\n");
+	for(i=0 ; i<PARTS ; i++)
+		for (j=0 ; j<NX/PARTS ; j++)
+			STARPU_ASSERT(vector2[i*(NX/PARTS+2*SHADOW)+j] == vector[i*(NX/PARTS)+j]);
+
+	return 0;
+
+enodev:
+	FPRINTF(stderr, "WARNING: No one can execute this task\n");
+	starpu_shutdown();
+	return 77;
+}

+ 2 - 1
include/starpu_data_filters.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  *
@@ -66,6 +66,7 @@ void starpu_vertical_block_filter_func(void *father_interface, void *child_inter
 
 
 /* for vector */
 /* for vector */
 void starpu_block_filter_func_vector(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_block_filter_func_vector(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+void starpu_block_shadow_filter_func_vector(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_vector_list_filter_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_vector_list_filter_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_vector_divide_in_2_filter_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 void starpu_vector_divide_in_2_filter_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 
 

+ 33 - 1
src/datawizard/interfaces/vector_filters.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2011  Université de Bordeaux 1
+ * Copyright (C) 2009-2012  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
@@ -49,6 +49,38 @@ void starpu_block_filter_func_vector(void *father_interface, void *child_interfa
 }
 }
 
 
 
 
+void starpu_block_shadow_filter_func_vector(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks)
+{
+        struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface;
+        struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface;
+
+        uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr;
+
+	/* actual number of elements */
+	uint32_t nx = vector_father->nx - 2 * shadow_size;
+	size_t elemsize = vector_father->elemsize;
+
+	STARPU_ASSERT(nchunks <= nx);
+
+	uint32_t chunk_size = (nx + nchunks - 1)/nchunks;
+	size_t offset = id*chunk_size*elemsize;
+
+	uint32_t child_nx =
+	  STARPU_MIN(chunk_size, nx - id*chunk_size) + 2 * shadow_size;
+
+	vector_child->nx = child_nx;
+	vector_child->elemsize = elemsize;
+
+	if (vector_father->dev_handle)
+	{
+		if (vector_father->ptr)
+			vector_child->ptr = vector_father->ptr + offset;
+		vector_child->dev_handle = vector_father->dev_handle;
+		vector_child->offset = vector_father->offset + offset;
+	}
+}
+
+
 void starpu_vector_divide_in_2_filter_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks)
 void starpu_vector_divide_in_2_filter_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks)
 {
 {
         /* there cannot be more than 2 chunks */
         /* there cannot be more than 2 chunks */