浏览代码

As a follow-up to r13691, the offset semantics of StarPU's data handles
finally does not exactly match the semantics needed for implementing
OpenMP 4.0 vector slices. Thus, revert r13691 changes and use a
different approach instead: Vector data handles may now be annotated
with a dedicated slice base expressed in number of elements,
which can subsequently be used to adjust array indices within subtasks as
needed. An example of use is given in tests/openmp/array_slice_01.c.

Olivier Aumage 10 年之前
父节点
当前提交
2109141a9c

+ 7 - 0
doc/doxygen/chapters/api/data_interfaces.doxy

@@ -488,6 +488,13 @@ designated by \p interface.
 Return the size of each element of the array designated by
 \p interface.
 
+\def STARPU_VECTOR_GET_SLICE_BASE(interface)
+\ingroup API_Data_Interfaces
+Return the OpenMP slice base annotation of each element of the array designated by
+\p interface.
+
+\sa starpu_omp_vector_annotate
+
 @name Accessing Matrix Data Interfaces
 \ingroup API_Data_Interfaces
 

+ 9 - 0
doc/doxygen/chapters/api/openmp_runtime_support.doxy

@@ -952,4 +952,13 @@ This function returns the precision of the time used by \p starpu_omp_get_wtime.
 
 \sa starpu_omp_get_wtime
 
+\fn void starpu_omp_vector_annotate (starpu_data_handle_t handle, uint32_t slice_base)
+\ingroup API_OpenMP_Runtime_Support
+This function enables setting additional vector metadata needed by the OpenMP Runtime Support.
+
+\p handle is vector data handle.
+\p slice_base is the base of an array slice, expressed in number of vector elements from the array base.
+
+\sa STARPU_VECTOR_GET_SLICE_BASE
+
 */

+ 6 - 3
include/starpu_data_interfaces.h

@@ -248,12 +248,12 @@ struct starpu_vector_interface
 	size_t offset;
 	uint32_t nx;
 	size_t elemsize;
+#ifdef STARPU_OPENMP
+	uint32_t slice_base;
+#endif /* STARPU_OPENMP */
 };
 
 void starpu_vector_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t nx, size_t elemsize);
-#ifdef STARPU_OPENMP
-void starpu_vector_data_register_with_offset(starpu_data_handle_t *handleptr, unsigned home_node, uintptr_t ptr, uint32_t nx, size_t elemsize, size_t offset);
-#endif /* STARPU_OPENMP */
 void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset);
 uint32_t starpu_vector_get_nx(starpu_data_handle_t handle);
 size_t starpu_vector_get_elemsize(starpu_data_handle_t handle);
@@ -264,6 +264,9 @@ uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle);
 #define STARPU_VECTOR_GET_OFFSET(interface)	(((struct starpu_vector_interface *)(interface))->offset)
 #define STARPU_VECTOR_GET_NX(interface)	(((struct starpu_vector_interface *)(interface))->nx)
 #define STARPU_VECTOR_GET_ELEMSIZE(interface)	(((struct starpu_vector_interface *)(interface))->elemsize)
+#ifdef STARPU_OPENMP
+#define STARPU_VECTOR_GET_SLICE_BASE(interface)	(((struct starpu_vector_interface *)(interface))->slice_base)
+#endif /* STARPU_OPENMP */
 
 struct starpu_variable_interface
 {

+ 1 - 0
include/starpu_openmp.h

@@ -160,6 +160,7 @@ extern void starpu_omp_atomic_fallback_inline_begin(void) __STARPU_OMP_NOTHROW;
 extern void starpu_omp_atomic_fallback_inline_end(void) __STARPU_OMP_NOTHROW;
 extern double starpu_omp_get_wtime (void) __STARPU_OMP_NOTHROW;
 extern double starpu_omp_get_wtick (void) __STARPU_OMP_NOTHROW;
+extern void starpu_omp_vector_annotate(starpu_data_handle_t handle, uint32_t slice_base) __STARPU_OMP_NOTHROW;
 
 #ifdef __cplusplus
 }

+ 11 - 26
src/datawizard/interfaces/vector_interface.c

@@ -100,18 +100,12 @@ static void register_vector_handle(starpu_data_handle_t handle, unsigned home_no
 		local_interface->id = vector_interface->id;
 		local_interface->nx = vector_interface->nx;
 		local_interface->elemsize = vector_interface->elemsize;
+#ifdef STARPU_OPENMP
+		local_interface->slice_base = vector_interface->slice_base;
+#endif /* STARPU_OPENMP */
 	}
 }
 
-static void _starpu_vector_data_register(starpu_data_handle_t * const handleptr, unsigned home_node, struct starpu_vector_interface * const vector)
-{
-#ifdef STARPU_USE_SCC
-	_starpu_scc_set_offset_in_shared_memory((void*)vector->ptr, (void**)&(vector->dev_handle), &(vector->offset));
-#endif
-
-	starpu_data_register(handleptr, home_node, vector, &starpu_interface_vector_ops);
-}
-
 /* declare a new data with the vector interface */
 void starpu_vector_data_register(starpu_data_handle_t *handleptr, unsigned home_node,
                         uintptr_t ptr, uint32_t nx, size_t elemsize)
@@ -123,27 +117,18 @@ void starpu_vector_data_register(starpu_data_handle_t *handleptr, unsigned home_
 		.nx = nx,
 		.elemsize = elemsize,
                 .dev_handle = ptr,
+#ifdef STARPU_OPENMP
+		.slice_base = 0,
+#endif /* STARPU_OPENMP */
                 .offset = 0
 	};
-	_starpu_vector_data_register(handleptr, home_node, &vector);
-}
 
-#ifdef STARPU_OPENMP
-void starpu_vector_data_register_with_offset(starpu_data_handle_t *handleptr, unsigned home_node,
-                        uintptr_t ptr, uint32_t nx, size_t elemsize, size_t offset)
-{
-	struct starpu_vector_interface vector =
-	{
-		.id = STARPU_VECTOR_INTERFACE_ID,
-		.ptr = ptr,
-		.nx = nx,
-		.elemsize = elemsize,
-                .dev_handle = ptr,
-                .offset = offset
-	};
-	_starpu_vector_data_register(handleptr, home_node, &vector);
+#ifdef STARPU_USE_SCC
+	_starpu_scc_set_offset_in_shared_memory((void*)vector.ptr, (void**)&(vector.dev_handle), &(vector.offset));
+#endif
+
+	starpu_data_register(handleptr, home_node, &vector, &starpu_interface_vector_ops);
 }
-#endif /* STARPU_OPENMP */
 
 void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node,
 			uintptr_t ptr, uintptr_t dev_handle, size_t offset)

+ 8 - 0
src/util/openmp_runtime_support.c

@@ -2396,6 +2396,14 @@ void starpu_omp_atomic_fallback_inline_end(void)
 	_starpu_spin_unlock(&device->atomic_lock);
 }
 
+void starpu_omp_vector_annotate(starpu_data_handle_t handle, uint32_t slice_base)
+{
+	struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *)
+		starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+	assert(vector_interface->id == STARPU_VECTOR_INTERFACE_ID);
+	vector_interface->slice_base = slice_base;
+}
+
 /*
  * restore deprecated diagnostics (-Wdeprecated-declarations)
  */

+ 4 - 0
tests/Makefile.am

@@ -254,6 +254,7 @@ noinst_PROGRAMS =				\
 	openmp/taskwait_01			\
 	openmp/taskgroup_01			\
 	openmp/taskgroup_02			\
+	openmp/array_slice_01			\
 	overlap/overlap				\
 	overlap/gpu_concurrency			\
 	parallel_tasks/explicit_combined_worker	\
@@ -568,6 +569,9 @@ openmp_taskgroup_01_SOURCES = 	\
 openmp_taskgroup_02_SOURCES = 	\
 	openmp/taskgroup_02.c
 
+openmp_array_slice_01_SOURCES = 	\
+	openmp/array_slice_01.c
+
 ###################
 # Block interface #
 ###################

+ 237 - 0
tests/openmp/array_slice_01.c

@@ -0,0 +1,237 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2014  Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <pthread.h>
+#include <starpu.h>
+#include "../helper.h"
+#include <stdio.h>
+
+#if !defined(STARPU_OPENMP)
+int main(int argc, char **argv)
+{
+	return STARPU_TEST_SKIPPED;
+}
+#else
+#define	NX	64
+int global_vector[NX];
+
+__attribute__((constructor))
+static void omp_constructor(void)
+{
+	int ret = starpu_omp_init();
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
+}
+
+__attribute__((destructor))
+static void omp_destructor(void)
+{
+	starpu_omp_shutdown();
+}
+
+void task_region_h(void *buffers[], void *_args)
+{
+	void **args = _args;
+	struct starpu_vector_interface *_vector = buffers[0];
+	int nx = STARPU_VECTOR_GET_NX(_vector);
+	int elemsize = STARPU_VECTOR_GET_ELEMSIZE(_vector);
+	int slice_base = STARPU_VECTOR_GET_SLICE_BASE(_vector);
+	int *v = (int *)STARPU_VECTOR_GET_PTR(_vector);
+	int f = (int)(intptr_t)args[0];
+	int imin = (int)(intptr_t)args[1];
+	int imax = (int)(intptr_t)args[2];
+	int i;
+
+	assert(elemsize == sizeof(v[0]));
+
+	printf("depth 2 task, entry: vector ptr = %p, slice_base = %d, imin = %d, imax = %d\n", v, slice_base, imin, imax);
+
+	for (i = imin; i < imax; i++)
+	{
+                assert(i-slice_base>=0);
+                assert(i-slice_base<NX);
+                (v-slice_base)[i] += f;
+	}
+
+	printf("depth 2 task ending\n");
+}
+
+void task_region_g(void *buffers[], void *args)
+{
+	struct starpu_vector_interface *_vector = buffers[0];
+
+	int nx = STARPU_VECTOR_GET_NX(_vector);
+	int *v = (int *)STARPU_VECTOR_GET_PTR(_vector);
+	int f = (int)(intptr_t)args;
+	
+	printf("depth 1 task, entry: vector ptr = %p\n", v);
+
+	{
+		int i;
+
+		for (i = 0; i < nx; i++)
+		{
+			v[i] += f;
+		}
+	}
+
+	{
+		const int half_nx = nx/2;
+
+		starpu_data_handle_t vector_slice_1_handle;
+		starpu_vector_data_register(&vector_slice_1_handle, STARPU_MAIN_RAM, (uintptr_t)&v[0], half_nx, sizeof(v[0]));
+		printf("depth 1 task, block 1: vector_slice_1_handle = %p\n", vector_slice_1_handle);
+
+		starpu_data_handle_t vector_slice_2_handle;
+		starpu_vector_data_register(&vector_slice_2_handle, STARPU_MAIN_RAM, (uintptr_t)&v[half_nx], nx-half_nx, sizeof(v[0]));
+		/* set slice base */
+		starpu_omp_vector_annotate(vector_slice_2_handle, half_nx);
+		printf("depth 1 task, block 1: vector_slice_2_handle = %p\n", vector_slice_2_handle);
+
+	}
+
+	void *cl_arg_1[3];
+	void *cl_arg_2[3];
+
+	{
+		struct starpu_omp_task_region_attr attr;
+		const int half_nx = nx/2;
+		int i;
+
+		starpu_data_handle_t vector_slice_1_handle = starpu_data_lookup(&v[0]);
+		printf("depth 1 task, block 2: vector_slice_1_handle = %p\n", vector_slice_1_handle);
+
+		starpu_data_handle_t vector_slice_2_handle = starpu_data_lookup(&v[half_nx]);
+		printf("depth 1 task, block 2: vector_slice_2_handle = %p\n", vector_slice_2_handle);
+
+		memset(&attr, 0, sizeof(attr));
+		attr.cl.cpu_funcs[0]  = task_region_h;
+		attr.cl.where         = STARPU_CPU;
+		attr.cl.nbuffers      = 1;
+		attr.cl.modes[0]      = STARPU_RW;
+		attr.cl_arg_size      = 3*sizeof(void *);
+		attr.cl_arg_free      = 0;
+		attr.if_clause        = 1;
+		attr.final_clause     = 0;
+		attr.untied_clause    = 1;
+		attr.mergeable_clause = 0;
+
+		i = 0;
+
+		cl_arg_1[0] = (void *)(intptr_t)i++;
+		cl_arg_1[1] = (void *)(intptr_t)0;
+		cl_arg_1[2] = (void *)(intptr_t)half_nx;
+		attr.cl_arg           = cl_arg_1;
+		attr.handles          = &vector_slice_1_handle;
+		starpu_omp_task_region(&attr);
+
+		cl_arg_2[0] = (void *)(intptr_t)i++;
+		cl_arg_2[1] = (void *)(intptr_t)half_nx;
+		cl_arg_2[2] = (void *)(intptr_t)nx;
+		attr.cl_arg           = cl_arg_2;
+		attr.handles          = &vector_slice_2_handle;
+		starpu_omp_task_region(&attr);
+	}
+
+	starpu_omp_taskwait();
+}
+
+void master_g1(void *arg)
+{
+	(void)arg;
+	starpu_data_handle_t region_vector_handle;
+	int i;
+
+	printf("master_g1: vector ptr = %p\n", global_vector);
+	for (i = 0; i < NX; i++)
+	{
+		global_vector[i] = 1;
+	}
+
+	starpu_vector_data_register(&region_vector_handle, STARPU_MAIN_RAM, (uintptr_t)global_vector, NX, sizeof(global_vector[0]));
+	printf("master_g1: region_vector_handle = %p\n", region_vector_handle);
+}
+
+void master_g2(void *arg)
+{
+	(void)arg;
+	starpu_data_handle_t region_vector_handle;
+	struct starpu_omp_task_region_attr attr;
+	int i;
+
+	region_vector_handle = starpu_data_lookup(global_vector);
+	printf("master_g2: region_vector_handle = %p\n", region_vector_handle);
+
+	memset(&attr, 0, sizeof(attr));
+	attr.cl.cpu_funcs[0]  = task_region_g;
+	attr.cl.where         = STARPU_CPU;
+	attr.cl.nbuffers      = 1;
+	attr.cl.modes[0]      = STARPU_RW;
+	attr.handles          = &region_vector_handle;
+	attr.cl_arg_size      = sizeof(void *);
+	attr.cl_arg_free      = 0;
+	attr.if_clause        = 1;
+	attr.final_clause     = 0;
+	attr.untied_clause    = 1;
+	attr.mergeable_clause = 0;
+
+	i = 0;
+
+	attr.cl_arg = (void *)(intptr_t)i++;
+	starpu_omp_task_region(&attr);
+	attr.cl_arg = (void *)(intptr_t)i++;
+	starpu_omp_task_region(&attr);
+	attr.cl_arg = (void *)(intptr_t)i++;
+	starpu_omp_task_region(&attr);
+	attr.cl_arg = (void *)(intptr_t)i++;
+	starpu_omp_task_region(&attr);
+}
+
+void parallel_region_f(void *buffers[], void *args)
+{
+	(void)buffers;
+	(void)args;
+	starpu_omp_master(master_g1, NULL);
+	starpu_omp_barrier();
+	{
+		starpu_data_handle_t region_vector_handle;
+		region_vector_handle = starpu_data_lookup(global_vector);
+		printf("parallel_region block 1: region_vector_handle = %p\n", region_vector_handle);
+	}
+	starpu_omp_barrier();
+	starpu_omp_master(master_g2, NULL);
+	starpu_omp_barrier();
+	{
+		starpu_data_handle_t region_vector_handle;
+		region_vector_handle = starpu_data_lookup(global_vector);
+		printf("parallel_region block 2: region_vector_handle = %p\n", region_vector_handle);
+	}
+}
+
+int
+main (int argc, char *argv[]) {
+	(void)argc;
+	(void)argv;
+	struct starpu_omp_parallel_region_attr attr;
+
+	assert(NX >= 2);
+	memset(&attr, 0, sizeof(attr));
+	attr.cl.cpu_funcs[0] = parallel_region_f;
+	attr.cl.where        = STARPU_CPU;
+	attr.if_clause       = 1;
+	starpu_omp_parallel_region(&attr);
+	return 0;
+}
+#endif