Przeglądaj źródła

- add support for omp region/omp task context aware data_lookup
- add testcase

Olivier Aumage 11 lat temu
rodzic
commit
3c50f4fc6f

+ 4 - 0
src/datawizard/coherency.h

@@ -210,6 +210,10 @@ struct _starpu_data_state
 
 	unsigned lazy_unregister;
 
+#ifdef STARPU_OPENMP
+	unsigned removed_from_context_hash;
+#endif
+
         /* Used for MPI */
         int rank;
 	int tag;

+ 134 - 25
src/datawizard/interfaces/data_interface.c

@@ -24,14 +24,9 @@
 #include <core/task.h>
 #include <core/workers.h>
 #include <datawizard/memstats.h>
-
-/* Entry in the `registered_handles' hash table.  */
-struct handle_entry
-{
-	UT_hash_handle hh;
-	void *pointer;
-	starpu_data_handle_t handle;
-};
+#ifdef STARPU_OPENMP
+#include <util/openmp_runtime_support.h>
+#endif
 
 /* Hash table mapping host pointers to data handles.  */
 static struct handle_entry *registered_handles;
@@ -85,6 +80,34 @@ void _starpu_data_interface_shutdown()
 	registered_tag_handles = NULL;
 }
 
+#ifdef STARPU_OPENMP
+void _starpu_omp_unregister_region_handles(struct starpu_omp_region *region)
+{
+	_starpu_spin_lock(&region->registered_handles_lock);
+	struct handle_entry *entry, *tmp;
+	HASH_ITER(hh, (region->registered_handles), entry, tmp)
+	{
+		entry->handle->removed_from_context_hash = 1;
+		HASH_DEL(region->registered_handles, entry);
+		starpu_data_unregister_submit(entry->handle);
+		free(entry);
+	}
+	_starpu_spin_unlock(&region->registered_handles_lock);
+}
+
+void _starpu_omp_unregister_task_handles(struct starpu_omp_task *task)
+{
+	struct handle_entry *entry, *tmp;
+	HASH_ITER(hh, task->registered_handles, entry, tmp)
+	{
+		entry->handle->removed_from_context_hash = 1;
+		HASH_DEL(task->registered_handles, entry);
+		starpu_data_unregister_submit(entry->handle);
+		free(entry);
+	}
+}
+#endif
+
 struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id)
 {
 	switch (interface_id)
@@ -131,26 +154,80 @@ void _starpu_data_register_ram_pointer(starpu_data_handle_t handle, void *ptr)
 	entry->pointer = ptr;
 	entry->handle = handle;
 
-	_starpu_spin_lock(&registered_handles_lock);
-	HASH_ADD_PTR(registered_handles, pointer, entry);
-	_starpu_spin_unlock(&registered_handles_lock);
+#ifdef STARPU_OPENMP
+	struct starpu_omp_task *task = _starpu_omp_get_task();
+	if (task)
+	{
+		if (task->is_implicit)
+		{
+			struct starpu_omp_region *parallel_region = task->owner_region;
+			_starpu_spin_lock(&parallel_region->registered_handles_lock);
+			HASH_ADD_PTR(parallel_region->registered_handles, pointer, entry);
+			_starpu_spin_unlock(&parallel_region->registered_handles_lock);
+		}
+		else
+		{
+			HASH_ADD_PTR(task->registered_handles, pointer, entry);
+		}
+	}
+	else
+#endif
+	{
+		_starpu_spin_lock(&registered_handles_lock);
+		HASH_ADD_PTR(registered_handles, pointer, entry);
+		_starpu_spin_unlock(&registered_handles_lock);
+	}
 }
 
 starpu_data_handle_t starpu_data_lookup(const void *ptr)
 {
 	starpu_data_handle_t result;
 
-	_starpu_spin_lock(&registered_handles_lock);
+#ifdef STARPU_OPENMP
+	struct starpu_omp_task *task = _starpu_omp_get_task();
+	if (task)
 	{
-		struct handle_entry *entry;
+		if (task->is_implicit)
+		{
+			struct starpu_omp_region *parallel_region = task->owner_region;
+			_starpu_spin_lock(&parallel_region->registered_handles_lock);
+			{
+				struct handle_entry *entry;
 
-		HASH_FIND_PTR(registered_handles, &ptr, entry);
-		if(STARPU_UNLIKELY(entry == NULL))
-			result = NULL;
+				HASH_FIND_PTR(parallel_region->registered_handles, &ptr, entry);
+				if(STARPU_UNLIKELY(entry == NULL))
+					result = NULL;
+				else
+					result = entry->handle;
+			}
+			_starpu_spin_unlock(&parallel_region->registered_handles_lock);
+		}
 		else
-			result = entry->handle;
+		{
+			struct handle_entry *entry;
+
+			HASH_FIND_PTR(task->registered_handles, &ptr, entry);
+			if(STARPU_UNLIKELY(entry == NULL))
+				result = NULL;
+			else
+				result = entry->handle;
+		}
+	}
+	else
+#endif
+	{
+		_starpu_spin_lock(&registered_handles_lock);
+		{
+			struct handle_entry *entry;
+
+			HASH_FIND_PTR(registered_handles, &ptr, entry);
+			if(STARPU_UNLIKELY(entry == NULL))
+				result = NULL;
+			else
+				result = entry->handle;
+		}
+		_starpu_spin_unlock(&registered_handles_lock);
 	}
-	_starpu_spin_unlock(&registered_handles_lock);
 
 	return result;
 }
@@ -489,21 +566,53 @@ struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_hand
 void _starpu_data_unregister_ram_pointer(starpu_data_handle_t handle)
 {
 	const void *ram_ptr = starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM);
+#ifdef STARPU_OPENMP
+	if (handle->removed_from_context_hash)
+		return;
+#endif
 	if (ram_ptr != NULL)
 	{
 		/* Remove the PTR -> HANDLE mapping.  If a mapping from PTR
 		 * to another handle existed before (e.g., when using
 		 * filters), it becomes visible again.  */
-		struct handle_entry *entry;
+#ifdef STARPU_OPENMP
+		struct starpu_omp_task *task = _starpu_omp_get_task();
+		if (task)
+		{
+			if (task->is_implicit)
+			{
+				struct starpu_omp_region *parallel_region = task->owner_region;
+				struct handle_entry *entry;
+				_starpu_spin_lock(&parallel_region->registered_handles_lock);
+				HASH_FIND_PTR(parallel_region->registered_handles, &ram_ptr, entry);
+				STARPU_ASSERT(entry != NULL);
+				HASH_DEL(registered_handles, entry);
+				free(entry);
+				_starpu_spin_unlock(&parallel_region->registered_handles_lock);
+			}
+			else
+			{
+				struct handle_entry *entry;
+				HASH_FIND_PTR(task->registered_handles, &ram_ptr, entry);
+				STARPU_ASSERT(entry != NULL);
+				HASH_DEL(task->registered_handles, entry);
+				free(entry);
+			}
+		}
+		else
+#endif
+		{
+			struct handle_entry *entry;
 
-		_starpu_spin_lock(&registered_handles_lock);
-		HASH_FIND_PTR(registered_handles, &ram_ptr, entry);
-		STARPU_ASSERT(entry != NULL);
+			_starpu_spin_lock(&registered_handles_lock);
+			HASH_FIND_PTR(registered_handles, &ram_ptr, entry);
+			STARPU_ASSERT(entry != NULL);
 
-		HASH_DEL(registered_handles, entry);
-		free(entry);
+			HASH_DEL(registered_handles, entry);
+			free(entry);
 
-		_starpu_spin_unlock(&registered_handles_lock);
+			_starpu_spin_unlock(&registered_handles_lock);
+		}
 	}
 }
 

+ 17 - 0
src/datawizard/interfaces/data_interface.h

@@ -20,6 +20,18 @@
 
 #include <starpu.h>
 #include <common/config.h>
+#include <common/uthash.h>
+#ifdef STARPU_OPENMP
+#include <util/openmp_runtime_support.h>
+#endif
+
+/* Entry in the `registered_handles' hash table.  */
+struct handle_entry
+{
+	UT_hash_handle hh;
+	void *pointer;
+	starpu_data_handle_t handle;
+};
 
 /* Generic type representing an interface, for now it's only used before
  * execution on message-passing devices but it can be useful in other cases.
@@ -57,6 +69,11 @@ extern void _starpu_data_interface_init(void) STARPU_ATTRIBUTE_INTERNAL;
 extern int _starpu_data_check_not_busy(starpu_data_handle_t handle) STARPU_ATTRIBUTE_INTERNAL;
 extern void _starpu_data_interface_shutdown(void) STARPU_ATTRIBUTE_INTERNAL;
 
+#ifdef STARPU_OPENMP
+void _starpu_omp_unregister_region_handles(struct starpu_omp_region *region);
+void _starpu_omp_unregister_task_handles(struct starpu_omp_task *task);
+#endif
+
 struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id);
 
 extern void _starpu_data_register_ram_pointer(starpu_data_handle_t handle,

+ 10 - 2
src/util/openmp_runtime_support.c

@@ -28,6 +28,7 @@
 #include <common/list.h>
 #include <common/starpu_spinlock.h>
 #include <common/uthash.h>
+#include <datawizard/interfaces/data_interface.h>
 #include <stdlib.h>
 #include <ctype.h>
 #include <strings.h>
@@ -218,6 +219,7 @@ static struct starpu_omp_region *create_omp_region_struct(struct starpu_omp_regi
 	region->implicit_task_list = starpu_omp_task_list_new();
 
 	_starpu_spin_init(&region->lock);
+	_starpu_spin_init(&region->registered_handles_lock);
 	region->level = (parent_region != NULL)?parent_region->level+1:0;
 	return region;
 }
@@ -230,6 +232,7 @@ static void destroy_omp_region_struct(struct starpu_omp_region *region)
 	STARPU_ASSERT(region->continuation_starpu_task == NULL);
 	starpu_omp_thread_list_delete(region->thread_list);
 	starpu_omp_task_list_delete(region->implicit_task_list);
+	_starpu_spin_destroy(&region->registered_handles_lock);
 	_starpu_spin_destroy(&region->lock);
 	memset(region, 0, sizeof(*region));
 	free(region);
@@ -278,6 +281,7 @@ static void starpu_omp_explicit_task_entry(struct starpu_omp_task *task)
 {
 	STARPU_ASSERT(!task->is_implicit);
 	task->f(task->starpu_buffers, task->starpu_cl_arg);
+	_starpu_omp_unregister_task_handles(task);
 	task->state = starpu_omp_task_state_terminated;
 	struct starpu_omp_thread *thread = STARPU_PTHREAD_GETSPECIFIC(omp_thread_key);
 	/* 
@@ -291,11 +295,15 @@ static void starpu_omp_explicit_task_entry(struct starpu_omp_task *task)
 
 static void starpu_omp_implicit_task_entry(struct starpu_omp_task *task)
 {
+	struct starpu_omp_thread *thread = STARPU_PTHREAD_GETSPECIFIC(omp_thread_key);
 	STARPU_ASSERT(task->is_implicit);
 	task->f(task->starpu_buffers, task->starpu_cl_arg);
 	starpu_omp_barrier();
+	if (thread == task->owner_region->master_thread)
+	{
+		_starpu_omp_unregister_region_handles(task->owner_region);
+	}
 	task->state = starpu_omp_task_state_terminated;
-	struct starpu_omp_thread *thread = STARPU_PTHREAD_GETSPECIFIC(omp_thread_key);
 	/* 
 	 * the task reached the terminated state, definitively give hand back to the worker code.
 	 *
@@ -517,7 +525,7 @@ static struct starpu_omp_task *create_omp_task_struct(struct starpu_omp_task *pa
 
 static void destroy_omp_task_struct(struct starpu_omp_task *task)
 {
-	STARPU_ASSERT(task->state == starpu_omp_task_state_terminated);
+	STARPU_ASSERT(task->state == starpu_omp_task_state_terminated || (task->state == starpu_omp_task_state_zombie && task->child_task_count == 0));
 	STARPU_ASSERT(task->nested_region == NULL);
 	STARPU_ASSERT(task->starpu_task == NULL);
 	STARPU_ASSERT(task->stack == NULL);

+ 3 - 0
src/util/openmp_runtime_support.h

@@ -231,6 +231,7 @@ LIST_TYPE(starpu_omp_task,
 	int sections_id;
 	struct starpu_omp_data_environment_icvs data_env_icvs;
 	struct starpu_omp_implicit_task_icvs implicit_task_icvs;
+	struct handle_entry *registered_handles;
 
 	struct starpu_task *starpu_task;
 	struct starpu_codelet cl;
@@ -318,6 +319,8 @@ struct starpu_omp_region
 	struct starpu_omp_loop *loop_list;
 	struct starpu_omp_sections *sections_list;
 	struct starpu_task *continuation_starpu_task;
+	struct handle_entry *registered_handles;
+	struct _starpu_spinlock registered_handles_lock;
 };
 
 struct starpu_omp_device

+ 4 - 0
tests/Makefile.am

@@ -242,6 +242,7 @@ noinst_PROGRAMS =				\
 	openmp/parallel_sections_01		\
 	openmp/parallel_sections_combined_01	\
 	openmp/task_01				\
+	openmp/task_02				\
 	openmp/taskwait_01			\
 	openmp/taskgroup_01			\
 	overlap/overlap				\
@@ -520,6 +521,9 @@ openmp_parallel_sections_combined_01_SOURCES = 	\
 openmp_task_01_SOURCES = 	\
 	openmp/task_01.c
 
+openmp_task_02_SOURCES = 	\
+	openmp/task_02.c
+
 openmp_taskwait_01_SOURCES = 	\
 	openmp/taskwait_01.c
 

+ 196 - 0
tests/openmp/task_02.c

@@ -0,0 +1,196 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2014  Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <pthread.h>
+#include <starpu.h>
+#include "../helper.h"
+#include <stdio.h>
+
+#if !defined(STARPU_OPENMP)
+int main(int argc, char **argv)
+{
+	return STARPU_TEST_SKIPPED;
+}
+#else
+#define	NX	64
+int global_vector[NX];
+
+__attribute__((constructor))
+static void omp_constructor(void)
+{
+	int ret = starpu_omp_init();
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init");
+}
+
+__attribute__((destructor))
+static void omp_destructor(void)
+{
+	starpu_omp_shutdown();
+}
+
+void task_region_h(void *buffers[], void *args)
+{
+	struct starpu_vector_interface *_vector = buffers[0];
+	int nx = STARPU_VECTOR_GET_NX(_vector);
+	int *v = (int *)STARPU_VECTOR_GET_PTR(_vector);
+	int f = (int)(intptr_t)args;
+	int i;
+
+	printf("depth 2 task, entry: vector ptr = %p\n", v);
+
+	for (i = 0; i < nx; i++)
+	{
+                v[i] += f;
+	}
+
+	printf("depth 2 task ending\n");
+}
+
+void task_region_g(void *buffers[], void *args)
+{
+	struct starpu_vector_interface *_vector = buffers[0];
+
+	int nx = STARPU_VECTOR_GET_NX(_vector);
+	int *v = (int *)STARPU_VECTOR_GET_PTR(_vector);
+	int f = (int)(intptr_t)args;
+	
+	printf("depth 1 task, entry: vector ptr = %p\n", v);
+
+	{
+		starpu_data_handle_t task_vector_handle;
+		int i;
+
+		for (i = 0; i < nx; i++)
+		{
+			v[i] += f;
+		}
+
+		starpu_vector_data_register(&task_vector_handle, STARPU_MAIN_RAM, (uintptr_t)v, NX, sizeof(v[0]));
+		printf("depth 1 task, block 1: task_vector_handle = %p\n", task_vector_handle);
+	}
+
+	{
+		starpu_data_handle_t task_vector_handle;
+		starpu_omp_task_region_attr_t attr;
+		int i;
+
+		task_vector_handle = starpu_data_lookup(v);
+		printf("depth 1 task, block 2: task_vector_handle = %p\n", task_vector_handle);
+
+		memset(&attr, 0, sizeof(attr));
+		attr.cl.cpu_funcs[0]  = task_region_h;
+		attr.cl.where         = STARPU_CPU;
+		attr.cl.nbuffers      = 1;
+		attr.cl.modes[0]      = STARPU_RW;
+		attr.handles          = &task_vector_handle;
+		attr.cl_arg_size      = sizeof(void *);
+		attr.cl_arg_free      = 0;
+		attr.if_clause        = 1;
+		attr.final_clause     = 0;
+		attr.untied_clause    = 1;
+		attr.mergeable_clause = 0;
+
+		i = 0;
+
+		attr.cl_arg = (void *)(intptr_t)i++;
+		starpu_omp_task_region(&attr);
+		attr.cl_arg = (void *)(intptr_t)i++;
+		starpu_omp_task_region(&attr);
+	}
+
+	starpu_omp_taskwait();
+}
+
+void master_g1(void *arg)
+{
+	starpu_data_handle_t region_vector_handle;
+	int i;
+
+	printf("master_g1: vector ptr = %p\n", global_vector);
+	for (i = 0; i < NX; i++)
+	{
+		global_vector[i] = 1;
+	}
+
+	starpu_vector_data_register(&region_vector_handle, STARPU_MAIN_RAM, (uintptr_t)global_vector, NX, sizeof(global_vector[0]));
+	printf("master_g1: region_vector_handle = %p\n", region_vector_handle);
+}
+
+void master_g2(void *arg)
+{
+	starpu_data_handle_t region_vector_handle;
+	starpu_omp_task_region_attr_t attr;
+	int i;
+
+	region_vector_handle = starpu_data_lookup(global_vector);
+	printf("master_g2: region_vector_handle = %p\n", region_vector_handle);
+
+	memset(&attr, 0, sizeof(attr));
+	attr.cl.cpu_funcs[0]  = task_region_g;
+	attr.cl.where         = STARPU_CPU;
+	attr.cl.nbuffers      = 1;
+	attr.cl.modes[0]      = STARPU_RW;
+	attr.handles          = &region_vector_handle;
+	attr.cl_arg_size      = sizeof(void *);
+	attr.cl_arg_free      = 0;
+	attr.if_clause        = 1;
+	attr.final_clause     = 0;
+	attr.untied_clause    = 1;
+	attr.mergeable_clause = 0;
+
+	i = 0;
+
+	attr.cl_arg = (void *)(intptr_t)i++;
+	starpu_omp_task_region(&attr);
+	attr.cl_arg = (void *)(intptr_t)i++;
+	starpu_omp_task_region(&attr);
+	attr.cl_arg = (void *)(intptr_t)i++;
+	starpu_omp_task_region(&attr);
+	attr.cl_arg = (void *)(intptr_t)i++;
+	starpu_omp_task_region(&attr);
+}
+
+void parallel_region_f(void *buffers[], void *args)
+{
+	starpu_omp_master(master_g1, NULL);
+	starpu_omp_barrier();
+	{
+		starpu_data_handle_t region_vector_handle;
+		region_vector_handle = starpu_data_lookup(global_vector);
+		printf("parallel_region block 1: region_vector_handle = %p\n", region_vector_handle);
+	}
+	starpu_omp_barrier();
+	starpu_omp_master(master_g2, NULL);
+	starpu_omp_barrier();
+	{
+		starpu_data_handle_t region_vector_handle;
+		region_vector_handle = starpu_data_lookup(global_vector);
+		printf("parallel_region block 2: region_vector_handle = %p\n", region_vector_handle);
+	}
+}
+
+int
+main (int argc, char *argv[]) {
+	starpu_omp_parallel_region_attr_t attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.cl.cpu_funcs[0] = parallel_region_f;
+	attr.cl.where        = STARPU_CPU;
+	attr.if_clause       = 1;
+	starpu_omp_parallel_region(&attr);
+	return 0;
+}
+#endif