12 年之前 · 63423b6e8a
--- a/Makefile.am
+++ b/Makefile.am
@@ -77,7 +77,8 @@ versinclude_HEADERS = 				\
 
				 	include/starpu_hash.h			\
			
 
				 	include/starpu_rand.h			\
			
 
				 	include/starpu_cublas.h			\
			
 
				-	include/starpu_driver.h
			
 
				+	include/starpu_driver.h			\
			
 
				+	include/starpu_stdlib.h
			
 
				 
			
 
				 nodist_versinclude_HEADERS = 			\
			
 
				 	include/starpu_config.h
			
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -43,6 +43,7 @@ typedef unsigned long long uint64_t;
 
				 #include <starpu_data.h>
			
 
				 #include <starpu_data_interfaces.h>
			
 
				 #include <starpu_data_filters.h>
			
 
				+#include <starpu_stdlib.h>
			
 
				 #include <starpu_perfmodel.h>
			
 
				 #include <starpu_worker.h>
			
 
				 #include <starpu_task.h>
			
--- a/include/starpu_data.h
+++ b/include/starpu_data.h
@@ -85,9 +85,6 @@ int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, unsigned node, e
 
				 void starpu_data_release(starpu_data_handle_t handle);
			
 
				 void starpu_data_release_on_node(starpu_data_handle_t handle, unsigned node);
			
 
				 
			
 
				-void starpu_malloc_set_align(size_t align);
			
 
				-int starpu_malloc(void **A, size_t dim);
			
 
				-int starpu_free(void *A);
			
 
				 void starpu_memory_display_stats();
			
 
				 
			
 
				 /* XXX These macros are provided to avoid breaking old codes. But consider
			
--- a/include/starpu_data_interfaces.h
+++ b/include/starpu_data_interfaces.h
@@ -135,11 +135,6 @@ int starpu_data_interface_get_next_id(void);
 
				 void starpu_data_register(starpu_data_handle_t *handleptr, unsigned home_node, void *data_interface, struct starpu_data_interface_ops *ops);
			
 
				 void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc);
			
 
				 
			
 
				-/* Allocate SIZE bytes on node NODE */
			
 
				-uintptr_t starpu_allocate_buffer_on_node(unsigned dst_node, size_t size);
			
 
				-/* Free ADDR on node NODE */
			
 
				-void starpu_free_buffer_on_node(unsigned dst_node, uintptr_t addr, size_t size);
			
 
				-
			
 
				 /* Return the pointer associated with HANDLE on node NODE or NULL if HANDLE's
			
 
				  * interface does not support this operation or data for this handle is not
			
 
				  * allocated on that node. */
			
--- a/include/starpu_stdlib.h
+++ b/include/starpu_stdlib.h
@@ -0,0 +1,41 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2013  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_STDLIB_H__
			
 
				+#define __STARPU_STDLIB_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+void starpu_malloc_set_align(size_t align);
			
 
				+int starpu_malloc(void **A, size_t dim);
			
 
				+int starpu_free(void *A);
			
 
				+
			
 
				+/* Allocate SIZE bytes on node NODE */
			
 
				+uintptr_t starpu_allocate_buffer_on_node(unsigned dst_node, size_t size);
			
 
				+/* Free ADDR on node NODE */
			
 
				+void starpu_free_buffer_on_node(unsigned dst_node, uintptr_t addr, size_t size);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_STDLIB_H__ */
			
--- a/src/datawizard/malloc.c
+++ b/src/datawizard/malloc.c
@@ -20,8 +20,6 @@
 
				 #include <core/workers.h>
			
 
				 #include <common/config.h>
			
 
				 #include <starpu.h>
			
 
				-#include <starpu_data.h>
			
 
				-#include <starpu_cuda.h>
			
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				 
			
 
				 static size_t malloc_align = sizeof(void*);
			
@@ -283,3 +281,145 @@ int starpu_free(void *A)
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+static _starpu_pthread_mutex_t cuda_alloc_mutex = _STARPU_PTHREAD_MUTEX_INITIALIZER;
			
 
				+static _starpu_pthread_mutex_t opencl_alloc_mutex = _STARPU_PTHREAD_MUTEX_INITIALIZER;
			
 
				+#endif
			
 
				+
			
 
				+uintptr_t
			
 
				+starpu_allocate_buffer_on_node(unsigned dst_node, size_t size)
			
 
				+{
			
 
				+	uintptr_t addr = 0;
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	cudaError_t status;
			
 
				+#endif
			
 
				+
			
 
				+	if (_starpu_memory_manager_can_allocate_size(size, dst_node) == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+#ifdef STARPU_DEVEL
			
 
				+#warning TODO: we need to use starpu_malloc which should itself inquire from the memory manager is there is enough available memory
			
 
				+#endif
			
 
				+	switch(starpu_node_get_kind(dst_node))
			
 
				+	{
			
 
				+		case STARPU_CPU_RAM:
			
 
				+		{
			
 
				+			addr = (uintptr_t)malloc(size);
			
 
				+			break;
			
 
				+		}
			
 
				+#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				+		case STARPU_CUDA_RAM:
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+#ifdef STARPU_DEVEL
			
 
				+#warning TODO: record used memory, using a simgrid property to know the available memory
			
 
				+#endif
			
 
				+			/* Sleep 10µs for the allocation */
			
 
				+			_STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
			
 
				+			MSG_process_sleep(0.000010);
			
 
				+			addr = 1;
			
 
				+			_STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
			
 
				+#else
			
 
				+			status = cudaMalloc((void **)&addr, size);
			
 
				+			if (!addr || (status != cudaSuccess))
			
 
				+			{
			
 
				+				if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation))
			
 
				+					STARPU_CUDA_REPORT_ERROR(status);
			
 
				+				addr = 0;
			
 
				+			}
			
 
				+#endif
			
 
				+			break;
			
 
				+#endif
			
 
				+#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				+	        case STARPU_OPENCL_RAM:
			
 
				+			{
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+				/* Sleep 10µs for the allocation */
			
 
				+				_STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
			
 
				+				MSG_process_sleep(0.000010);
			
 
				+				addr = 1;
			
 
				+				_STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
			
 
				+#else
			
 
				+                                int ret;
			
 
				+				cl_mem ptr;
			
 
				+
			
 
				+				ret = starpu_opencl_allocate_memory(&ptr, size, CL_MEM_READ_WRITE);
			
 
				+				if (ret)
			
 
				+				{
			
 
				+					addr = 0;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					addr = (uintptr_t)ptr;
			
 
				+				}
			
 
				+				break;
			
 
				+#endif
			
 
				+			}
			
 
				+#endif
			
 
				+		default:
			
 
				+			STARPU_ABORT();
			
 
				+	}
			
 
				+
			
 
				+	if (addr == 0)
			
 
				+	{
			
 
				+		// Allocation failed, gives the memory back to the memory manager
			
 
				+		_starpu_memory_manager_deallocate_size(size, dst_node);
			
 
				+	}
			
 
				+	return addr;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+starpu_free_buffer_on_node(unsigned dst_node, uintptr_t addr, size_t size)
			
 
				+{
			
 
				+	enum starpu_node_kind kind = starpu_node_get_kind(dst_node);
			
 
				+	switch(kind)
			
 
				+	{
			
 
				+#ifdef STARPU_DEVEL
			
 
				+#warning TODO we need to call starpu_free
			
 
				+#endif
			
 
				+		case STARPU_CPU_RAM:
			
 
				+			free((void*)addr);
			
 
				+			_starpu_memory_manager_deallocate_size(size, dst_node);
			
 
				+			break;
			
 
				+#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				+		case STARPU_CUDA_RAM:
			
 
				+		{
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+			_STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
			
 
				+			/* Sleep 10µs for the free */
			
 
				+			MSG_process_sleep(0.000010);
			
 
				+			_STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
			
 
				+#else
			
 
				+			cudaError_t err;
			
 
				+			err = cudaFree((void*)addr);
			
 
				+			if (STARPU_UNLIKELY(err != cudaSuccess))
			
 
				+				STARPU_CUDA_REPORT_ERROR(err);
			
 
				+			_starpu_memory_manager_deallocate_size(size, dst_node);
			
 
				+#endif
			
 
				+			break;
			
 
				+		}
			
 
				+#endif
			
 
				+#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				+                case STARPU_OPENCL_RAM:
			
 
				+		{
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+			_STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
			
 
				+			/* Sleep 10µs for the free */
			
 
				+			MSG_process_sleep(0.000010);
			
 
				+			_STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
			
 
				+#else
			
 
				+			cl_int err;
			
 
				+                        err = clReleaseMemObject((void*)addr);
			
 
				+			if (STARPU_UNLIKELY(err != CL_SUCCESS))
			
 
				+				STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+			_starpu_memory_manager_deallocate_size(size, dst_node);
			
 
				+#endif
			
 
				+                        break;
			
 
				+		}
			
 
				+#endif
			
 
				+		default:
			
 
				+			STARPU_ABORT();
			
 
				+	}
			
 
				+}
			
 
				+
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -18,8 +18,7 @@
 
				 #include <datawizard/memory_manager.h>
			
 
				 #include <datawizard/memalloc.h>
			
 
				 #include <datawizard/footprint.h>
			
 
				-#include <starpu_cuda.h>
			
 
				-#include <starpu_opencl.h>
			
 
				+#include <starpu.h>
			
 
				 
			
 
				 /* This per-node RW-locks protect mc_list and memchunk_cache entries */
			
 
				 static _starpu_pthread_rwlock_t mc_rwlock[STARPU_MAXNODES];
			
@@ -723,147 +722,6 @@ void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, unsigned nod
 
				 	_STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
			
 
				 }
			
 
				 
			
 
				-#ifdef STARPU_SIMGRID
			
 
				-static _starpu_pthread_mutex_t cuda_alloc_mutex = _STARPU_PTHREAD_MUTEX_INITIALIZER;
			
 
				-static _starpu_pthread_mutex_t opencl_alloc_mutex = _STARPU_PTHREAD_MUTEX_INITIALIZER;
			
 
				-#endif
			
 
				-
			
 
				-uintptr_t
			
 
				-starpu_allocate_buffer_on_node(unsigned dst_node, size_t size)
			
 
				-{
			
 
				-	uintptr_t addr = 0;
			
 
				-
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-	cudaError_t status;
			
 
				-#endif
			
 
				-
			
 
				-	if (_starpu_memory_manager_can_allocate_size(size, dst_node) == 0)
			
 
				-		return 0;
			
 
				-
			
 
				-#ifdef STARPU_DEVEL
			
 
				-#warning TODO: we need to use starpu_malloc which should itself inquire from the memory manager is there is enough available memory
			
 
				-#endif
			
 
				-	switch(starpu_node_get_kind(dst_node))
			
 
				-	{
			
 
				-		case STARPU_CPU_RAM:
			
 
				-		{
			
 
				-			addr = (uintptr_t)malloc(size);
			
 
				-			break;
			
 
				-		}
			
 
				-#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				-		case STARPU_CUDA_RAM:
			
 
				-#ifdef STARPU_SIMGRID
			
 
				-#ifdef STARPU_DEVEL
			
 
				-#warning TODO: record used memory, using a simgrid property to know the available memory
			
 
				-#endif
			
 
				-			/* Sleep 10µs for the allocation */
			
 
				-			_STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
			
 
				-			MSG_process_sleep(0.000010);
			
 
				-			addr = 1;
			
 
				-			_STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
			
 
				-#else
			
 
				-			status = cudaMalloc((void **)&addr, size);
			
 
				-			if (!addr || (status != cudaSuccess))
			
 
				-			{
			
 
				-				if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation))
			
 
				-					STARPU_CUDA_REPORT_ERROR(status);
			
 
				-				addr = 0;
			
 
				-			}
			
 
				-#endif
			
 
				-			break;
			
 
				-#endif
			
 
				-#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				-	        case STARPU_OPENCL_RAM:
			
 
				-			{
			
 
				-#ifdef STARPU_SIMGRID
			
 
				-				/* Sleep 10µs for the allocation */
			
 
				-				_STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
			
 
				-				MSG_process_sleep(0.000010);
			
 
				-				addr = 1;
			
 
				-				_STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
			
 
				-#else
			
 
				-                                int ret;
			
 
				-				cl_mem ptr;
			
 
				-
			
 
				-				ret = starpu_opencl_allocate_memory(&ptr, size, CL_MEM_READ_WRITE);
			
 
				-				if (ret)
			
 
				-				{
			
 
				-					addr = 0;
			
 
				-				}
			
 
				-				else
			
 
				-				{
			
 
				-					addr = (uintptr_t)ptr;
			
 
				-				}
			
 
				-				break;
			
 
				-#endif
			
 
				-			}
			
 
				-#endif
			
 
				-		default:
			
 
				-			STARPU_ABORT();
			
 
				-	}
			
 
				-
			
 
				-	if (addr == 0)
			
 
				-	{
			
 
				-		// Allocation failed, gives the memory back to the memory manager
			
 
				-		_starpu_memory_manager_deallocate_size(size, dst_node);
			
 
				-	}
			
 
				-	return addr;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-starpu_free_buffer_on_node(unsigned dst_node, uintptr_t addr, size_t size)
			
 
				-{
			
 
				-	enum starpu_node_kind kind = starpu_node_get_kind(dst_node);
			
 
				-	switch(kind)
			
 
				-	{
			
 
				-#ifdef STARPU_DEVEL
			
 
				-#warning TODO we need to call starpu_free
			
 
				-#endif
			
 
				-		case STARPU_CPU_RAM:
			
 
				-			free((void*)addr);
			
 
				-			_starpu_memory_manager_deallocate_size(size, dst_node);
			
 
				-			break;
			
 
				-#if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				-		case STARPU_CUDA_RAM:
			
 
				-		{
			
 
				-#ifdef STARPU_SIMGRID
			
 
				-			_STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
			
 
				-			/* Sleep 10µs for the free */
			
 
				-			MSG_process_sleep(0.000010);
			
 
				-			_STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
			
 
				-#else
			
 
				-			cudaError_t err;
			
 
				-			err = cudaFree((void*)addr);
			
 
				-			if (STARPU_UNLIKELY(err != cudaSuccess))
			
 
				-				STARPU_CUDA_REPORT_ERROR(err);
			
 
				-			_starpu_memory_manager_deallocate_size(size, dst_node);
			
 
				-#endif
			
 
				-			break;
			
 
				-		}
			
 
				-#endif
			
 
				-#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				-                case STARPU_OPENCL_RAM:
			
 
				-		{
			
 
				-#ifdef STARPU_SIMGRID
			
 
				-			_STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
			
 
				-			/* Sleep 10µs for the free */
			
 
				-			MSG_process_sleep(0.000010);
			
 
				-			_STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
			
 
				-#else
			
 
				-			cl_int err;
			
 
				-                        err = clReleaseMemObject((void*)addr);
			
 
				-			if (STARPU_UNLIKELY(err != CL_SUCCESS))
			
 
				-				STARPU_OPENCL_REPORT_ERROR(err);
			
 
				-			_starpu_memory_manager_deallocate_size(size, dst_node);
			
 
				-#endif
			
 
				-                        break;
			
 
				-		}
			
 
				-#endif
			
 
				-		default:
			
 
				-			STARPU_ABORT();
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * In order to allocate a piece of data, we try to reuse existing buffers if
			
 
				  * its possible.