6 years ago · 2eedccc841
--- a/ChangeLog
+++ b/ChangeLog
@@ -22,6 +22,8 @@ New features:
 
				   * New schedulers modular-pheft, modular-prandom and modular-prandom-prio
			
 
				   * Add STARPU_MATRIX_SET_NX/NY/LD and STARPU_VECTOR_SET_NX to change a matrix
			
 
				     tile or vector size without reallocating the buffer.
			
 
				+  * Application can change the allocation used by StarPU with
			
 
				+    starpu_malloc_set_hooks()
			
 
				 
			
 
				 StarPU 1.3.0 (svn revision xxxx)
			
 
				 ==============================================
			
--- a/doc/doxygen/chapters/310_data_management.doxy
+++ b/doc/doxygen/chapters/310_data_management.doxy
@@ -183,6 +183,10 @@ transfer to overlap with computations. Otherwise, the trace will show
 
				 that the <c>DriverCopyAsync</c> state takes a lot of time, this is
			
 
				 because CUDA or OpenCL then reverts to synchronous transfers.
			
 
				 
			
 
				+The application can provide its own allocation function by calling
			
 
				+starpu_malloc_set_hooks(). StarPU will then use them for all data handle
			
 
				+allocations in the main memory.
			
 
				+
			
 
				 By default, StarPU leaves replicates of data wherever they were used, in case they
			
 
				 will be re-used by other tasks, thus saving the data transfer time. When some
			
 
				 task modifies some data, all the other replicates are invalidated, and only the
			
--- a/doc/doxygen/chapters/api/standard_memory_library.doxy
+++ b/doc/doxygen/chapters/api/standard_memory_library.doxy
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2013,2015-2017                      CNRS
			
 
				- * Copyright (C) 2009-2011,2014-2017                      Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2011,2014-2017,2019                 Université de Bordeaux
			
 
				  * Copyright (C) 2011,2012                                Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -100,6 +100,20 @@ Free memory by specifying its size. The given
 
				 flags should be consistent with the ones given to starpu_malloc_flags()
			
 
				 when allocating the memory.
			
 
				 
			
 
				+\fn void starpu_malloc_set_hooks(starpu_malloc_hook malloc_hook, starpu_free_hook free_hook)
			
 
				+\ingroup API_Standard_Memory_Library
			
 
				+Sets allocation functions to be used by StarPU. By default, StarPU will use
			
 
				+\c malloc() (or \c cudaHostAlloc() if CUDA GPUs are used) for all its data
			
 
				+handle allocations. The application can specify another allocation primitive
			
 
				+by calling this. The malloc_hook should pass the allocated pointer through
			
 
				+the \c A parameter, and return 0 on success. On allocation failure, it should
			
 
				+return -ENOMEM. The \c flags parameter contains STARPU_MALLOC_PINNED if the
			
 
				+memory should be pinned by the hook for GPU transfer efficiency. The hook
			
 
				+can use starpu_memory_pin() to achieve this.  The \c dst_node parameter
			
 
				+is the starpu memory node, one can convert it to an hwloc logical id with
			
 
				+starpu_memory_nodes_numa_id_to_hwloclogid() or to an OS NUMA number with
			
 
				+starpu_memory_nodes_numa_devid_to_id().
			
 
				+
			
 
				 \fn int starpu_memory_pin(void *addr, size_t size)
			
 
				 \ingroup API_Standard_Memory_Library
			
 
				 Pin the given memory area, so that CPU-GPU transfers can be done
			
--- a/include/starpu_stdlib.h
+++ b/include/starpu_stdlib.h
@@ -56,6 +56,11 @@ int starpu_free(void *A);
 
				 int starpu_malloc_flags(void **A, size_t dim, int flags);
			
 
				 int starpu_free_flags(void *A, size_t dim, int flags);
			
 
				 
			
 
				+typedef int (*starpu_malloc_hook)(unsigned dst_node, void **A, size_t dim, int flags);
			
 
				+typedef int (*starpu_free_hook)(unsigned dst_node, void *A, size_t dim, int flags);
			
 
				+
			
 
				+void starpu_malloc_set_hooks(starpu_malloc_hook malloc_hook, starpu_free_hook free_hook);
			
 
				+
			
 
				 int starpu_memory_pin(void *addr, size_t size);
			
 
				 int starpu_memory_unpin(void *addr, size_t size);
			
 
				 
			
--- a/src/datawizard/malloc.c
+++ b/src/datawizard/malloc.c
@@ -3,7 +3,7 @@
 
				  * Copyright (C) 2011-2014,2016,2017                      Inria
			
 
				  * Copyright (C) 2018                                     Federal University of Rio Grande do Sul (UFRGS)
			
 
				  * Copyright (C) 2010-2017, 2019                          CNRS
			
 
				- * Copyright (C) 2009-2018                                Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2019                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -68,6 +68,15 @@ static unsigned long _starpu_malloc_simulation_fold;
 
				 #endif
			
 
				 #endif
			
 
				 
			
 
				+static starpu_malloc_hook malloc_hook;
			
 
				+static starpu_free_hook free_hook;
			
 
				+
			
 
				+void starpu_malloc_set_hooks(starpu_malloc_hook _malloc_hook, starpu_free_hook _free_hook)
			
 
				+{
			
 
				+	malloc_hook = _malloc_hook;
			
 
				+	free_hook = _free_hook;
			
 
				+}
			
 
				+
			
 
				 void starpu_malloc_set_align(size_t align)
			
 
				 {
			
 
				 	STARPU_ASSERT_MSG(!(align & (align - 1)), "Alignment given to starpu_malloc_set_align (%lu) must be a power of two", (unsigned long) align);
			
@@ -175,6 +184,12 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 
				 			starpu_memory_allocate(dst_node, dim, flags | STARPU_MEMORY_OVERFLOW);
			
 
				 	}
			
 
				 
			
 
				+	if (malloc_hook)
			
 
				+	{
			
 
				+		ret = malloc_hook(dst_node, A, dim, flags);
			
 
				+		goto end;
			
 
				+	}
			
 
				+
			
 
				 	if (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0)
			
 
				 	{
			
 
				 		if (_starpu_can_submit_cuda_task())
			
@@ -376,9 +391,7 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 
				 				ret = -ENOMEM;
			
 
				 		}
			
 
				 
			
 
				-#if (defined(STARPU_SIMGRID) && (SIMGRID_VERSION < 31500 || SIMGRID_VERSION == 31559)) || defined(STARPU_USE_CUDA)
			
 
				 end:
			
 
				-#endif
			
 
				 	if (ret == 0)
			
 
				 	{
			
 
				 		STARPU_ASSERT_MSG(*A, "Failed to allocated memory of size %lu b\n", (unsigned long)dim);
			
@@ -440,6 +453,12 @@ int starpu_free_flags(void *A, size_t dim, int flags)
 
				 
			
 
				 int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags)
			
 
				 {
			
 
				+	if (free_hook)
			
 
				+	{
			
 
				+		free_hook(dst_node, A, dim, flags);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				 	if (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0)
			
 
				 	{
			
 
				 		if (_starpu_can_submit_cuda_task())
			
@@ -540,9 +559,7 @@ int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags
 
				 	else
			
 
				 		free(A);
			
 
				 
			
 
				-#if !defined(STARPU_SIMGRID) && defined(STARPU_USE_CUDA)
			
 
				 out:
			
 
				-#endif
			
 
				 	if (flags & STARPU_MALLOC_COUNT)
			
 
				 	{
			
 
				 		starpu_memory_deallocate(dst_node, dim);