Browse Source

Application can change the allocation used by StarPU with starpu_malloc_set_hooks()

Samuel Thibault 6 years ago
parent
commit
2eedccc841

+ 2 - 0
ChangeLog

@@ -22,6 +22,8 @@ New features:
   * New schedulers modular-pheft, modular-prandom and modular-prandom-prio
   * Add STARPU_MATRIX_SET_NX/NY/LD and STARPU_VECTOR_SET_NX to change a matrix
     tile or vector size without reallocating the buffer.
+  * Application can change the allocation used by StarPU with
+    starpu_malloc_set_hooks()
 
 StarPU 1.3.0 (svn revision xxxx)
 ==============================================

+ 4 - 0
doc/doxygen/chapters/310_data_management.doxy

@@ -183,6 +183,10 @@ transfer to overlap with computations. Otherwise, the trace will show
 that the <c>DriverCopyAsync</c> state takes a lot of time, this is
 because CUDA or OpenCL then reverts to synchronous transfers.
 
+The application can provide its own allocation function by calling
+starpu_malloc_set_hooks(). StarPU will then use them for all data handle
+allocations in the main memory.
+
 By default, StarPU leaves replicates of data wherever they were used, in case they
 will be re-used by other tasks, thus saving the data transfer time. When some
 task modifies some data, all the other replicates are invalidated, and only the

+ 15 - 1
doc/doxygen/chapters/api/standard_memory_library.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2013,2015-2017                      CNRS
- * Copyright (C) 2009-2011,2014-2017                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2017,2019                 Université de Bordeaux
  * Copyright (C) 2011,2012                                Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -100,6 +100,20 @@ Free memory by specifying its size. The given
 flags should be consistent with the ones given to starpu_malloc_flags()
 when allocating the memory.
 
+\fn void starpu_malloc_set_hooks(starpu_malloc_hook malloc_hook, starpu_free_hook free_hook)
+\ingroup API_Standard_Memory_Library
+Sets allocation functions to be used by StarPU. By default, StarPU will use
+\c malloc() (or \c cudaHostAlloc() if CUDA GPUs are used) for all its data
+handle allocations. The application can specify another allocation primitive
+by calling this. The malloc_hook should pass the allocated pointer through
+the \c A parameter, and return 0 on success. On allocation failure, it should
+return -ENOMEM. The \c flags parameter contains STARPU_MALLOC_PINNED if the
+memory should be pinned by the hook for GPU transfer efficiency. The hook
+can use starpu_memory_pin() to achieve this.  The \c dst_node parameter
+is the starpu memory node, one can convert it to an hwloc logical id with
+starpu_memory_nodes_numa_id_to_hwloclogid() or to an OS NUMA number with
+starpu_memory_nodes_numa_devid_to_id().
+
 \fn int starpu_memory_pin(void *addr, size_t size)
 \ingroup API_Standard_Memory_Library
 Pin the given memory area, so that CPU-GPU transfers can be done

+ 5 - 0
include/starpu_stdlib.h

@@ -56,6 +56,11 @@ int starpu_free(void *A);
 int starpu_malloc_flags(void **A, size_t dim, int flags);
 int starpu_free_flags(void *A, size_t dim, int flags);
 
+typedef int (*starpu_malloc_hook)(unsigned dst_node, void **A, size_t dim, int flags);
+typedef int (*starpu_free_hook)(unsigned dst_node, void *A, size_t dim, int flags);
+
+void starpu_malloc_set_hooks(starpu_malloc_hook malloc_hook, starpu_free_hook free_hook);
+
 int starpu_memory_pin(void *addr, size_t size);
 int starpu_memory_unpin(void *addr, size_t size);
 

+ 22 - 5
src/datawizard/malloc.c

@@ -3,7 +3,7 @@
  * Copyright (C) 2011-2014,2016,2017                      Inria
  * Copyright (C) 2018                                     Federal University of Rio Grande do Sul (UFRGS)
  * Copyright (C) 2010-2017, 2019                          CNRS
- * Copyright (C) 2009-2018                                Université de Bordeaux
+ * Copyright (C) 2009-2019                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -68,6 +68,15 @@ static unsigned long _starpu_malloc_simulation_fold;
 #endif
 #endif
 
+static starpu_malloc_hook malloc_hook;
+static starpu_free_hook free_hook;
+
+void starpu_malloc_set_hooks(starpu_malloc_hook _malloc_hook, starpu_free_hook _free_hook)
+{
+	malloc_hook = _malloc_hook;
+	free_hook = _free_hook;
+}
+
 void starpu_malloc_set_align(size_t align)
 {
 	STARPU_ASSERT_MSG(!(align & (align - 1)), "Alignment given to starpu_malloc_set_align (%lu) must be a power of two", (unsigned long) align);
@@ -175,6 +184,12 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 			starpu_memory_allocate(dst_node, dim, flags | STARPU_MEMORY_OVERFLOW);
 	}
 
+	if (malloc_hook)
+	{
+		ret = malloc_hook(dst_node, A, dim, flags);
+		goto end;
+	}
+
 	if (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0)
 	{
 		if (_starpu_can_submit_cuda_task())
@@ -376,9 +391,7 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 				ret = -ENOMEM;
 		}
 
-#if (defined(STARPU_SIMGRID) && (SIMGRID_VERSION < 31500 || SIMGRID_VERSION == 31559)) || defined(STARPU_USE_CUDA)
 end:
-#endif
 	if (ret == 0)
 	{
 		STARPU_ASSERT_MSG(*A, "Failed to allocated memory of size %lu b\n", (unsigned long)dim);
@@ -440,6 +453,12 @@ int starpu_free_flags(void *A, size_t dim, int flags)
 
 int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags)
 {
+	if (free_hook)
+	{
+		free_hook(dst_node, A, dim, flags);
+		goto out;
+	}
+
 	if (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0)
 	{
 		if (_starpu_can_submit_cuda_task())
@@ -540,9 +559,7 @@ int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags
 	else
 		free(A);
 
-#if !defined(STARPU_SIMGRID) && defined(STARPU_USE_CUDA)
 out:
-#endif
 	if (flags & STARPU_MALLOC_COUNT)
 	{
 		starpu_memory_deallocate(dst_node, dim);