Browse Source

Application can change the allocation used by StarPU with starpu_malloc_set_hooks()

Samuel Thibault 6 years ago
parent
commit
2eedccc841

+ 2 - 0
ChangeLog

@@ -22,6 +22,8 @@ New features:
   * New schedulers modular-pheft, modular-prandom and modular-prandom-prio
   * New schedulers modular-pheft, modular-prandom and modular-prandom-prio
   * Add STARPU_MATRIX_SET_NX/NY/LD and STARPU_VECTOR_SET_NX to change a matrix
   * Add STARPU_MATRIX_SET_NX/NY/LD and STARPU_VECTOR_SET_NX to change a matrix
     tile or vector size without reallocating the buffer.
     tile or vector size without reallocating the buffer.
+  * Application can change the allocation used by StarPU with
+    starpu_malloc_set_hooks()
 
 
 StarPU 1.3.0 (svn revision xxxx)
 StarPU 1.3.0 (svn revision xxxx)
 ==============================================
 ==============================================

+ 4 - 0
doc/doxygen/chapters/310_data_management.doxy

@@ -183,6 +183,10 @@ transfer to overlap with computations. Otherwise, the trace will show
 that the <c>DriverCopyAsync</c> state takes a lot of time, this is
 that the <c>DriverCopyAsync</c> state takes a lot of time, this is
 because CUDA or OpenCL then reverts to synchronous transfers.
 because CUDA or OpenCL then reverts to synchronous transfers.
 
 
+The application can provide its own allocation function by calling
+starpu_malloc_set_hooks(). StarPU will then use them for all data handle
+allocations in the main memory.
+
 By default, StarPU leaves replicates of data wherever they were used, in case they
 By default, StarPU leaves replicates of data wherever they were used, in case they
 will be re-used by other tasks, thus saving the data transfer time. When some
 will be re-used by other tasks, thus saving the data transfer time. When some
 task modifies some data, all the other replicates are invalidated, and only the
 task modifies some data, all the other replicates are invalidated, and only the

+ 15 - 1
doc/doxygen/chapters/api/standard_memory_library.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2013,2015-2017                      CNRS
  * Copyright (C) 2010-2013,2015-2017                      CNRS
- * Copyright (C) 2009-2011,2014-2017                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2017,2019                 Université de Bordeaux
  * Copyright (C) 2011,2012                                Inria
  * Copyright (C) 2011,2012                                Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -100,6 +100,20 @@ Free memory by specifying its size. The given
 flags should be consistent with the ones given to starpu_malloc_flags()
 flags should be consistent with the ones given to starpu_malloc_flags()
 when allocating the memory.
 when allocating the memory.
 
 
+\fn void starpu_malloc_set_hooks(starpu_malloc_hook malloc_hook, starpu_free_hook free_hook)
+\ingroup API_Standard_Memory_Library
+Sets allocation functions to be used by StarPU. By default, StarPU will use
+\c malloc() (or \c cudaHostAlloc() if CUDA GPUs are used) for all its data
+handle allocations. The application can specify another allocation primitive
+by calling this. The malloc_hook should pass the allocated pointer through
+the \c A parameter, and return 0 on success. On allocation failure, it should
+return -ENOMEM. The \c flags parameter contains STARPU_MALLOC_PINNED if the
+memory should be pinned by the hook for GPU transfer efficiency. The hook
+can use starpu_memory_pin() to achieve this.  The \c dst_node parameter
+is the starpu memory node, one can convert it to an hwloc logical id with
+starpu_memory_nodes_numa_id_to_hwloclogid() or to an OS NUMA number with
+starpu_memory_nodes_numa_devid_to_id().
+
 \fn int starpu_memory_pin(void *addr, size_t size)
 \fn int starpu_memory_pin(void *addr, size_t size)
 \ingroup API_Standard_Memory_Library
 \ingroup API_Standard_Memory_Library
 Pin the given memory area, so that CPU-GPU transfers can be done
 Pin the given memory area, so that CPU-GPU transfers can be done

+ 5 - 0
include/starpu_stdlib.h

@@ -56,6 +56,11 @@ int starpu_free(void *A);
 int starpu_malloc_flags(void **A, size_t dim, int flags);
 int starpu_malloc_flags(void **A, size_t dim, int flags);
 int starpu_free_flags(void *A, size_t dim, int flags);
 int starpu_free_flags(void *A, size_t dim, int flags);
 
 
+typedef int (*starpu_malloc_hook)(unsigned dst_node, void **A, size_t dim, int flags);
+typedef int (*starpu_free_hook)(unsigned dst_node, void *A, size_t dim, int flags);
+
+void starpu_malloc_set_hooks(starpu_malloc_hook malloc_hook, starpu_free_hook free_hook);
+
 int starpu_memory_pin(void *addr, size_t size);
 int starpu_memory_pin(void *addr, size_t size);
 int starpu_memory_unpin(void *addr, size_t size);
 int starpu_memory_unpin(void *addr, size_t size);
 
 

+ 22 - 5
src/datawizard/malloc.c

@@ -3,7 +3,7 @@
  * Copyright (C) 2011-2014,2016,2017                      Inria
  * Copyright (C) 2011-2014,2016,2017                      Inria
  * Copyright (C) 2018                                     Federal University of Rio Grande do Sul (UFRGS)
  * Copyright (C) 2018                                     Federal University of Rio Grande do Sul (UFRGS)
  * Copyright (C) 2010-2017, 2019                          CNRS
  * Copyright (C) 2010-2017, 2019                          CNRS
- * Copyright (C) 2009-2018                                Université de Bordeaux
+ * Copyright (C) 2009-2019                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -68,6 +68,15 @@ static unsigned long _starpu_malloc_simulation_fold;
 #endif
 #endif
 #endif
 #endif
 
 
+static starpu_malloc_hook malloc_hook;
+static starpu_free_hook free_hook;
+
+void starpu_malloc_set_hooks(starpu_malloc_hook _malloc_hook, starpu_free_hook _free_hook)
+{
+	malloc_hook = _malloc_hook;
+	free_hook = _free_hook;
+}
+
 void starpu_malloc_set_align(size_t align)
 void starpu_malloc_set_align(size_t align)
 {
 {
 	STARPU_ASSERT_MSG(!(align & (align - 1)), "Alignment given to starpu_malloc_set_align (%lu) must be a power of two", (unsigned long) align);
 	STARPU_ASSERT_MSG(!(align & (align - 1)), "Alignment given to starpu_malloc_set_align (%lu) must be a power of two", (unsigned long) align);
@@ -175,6 +184,12 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 			starpu_memory_allocate(dst_node, dim, flags | STARPU_MEMORY_OVERFLOW);
 			starpu_memory_allocate(dst_node, dim, flags | STARPU_MEMORY_OVERFLOW);
 	}
 	}
 
 
+	if (malloc_hook)
+	{
+		ret = malloc_hook(dst_node, A, dim, flags);
+		goto end;
+	}
+
 	if (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0)
 	if (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0)
 	{
 	{
 		if (_starpu_can_submit_cuda_task())
 		if (_starpu_can_submit_cuda_task())
@@ -376,9 +391,7 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 				ret = -ENOMEM;
 				ret = -ENOMEM;
 		}
 		}
 
 
-#if (defined(STARPU_SIMGRID) && (SIMGRID_VERSION < 31500 || SIMGRID_VERSION == 31559)) || defined(STARPU_USE_CUDA)
 end:
 end:
-#endif
 	if (ret == 0)
 	if (ret == 0)
 	{
 	{
 		STARPU_ASSERT_MSG(*A, "Failed to allocated memory of size %lu b\n", (unsigned long)dim);
 		STARPU_ASSERT_MSG(*A, "Failed to allocated memory of size %lu b\n", (unsigned long)dim);
@@ -440,6 +453,12 @@ int starpu_free_flags(void *A, size_t dim, int flags)
 
 
 int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags)
 int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags)
 {
 {
+	if (free_hook)
+	{
+		free_hook(dst_node, A, dim, flags);
+		goto out;
+	}
+
 	if (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0)
 	if (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0)
 	{
 	{
 		if (_starpu_can_submit_cuda_task())
 		if (_starpu_can_submit_cuda_task())
@@ -540,9 +559,7 @@ int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags
 	else
 	else
 		free(A);
 		free(A);
 
 
-#if !defined(STARPU_SIMGRID) && defined(STARPU_USE_CUDA)
 out:
 out:
-#endif
 	if (flags & STARPU_MALLOC_COUNT)
 	if (flags & STARPU_MALLOC_COUNT)
 	{
 	{
 		starpu_memory_deallocate(dst_node, dim);
 		starpu_memory_deallocate(dst_node, dim);