Browse Source

Add STARPU_MALLOC_NORECLAIM flag to allocate without running a reclaim if the node is out of memory

Samuel Thibault 10 years ago
parent
commit
a60fedf197

+ 10 - 0
doc/doxygen/chapters/api/standard_memory_library.doxy

@@ -34,6 +34,16 @@ If no memory is available, it tries to reclaim memory from StarPU.
 Memory allocated this way needs to be freed by calling the function
 starpu_free_flags() with the same flag.
 
+\def STARPU_MALLOC_NORECLAIM
+\ingroup API_Standard_Memory_Library
+Value passed to the function starpu_malloc_flags() along STARPU_MALLOC_COUNT
+to indicate that while the memory allocation should be kept in the limits
+defined for STARPU_MALLOC_COUNT, no reclaiming should be performed by
+starpu_malloc_flags itself, thus potentially overflowing the memory node a
+bit. StarPU will reclaim memory after next task termination, according to
+the STARPU_MINIMUM_AVAILABLE_MEM and STARPU_TARGET_AVAILABLE_MEM environment
+variables.
+
 \fn int starpu_malloc_flags(void **A, size_t dim, int flags)
 \ingroup API_Standard_Memory_Library
 Performs a memory allocation based on the constraints defined

+ 1 - 0
include/starpu_stdlib.h

@@ -27,6 +27,7 @@ extern "C"
 
 #define STARPU_MALLOC_PINNED	((1ULL)<<1)
 #define STARPU_MALLOC_COUNT	((1ULL)<<3)
+#define STARPU_MALLOC_NORECLAIM	((1ULL)<<4)
 
 void starpu_malloc_set_align(size_t align);
 

+ 27 - 13
src/datawizard/malloc.c

@@ -92,21 +92,24 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 
 	if (flags & STARPU_MALLOC_COUNT)
 	{
-		while (_starpu_memory_manager_can_allocate_size(dim, STARPU_MAIN_RAM) == 0)
-		{
-			size_t freed;
-			size_t reclaim = 2 * dim;
-			_STARPU_DEBUG("There is not enough memory left, we are going to reclaim %ld\n", reclaim);
-			_STARPU_TRACE_START_MEMRECLAIM(0,0);
-			freed = _starpu_memory_reclaim_generic(0, 0, reclaim);
-			_STARPU_TRACE_END_MEMRECLAIM(0,0);
-			if (freed < dim)
+		if (!(flags & STARPU_MALLOC_NORECLAIM))
+			while (_starpu_memory_manager_can_allocate_size(dim, STARPU_MAIN_RAM) == 0)
 			{
-				// We could not reclaim enough memory
-				*A = NULL;
-				return -ENOMEM;
+				size_t freed;
+				size_t reclaim = 2 * dim;
+				_STARPU_DEBUG("There is not enough memory left, we are going to reclaim %ld\n", reclaim);
+				_STARPU_TRACE_START_MEMRECLAIM(0,0);
+				freed = _starpu_memory_reclaim_generic(0, 0, reclaim);
+				_STARPU_TRACE_END_MEMRECLAIM(0,0);
+				if (freed < dim)
+				{
+					// We could not reclaim enough memory
+					*A = NULL;
+					return -ENOMEM;
+				}
 			}
-		}
+		else
+			_starpu_memory_manager_allocate_size(dim, STARPU_MAIN_RAM);
 	}
 
 	if (flags & STARPU_MALLOC_PINNED && starpu_get_env_number("STARPU_DISABLE_PINNING") <= 0 && RUNNING_ON_VALGRIND == 0)
@@ -125,7 +128,10 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 			cudaError_t cures;
 			cures = cudaHostAlloc(A, dim, cudaHostAllocPortable);
 			if (STARPU_UNLIKELY(cures))
+			{
 				STARPU_CUDA_REPORT_ERROR(cures);
+				ret = -ENOMEM;
+			}
 			goto end;
 #else
 			int push_res;
@@ -207,11 +213,15 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 		if (_malloc_align != sizeof(void*))
 		{
 			*A = memalign(_malloc_align, dim);
+			if (!*A)
+				ret = -ENOMEM;
 		}
 		else
 #endif /* STARPU_HAVE_POSIX_MEMALIGN */
 		{
 			*A = malloc(dim);
+			if (!*A)
+				ret = -ENOMEM;
 		}
 
 end:
@@ -219,6 +229,10 @@ end:
 	{
 		STARPU_ASSERT_MSG(*A, "Failed to allocated memory of size %ld b\n", dim);
 	}
+	else if (flags & STARPU_MALLOC_COUNT)
+	{
+		_starpu_memory_manager_deallocate_size(dim, 0);
+	}
 
 	return ret;
 }

+ 7 - 0
src/datawizard/memory_manager.c

@@ -72,6 +72,13 @@ int _starpu_memory_manager_can_allocate_size(size_t size, unsigned node)
 	return ret;
 }
 
+void _starpu_memory_manager_allocate_size(size_t size, unsigned node)
+{
+	STARPU_PTHREAD_MUTEX_LOCK(&lock_nodes[node]);
+	used_size[node] += size;
+	STARPU_PTHREAD_MUTEX_UNLOCK(&lock_nodes[node]);
+}
+
 void _starpu_memory_manager_deallocate_size(size_t size, unsigned node)
 {
 	STARPU_PTHREAD_MUTEX_LOCK(&lock_nodes[node]);

+ 10 - 2
src/datawizard/memory_manager.h

@@ -42,15 +42,23 @@ void _starpu_memory_manager_set_global_memory_size(unsigned node, size_t size);
 size_t _starpu_memory_manager_get_global_memory_size(unsigned node);
 
 /**
- * Indicates if memory can be allocated on the given node
+ * Try to allocate memory on the given node
  *
  * @param size amount of memory to allocate
  * @param node node where the memory is to be allocated
- * @return 1 if the given amount of memory can be allocated on the given node
+ * @return 1 if the given amount of memory was allocated on the given node
  */
 int _starpu_memory_manager_can_allocate_size(size_t size, unsigned node) STARPU_WARN_UNUSED_RESULT;
 
 /**
+ * Allocate memory on the given node, without caring about overflowing
+ *
+ * @param size amount of memory to allocate
+ * @param node node where the memory is to be allocated
+ */
+void _starpu_memory_manager_allocate_size(size_t size, unsigned node);
+
+/**
  * Indicates the given amount of memory is going to be deallocated from the given node
  *
  * @param size amount of memory to be deallocated

+ 1 - 0
tests/Makefile.am

@@ -182,6 +182,7 @@ noinst_PROGRAMS =				\
 	datawizard/handle_to_pointer		\
 	datawizard/lazy_allocation		\
 	datawizard/lazy_unregister		\
+	datawizard/noreclaim			\
 	datawizard/interfaces/copy_interfaces	\
 	datawizard/interfaces/block/block_interface \
 	datawizard/interfaces/bcsr/bcsr_interface \

+ 129 - 0
tests/datawizard/noreclaim.c

@@ -0,0 +1,129 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2014  Université de Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This test stress the memory allocation system and should force StarPU to
+ * reclaim memory from time to time.
+ */
+
+#include <assert.h>
+#include <starpu.h>
+#include "../helper.h"
+
+void dummy_func(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
+{
+}
+
+static struct starpu_codelet dummy_cl =
+{
+	.cpu_funcs = {dummy_func},
+	.nbuffers = 1,
+	.modes = {STARPU_W}
+};
+
+static void emit_task(starpu_data_handle_t handle)
+{
+	struct starpu_task *task = starpu_task_create();
+	int ret;
+	task->cl = &dummy_cl;
+	task->handles[0] = handle;
+	ret = starpu_task_submit(task);
+	STARPU_ASSERT(ret == 0);
+}
+
+static struct starpu_codelet empty_cl =
+{
+	.cpu_funcs = {dummy_func},
+	.nbuffers = 0,
+};
+
+static void emit_empty_task(void)
+{
+	struct starpu_task *task = starpu_task_create();
+	int ret;
+	task->cl = &empty_cl;
+	ret = starpu_task_submit(task);
+	STARPU_ASSERT(ret == 0);
+}
+
+#define TOTAL "100"
+#define FILL (99*1024*1024)
+
+int main(int argc, char **argv)
+{
+	int i, ret;
+	struct starpu_conf conf;
+	starpu_data_handle_t handle;
+	void *allocated;
+
+	setenv("STARPU_LIMIT_CPU_MEM", TOTAL, 1);
+
+	starpu_conf_init(&conf);
+	conf.ncpus = 1;
+	conf.ncuda = 0;
+	conf.nopencl = 0;
+	conf.nmic = 0;
+	conf.nscc = 0;
+
+        ret = starpu_initialize(&conf, &argc, &argv);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	starpu_variable_data_register(&handle, -1, 0, FILL);
+
+	/* This makes the data allocated */
+	emit_task(handle);
+	ret = starpu_task_wait_for_all();
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
+
+	ret = starpu_malloc_flags(&allocated, FILL, STARPU_MALLOC_COUNT);
+	/* Room should be busy due to data */
+	STARPU_ASSERT(ret == -ENOMEM);
+
+	ret = starpu_malloc_flags(&allocated, FILL, STARPU_MALLOC_COUNT|STARPU_MALLOC_NORECLAIM);
+	/* But we should be able to tell we don't care */
+	STARPU_ASSERT(ret == 0);
+	((char*)allocated)[FILL-1] = 0;
+	starpu_free_flags(allocated, FILL, STARPU_MALLOC_COUNT);
+
+	/* Release the automatically allocated data */
+	starpu_data_unregister(handle);
+
+	/* Memory may not be available immediately, make sure the driver has
+	 * the opportunity to release it */
+	emit_empty_task();
+	ret = starpu_task_wait_for_all();
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
+	emit_empty_task();
+	ret = starpu_task_wait_for_all();
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
+
+	ret = starpu_malloc_flags(&allocated, FILL, STARPU_MALLOC_COUNT);
+	/* Room should now be available */
+	STARPU_ASSERT(ret == 0);
+	starpu_free_flags(allocated, FILL, STARPU_MALLOC_COUNT);
+
+	starpu_shutdown();
+
+	return EXIT_SUCCESS;
+
+enodev:
+	fprintf(stderr, "WARNING: No one can execute this task\n");
+	/* yes, we do not perform the computation but we did detect that no one
+ 	 * could perform the kernel, so this is not an error from StarPU */
+	starpu_shutdown();
+	return STARPU_TEST_SKIPPED;
+}