Browse Source

Periodically reclaim memory instead of waiting the last moment.

Samuel Thibault 11 years ago
parent
commit
d737e2d729

+ 2 - 0
ChangeLog

@@ -45,6 +45,8 @@ New features:
   * Out-of-core support, through registration of disk areas as additional memory
     nodes. It can be enabled programmatically or through the STARPU_DISK_SWAP*
     environment variables.
+  * Reclaiming is now periodically done before memory becomes full. This can
+    be controlled through the STARPU_*_AVAILABLE_MEM environment variables.
   * New hierarchical schedulers which allow the user to easily build
     its own scheduler, by coding itself each "box" it wants, or by
     combining existing boxes in StarPU to build it. Hierarchical

+ 18 - 0
doc/doxygen/chapters/40environment_variables.doxy

@@ -568,6 +568,24 @@ This variable specifies the maximum number of megabytes that should be
 available to the application on each CPU device.
 </dd>
 
+<dt>STARPU_MINIMUM_AVAILABLE_MEM</dt>
+<dd>
+\anchor STARPU_MINIMUM_AVAILABLE_MEM
+\addindex __env__STARPU_MINIMUM_AVAILABLE_MEM
+This specifies the minimum percentage of memory that should be available in GPUs
+(or in main memory, when using out of core), below which a reclaiming pass is
+performed. The default is 5%.
+</dd>
+
+<dt>STARPU_TARGET_AVAILABLE_MEM</dt>
+<dd>
+\anchor STARPU_TARGET_AVAILABLE_MEM
+\addindex __env__STARPU_TARGET_AVAILABLE_MEM
+This specifies the target percentage of memory that should be reached in
+GPUs (or in main memory, when using out of core), when performing a periodic
+reclaiming pass. The default is 10%.
+</dd>
+
 <dt>STARPU_DISK_SWAP</dt>
 <dd>
 \anchor STARPU_DISK_SWAP

+ 3 - 0
src/datawizard/datawizard.c

@@ -18,6 +18,7 @@
 #include <starpu.h>
 #include <common/config.h>
 #include <datawizard/datawizard.h>
+#include <datawizard/memalloc.h>
 #include <core/workers.h>
 #include <core/progress_hook.h>
 #ifdef STARPU_SIMGRID
@@ -33,6 +34,8 @@ int __starpu_datawizard_progress(unsigned memory_node, unsigned may_alloc, unsig
 #endif
 	STARPU_UYIELD();
 
+	_starpu_memchunk_tidy(memory_node);
+
 	/* in case some other driver requested data */
 	if (_starpu_handle_pending_node_data_requests(memory_node))
 		ret = 1;

+ 41 - 3
src/datawizard/memalloc.c

@@ -72,6 +72,7 @@ void _starpu_deinit_mem_chunk_lists(void)
 		}
 		STARPU_ASSERT(mc_cache_nb[i] == 0);
 		STARPU_ASSERT(mc_cache_size[i] == 0);
+		STARPU_HG_DISABLE_CHECKING(mc_cache_size[i]);
 		_starpu_spin_destroy(&mc_lock[i]);
 	}
 }
@@ -710,10 +711,10 @@ size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t recl
 	if (reclaim && !force)
 	{
 		static int warned;
-		char name[32];
-		_starpu_memory_node_get_name(node, name, sizeof(name));
 		if (!warned) {
-			_STARPU_DISP("Not enough memory left on node %s. Your application data set seems to huge to fit on the device, StarPU will cope by trying to purge %lu bytes out. This message will not be printed again for further purges\n", name, (unsigned long) reclaim);
+			char name[32];
+			_starpu_memory_node_get_name(node, name, sizeof(name));
+			_STARPU_DISP("Not enough memory left on node %s. Your application data set seems too huge to fit on the device, StarPU will cope by trying to purge %lu MiB out. This message will not be printed again for further purges\n", name, (unsigned long) (reclaim / 1048576));
 			warned = 1;
 		}
 	}
@@ -739,6 +740,43 @@ size_t _starpu_free_all_automatically_allocated_buffers(unsigned node)
 	return _starpu_memory_reclaim_generic(node, 1, 0);
 }
 
+/* Periodic tidy of available memory  */
+void _starpu_memchunk_tidy(unsigned node)
+{
+	starpu_ssize_t total = starpu_memory_get_total(node);
+	starpu_ssize_t available = starpu_memory_get_available(node);
+	size_t target, amount;
+	unsigned minimum_p = starpu_get_env_number_default("STARPU_MINIMUM_AVAILABLE_MEM", 5);
+	unsigned target_p = starpu_get_env_number_default("STARPU_TARGET_AVAILABLE_MEM", 10);
+
+	if (total <= 0)
+		return;
+
+	/* TODO: only request writebacks to get buffers clean, without waiting
+	 * for it */
+
+	/* Count cached allocation as being available */
+	available += mc_cache_size[node];
+
+	if (available >= (total * minimum_p) / 100)
+		/* Enough available space, do not trigger reclaiming */
+		return;
+
+	/* Not enough available space, reclaim until we reach the target.  */
+	target = (total * target_p) / 100;
+	amount = target - available;
+
+	static int warned;
+	if (!warned) {
+		char name[32];
+		_starpu_memory_node_get_name(node, name, sizeof(name));
+		_STARPU_DISP("Low memory left on node %s (%luMiB over %luMiB). Your application data set seems too huge to fit on the device, StarPU will cope by trying to purge %lu MiB out. This message will not be printed again for further purges. The thresholds can be tuned using the STARPU_MINIMUM_AVAILABLE_MEM and STARPU_TARGET_AVAILABLE_MEM environment variables.\n", name, (unsigned long) (available / 1048576), (unsigned long) (total / 1048576), (unsigned long) (amount / 1048576));
+		warned = 1;
+	}
+
+	free_potentially_in_use_mc(node, 0, amount);
+}
+
 static struct _starpu_mem_chunk *_starpu_memchunk_init(struct _starpu_data_replicate *replicate, size_t interface_size, unsigned automatically_allocated)
 {
 	struct _starpu_mem_chunk *mc = _starpu_mem_chunk_new();

+ 1 - 0
src/datawizard/memalloc.h

@@ -65,6 +65,7 @@ void _starpu_deinit_mem_chunk_lists(void);
 void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size);
 int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned is_prefetch);
 size_t _starpu_free_all_automatically_allocated_buffers(unsigned node);
+void _starpu_memchunk_tidy(unsigned node);
 void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node);
 
 void _starpu_display_memory_stats_by_node(int node);