Selaa lähdekoodia

Make reclaiming prevent more fetches on the node being cleaned up, to avoid permanent battle between them

Samuel Thibault 10 vuotta sitten
vanhempi
commit
d06eb3bbd4

+ 2 - 1
src/datawizard/copy_driver.c

@@ -488,7 +488,8 @@ int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT _starpu_driver_copy_data_1_to_1(starpu_d
 	/* first make sure the destination has an allocated buffer */
 	if (!dst_replicate->allocated)
 	{
-		if (!may_alloc)
+		if (!may_alloc || _starpu_is_reclaiming(dst_node))
+			/* We're not supposed to allocate there at the moment */
 			return -ENOMEM;
 
 		ret_alloc = _starpu_allocate_memory_on_node(handle, dst_replicate, req ? req->prefetch : 0);

+ 3 - 1
src/datawizard/data_request.c

@@ -524,7 +524,9 @@ static int __starpu_handle_node_data_requests(struct _starpu_data_request_list *
 			ret = res;
 			/* Prefetch requests might have gotten promoted while in tmp list */
 			_starpu_data_request_list_push_back(&new_data_requests[r->prefetch], r);
-			break;
+			if (prefetch)
+				/* Prefetching more there would make the situation even worse */
+				break;
 		}
 
 		(*pushed)++;

+ 18 - 0
src/datawizard/memalloc.c

@@ -102,6 +102,13 @@ static starpu_ssize_t mc_cache_size[STARPU_MAXNODES];
 
 /* Whether some thread is currently tidying this node */
 static unsigned tidying[STARPU_MAXNODES];
+/* Whether some thread is currently reclaiming memory for this node */
+static unsigned reclaiming[STARPU_MAXNODES];
+
+int _starpu_is_reclaiming(unsigned node)
+{
+	return tidying[node] || reclaiming[node];
+}
 
 /* When reclaiming memory to allocate, we reclaim MAX(what_is_to_reclaim_on_device, data_size_coefficient*data_size) */
 const unsigned starpu_memstrategy_data_size_coefficient=2;
@@ -1192,6 +1199,7 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 	starpu_ssize_t allocated_memory;
 	int ret;
 	starpu_ssize_t data_size = _starpu_data_get_size(handle);
+	int told_reclaiming = 0;
 
 	_starpu_spin_checklocked(&handle->header_lock);
 
@@ -1246,6 +1254,12 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 
 		if (allocated_memory == -ENOMEM)
 		{
+			if (!told_reclaiming)
+			{
+				/* Prevent prefetches and such from happening */
+				(void) STARPU_ATOMIC_ADD(&reclaiming[dst_node], 1);
+				told_reclaiming = 1;
+			}
 			size_t reclaim = 0.25*_starpu_memory_manager_get_global_memory_size(dst_node);
 			size_t handle_size = handle->ops->get_size(handle);
 			if (starpu_memstrategy_data_size_coefficient*handle_size > reclaim)
@@ -1279,6 +1293,10 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 	ret = _starpu_data_check_not_busy(handle);
 	STARPU_ASSERT(ret == 0);
 
+	if (told_reclaiming)
+		/* We've finished with reclaiming memory, let prefetches start again */
+		(void) STARPU_ATOMIC_ADD(&reclaiming[dst_node], -1);
+
 	if (allocated_memory == -ENOMEM)
 	{
 		if (replicate->allocated)

+ 1 - 0
src/datawizard/memalloc.h

@@ -86,5 +86,6 @@ void _starpu_memchunk_dirty(struct _starpu_mem_chunk *mc, unsigned node);
 
 void _starpu_display_memory_stats_by_node(int node);
 size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t reclaim);
+int _starpu_is_reclaiming(unsigned node);
 
 #endif