Переглянути джерело

Improve data reuse: before calling generic reclaiming, try to reuse potentially-in-use mc of the same interface. Avoids having generic reclaiming free data just to be able to reallocate exactly the same kind of data

Samuel Thibault 8 роки тому
батько
коміт
33399e8f5a
1 змінених файлів з 182 додано та 146 видалено
  1. 182 146
      src/datawizard/memalloc.c

+ 182 - 146
src/datawizard/memalloc.c

@@ -428,10 +428,56 @@ static size_t do_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 	return size;
 }
 
+/* We assume that mc_lock[node] is taken. is_already_in_mc_list indicates
+ * that the mc is already in the list of buffers that are possibly used, and
+ * therefore not in the cache. */
+static void reuse_mem_chunk(unsigned node, struct _starpu_data_replicate *new_replicate, struct _starpu_mem_chunk *mc, unsigned is_already_in_mc_list)
+{
+	void *data_interface;
+
+	/* we found an appropriate mem chunk: so we get it out
+	 * of the "to free" list, and reassign it to the new
+	 * piece of data */
+
+	struct _starpu_data_replicate *old_replicate = mc->replicate;
+	if (old_replicate)
+	{
+		old_replicate->allocated = 0;
+		old_replicate->automatically_allocated = 0;
+		old_replicate->initialized = 0;
+		data_interface = old_replicate->data_interface;
+	}
+	else
+		data_interface = mc->chunk_interface;
+
+	STARPU_ASSERT(new_replicate->data_interface);
+	STARPU_ASSERT(data_interface);
+	memcpy(new_replicate->data_interface, data_interface, mc->size_interface);
+
+	if (!old_replicate)
+	{
+		/* Free the copy that we made */
+		free(mc->chunk_interface);
+		mc->chunk_interface = NULL;
+	}
+
+	/* XXX: We do not actually reuse the mc at the moment, only the interface */
+
+	/* mc->data = new_replicate->handle; */
+	/* mc->footprint, mc->ops, mc->size_interface, mc->automatically_allocated should be
+ 	 * unchanged ! */
+
+	/* remove the mem chunk from the list of active memory chunks, register_mem_chunk will put it back later */
+	if (is_already_in_mc_list)
+		MC_LIST_ERASE(node, mc);
+
+	free(mc);
+}
+
 /* This function is called for memory chunks that are possibly in used (ie. not
  * in the cache). They should therefore still be associated to a handle. */
 /* mc_lock is held and may be temporarily released! */
-static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
+static size_t try_to_throw_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node, struct _starpu_data_replicate *replicate, unsigned is_already_in_mc_list)
 {
 	size_t freed = 0;
 
@@ -496,6 +542,8 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 			/* choose the best target */
 			target = choose_target(handle, node);
 
+			/* TODO: only transfer if !reuse */
+
 			if (target != -1)
 			{
 				int res;
@@ -544,8 +592,18 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 						 */
 
 						if (handle->per_node[node].refcnt == 0)
-							/* And still nobody on it, now the actual buffer may be freed */
-							freed = do_free_mem_chunk(mc, node);
+						{
+							/* And still nobody on it, now the actual buffer may be reused or freed */
+							if (replicate)
+							{
+								/* Reuse for this replicate */
+								reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list);
+								freed = 1;
+							}
+							else
+								/* Free */
+								freed = do_free_mem_chunk(mc, node);
+						}
 					}
 				}
 			}
@@ -558,122 +616,6 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 }
 
 #ifdef STARPU_USE_ALLOCATION_CACHE
-/* We assume that mc_lock[node] is taken. is_already_in_mc_list indicates
- * that the mc is already in the list of buffers that are possibly used, and
- * therefore not in the cache. */
-static void reuse_mem_chunk(unsigned node, struct _starpu_data_replicate *new_replicate, struct _starpu_mem_chunk *mc, unsigned is_already_in_mc_list)
-{
-	void *data_interface;
-
-	/* we found an appropriate mem chunk: so we get it out
-	 * of the "to free" list, and reassign it to the new
-	 * piece of data */
-
-	struct _starpu_data_replicate *old_replicate = mc->replicate;
-	if (old_replicate)
-	{
-		old_replicate->allocated = 0;
-		old_replicate->automatically_allocated = 0;
-		old_replicate->initialized = 0;
-		data_interface = old_replicate->data_interface;
-	}
-	else
-		data_interface = mc->chunk_interface;
-
-	STARPU_ASSERT(new_replicate->data_interface);
-	STARPU_ASSERT(data_interface);
-	memcpy(new_replicate->data_interface, data_interface, mc->size_interface);
-
-	if (!old_replicate)
-	{
-		/* Free the copy that we made */
-		free(mc->chunk_interface);
-		mc->chunk_interface = NULL;
-	}
-
-	/* XXX: We do not actually reuse the mc at the moment, only the interface */
-
-	/* mc->data = new_replicate->handle; */
-	/* mc->footprint, mc->ops, mc->size_interface, mc->automatically_allocated should be
- 	 * unchanged ! */
-
-	/* remove the mem chunk from the list of active memory chunks, register_mem_chunk will put it back later */
-	if (is_already_in_mc_list)
-		MC_LIST_ERASE(node, mc);
-
-	free(mc);
-}
-
-/* mc_lock is held and may be temporarily released! */
-static unsigned try_to_reuse_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node, struct _starpu_data_replicate *replicate, unsigned is_already_in_mc_list)
-{
-	unsigned success = 0;
-
-	starpu_data_handle_t old_data;
-
-	old_data = mc->data;
-
-	STARPU_ASSERT(old_data);
-
-	/* try to lock all the subtree */
-	/* and check if they are all "free" */
-	if (lock_all_subtree(old_data))
-	{
-		if (may_free_subtree(old_data, node))
-		{
-			int res;
-			/* Should have been avoided in our caller */
-			STARPU_ASSERT(!mc->remove_notify);
-			mc->remove_notify = &mc;
-			/* in case there was nobody using that buffer, throw it
-			 * away after writing it back to main memory */
-			_starpu_spin_unlock(&mc_lock[node]);
-			_STARPU_TRACE_START_WRITEBACK(node);
-			res = transfer_subtree_to_node(old_data, node, STARPU_MAIN_RAM);
-			_STARPU_TRACE_END_WRITEBACK(node);
-			_starpu_spin_lock(&mc_lock[node]);
-
-			if (!mc)
-			{
-				if (res == -1)
-				{
-					/* handle disappeared, abort without unlocking it */
-					return 0;
-				}
-			}
-			else
-			{
-				STARPU_ASSERT(mc->remove_notify == &mc);
-				mc->remove_notify = NULL;
-
-				if (res == -1)
-				{
-					/* handle disappeared, abort without unlocking it */
-					return 0;
-				}
-
-				if (res == 1)
-				{
-					/* mc is still associated with the old
-					 * handle, now replace the previous data
-					 */
-					if (old_data->per_node[node].refcnt == 0)
-					{
-						/* And still nobody on it, now the actual buffer may be reused */
-						reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list);
-						success = 1;
-					}
-				}
-			}
-		}
-
-		/* unlock the tree */
-		unlock_all_subtree(old_data);
-	}
-
-	return success;
-}
-
 static int _starpu_data_interface_compare(void *data_interface_a, struct starpu_data_interface_ops *ops_a,
                                           void *data_interface_b, struct starpu_data_interface_ops *ops_b)
 {
@@ -722,9 +664,9 @@ static struct _starpu_mem_chunk *_starpu_memchunk_cache_lookup_locked(unsigned n
 
 /* this function looks for a memory chunk that matches a given footprint in the
  * list of mem chunk that need to be freed. */
-static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint)
+static int try_to_find_reusable_mc(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint)
 {
-	struct _starpu_mem_chunk *mc, *orig_next_mc, *next_mc;
+	struct _starpu_mem_chunk *mc;
 	int success = 0;
 
 	_starpu_spin_lock(&mc_lock[node]);
@@ -734,16 +676,21 @@ static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle
 	{
 		/* We found an entry in the cache so we can reuse it */
 		reuse_mem_chunk(node, replicate, mc, 0);
-		_starpu_spin_unlock(&mc_lock[node]);
-		return 1;
+		success = 1;
 	}
+	_starpu_spin_unlock(&mc_lock[node]);
+	return success;
+}
+#endif
 
-	if (!_starpu_has_not_important_data)
-	{
-		_starpu_spin_unlock(&mc_lock[node]);
-		return 0;
-	}
+/* this function looks for a memory chunk that matches a given footprint in the
+ * list of mem chunk that are not important */
+static int try_to_reuse_not_important_mc(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint)
+{
+	struct _starpu_mem_chunk *mc, *orig_next_mc, *next_mc;
+	int success = 0;
 
+	_starpu_spin_lock(&mc_lock[node]);
 restart:
 	/* now look for some non essential data in the active list */
 	for (mc = _starpu_mem_chunk_list_begin(&mc_list[node]);
@@ -757,6 +704,12 @@ restart:
 		if (mc->remove_notify)
 			/* Somebody already working here, skip */
 			continue;
+		if (!mc->data->is_not_important)
+			/* Important data, skip */
+			continue;
+		if (mc->footprint != footprint || _starpu_data_interface_compare(data->per_node[node].data_interface, data->ops, mc->data->per_node[node].data_interface, mc->ops) != 1 )
+			/* Not the right type of interface, skip */
+			continue;
 		if (next_mc)
 		{
 			if (next_mc->remove_notify)
@@ -765,12 +718,73 @@ restart:
 			next_mc->remove_notify = &next_mc;
 		}
 
-		if (mc->data->is_not_important && (mc->footprint == footprint))
+		/* Note: this may unlock mc_list! */
+		success = try_to_throw_mem_chunk(mc, node, replicate, 1);
+
+		if (orig_next_mc)
 		{
-			/* Note: this may unlock mc_list! */
-			success = try_to_reuse_mem_chunk(mc, node, replicate, 1);
+			if (!next_mc)
+				/* Oops, somebody dropped the next item while we were
+				 * not keeping the mc_lock. Restart from the beginning
+				 * of the list */
+				goto restart;
+			else
+			{
+				STARPU_ASSERT(next_mc->remove_notify == &next_mc);
+				next_mc->remove_notify = NULL;
+			}
+		}
+	}
+	_starpu_spin_unlock(&mc_lock[node]);
+
+	return success;
+}
+
+/*
+ * Try to find a buffer currently in use on the memory node which has the given
+ * footprint.
+ */
+static int try_to_reuse_potentially_in_use_mc(unsigned node, starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, uint32_t footprint)
+{
+	struct _starpu_mem_chunk *mc, *next_mc, *orig_next_mc;
+	int success = 0;
+
+	/*
+	 * We have to unlock mc_lock before locking header_lock, so we have
+	 * to be careful with the list.  We try to do just one pass, by
+	 * remembering the next mc to be tried. If it gets dropped, we restart
+	 * from zero. So we continue until we go through the whole list without
+	 * finding anything to free.
+	 */
+
+	_starpu_spin_lock(&mc_lock[node]);
+
+restart:
+	for (mc = _starpu_mem_chunk_list_begin(&mc_list[node]);
+	     mc != _starpu_mem_chunk_list_end(&mc_list[node]) && !success;
+	     mc = next_mc)
+	{
+		/* mc hopefully gets out of the list, we thus need to prefetch
+		 * the next element */
+		orig_next_mc = next_mc = _starpu_mem_chunk_list_next(mc);
+
+		if (mc->remove_notify)
+			/* Somebody already working here, skip */
+			continue;
+		if (mc->footprint != footprint || _starpu_data_interface_compare(handle->per_node[node].data_interface, handle->ops, mc->data->per_node[node].data_interface, mc->ops) != 1)
+			/* Not the right type of interface, skip */
+			continue;
+		if (next_mc)
+		{
+			if (next_mc->remove_notify)
+				/* Somebody already working here, skip */
+				continue;
+			next_mc->remove_notify = &next_mc;
 		}
 
+		/* Note: this may unlock mc_list! */
+		success = try_to_throw_mem_chunk(mc, node, replicate, 1);
+
 		if (orig_next_mc)
 		{
 			if (!next_mc)
@@ -789,7 +803,6 @@ restart:
 
 	return success;
 }
-#endif
 
 /*
  * Free the memory chuncks that are explicitely tagged to be freed.
@@ -875,7 +888,7 @@ restart2:
 				next_mc->remove_notify = &next_mc;
 			}
 			/* Note: this may unlock mc_list! */
-			freed += try_to_free_mem_chunk(mc, node);
+			freed += try_to_throw_mem_chunk(mc, node, NULL, 0);
 
 			if (orig_next_mc)
 			{
@@ -1284,6 +1297,7 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 	int ret;
 	starpu_ssize_t data_size = _starpu_data_get_size(handle);
 	int told_reclaiming = 0;
+	int reused = 0;
 
 	_starpu_spin_checklocked(&handle->header_lock);
 
@@ -1294,7 +1308,7 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 	uint32_t footprint = _starpu_compute_data_footprint(handle);
 
 	_STARPU_TRACE_START_ALLOC_REUSE(dst_node, data_size);
-	if (try_to_find_reusable_mem_chunk(dst_node, handle, replicate, footprint))
+	if (try_to_find_reusable_mc(dst_node, handle, replicate, footprint))
 	{
 		_starpu_allocation_cache_hit(dst_node);
 		return data_size;
@@ -1338,24 +1352,42 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 
 		if (allocated_memory == -ENOMEM)
 		{
-			if (!told_reclaiming)
-			{
-				/* Prevent prefetches and such from happening */
-				(void) STARPU_ATOMIC_ADD(&reclaiming[dst_node], 1);
-				told_reclaiming = 1;
-			}
 			size_t reclaim = 0.25*_starpu_memory_manager_get_global_memory_size(dst_node);
 			size_t handle_size = handle->ops->get_size(handle);
 			if (starpu_memstrategy_data_size_coefficient*handle_size > reclaim)
 				reclaim = starpu_memstrategy_data_size_coefficient*handle_size;
 
-			_STARPU_TRACE_START_MEMRECLAIM(dst_node,is_prefetch);
+			/* First try to flush data explicitly marked for freeing */
+			size_t freed = flush_memchunk_cache(dst_node, reclaim);
+
+			if (freed >= reclaim)
+				/* That freed enough data, retry allocating */
+				continue;
+			reclaim -= freed;
+
 			if (is_prefetch)
+				/* It's just prefetch, don't bother existing allocations */
+				continue;
+
+			/* Try to reuse an allocated data with the same interface (to avoid spurious free/alloc) */
+			if (_starpu_has_not_important_data && try_to_reuse_not_important_mc(dst_node, handle, replicate, footprint))
+				break;
+			if (try_to_reuse_potentially_in_use_mc(dst_node, handle, replicate, footprint))
 			{
-				flush_memchunk_cache(dst_node, reclaim);
+				reused = 1;
+				allocated_memory = data_size;
+				break;
 			}
-			else
-				_starpu_memory_reclaim_generic(dst_node, 0, reclaim);
+
+			if (!told_reclaiming)
+			{
+				/* Prevent prefetches and such from happening */
+				(void) STARPU_ATOMIC_ADD(&reclaiming[dst_node], 1);
+				told_reclaiming = 1;
+			}
+			/* That was not enough, we have to really reclaim */
+			_STARPU_TRACE_START_MEMRECLAIM(dst_node,is_prefetch);
+			_starpu_memory_reclaim_generic(dst_node, 0, reclaim);
 			_STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch);
 		}
 	}
@@ -1389,7 +1421,11 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 		goto out;
 	}
 
-	if (replicate->allocated)
+	if (reused)
+	{
+		/* We just reused an allocation, nothing more to do */
+	}
+	else if (replicate->allocated)
 	{
 		/* Argl, somebody allocated it in between already, drop this one */
 		_STARPU_TRACE_START_FREE(dst_node, data_size);
@@ -1398,7 +1434,7 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 		allocated_memory = 0;
 	}
 	else
-		/* Install allocated interface */
+		/* Install newly-allocated interface */
 		memcpy(replicate->data_interface, data_interface, handle->ops->interface_size);
 
 out: