Parcourir la source

rework deallocating handle memory chunks: we do not actually need to take the burden of going through the whole mc_list, since we already have the mc pointer. This also permits to properly mark per-worker chunks as deallocated

Samuel Thibault il y a 12 ans
Parent
commit
f24ad3cbab

+ 9 - 6
src/datawizard/filters.c

@@ -345,13 +345,14 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gatherin
 	/* still valid ? */
 	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
+		struct _starpu_data_replicate *local;
 		/* until an issue is found the data is assumed to be valid */
 		unsigned isvalid = 1;
 
 		for (child = 0; child < root_handle->nchildren; child++)
 		{
 			starpu_data_handle_t child_handle = starpu_data_get_child(root_handle, child);
-			struct _starpu_data_replicate *local = &child_handle->per_node[node];
+			local = &child_handle->per_node[node];
 
 			if (local->state == STARPU_INVALID)
 			{
@@ -359,19 +360,21 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gatherin
 				isvalid = 0;
 			}
 
-			if (local->allocated && local->automatically_allocated)
+			if (local->mc && local->allocated && local->automatically_allocated)
 				/* free the child data copy in a lazy fashion */
-				_starpu_request_mem_chunk_removal(child_handle, node, sizes[child]);
+				_starpu_request_mem_chunk_removal(child_handle, local, node, sizes[child]);
 		}
 
-		if (!root_handle->per_node[node].allocated)
+		local = &root_handle->per_node[node];
+
+		if (!local->allocated)
 			/* Even if we have all the bits, if we don't have the
 			 * whole data, it's not valid */
 			isvalid = 0;
 
-		if (!isvalid && root_handle->per_node[node].allocated && root_handle->per_node[node].automatically_allocated)
+		if (!isvalid && local->mc && local->allocated && local->automatically_allocated)
 			/* free the data copy in a lazy fashion */
-			_starpu_request_mem_chunk_removal(root_handle, node, _starpu_data_get_size(root_handle));
+			_starpu_request_mem_chunk_removal(root_handle, local, node, _starpu_data_get_size(root_handle));
 
 		/* if there was no invalid copy, the node still has a valid copy */
 		still_valid[node] = isvalid;

+ 30 - 9
src/datawizard/interfaces/data_interface.c

@@ -553,16 +553,28 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 	/* Wait for finished requests to release the handle */
 	_starpu_spin_lock(&handle->header_lock);
 
+	_starpu_data_free_interfaces(handle);
+
 	/* Destroy the data now */
-	unsigned node;
 	size_t size = _starpu_data_get_size(handle);
+	unsigned node;
 	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
+		struct _starpu_data_replicate *local = &handle->per_node[node];
+		/* free the data copy in a lazy fashion */
+		if (local->allocated && local->automatically_allocated)
+			_starpu_request_mem_chunk_removal(handle, local, node, size);
+	}
+	unsigned worker;
+	unsigned nworkers = starpu_worker_get_count();
+	for (worker = 0; worker < nworkers; worker++)
+	{
+		struct _starpu_data_replicate *local = &handle->per_worker[worker];
 		/* free the data copy in a lazy fashion */
-		_starpu_request_mem_chunk_removal(handle, node, size);
+		if (local->allocated && local->automatically_allocated)
+			_starpu_request_mem_chunk_removal(handle, local, starpu_worker_get_memory_node(worker), size);
 	}
 
-	_starpu_data_free_interfaces(handle);
 	_starpu_memory_stats_free(handle);
 	_starpu_data_requester_list_delete(handle->req_list);
 	_starpu_data_requester_list_delete(handle->reduction_req_list);
@@ -604,13 +616,22 @@ static void _starpu_data_invalidate(void *data)
 	{
 		struct _starpu_data_replicate *local = &handle->per_node[node];
 
-		if (local->allocated && local->automatically_allocated)
-		{
+		if (local->mc && local->allocated && local->automatically_allocated)
 			/* free the data copy in a lazy fashion */
-			_starpu_request_mem_chunk_removal(handle, node, size);
-			local->allocated = 0;
-			local->automatically_allocated = 0;
-		}
+			_starpu_request_mem_chunk_removal(handle, local, node, size);
+
+		local->state = STARPU_INVALID;
+	}
+
+	unsigned worker;
+	unsigned nworkers = starpu_worker_get_count();
+	for (worker = 0; worker < nworkers; worker++)
+	{
+		struct _starpu_data_replicate *local = &handle->per_worker[worker];
+
+		if (local->mc && local->allocated && local->automatically_allocated)
+			/* free the data copy in a lazy fashion */
+			_starpu_request_mem_chunk_removal(handle, local, starpu_worker_get_memory_node(worker), size);
 
 		local->state = STARPU_INVALID;
 	}

+ 29 - 42
src/datawizard/memalloc.c

@@ -694,57 +694,44 @@ static void register_mem_chunk(struct _starpu_data_replicate *replicate, unsigne
  * unregister or unpartition). It puts all the memchunks that refer to the
  * specified handle into the cache.
  */
-void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, unsigned node, size_t size)
+void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size)
 {
-	_STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
+	struct _starpu_mem_chunk *mc = replicate->mc;
 
-	/* TODO: expensive, handle should have its own list of chunks? */
-	/* iterate over the list of memory chunks and remove the entry */
-	struct _starpu_mem_chunk *mc, *next_mc;
-	for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
-	     mc != _starpu_mem_chunk_list_end(mc_list[node]);
-	     mc = next_mc)
-	{
-		next_mc = _starpu_mem_chunk_list_next(mc);
+	STARPU_ASSERT(mc->data == handle);
 
-		if (mc->data == handle)
-		{
-			/* we found the data */
+	/* Record the allocated size, so that later in memory
+	 * reclaiming we can estimate how much memory we free
+	 * by freeing this.  */
+	mc->size = size;
+	/* This memchunk doesn't have to do with the data any more. */
+	mc->data = NULL;
+	replicate->mc = NULL;
+	replicate->allocated = 0;
+	replicate->automatically_allocated = 0;
 
-			/* Record the allocated size, so that later in memory
-			 * reclaiming we can estimate how much memory we free
-			 * by freeing this.  */
-			mc->size = size;
-			/* This memchunk doesn't have to do with the data any more. */
-			mc->data = NULL;
+	_STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
 
-			/* remove it from the main list */
-			_starpu_mem_chunk_list_erase(mc_list[node], mc);
+	/* remove it from the main list */
+	_starpu_mem_chunk_list_erase(mc_list[node], mc);
 
-			/* We would never flush the node 0 cache, unless
-			 * malloc() returns NULL, which is very unlikely... */
-			/* This is particularly important when
-			 * STARPU_USE_ALLOCATION_CACHE is not enabled, as we
-			 * wouldn't even re-use these allocations! */
-			if (starpu_node_get_kind(node) == STARPU_CPU_RAM)
-			{
-				free_memory_on_node(mc, node);
+	_STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
 
-				free(mc->chunk_interface);
-				_starpu_mem_chunk_delete(mc);
-			}
-			else
-				/* put it in the list of buffers to be removed */
-				_starpu_mem_chunk_list_push_front(memchunk_cache[node], mc);
+	/* We would never flush the node 0 cache, unless
+	 * malloc() returns NULL, which is very unlikely... */
+	/* This is particularly important when
+	 * STARPU_USE_ALLOCATION_CACHE is not enabled, as we
+	 * wouldn't even re-use these allocations! */
+	if (starpu_node_get_kind(node) == STARPU_CPU_RAM)
+	{
+		free_memory_on_node(mc, node);
 
-			/* Note that we do not stop here because there can be
-			 * multiple replicates associated to the same handle on
-			 * the same memory node.  */
-		}
+		free(mc->chunk_interface);
+		_starpu_mem_chunk_delete(mc);
 	}
-
-	/* there was no corresponding buffer ... */
-	_STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
+	else
+		/* put it in the list of buffers to be removed */
+		_starpu_mem_chunk_list_push_front(memchunk_cache[node], mc);
 }
 
 /*

+ 1 - 1
src/datawizard/memalloc.h

@@ -62,7 +62,7 @@ LIST_TYPE(_starpu_mem_chunk_lru,
 
 void _starpu_init_mem_chunk_lists(void);
 void _starpu_deinit_mem_chunk_lists(void);
-void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, unsigned node, size_t size);
+void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size);
 int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned is_prefetch);
 size_t _starpu_free_all_automatically_allocated_buffers(unsigned node);
 void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node);