Ver código fonte

Fix some rare cases where 'allocated' unexpectedly becomes 0. This was happening
because reclaim_memory_generic was not taking the header lock all the time,
while free_memory_on_node indeed needs the lock to be held. Unfortunately,
header locks have to be taken before mc_rwlock, so we have to rework the
reclaiming loops into being able to release it before taking the header lock.

Samuel Thibault 12 anos atrás
pai
commit
7f1972c484
3 arquivos alterados com 86 adições e 50 exclusões
  1. 4 0
      src/datawizard/filters.c
  2. 81 48
      src/datawizard/memalloc.c
  3. 1 2
      src/datawizard/memalloc.h

+ 4 - 0
src/datawizard/filters.c

@@ -357,11 +357,15 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gatherin
 			}
 			}
 
 
 			if (local->allocated && local->automatically_allocated)
 			if (local->allocated && local->automatically_allocated)
+			{
 				/* free the child data copy in a lazy fashion */
 				/* free the child data copy in a lazy fashion */
 #ifdef STARPU_DEVEL
 #ifdef STARPU_DEVEL
 #warning FIXME!! this needs access to the child interface, which was freed above!
 #warning FIXME!! this needs access to the child interface, which was freed above!
 #endif
 #endif
+				_starpu_spin_lock(&root_handle->header_lock);
 				_starpu_request_mem_chunk_removal(child_handle, node, 1);
 				_starpu_request_mem_chunk_removal(child_handle, node, 1);
+				_starpu_spin_unlock(&root_handle->header_lock);
+			}
 		}
 		}
 
 
 		if (!root_handle->per_node[node].allocated)
 		if (!root_handle->per_node[node].allocated)

+ 81 - 48
src/datawizard/memalloc.c

@@ -21,6 +21,7 @@
 #include <starpu.h>
 #include <starpu.h>
 
 
 /* This per-node RW-locks protect mc_list and memchunk_cache entries */
 /* This per-node RW-locks protect mc_list and memchunk_cache entries */
+/* Note: handle header lock is always taken before this */
 static _starpu_pthread_rwlock_t mc_rwlock[STARPU_MAXNODES];
 static _starpu_pthread_rwlock_t mc_rwlock[STARPU_MAXNODES];
 
 
 /* This per-node spinlock protect lru_list */
 /* This per-node spinlock protect lru_list */
@@ -173,7 +174,8 @@ static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_n
 			dst_replicate->refcnt--;
 			dst_replicate->refcnt--;
 			STARPU_ASSERT(handle->busy_count >= 2);
 			STARPU_ASSERT(handle->busy_count >= 2);
 			handle->busy_count -= 2;
 			handle->busy_count -= 2;
-			_starpu_data_check_not_busy(handle);
+			ret = _starpu_data_check_not_busy(handle);
+			STARPU_ASSERT(ret == 0);
 
 
 			break;
 			break;
 		case STARPU_SHARED:
 		case STARPU_SHARED:
@@ -225,24 +227,15 @@ static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
 
 
 	starpu_data_handle_t handle = mc->data;
 	starpu_data_handle_t handle = mc->data;
 
 
-	/* Does this memory chunk refers to a handle that does not exist
-	 * anymore ? */
-	unsigned data_was_deleted = mc->data_was_deleted;
-
 	struct _starpu_data_replicate *replicate = mc->replicate;
 	struct _starpu_data_replicate *replicate = mc->replicate;
 
 
-//	while (_starpu_spin_trylock(&handle->header_lock))
-//		_starpu_datawizard_progress(_starpu_memory_node_get_local_key());
-
-#ifdef STARPU_DEVEL
-#warning can we block here ?
-#endif
-//	_starpu_spin_lock(&handle->header_lock);
+	if (handle)
+		_starpu_spin_checklocked(&handle->header_lock);
 
 
 	if (mc->automatically_allocated &&
 	if (mc->automatically_allocated &&
-		(!handle || data_was_deleted || replicate->refcnt == 0))
+		(!handle || replicate->refcnt == 0))
 	{
 	{
-		if (handle && !data_was_deleted)
+		if (handle)
 			STARPU_ASSERT(replicate->allocated);
 			STARPU_ASSERT(replicate->allocated);
 
 
 #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
 #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
@@ -258,7 +251,7 @@ static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
 
 
 		mc->ops->free_data_on_node(mc->chunk_interface, node);
 		mc->ops->free_data_on_node(mc->chunk_interface, node);
 
 
-		if (handle && !data_was_deleted)
+		if (handle)
 		{
 		{
 			replicate->allocated = 0;
 			replicate->allocated = 0;
 
 
@@ -268,12 +261,10 @@ static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
 
 
 		freed = mc->size;
 		freed = mc->size;
 
 
-		if (handle && !data_was_deleted)
+		if (handle)
 			STARPU_ASSERT(replicate->refcnt == 0);
 			STARPU_ASSERT(replicate->refcnt == 0);
 	}
 	}
 
 
-//	_starpu_spin_unlock(&handle->header_lock);
-
 	return freed;
 	return freed;
 }
 }
 
 
@@ -282,6 +273,10 @@ static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
 static size_t do_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 static size_t do_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 {
 {
 	size_t size;
 	size_t size;
+	starpu_data_handle_t handle = mc->data;
+
+	if (handle)
+		_starpu_spin_checklocked(&handle->header_lock);
 
 
 	mc->replicate->mc=NULL;
 	mc->replicate->mc=NULL;
 
 
@@ -401,7 +396,6 @@ static void reuse_mem_chunk(unsigned node, struct _starpu_data_replicate *new_re
 	memcpy(new_replicate->data_interface, mc->chunk_interface, old_replicate->handle->ops->interface_size);
 	memcpy(new_replicate->data_interface, mc->chunk_interface, old_replicate->handle->ops->interface_size);
 
 
 	mc->data = new_replicate->handle;
 	mc->data = new_replicate->handle;
-	mc->data_was_deleted = 0;
 	/* mc->ops, mc->footprint and mc->interface should be
 	/* mc->ops, mc->footprint and mc->interface should be
  	 * unchanged ! */
  	 * unchanged ! */
 
 
@@ -528,26 +522,33 @@ static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle
  */
  */
 static size_t flush_memchunk_cache(unsigned node, size_t reclaim)
 static size_t flush_memchunk_cache(unsigned node, size_t reclaim)
 {
 {
-	struct _starpu_mem_chunk *mc, *next_mc;
+	struct _starpu_mem_chunk *mc;
 
 
 	size_t freed = 0;
 	size_t freed = 0;
 
 
-	for (mc = _starpu_mem_chunk_list_begin(memchunk_cache[node]);
-	     mc != _starpu_mem_chunk_list_end(memchunk_cache[node]);
-	     mc = next_mc)
-	{
-		next_mc = _starpu_mem_chunk_list_next(mc);
+	_STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
+	while (!_starpu_mem_chunk_list_empty(memchunk_cache[node])) {
+		mc = _starpu_mem_chunk_list_pop_front(memchunk_cache[node]);
+		_STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
 
 
-		freed += free_memory_on_node(mc, node);
+		starpu_data_handle_t handle = mc->data;
 
 
-		_starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
+		if (handle)
+			while (_starpu_spin_trylock(&handle->header_lock))
+				_starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
+		freed += free_memory_on_node(mc, node);
+		if (handle)
+			_starpu_spin_unlock(&handle->header_lock);
 
 
 		free(mc->chunk_interface);
 		free(mc->chunk_interface);
 		_starpu_mem_chunk_delete(mc);
 		_starpu_mem_chunk_delete(mc);
+
 		if (reclaim && freed>reclaim)
 		if (reclaim && freed>reclaim)
 			break;
 			break;
-	}
 
 
+		_STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
+	}
+	_STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
 	return freed;
 	return freed;
 }
 }
 
 
@@ -561,30 +562,59 @@ static size_t free_potentially_in_use_mc(unsigned node, unsigned force, size_t r
 {
 {
 	size_t freed = 0;
 	size_t freed = 0;
 
 
-	struct _starpu_mem_chunk *mc, *next_mc;
+	struct _starpu_mem_chunk *mc, *next_mc = NULL;
 
 
-	for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
-	     mc != _starpu_mem_chunk_list_end(mc_list[node]);
-	     mc = next_mc)
+	/*
+	 * We have to unlock mc_rwlock before locking header_lock, so we have
+	 * to be careful with the list.  We try to do just one pass, by
+	 * remembering the next mc to be tried. If it gets dropped, we restart
+	 * from zero. So we continue until we go through the whole list without
+	 * finding anything to free.
+	 */
+
+	while (1)
 	{
 	{
-		/* there is a risk that the memory chunk is freed
-		   before next iteration starts: so we compute the next
-		   element of the list now */
+		_STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
+		/* A priori, start from the beginning */
+		mc = _starpu_mem_chunk_list_begin(mc_list[node]);
+		if (next_mc)
+			/* Unless we might restart from where we were */
+			for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
+			     mc != _starpu_mem_chunk_list_end(mc_list[node]);
+			     mc = _starpu_mem_chunk_list_next(mc))
+				if (mc == next_mc)
+					/* Yes, restart from there.  */
+					break;
+
+		if (mc == _starpu_mem_chunk_list_end(mc_list[node]))
+		{
+			/* But it was the last one of the list :/ */
+			_STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
+			break;
+		}
+		/* Remember where to try next */
 		next_mc = _starpu_mem_chunk_list_next(mc);
 		next_mc = _starpu_mem_chunk_list_next(mc);
+		_STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
 
 
 		if (!force)
 		if (!force)
 		{
 		{
 			freed += try_to_free_mem_chunk(mc, node);
 			freed += try_to_free_mem_chunk(mc, node);
-			#if 1
+
 			if (reclaim && freed > reclaim)
 			if (reclaim && freed > reclaim)
 				break;
 				break;
-			#endif
 		}
 		}
 		else
 		else
 		{
 		{
-			/* We must free the memory now: note that data
-			 * coherency is not maintained in that case ! */
+			starpu_data_handle_t handle = mc->data;
+
+			_starpu_spin_lock(&handle->header_lock);
+
+			/* We must free the memory now, because we are
+			 * terminating the drivers: note that data coherency is
+			 * not maintained in that case ! */
 			freed += do_free_mem_chunk(mc, node);
 			freed += do_free_mem_chunk(mc, node);
+
+			_starpu_spin_unlock(&handle->header_lock);
 		}
 		}
 	}
 	}
 
 
@@ -595,8 +625,6 @@ size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t recl
 {
 {
 	size_t freed = 0;
 	size_t freed = 0;
 
 
-	_STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
-
 	starpu_lru(node);
 	starpu_lru(node);
 
 
 	/* remove all buffers for which there was a removal request */
 	/* remove all buffers for which there was a removal request */
@@ -606,8 +634,6 @@ size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t recl
 	if (reclaim && freed<reclaim)
 	if (reclaim && freed<reclaim)
 		freed += free_potentially_in_use_mc(node, force, reclaim);
 		freed += free_potentially_in_use_mc(node, force, reclaim);
 
 
-	_STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
-
 	return freed;
 	return freed;
 
 
 }
 }
@@ -633,7 +659,6 @@ static struct _starpu_mem_chunk *_starpu_memchunk_init(struct _starpu_data_repli
 	mc->data = handle;
 	mc->data = handle;
 	mc->footprint = _starpu_compute_data_footprint(handle);
 	mc->footprint = _starpu_compute_data_footprint(handle);
 	mc->ops = handle->ops;
 	mc->ops = handle->ops;
-	mc->data_was_deleted = 0;
 	mc->automatically_allocated = automatically_allocated;
 	mc->automatically_allocated = automatically_allocated;
 	mc->relaxed_coherency = replicate->relaxed_coherency;
 	mc->relaxed_coherency = replicate->relaxed_coherency;
 	mc->replicate = replicate;
 	mc->replicate = replicate;
@@ -674,6 +699,8 @@ static void register_mem_chunk(struct _starpu_data_replicate *replicate, unsigne
  */
  */
 void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, unsigned node, int handle_deleted)
 void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, unsigned node, int handle_deleted)
 {
 {
+	_starpu_spin_checklocked(&handle->header_lock);
+
 	_STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
 	_STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
 
 
 	size_t size = _starpu_data_get_size(handle);
 	size_t size = _starpu_data_get_size(handle);
@@ -691,7 +718,9 @@ void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, unsigned nod
 		{
 		{
 			/* we found the data */
 			/* we found the data */
 			mc->size = size;
 			mc->size = size;
-			mc->data_was_deleted = handle_deleted;
+			if (handle_deleted)
+				/* This memchunk doesn't have to do with the data any more. */
+				mc->data = NULL;
 
 
 			/* remove it from the main list */
 			/* remove it from the main list */
 			_starpu_mem_chunk_list_erase(mc_list[node], mc);
 			_starpu_mem_chunk_list_erase(mc_list[node], mc);
@@ -738,6 +767,7 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _s
 {
 {
 	unsigned attempts = 0;
 	unsigned attempts = 0;
 	ssize_t allocated_memory;
 	ssize_t allocated_memory;
+	int ret;
 
 
 	_starpu_spin_checklocked(&handle->header_lock);
 	_starpu_spin_checklocked(&handle->header_lock);
 
 
@@ -799,9 +829,7 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _s
 			_STARPU_TRACE_START_MEMRECLAIM(dst_node);
 			_STARPU_TRACE_START_MEMRECLAIM(dst_node);
 			if (is_prefetch)
 			if (is_prefetch)
 			{
 			{
-				_STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[dst_node]);
 				flush_memchunk_cache(dst_node, reclaim);
 				flush_memchunk_cache(dst_node, reclaim);
-				_STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
 			}
 			}
 			else
 			else
 				_starpu_memory_reclaim_generic(dst_node, 0, reclaim);
 				_starpu_memory_reclaim_generic(dst_node, 0, reclaim);
@@ -814,7 +842,8 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _s
 			STARPU_ASSERT(replicate->refcnt >= 0);
 			STARPU_ASSERT(replicate->refcnt >= 0);
 			STARPU_ASSERT(handle->busy_count > 0);
 			STARPU_ASSERT(handle->busy_count > 0);
 			handle->busy_count--;
 			handle->busy_count--;
-			_starpu_data_check_not_busy(handle);
+			ret = _starpu_data_check_not_busy(handle);
+			STARPU_ASSERT(ret == 0);
 		}
 		}
 
 
 	}
 	}
@@ -894,6 +923,8 @@ static void _starpu_memchunk_recently_used_move(struct _starpu_mem_chunk *mc, un
 
 
 static void starpu_lru(unsigned node)
 static void starpu_lru(unsigned node)
 {
 {
+	_STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
+
 	_starpu_spin_lock(&lru_rwlock[node]);
 	_starpu_spin_lock(&lru_rwlock[node]);
 	while (!_starpu_mem_chunk_lru_list_empty(starpu_lru_list[node]))
 	while (!_starpu_mem_chunk_lru_list_empty(starpu_lru_list[node]))
 	{
 	{
@@ -903,6 +934,8 @@ static void starpu_lru(unsigned node)
 		_starpu_mem_chunk_lru_delete(mc_lru);
 		_starpu_mem_chunk_lru_delete(mc_lru);
 	}
 	}
 	_starpu_spin_unlock(&lru_rwlock[node]);
 	_starpu_spin_unlock(&lru_rwlock[node]);
+
+	_STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
 }
 }
 
 
 #ifdef STARPU_MEMORY_STATS
 #ifdef STARPU_MEMORY_STATS

+ 1 - 2
src/datawizard/memalloc.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010, 2012  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010, 2012-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -42,7 +42,6 @@ LIST_TYPE(_starpu_mem_chunk,
 	struct starpu_data_interface_ops *ops;
 	struct starpu_data_interface_ops *ops;
 	void *chunk_interface;
 	void *chunk_interface;
 	unsigned automatically_allocated;
 	unsigned automatically_allocated;
-	unsigned data_was_deleted;
 
 
 	/* the size is only set when calling _starpu_request_mem_chunk_removal(),
 	/* the size is only set when calling _starpu_request_mem_chunk_removal(),
          * it is needed by free_memory_on_node() which is called when
          * it is needed by free_memory_on_node() which is called when