Browse Source

Memory reclaiming bug fix: while reclaiming memory, we should not hold the lock of the header describing the entire state of the handle while other handles are inspected to find available memory.

Cédric Augonnet 15 years ago
parent
commit
dabb837619
2 changed files with 13 additions and 4 deletions
  1. 3 3
      src/datawizard/data_request.c
  2. 10 1
      src/datawizard/memalloc.c

+ 3 - 3
src/datawizard/data_request.c

@@ -304,8 +304,11 @@ static int starpu_handle_data_request(starpu_data_request_t r, unsigned may_allo
 	STARPU_ASSERT(!(r_mode & STARPU_R) || src_replicate->allocated);
 	STARPU_ASSERT(!(r_mode & STARPU_R) || src_replicate->refcnt);
 
+	_starpu_spin_unlock(&r->lock);
+
 	/* perform the transfer */
 	/* the header of the data must be locked by the worker that submitted the request */
+
 	r->retval = _starpu_driver_copy_data_1_to_1(handle, src_replicate,
 			dst_replicate, !(r_mode & STARPU_R), r, may_alloc);
 
@@ -313,9 +316,7 @@ static int starpu_handle_data_request(starpu_data_request_t r, unsigned may_allo
 	{
 		/* If there was not enough memory, we will try to redo the
 		 * request later. */
-		_starpu_spin_unlock(&r->lock);
 		_starpu_spin_unlock(&handle->header_lock);
-
 		return -ENOMEM;
 	}
 
@@ -325,7 +326,6 @@ static int starpu_handle_data_request(starpu_data_request_t r, unsigned may_allo
 		 * immediatly. We will handle the completion of the request
 		 * asynchronously. The request is put in the list of "pending"
 		 * requests in the meantime. */
-		_starpu_spin_unlock(&r->lock);
 		_starpu_spin_unlock(&handle->header_lock);
 
 		PTHREAD_MUTEX_LOCK(&data_requests_pending_list_mutex[r->handling_node]);

+ 10 - 1
src/datawizard/memalloc.c

@@ -693,10 +693,19 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle handle, struct star
 		allocated_memory = handle->ops->allocate_data_on_node(replicate->interface, dst_node);
 		STARPU_TRACE_END_ALLOC(dst_node);
 
-		if (allocated_memory == -ENOMEM) {
+		if (allocated_memory == -ENOMEM)
+		{
+			replicate->refcnt++;
+			_starpu_spin_unlock(&handle->header_lock);
+
 			STARPU_TRACE_START_MEMRECLAIM(dst_node);
 			reclaim_memory_generic(dst_node, 0);
 			STARPU_TRACE_END_MEMRECLAIM(dst_node);
+
+		        while (_starpu_spin_trylock(&handle->header_lock))
+		                _starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
+		
+			replicate->refcnt--;
 		}
 		
 	} while((allocated_memory == -ENOMEM) && attempts++ < 2);