Kaynağa Gözat

Memory reclaiming bug fix: while reclaiming memory, we should not hold the lock of the header describing the entire state of the handle while other handles are inspected to find available memory.

Cédric Augonnet 15 yıl önce
ebeveyn
işleme
dabb837619
2 değiştirilmiş dosya ile 13 ekleme ve 4 silme
  1. 3 3
      src/datawizard/data_request.c
  2. 10 1
      src/datawizard/memalloc.c

+ 3 - 3
src/datawizard/data_request.c

@@ -304,8 +304,11 @@ static int starpu_handle_data_request(starpu_data_request_t r, unsigned may_allo
 	STARPU_ASSERT(!(r_mode & STARPU_R) || src_replicate->allocated);
 	STARPU_ASSERT(!(r_mode & STARPU_R) || src_replicate->refcnt);
 
+	_starpu_spin_unlock(&r->lock);
+
 	/* perform the transfer */
 	/* the header of the data must be locked by the worker that submitted the request */
+
 	r->retval = _starpu_driver_copy_data_1_to_1(handle, src_replicate,
 			dst_replicate, !(r_mode & STARPU_R), r, may_alloc);
 
@@ -313,9 +316,7 @@ static int starpu_handle_data_request(starpu_data_request_t r, unsigned may_allo
 	{
 		/* If there was not enough memory, we will try to redo the
 		 * request later. */
-		_starpu_spin_unlock(&r->lock);
 		_starpu_spin_unlock(&handle->header_lock);
-
 		return -ENOMEM;
 	}
 
@@ -325,7 +326,6 @@ static int starpu_handle_data_request(starpu_data_request_t r, unsigned may_allo
 		 * immediatly. We will handle the completion of the request
 		 * asynchronously. The request is put in the list of "pending"
 		 * requests in the meantime. */
-		_starpu_spin_unlock(&r->lock);
 		_starpu_spin_unlock(&handle->header_lock);
 
 		PTHREAD_MUTEX_LOCK(&data_requests_pending_list_mutex[r->handling_node]);

+ 10 - 1
src/datawizard/memalloc.c

@@ -693,10 +693,19 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle handle, struct star
 		allocated_memory = handle->ops->allocate_data_on_node(replicate->interface, dst_node);
 		STARPU_TRACE_END_ALLOC(dst_node);
 
-		if (allocated_memory == -ENOMEM) {
+		if (allocated_memory == -ENOMEM)
+		{
+			replicate->refcnt++;
+			_starpu_spin_unlock(&handle->header_lock);
+
 			STARPU_TRACE_START_MEMRECLAIM(dst_node);
 			reclaim_memory_generic(dst_node, 0);
 			STARPU_TRACE_END_MEMRECLAIM(dst_node);
+
+		        while (_starpu_spin_trylock(&handle->header_lock))
+		                _starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
+		
+			replicate->refcnt--;
 		}
 		
 	} while((allocated_memory == -ENOMEM) && attempts++ < 2);