소스 검색

Memory reclaiming bug fix: while reclaiming memory, we should not hold the lock of the header describing the entire state of the handle while other handles are inspected to find available memory.

Cédric Augonnet 15 년 전
부모
커밋
dabb837619
2개의 변경된 파일13개의 추가작업 그리고 4개의 파일을 삭제
  1. 3 3
      src/datawizard/data_request.c
  2. 10 1
      src/datawizard/memalloc.c

+ 3 - 3
src/datawizard/data_request.c

@@ -304,8 +304,11 @@ static int starpu_handle_data_request(starpu_data_request_t r, unsigned may_allo
 	STARPU_ASSERT(!(r_mode & STARPU_R) || src_replicate->allocated);
 	STARPU_ASSERT(!(r_mode & STARPU_R) || src_replicate->refcnt);
 
+	_starpu_spin_unlock(&r->lock);
+
 	/* perform the transfer */
 	/* the header of the data must be locked by the worker that submitted the request */
+
 	r->retval = _starpu_driver_copy_data_1_to_1(handle, src_replicate,
 			dst_replicate, !(r_mode & STARPU_R), r, may_alloc);
 
@@ -313,9 +316,7 @@ static int starpu_handle_data_request(starpu_data_request_t r, unsigned may_allo
 	{
 		/* If there was not enough memory, we will try to redo the
 		 * request later. */
-		_starpu_spin_unlock(&r->lock);
 		_starpu_spin_unlock(&handle->header_lock);
-
 		return -ENOMEM;
 	}
 
@@ -325,7 +326,6 @@ static int starpu_handle_data_request(starpu_data_request_t r, unsigned may_allo
 		 * immediatly. We will handle the completion of the request
 		 * asynchronously. The request is put in the list of "pending"
 		 * requests in the meantime. */
-		_starpu_spin_unlock(&r->lock);
 		_starpu_spin_unlock(&handle->header_lock);
 
 		PTHREAD_MUTEX_LOCK(&data_requests_pending_list_mutex[r->handling_node]);

+ 10 - 1
src/datawizard/memalloc.c

@@ -693,10 +693,19 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle handle, struct star
 		allocated_memory = handle->ops->allocate_data_on_node(replicate->interface, dst_node);
 		STARPU_TRACE_END_ALLOC(dst_node);
 
-		if (allocated_memory == -ENOMEM) {
+		if (allocated_memory == -ENOMEM)
+		{
+			replicate->refcnt++;
+			_starpu_spin_unlock(&handle->header_lock);
+
 			STARPU_TRACE_START_MEMRECLAIM(dst_node);
 			reclaim_memory_generic(dst_node, 0);
 			STARPU_TRACE_END_MEMRECLAIM(dst_node);
+
+		        while (_starpu_spin_trylock(&handle->header_lock))
+		                _starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
+		
+			replicate->refcnt--;
 		}
 		
 	} while((allocated_memory == -ENOMEM) && attempts++ < 2);