瀏覽代碼

Cope with the case when a data handle is unregistered just while being evicted

Samuel Thibault 10 年之前
父節點
當前提交
681aa0a73e
共有 1 個文件被更改,包括 22 次插入6 次删除
  1. 22 6
      src/datawizard/memalloc.c

+ 22 - 6
src/datawizard/memalloc.c

@@ -223,7 +223,10 @@ static unsigned may_free_subtree(starpu_data_handle_t handle, unsigned node)
 	return 1;
 }
 
-static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_node,
+/* Warn: this releases the header lock of the handle during the transfer
+ * The handle may thus unexpectedly disappear. This returns 1 in that case.
+ */
+static int transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_node,
 				     unsigned dst_node)
 {
 	unsigned i;
@@ -253,8 +256,8 @@ static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_n
 			_starpu_wait_data_request_completion(r, 1);
 			_starpu_spin_lock(&handle->header_lock);
 			handle->busy_count--;
-			int ret = _starpu_data_check_not_busy(handle);
-			STARPU_ASSERT(!ret);
+			if (_starpu_data_check_not_busy(handle))
+				return 1;
 		}
 
 		if (src_replicate->state == STARPU_SHARED)
@@ -286,12 +289,17 @@ static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_n
 	{
 		/* lock all sub-subtrees children */
 		unsigned child;
+		int res;
 		for (child = 0; child < handle->nchildren; child++)
 		{
 			starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
-			transfer_subtree_to_node(child_handle, src_node, dst_node);
+			res = transfer_subtree_to_node(child_handle, src_node, dst_node);
+			/* There is no way children have disappeared since we
+			 * keep the parent lock held */
+			STARPU_ASSERT(!res);
 		}
 	}
+	return 0;
 }
 
 static void notify_handle_children(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node)
@@ -463,6 +471,7 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 
 			if (target != -1)
 			{
+				int res;
 				/* Should have been avoided in our caller */
 				STARPU_ASSERT(!mc->remove_notify);
 				mc->remove_notify = &mc;
@@ -475,8 +484,11 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 				/* Note: this may need to allocate data etc.
 				 * and thus release the header lock, take
 				 * mc_lock, etc. */
-				transfer_subtree_to_node(handle, node, target);
+				res = transfer_subtree_to_node(handle, node, target);
 				_STARPU_TRACE_END_WRITEBACK(node);
+				if (res)
+					/* Oops, the handle has disappeared in the meanwhile */
+					return 0;
 #ifdef STARPU_MEMORY_STATS
 				_starpu_memory_handle_stats_loaded_owner(handle, target);
 #endif
@@ -567,6 +579,7 @@ static unsigned try_to_reuse_mem_chunk(struct _starpu_mem_chunk *mc, unsigned no
 	{
 		if (may_free_subtree(old_data, node))
 		{
+			int res;
 			/* Should have been avoided in our caller */
 			STARPU_ASSERT(!mc->remove_notify);
 			mc->remove_notify = &mc;
@@ -574,8 +587,11 @@ static unsigned try_to_reuse_mem_chunk(struct _starpu_mem_chunk *mc, unsigned no
 			 * away after writing it back to main memory */
 			_starpu_spin_unlock(&mc_lock[node]);
 			_STARPU_TRACE_START_WRITEBACK(node);
-			transfer_subtree_to_node(old_data, node, STARPU_MAIN_RAM);
+			res = transfer_subtree_to_node(old_data, node, STARPU_MAIN_RAM);
 			_STARPU_TRACE_END_WRITEBACK(node);
+			if (res)
+				/* Oops, the handle has disappeared in the meanwhile */
+				return 0;
 			_starpu_spin_lock(&mc_lock[node]);
 
 			if (mc)