瀏覽代碼

- add some sanity checks
- make the data transfer code a bit simpler

Cédric Augonnet 16 年之前
父節點
當前提交
b110f4e08a
共有 3 個文件被更改,包括 40 次插入52 次删除
  1. 35 4
      src/datawizard/coherency.c
  2. 5 42
      src/datawizard/copy-driver.c
  3. 0 6
      src/datawizard/copy-driver.h

+ 35 - 4
src/datawizard/coherency.c

@@ -20,10 +20,11 @@
 #include <datawizard/write_back.h>
 #include <core/dependencies/data-concurrency.h>
 
-/* this function will actually copy a valid data into the requesting node */
-static int __attribute__((warn_unused_result)) copy_data_to_node(data_state *state, uint32_t requesting_node, 
-						 unsigned donotread)
+static uint32_t choose_src_node(data_state *state)
 {
+	unsigned src_node = 0;
+	unsigned i;
+
 	/* first find a valid copy, either a OWNER or a SHARED */
 	int ret;
 	uint32_t node;
@@ -39,7 +40,37 @@ static int __attribute__((warn_unused_result)) copy_data_to_node(data_state *sta
 	/* we should have found at least one copy ! */
 	STARPU_ASSERT(src_node_mask != 0);
 
-	ret = driver_copy_data(state, src_node_mask, requesting_node, donotread);
+	mem_node_descr * const descr = get_memory_node_description();
+
+	/* find the node that will be the actual source */
+	for (i = 0; i < MAXNODES; i++)
+	{
+		if (src_node_mask & (1<<i))
+		{
+			/* this is a potential candidate */
+			src_node = i;
+
+			/* however GPU are expensive sources, really !
+			 * 	other should be ok */
+			if (descr->nodes[i] != CUDA_RAM)
+				break;
+
+			/* XXX do a better algorithm to distribute the memory copies */
+			/* TODO : use the "requesting_node" as an argument to do so */
+		}
+	}
+
+	return src_node;
+}
+/* this function will actually copy a valid data into the requesting node */
+static int __attribute__((warn_unused_result)) copy_data_to_node(data_state *state, uint32_t dst_node, 
+						 unsigned donotread)
+{
+	int ret;
+	uint32_t src_node = choose_src_node(state);
+
+	/* possibly returns -1 if there was no memory left */
+	ret = driver_copy_data_1_to_1(state, src_node, dst_node, donotread);
 
 	return ret;
 }

+ 5 - 42
src/datawizard/copy-driver.c

@@ -107,8 +107,9 @@ static int copy_data_1_to_1_generic(data_state *state, uint32_t src_node, uint32
 		switch (src_kind) {
 			case RAM:
 				/* RAM -> RAM */
-				 copy_methods->ram_to_ram(state, src_node, dst_node);
-				 break;
+				STARPU_ASSERT(copy_methods->ram_to_ram);
+				copy_methods->ram_to_ram(state, src_node, dst_node);
+				break;
 #ifdef USE_CUDA
 			case CUDA_RAM:
 				/* CUBLAS_RAM -> RAM */
@@ -116,6 +117,7 @@ static int copy_data_1_to_1_generic(data_state *state, uint32_t src_node, uint32
 				if (get_local_memory_node() == src_node)
 				{
 					/* only the proper CUBLAS thread can initiate this directly ! */
+					STARPU_ASSERT(copy_methods->cuda_to_ram);
 					copy_methods->cuda_to_ram(state, src_node, dst_node);
 				}
 				else
@@ -142,6 +144,7 @@ static int copy_data_1_to_1_generic(data_state *state, uint32_t src_node, uint32
 				/* RAM -> CUBLAS_RAM */
 				/* only the proper CUBLAS thread can initiate this ! */
 				STARPU_ASSERT(get_local_memory_node() == dst_node);
+				STARPU_ASSERT(copy_methods->ram_to_cuda);
 				copy_methods->ram_to_cuda(state, src_node, dst_node);
 				break;
 			case CUDA_RAM:
@@ -209,43 +212,3 @@ int __attribute__((warn_unused_result)) driver_copy_data_1_to_1(data_state *stat
 nomem:
 	return -ENOMEM;
 }
-
-static uint32_t choose_src_node(uint32_t src_node_mask)
-{
-	unsigned src_node = 0;
-	unsigned i;
-
-	mem_node_descr * const descr = get_memory_node_description();
-
-	/* first find the node that will be the actual source */
-	for (i = 0; i < MAXNODES; i++)
-	{
-		if (src_node_mask & (1<<i))
-		{
-			/* this is a potential candidate */
-			src_node = i;
-
-			/* however GPU are expensive sources, really !
-			 * 	other should be ok */
-			if (descr->nodes[i] != CUDA_RAM)
-				break;
-
-			/* XXX do a better algorithm to distribute the memory copies */
-		}
-	}
-
-	return src_node;
-}
-
-__attribute__((warn_unused_result))
-int driver_copy_data(data_state *state, uint32_t src_node_mask,
-			 uint32_t dst_node, unsigned donotread)
-{
-	int ret;
-	uint32_t src_node = choose_src_node(src_node_mask);
-
-	/* possibly returns -1 if there was no memory left */
-	ret = driver_copy_data_1_to_1(state, src_node, dst_node, donotread);
-
-	return ret;
-}

+ 0 - 6
src/datawizard/copy-driver.h

@@ -45,12 +45,6 @@ struct copy_data_methods_s {
 	int (*spu_to_spu)(struct starpu_data_state_t *state, uint32_t src, uint32_t dst);
 };
 
-__attribute__((warn_unused_result))
-int driver_copy_data(struct starpu_data_state_t *state, 
-			uint32_t src_node_mask,
-			uint32_t dst_node,
-			unsigned donotread);
-
 void wake_all_blocked_workers(void);
 void wake_all_blocked_workers_on_node(unsigned nodeid);