瀏覽代碼

chose best numa node when doing transfers

Corentin Salingue 8 年之前
父節點
當前提交
db813919c0
共有 1 個文件被更改,包括 27 次插入3 次删除
  1. 27 3
      src/datawizard/coherency.c

+ 27 - 3
src/datawizard/coherency.c

@@ -320,6 +320,29 @@ static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned
 	return 0;
 }
 
+/* Now, we use slowness/bandwidth to compare numa nodes, is it better to use latency ? */
+static unsigned chose_best_numa_between_src_and_dest(int src, int dst)
+{
+	double timing_best;
+	int best_numa = -1;
+	unsigned numa;
+	const unsigned nb_numa_nodes = _starpu_get_nb_numa_nodes();
+	for(numa = 0; numa < nb_numa_nodes; numa++)
+	{
+		double actual = 1.0/starpu_transfer_bandwidth(src, numa) + 1.0/starpu_transfer_bandwidth(numa, dst);
+
+		/* Compare slowness : take the lowest */
+		if (best_numa < 0 || actual < timing_best)
+		{
+			best_numa = numa;
+			timing_best = actual;
+		}
+	}
+	STARPU_ASSERT(best_numa >= 0);
+	
+	return best_numa;
+}
+
 /* Determines the path of a request : each hop is defined by (src,dst) and the
  * node that handles the hop. The returned value indicates the number of hops,
  * and the max_len is the maximum number of hops (ie. the size of the
@@ -351,7 +374,6 @@ static int determine_request_path(starpu_data_handle_t handle,
 	unsigned handling_node;
 	int link_is_valid = link_supports_direct_transfers(handle, src_node, dst_node, &handling_node);
 
-	/* TODO: NUMA nodes */
 	if (!link_is_valid)
 	{
 		int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node) = handle->ops->copy_methods->can_copy;
@@ -363,9 +385,11 @@ static int determine_request_path(starpu_data_handle_t handle,
 		STARPU_ASSERT(max_len >= 2);
 		STARPU_ASSERT(src_node >= 0);
 
+		unsigned numa = chose_best_numa_between_src_and_dest(src_node, dst_node);
+
 		/* GPU -> RAM */
 		src_nodes[0] = src_node;
-		dst_nodes[0] = STARPU_MAIN_RAM;
+		dst_nodes[0] = numa;
 
 		if (starpu_node_get_kind(src_node) == STARPU_DISK_RAM)
 			/* Disks don't have their own driver thread */
@@ -381,7 +405,7 @@ static int determine_request_path(starpu_data_handle_t handle,
 		}
 
 		/* RAM -> GPU */
-		src_nodes[1] = STARPU_MAIN_RAM;
+		src_nodes[1] = numa;
 		dst_nodes[1] = dst_node;
 
 		if (starpu_node_get_kind(dst_node) == STARPU_DISK_RAM)