Преглед изворни кода

Add some support for NUMA proximity of CPU and GPU devices

Samuel Thibault пре 7 година
родитељ
комит
4072404abc
1 измењених фајлова са 71 додато и 5 уклоњено
  1. 71 5
      src/core/topology.c

+ 71 - 5
src/core/topology.c

@@ -156,6 +156,7 @@ static int numa_get_physical_id(hwloc_obj_t obj)
 }
 #endif
 
+/* This returns the exact NUMA node next to a worker */
 static int _starpu_get_logical_numa_node_worker(unsigned workerid)
 {
 #if defined(STARPU_HAVE_HWLOC)
@@ -185,6 +186,7 @@ static int _starpu_get_logical_numa_node_worker(unsigned workerid)
 	}
 }
 
+/* This returns the exact NUMA node next to a worker */
 static int _starpu_get_physical_numa_node_worker(unsigned workerid)
 {
 #if defined(STARPU_HAVE_HWLOC)
@@ -214,6 +216,43 @@ static int _starpu_get_physical_numa_node_worker(unsigned workerid)
 	}
 }
 
+/* This returns the CPU NUMA memory close to a worker */
+static int _starpu_get_logical_close_numa_node_worker(unsigned workerid)
+{
+#if defined(STARPU_HAVE_HWLOC)
+	if (starpu_get_env_number_default("STARPU_USE_NUMA", 0))
+	{
+		struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
+		struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config() ;
+		struct _starpu_machine_topology *topology = &config->topology ;
+
+		hwloc_obj_t obj;
+		switch(worker->arch)
+		{
+			case STARPU_CPU_WORKER:
+				obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid) ;
+				break;
+#ifndef STARPU_SIMGRID
+#if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX
+			case STARPU_CUDA_WORKER:
+				obj = hwloc_cuda_get_device_osdev_by_index(topology->hwtopology, worker->devid);
+				break;
+#endif
+#endif
+			default:
+				return 0;
+		}
+
+		return numa_get_logical_id(obj);
+	}
+	else
+#endif
+	{
+		(void) workerid; /* unused */
+		return STARPU_NUMA_MAIN_RAM;
+	}
+}
+
 //TODO change this in an array
 int starpu_memory_nodes_numa_hwloclogid_to_id(int logid)
 {
@@ -246,6 +285,8 @@ int starpu_memory_nodes_numa_id_to_devid(int osid)
 	return -1;
 }
 
+// TODO: cache the values instead of looking in hwloc each time
+
 /* Avoid using this one, prefer _starpu_task_data_get_node_on_worker */
 int _starpu_task_data_get_node_on_node(struct starpu_task *task, unsigned index, unsigned local_node)
 {
@@ -254,11 +295,19 @@ int _starpu_task_data_get_node_on_node(struct starpu_task *task, unsigned index,
 		node = STARPU_CODELET_GET_NODE(task->cl, index);
 	switch (node) {
 	case STARPU_SPECIFIC_NODE_LOCAL:
+		// TODO: rather find MCDRAM
 		node = local_node;
 		break;
 	case STARPU_SPECIFIC_NODE_CPU:
-		// TODO: rather take close NUMA node
-		node = STARPU_MAIN_RAM;
+		switch (starpu_node_get_kind(local_node)) {
+		case STARPU_CPU_RAM:
+			node = local_node;
+			break;
+		default:
+			// TODO: rather take close NUMA node
+			node = STARPU_MAIN_RAM;
+			break;
+		}
 		break;
 	case STARPU_SPECIFIC_NODE_SLOW:
 		// TODO: rather leave in DDR
@@ -270,9 +319,26 @@ int _starpu_task_data_get_node_on_node(struct starpu_task *task, unsigned index,
 
 int _starpu_task_data_get_node_on_worker(struct starpu_task *task, unsigned index, unsigned worker)
 {
-	/* TODO: choose memory node according to proximity to worker rather than memory node */
-	unsigned target_node = starpu_worker_get_memory_node(worker);
-	return _starpu_task_data_get_node_on_node(task, index, target_node);
+	unsigned local_node = starpu_worker_get_memory_node(worker);
+	int node = STARPU_SPECIFIC_NODE_LOCAL;
+	if (task->cl->specific_nodes)
+		node = STARPU_CODELET_GET_NODE(task->cl, index);
+	switch (node) {
+	case STARPU_SPECIFIC_NODE_LOCAL:
+		// TODO: rather find MCDRAM
+		node = local_node;
+		break;
+	case STARPU_SPECIFIC_NODE_CPU:
+		node = starpu_memory_nodes_numa_hwloclogid_to_id(_starpu_get_logical_close_numa_node_worker(worker));
+		if (node == -1)
+			node = STARPU_MAIN_RAM;
+		break;
+	case STARPU_SPECIFIC_NODE_SLOW:
+		// TODO: rather leave in DDR
+		node = local_node;
+		break;
+	}
+	return node;
 }
 
 struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d)