Browse Source

Get NUMA nodes attached to the CUDA/OpenCL devices if no CPU workers found

Corentin Salingue 8 years ago
parent
commit
a2ea8df04a
1 changed files with 121 additions and 3 deletions
  1. 121 3
      src/core/topology.c

+ 121 - 3
src/core/topology.c

@@ -30,6 +30,7 @@
 #include <drivers/mpi/driver_mpi_common.h>
 #include <drivers/mp_common/source_common.h>
 #include <drivers/opencl/driver_opencl.h>
+#include <drivers/opencl/driver_opencl_utils.h>
 #include <profiling/profiling.h>
 #include <datawizard/datastats.h>
 #include <datawizard/memory_nodes.h>
@@ -54,6 +55,10 @@
 #include <hwloc/cuda.h>
 #endif
 
+#if defined(STARPU_USE_OPENCL)
+#include <hwloc/opencl.h>
+#endif
+
 static unsigned topology_is_initialized = 0;
 static int nobind;
 
@@ -1847,15 +1852,128 @@ static void _starpu_init_numa_node(struct _starpu_machine_config *config)
 			}
 		}
 	}
-	
+
 	/* If we found NUMA nodes from CPU workers, it's good */
 	if (nb_numa_nodes != 0)
 		return;
 
-	//TODO IF NO NUMA MEMNODE check GPUS
+#if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && defined(STARPU_HAVE_HWLOC)
+	_STARPU_DISP("No NUMA nodes found when checking CPU workers. Take NUMA nodes attached to CUDA and OpenCL devices... \n");
+#endif
+
+
+#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_HWLOC)
+	for (i = 0; i < config->topology.ncudagpus; i++)
+	{
+		hwloc_obj_t obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, i);
+		while (obj->type != HWLOC_OBJ_NODE)
+		{
+			obj = obj->parent;
+
+			/* If we don't find a "node" obj before the root, this means
+			 * hwloc does not know whether there are numa nodes or not, so
+			 * we should not use a per-node sampling in that case. */
+			if (!obj)
+				continue;
+		}
+		int numa_starpu_id = _starpu_numa_logid_to_id(obj->logical_index);
+
+		if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
+		{
+			_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
+			/* Don't create a new NUMA node */
+			numa_starpu_id = STARPU_MAIN_RAM;
+		}
+
+		if (numa_starpu_id == -1)
+		{
+			int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
+			STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
+			numa_memory_nodes[memnode] = obj->logical_index;
+			nb_numa_nodes++;
+#ifdef STARPU_SIMGRID
+			snprintf(name, sizeof(name), "RAM%d", memnode);
+			host = _starpu_simgrid_get_host_by_name(name);
+			STARPU_ASSERT(host);
+			_starpu_simgrid_memory_node_set_host(memnode, host);
+#endif
+		}
+	}	
+#endif
+#if defined(STARPU_USE_OPENCL) && defined(STARPU_HAVE_HWLOC)
+	if (config->topology.nopenclgpus > 0)
+	{
+		cl_int err;
+		cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
+		cl_uint nb_platforms;
+		unsigned platform;
+		unsigned nb_opencl_devices = 0, num = 0;
+
+		err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
+		if (STARPU_UNLIKELY(err != CL_SUCCESS)) 
+			nb_platforms=0;
+
+		cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
+		if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
+			device_type |= CL_DEVICE_TYPE_CPU;
+		if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
+			device_type = CL_DEVICE_TYPE_CPU;
+
+		for (platform = 0; platform < nb_platforms ; platform++)
+		{
+			err = clGetDeviceIDs(platform_id[platform], device_type, 0, NULL, &num);
+			if (err != CL_SUCCESS)
+				num = 0;
+			nb_opencl_devices += num;
+
+			for (i = 0; i < num; i++)
+			{
+				hwloc_obj_t obj = hwloc_opencl_get_device_osdev_by_index(config->topology.hwtopology, platform, i);
+				while (obj->type != HWLOC_OBJ_NODE)
+				{
+					obj = obj->parent;
+
+					/* If we don't find a "node" obj before the root, this means
+					 * hwloc does not know whether there are numa nodes or not, so
+					 * we should not use a per-node sampling in that case. */
+					if (!obj)
+						continue;
+				}
+				int numa_starpu_id = _starpu_numa_logid_to_id(obj->logical_index);
+
+				if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES)
+				{
+					_STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES);
+					/* Don't create a new NUMA node */
+					numa_starpu_id = STARPU_MAIN_RAM;
+				}
+
+				if (numa_starpu_id == -1)
+				{
+					int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index);
+					STARPU_ASSERT_MSG(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES);
+					numa_memory_nodes[memnode] = obj->logical_index;
+					nb_numa_nodes++;
+#ifdef STARPU_SIMGRID
+					snprintf(name, sizeof(name), "RAM%d", memnode);
+					host = _starpu_simgrid_get_host_by_name(name);
+					STARPU_ASSERT(host);
+					_starpu_simgrid_memory_node_set_host(memnode, host);
+#endif
+				}
+			}	
+		}
+	}
+#endif
+	
+#if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && defined(STARPU_HAVE_HWLOC)
+	//Found NUMA nodes from CUDA nodes
+	if (nb_numa_nodes != 0)
+		return;
+#endif
 
 	/* In case, we do not find any NUMA, we take all of them */
-	_STARPU_MSG("No NUMA nodes found when checking workers. Take all NUMA nodes available... \n");
+	_STARPU_DISP("No NUMA nodes found when checking workers. Take all NUMA nodes available... \n");
 
 	unsigned nnuma = _starpu_topology_get_nnumanodes(config);
 	if (nnuma > STARPU_MAXNUMANODES)