Browse Source

Add more ways to get the CUDA hwloc location

Samuel Thibault 4 years ago
parent
commit
9e78d29691

+ 45 - 3
src/core/perfmodel/perfmodel_bus.c

@@ -49,6 +49,9 @@
 
 
 #ifdef STARPU_HAVE_HWLOC
 #ifdef STARPU_HAVE_HWLOC
 #include <hwloc.h>
 #include <hwloc.h>
+#ifdef STARPU_HAVE_LIBNVIDIA_ML
+#include <hwloc/nvml.h>
+#endif
 #ifndef HWLOC_API_VERSION
 #ifndef HWLOC_API_VERSION
 #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
 #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
 #endif
 #endif
@@ -2366,12 +2369,51 @@ static int find_platform_path_up(hwloc_obj_t obj1, hwloc_obj_t obj2, double band
 	return ret;
 	return ret;
 }
 }
 
 
+static hwloc_obj_t get_hwloc_cuda_obj(hwloc_topology_t topology, unsigned devid)
+{
+	hwloc_obj_t res;
+	struct cudaDeviceProp props;
+	cudaError_t cures;
+
+	res = hwloc_cuda_get_device_osdev_by_index(topology, devid);
+	if (res)
+		return res;
+
+	cures = cudaGetDeviceProperties(&props, devid);
+	if (cures == cudaSuccess)
+	{
+		res = hwloc_get_pcidev_by_busid(topology, props.pciDomainID, props.pciBusID, props.pciDeviceID, 0);
+		if (res)
+			return res;
+
+#ifdef STARPU_HAVE_LIBNVIDIA_ML
+		nvmlDevice_t nvmldev = _starpu_cuda_get_nvmldev(&props);
+
+		if (nvmldev)
+		{
+			unsigned int index;
+			if (nvmlDeviceGetIndex(nvmldev, &index) == NVML_SUCCESS)
+			{
+				res = hwloc_nvml_get_device_osdev_by_index(topology, index);
+				if (res)
+					return res;
+			}
+
+			res = hwloc_nvml_get_device_osdev(topology, nvmldev);
+			if (res)
+				return res;
+		}
+#endif
+	}
+	return NULL;
+}
+
 /* find the path between cuda i and cuda j, and update the maximum bandwidth along the path */
 /* find the path between cuda i and cuda j, and update the maximum bandwidth along the path */
 static int find_platform_cuda_path(hwloc_topology_t topology, unsigned i, unsigned j, double bandwidth)
 static int find_platform_cuda_path(hwloc_topology_t topology, unsigned i, unsigned j, double bandwidth)
 {
 {
 	hwloc_obj_t cudai, cudaj;
 	hwloc_obj_t cudai, cudaj;
-	cudai = hwloc_cuda_get_device_osdev_by_index(topology, i);
-	cudaj = hwloc_cuda_get_device_osdev_by_index(topology, j);
+	cudai = get_hwloc_cuda_obj(topology, i);
+	cudaj = get_hwloc_cuda_obj(topology, j);
 
 
 	if (!cudai || !cudaj)
 	if (!cudai || !cudaj)
 		return 0;
 		return 0;
@@ -2832,7 +2874,7 @@ static void write_bus_platform_file_content(int version)
 				if (i != j)
 				if (i != j)
 					if (!find_platform_cuda_path(topology, i, j, 1000000. / cudadev_timing_dtod[i][j]))
 					if (!find_platform_cuda_path(topology, i, j, 1000000. / cudadev_timing_dtod[i][j]))
 					{
 					{
-						_STARPU_DISP("Warning: could not get CUDA location from hwloc, please make sure that hwloc has its cuda plugin enabled\n");
+						_STARPU_DISP("Warning: could not get CUDA location from hwloc\n");
 						clean_topology(hwloc_get_root_obj(topology));
 						clean_topology(hwloc_get_root_obj(topology));
 						hwloc_topology_destroy(topology);
 						hwloc_topology_destroy(topology);
 						goto flat_cuda;
 						goto flat_cuda;

+ 13 - 3
src/drivers/cuda/driver_cuda.c

@@ -106,6 +106,18 @@ static size_t _starpu_cuda_get_global_mem_size(unsigned devid)
 }
 }
 
 
 #ifdef STARPU_HAVE_LIBNVIDIA_ML
 #ifdef STARPU_HAVE_LIBNVIDIA_ML
+nvmlDevice_t _starpu_cuda_get_nvmldev(struct cudaDeviceProp *props)
+{
+	char busid[13];
+	nvmlDevice_t ret;
+
+	snprintf(busid, sizeof(busid), "%04x:%02x:%02x.0", props->pciDomainID, props->pciBusID, props->pciDeviceID);
+	if (nvmlDeviceGetHandleByPciBusId(busid, &ret) != NVML_SUCCESS)
+		ret = NULL;
+
+	return ret;
+}
+
 nvmlDevice_t starpu_cuda_get_nvmldev(unsigned devid)
 nvmlDevice_t starpu_cuda_get_nvmldev(unsigned devid)
 {
 {
 	return nvmlDev[devid];
 	return nvmlDev[devid];
@@ -746,9 +758,7 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 #if defined(STARPU_HAVE_BUSID) && !defined(STARPU_SIMGRID)
 #if defined(STARPU_HAVE_BUSID) && !defined(STARPU_SIMGRID)
 #if defined(STARPU_HAVE_DOMAINID) && !defined(STARPU_SIMGRID)
 #if defined(STARPU_HAVE_DOMAINID) && !defined(STARPU_SIMGRID)
 #ifdef STARPU_HAVE_LIBNVIDIA_ML
 #ifdef STARPU_HAVE_LIBNVIDIA_ML
-		char busid[13];
-		snprintf(busid, sizeof(busid), "%04x:%02x:%02x.0", props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID);
-		nvmlDeviceGetHandleByPciBusId(busid, &nvmlDev[devid]);
+		nvmlDev[devid] = _starpu_cuda_get_nvmldev(&props[devid]);
 #endif
 #endif
 		if (props[devid].pciDomainID)
 		if (props[devid].pciDomainID)
 			snprintf(worker->name, sizeof(worker->name), "CUDA %u.%u (%s %.1f GiB %04x:%02x:%02x.0)", devid, subdev, devname, size, props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID);
 			snprintf(worker->name, sizeof(worker->name), "CUDA %u.%u (%s %.1f GiB %04x:%02x:%02x.0)", devid, subdev, devname, size, props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID);

+ 6 - 0
src/drivers/cuda/driver_cuda.h

@@ -28,6 +28,9 @@ void _starpu_cuda_preinit(void);
 #include <cuda.h>
 #include <cuda.h>
 #include <cuda_runtime_api.h>
 #include <cuda_runtime_api.h>
 #include <cublas.h>
 #include <cublas.h>
+#ifdef STARPU_HAVE_LIBNVIDIA_ML
+#include <nvml.h>
+#endif
 #endif
 #endif
 
 
 #include <starpu.h>
 #include <starpu.h>
@@ -45,6 +48,9 @@ extern int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_M
 void _starpu_cuda_discover_devices (struct _starpu_machine_config *);
 void _starpu_cuda_discover_devices (struct _starpu_machine_config *);
 void _starpu_init_cuda(void);
 void _starpu_init_cuda(void);
 void *_starpu_cuda_worker(void *);
 void *_starpu_cuda_worker(void *);
+#ifdef STARPU_HAVE_LIBNVIDIA_ML
+nvmlDevice_t _starpu_cuda_get_nvmldev(struct cudaDeviceProp *props);
+#endif
 #else
 #else
 #  define _starpu_cuda_discover_devices(config) ((void) config)
 #  define _starpu_cuda_discover_devices(config) ((void) config)
 #endif
 #endif