4 years ago · 9e78d29691
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -49,6 +49,9 @@
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																 #include <hwloc.h>
															
 
																+#ifdef STARPU_HAVE_LIBNVIDIA_ML
															
 
																+#include <hwloc/nvml.h>
															
 
																+#endif
															
 
																 #ifndef HWLOC_API_VERSION
															
 
																 #define HWLOC_OBJ_PU HWLOC_OBJ_PROC
															
 
																 #endif
															
@@ -2366,12 +2369,51 @@ static int find_platform_path_up(hwloc_obj_t obj1, hwloc_obj_t obj2, double band
 
																 	return ret;
															
 
																 }
															
 
																+static hwloc_obj_t get_hwloc_cuda_obj(hwloc_topology_t topology, unsigned devid)
															
 
																+{
															
 
																+	hwloc_obj_t res;
															
 
																+	struct cudaDeviceProp props;
															
 
																+	cudaError_t cures;
															
 
																+
															
 
																+	res = hwloc_cuda_get_device_osdev_by_index(topology, devid);
															
 
																+	if (res)
															
 
																+		return res;
															
 
																+
															
 
																+	cures = cudaGetDeviceProperties(&props, devid);
															
 
																+	if (cures == cudaSuccess)
															
 
																+	{
															
 
																+		res = hwloc_get_pcidev_by_busid(topology, props.pciDomainID, props.pciBusID, props.pciDeviceID, 0);
															
 
																+		if (res)
															
 
																+			return res;
															
 
																+
															
 
																+#ifdef STARPU_HAVE_LIBNVIDIA_ML
															
 
																+		nvmlDevice_t nvmldev = _starpu_cuda_get_nvmldev(&props);
															
 
																+
															
 
																+		if (nvmldev)
															
 
																+		{
															
 
																+			unsigned int index;
															
 
																+			if (nvmlDeviceGetIndex(nvmldev, &index) == NVML_SUCCESS)
															
 
																+			{
															
 
																+				res = hwloc_nvml_get_device_osdev_by_index(topology, index);
															
 
																+				if (res)
															
 
																+					return res;
															
 
																+			}
															
 
																+
															
 
																+			res = hwloc_nvml_get_device_osdev(topology, nvmldev);
															
 
																+			if (res)
															
 
																+				return res;
															
 
																+		}
															
 
																+#endif
															
 
																+	}
															
 
																+	return NULL;
															
 
																+}
															
 
																+
															
 
																 /* find the path between cuda i and cuda j, and update the maximum bandwidth along the path */
															
 
																 static int find_platform_cuda_path(hwloc_topology_t topology, unsigned i, unsigned j, double bandwidth)
															
 
																 {
															
 
																 	hwloc_obj_t cudai, cudaj;
															
 
																-	cudai = hwloc_cuda_get_device_osdev_by_index(topology, i);
															
 
																-	cudaj = hwloc_cuda_get_device_osdev_by_index(topology, j);
															
 
																+	cudai = get_hwloc_cuda_obj(topology, i);
															
 
																+	cudaj = get_hwloc_cuda_obj(topology, j);
															
 
																 	if (!cudai || !cudaj)
															
 
																 		return 0;
															
@@ -2832,7 +2874,7 @@ static void write_bus_platform_file_content(int version)
 
																 				if (i != j)
															
 
																 					if (!find_platform_cuda_path(topology, i, j, 1000000. / cudadev_timing_dtod[i][j]))
															
 
																 					{
															
 
																-						_STARPU_DISP("Warning: could not get CUDA location from hwloc, please make sure that hwloc has its cuda plugin enabled\n");
															
 
																+						_STARPU_DISP("Warning: could not get CUDA location from hwloc\n");
															
 
																 						clean_topology(hwloc_get_root_obj(topology));
															
 
																 						hwloc_topology_destroy(topology);
															
 
																 						goto flat_cuda;
															
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -106,6 +106,18 @@ static size_t _starpu_cuda_get_global_mem_size(unsigned devid)
 
																 }
															
 
																 #ifdef STARPU_HAVE_LIBNVIDIA_ML
															
 
																+nvmlDevice_t _starpu_cuda_get_nvmldev(struct cudaDeviceProp *props)
															
 
																+{
															
 
																+	char busid[13];
															
 
																+	nvmlDevice_t ret;
															
 
																+
															
 
																+	snprintf(busid, sizeof(busid), "%04x:%02x:%02x.0", props->pciDomainID, props->pciBusID, props->pciDeviceID);
															
 
																+	if (nvmlDeviceGetHandleByPciBusId(busid, &ret) != NVML_SUCCESS)
															
 
																+		ret = NULL;
															
 
																+
															
 
																+	return ret;
															
 
																+}
															
 
																+
															
 
																 nvmlDevice_t starpu_cuda_get_nvmldev(unsigned devid)
															
 
																 {
															
 
																 	return nvmlDev[devid];
															
@@ -746,9 +758,7 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 
																 #if defined(STARPU_HAVE_BUSID) && !defined(STARPU_SIMGRID)
															
 
																 #if defined(STARPU_HAVE_DOMAINID) && !defined(STARPU_SIMGRID)
															
 
																 #ifdef STARPU_HAVE_LIBNVIDIA_ML
															
 
																-		char busid[13];
															
 
																-		snprintf(busid, sizeof(busid), "%04x:%02x:%02x.0", props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID);
															
 
																-		nvmlDeviceGetHandleByPciBusId(busid, &nvmlDev[devid]);
															
 
																+		nvmlDev[devid] = _starpu_cuda_get_nvmldev(&props[devid]);
															
 
																 #endif
															
 
																 		if (props[devid].pciDomainID)
															
 
																 			snprintf(worker->name, sizeof(worker->name), "CUDA %u.%u (%s %.1f GiB %04x:%02x:%02x.0)", devid, subdev, devname, size, props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID);
															
--- a/src/drivers/cuda/driver_cuda.h
+++ b/src/drivers/cuda/driver_cuda.h
@@ -28,6 +28,9 @@ void _starpu_cuda_preinit(void);
 
																 #include <cuda.h>
															
 
																 #include <cuda_runtime_api.h>
															
 
																 #include <cublas.h>
															
 
																+#ifdef STARPU_HAVE_LIBNVIDIA_ML
															
 
																+#include <nvml.h>
															
 
																+#endif
															
 
																 #endif
															
 
																 #include <starpu.h>
															
@@ -45,6 +48,9 @@ extern int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_M
 
																 void _starpu_cuda_discover_devices (struct _starpu_machine_config *);
															
 
																 void _starpu_init_cuda(void);
															
 
																 void *_starpu_cuda_worker(void *);
															
 
																+#ifdef STARPU_HAVE_LIBNVIDIA_ML
															
 
																+nvmlDevice_t _starpu_cuda_get_nvmldev(struct cudaDeviceProp *props);
															
 
																+#endif
															
 
																 #else
															
 
																 #  define _starpu_cuda_discover_devices(config) ((void) config)
															
 
																 #endif