浏览代码

get CUDA properties once for good, and expose to application

Samuel Thibault 14 年之前
父节点
当前提交
ea8cb715a7
共有 3 个文件被更改,包括 29 次插入23 次删除
  1. 6 1
      doc/chapters/basic-api.texi
  2. 2 0
      include/starpu_cuda.h
  3. 21 22
      src/drivers/cuda/driver_cuda.c

+ 6 - 1
doc/chapters/basic-api.texi

@@ -356,7 +356,7 @@ scheduled (which may however have to wait for a task completion).
 @end deftypefun
 @end deftypefun
 
 
 @deftypefun starpu_data_handle_t starpu_data_lookup ({const void *}@var{ptr})
 @deftypefun starpu_data_handle_t starpu_data_lookup ({const void *}@var{ptr})
-todo
+Return the handle associated to ptr @var{ptr}.
 @end deftypefun
 @end deftypefun
 
 
 @deftypefun int starpu_data_request_allocation (starpu_data_handle_t @var{handle}, uint32_t @var{node})
 @deftypefun int starpu_data_request_allocation (starpu_data_handle_t @var{handle}, uint32_t @var{node})
@@ -1579,6 +1579,11 @@ Synchronizing with @code{cudaThreadSynchronize()} is allowed, but will reduce
 the likelihood of having all transfers overlapped.
 the likelihood of having all transfers overlapped.
 @end deftypefun
 @end deftypefun
 
 
+@deftypefun {const struct cudaDeviceProp *} starpu_cuda_get_device_properties (unsigned @var{workerid})
+This function returns a pointer to device properties for worker @var{workerid}
+(assumed to be a CUDA worker).
+@end deftypefun
+
 @deftypefun void starpu_helper_cublas_init (void)
 @deftypefun void starpu_helper_cublas_init (void)
 This function initializes CUBLAS on every CUDA device.
 This function initializes CUBLAS on every CUDA device.
 The CUBLAS library must be initialized prior to any CUBLAS call. Calling
 The CUBLAS library must be initialized prior to any CUBLAS call. Calling

+ 2 - 0
include/starpu_cuda.h

@@ -40,6 +40,8 @@ void starpu_cuda_report_error(const char *func, const char *file, int line, cuda
 size_t starpu_cuda_get_global_mem_size(int devid);
 size_t starpu_cuda_get_global_mem_size(int devid);
 cudaStream_t starpu_cuda_get_local_stream(void);
 cudaStream_t starpu_cuda_get_local_stream(void);
 
 
+const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid);
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif

+ 21 - 22
src/drivers/cuda/driver_cuda.c

@@ -34,6 +34,7 @@ static int ncudagpus;
 
 
 static cudaStream_t streams[STARPU_NMAXWORKERS];
 static cudaStream_t streams[STARPU_NMAXWORKERS];
 static cudaStream_t transfer_streams[STARPU_NMAXWORKERS];
 static cudaStream_t transfer_streams[STARPU_NMAXWORKERS];
+static struct cudaDeviceProp props[STARPU_MAXCUDADEVS];
 
 
 /* In case we want to cap the amount of memory available on the GPUs by the
 /* In case we want to cap the amount of memory available on the GPUs by the
  * mean of the STARPU_LIMIT_GPU_MEM, we allocate a big buffer when the driver
  * mean of the STARPU_LIMIT_GPU_MEM, we allocate a big buffer when the driver
@@ -52,16 +53,13 @@ static void limit_gpu_mem_if_needed(int devid)
 	}
 	}
 
 
 	/* Find the size of the memory on the device */
 	/* Find the size of the memory on the device */
-	struct cudaDeviceProp prop;
-	cures = cudaGetDeviceProperties(&prop, devid);
-	if (STARPU_UNLIKELY(cures))
-		STARPU_CUDA_REPORT_ERROR(cures);
-
-	size_t totalGlobalMem = prop.totalGlobalMem;
+	size_t totalGlobalMem = props[devid].totalGlobalMem;
 
 
 	/* How much memory to waste ? */
 	/* How much memory to waste ? */
 	size_t to_waste = totalGlobalMem - (size_t)limit*1024*1024;
 	size_t to_waste = totalGlobalMem - (size_t)limit*1024*1024;
 
 
+	props[devid].totalGlobalMem -= to_waste;
+
 	_STARPU_DEBUG("CUDA device %d: Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n",
 	_STARPU_DEBUG("CUDA device %d: Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n",
 			devid, (size_t)to_waste/(1024*1024), (size_t)limit, (size_t)totalGlobalMem/(1024*1024),
 			devid, (size_t)to_waste/(1024*1024), (size_t)limit, (size_t)totalGlobalMem/(1024*1024),
 			(size_t)(totalGlobalMem - to_waste)/(1024*1024));
 			(size_t)(totalGlobalMem - to_waste)/(1024*1024));
@@ -88,15 +86,7 @@ static void unlimit_gpu_mem_if_needed(int devid)
 
 
 size_t starpu_cuda_get_global_mem_size(int devid)
 size_t starpu_cuda_get_global_mem_size(int devid)
 {
 {
-	cudaError_t cures;
-	struct cudaDeviceProp prop;
-
-	/* Find the size of the memory on the device */
-	cures = cudaGetDeviceProperties(&prop, devid);
-	if (STARPU_UNLIKELY(cures))
-		STARPU_CUDA_REPORT_ERROR(cures);
-
-	return (size_t)prop.totalGlobalMem;
+	return (size_t)props[devid].totalGlobalMem;
 }
 }
 
 
 cudaStream_t starpu_cuda_get_local_transfer_stream(void)
 cudaStream_t starpu_cuda_get_local_transfer_stream(void)
@@ -113,6 +103,13 @@ cudaStream_t starpu_cuda_get_local_stream(void)
 	return streams[worker];
 	return streams[worker];
 }
 }
 
 
+const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid)
+{
+	struct _starpu_machine_config *config = _starpu_get_machine_config();
+	unsigned devid = config->workers[workerid].devid;
+	return &props[devid];
+}
+
 static void init_context(int devid)
 static void init_context(int devid)
 {
 {
 	cudaError_t cures;
 	cudaError_t cures;
@@ -125,6 +122,10 @@ static void init_context(int devid)
 	/* force CUDA to initialize the context for real */
 	/* force CUDA to initialize the context for real */
 	cudaFree(0);
 	cudaFree(0);
 
 
+	cures = cudaGetDeviceProperties(&props[devid], devid);
+	if (STARPU_UNLIKELY(cures))
+		STARPU_CUDA_REPORT_ERROR(cures);
+
 	limit_gpu_mem_if_needed(devid);
 	limit_gpu_mem_if_needed(devid);
 
 
 	cures = cudaStreamCreate(&streams[workerid]);
 	cures = cudaStreamCreate(&streams[workerid]);
@@ -267,18 +268,16 @@ void *_starpu_cuda_worker(void *arg)
 
 
 	/* get the device's name */
 	/* get the device's name */
 	char devname[128];
 	char devname[128];
-	struct cudaDeviceProp prop;
-	cudaGetDeviceProperties(&prop, devid);
-	strncpy(devname, prop.name, 128);
-	float size = (float) prop.totalGlobalMem / (1<<30);
+	strncpy(devname, props[devid].name, 128);
+	float size = (float) props[devid].totalGlobalMem / (1<<30);
 
 
 #ifdef STARPU_HAVE_BUSID
 #ifdef STARPU_HAVE_BUSID
 #ifdef STARPU_HAVE_DOMAINID
 #ifdef STARPU_HAVE_DOMAINID
-	if (prop.pciDomainID)
-		snprintf(args->name, sizeof(args->name), "CUDA %d (%s %.1f GiB %04x:%02x:%02x.0)", args->devid, devname, size, prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
+	if (props[devid].pciDomainID)
+		snprintf(args->name, sizeof(args->name), "CUDA %d (%s %.1f GiB %04x:%02x:%02x.0)", args->devid, devname, size, props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID);
 	else
 	else
 #endif
 #endif
-		snprintf(args->name, sizeof(args->name), "CUDA %d (%s %.1f GiB %02x:%02x.0)", args->devid, devname, size, prop.pciBusID, prop.pciDeviceID);
+		snprintf(args->name, sizeof(args->name), "CUDA %d (%s %.1f GiB %02x:%02x.0)", args->devid, devname, size, props[devid].pciBusID, props[devid].pciDeviceID);
 #else
 #else
 	snprintf(args->name, sizeof(args->name), "CUDA %d (%s %.1f GiB)", args->devid, devname, size);
 	snprintf(args->name, sizeof(args->name), "CUDA %d (%s %.1f GiB)", args->devid, devname, size);
 #endif
 #endif