浏览代码

get CUDA properties once for good, and expose to application

Samuel Thibault 14 年之前
父节点
当前提交
ea8cb715a7
共有 3 个文件被更改,包括 29 次插入23 次删除
  1. 6 1
      doc/chapters/basic-api.texi
  2. 2 0
      include/starpu_cuda.h
  3. 21 22
      src/drivers/cuda/driver_cuda.c

+ 6 - 1
doc/chapters/basic-api.texi

@@ -356,7 +356,7 @@ scheduled (which may however have to wait for a task completion).
 @end deftypefun
 
 @deftypefun starpu_data_handle_t starpu_data_lookup ({const void *}@var{ptr})
-todo
+Return the handle associated to ptr @var{ptr}.
 @end deftypefun
 
 @deftypefun int starpu_data_request_allocation (starpu_data_handle_t @var{handle}, uint32_t @var{node})
@@ -1579,6 +1579,11 @@ Synchronizing with @code{cudaThreadSynchronize()} is allowed, but will reduce
 the likelihood of having all transfers overlapped.
 @end deftypefun
 
+@deftypefun {const struct cudaDeviceProp *} starpu_cuda_get_device_properties (unsigned @var{workerid})
+This function returns a pointer to device properties for worker @var{workerid}
+(assumed to be a CUDA worker).
+@end deftypefun
+
 @deftypefun void starpu_helper_cublas_init (void)
 This function initializes CUBLAS on every CUDA device.
 The CUBLAS library must be initialized prior to any CUBLAS call. Calling

+ 2 - 0
include/starpu_cuda.h

@@ -40,6 +40,8 @@ void starpu_cuda_report_error(const char *func, const char *file, int line, cuda
 size_t starpu_cuda_get_global_mem_size(int devid);
 cudaStream_t starpu_cuda_get_local_stream(void);
 
+const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid);
+
 #ifdef __cplusplus
 }
 #endif

+ 21 - 22
src/drivers/cuda/driver_cuda.c

@@ -34,6 +34,7 @@ static int ncudagpus;
 
 static cudaStream_t streams[STARPU_NMAXWORKERS];
 static cudaStream_t transfer_streams[STARPU_NMAXWORKERS];
+static struct cudaDeviceProp props[STARPU_MAXCUDADEVS];
 
 /* In case we want to cap the amount of memory available on the GPUs by the
  * mean of the STARPU_LIMIT_GPU_MEM, we allocate a big buffer when the driver
@@ -52,16 +53,13 @@ static void limit_gpu_mem_if_needed(int devid)
 	}
 
 	/* Find the size of the memory on the device */
-	struct cudaDeviceProp prop;
-	cures = cudaGetDeviceProperties(&prop, devid);
-	if (STARPU_UNLIKELY(cures))
-		STARPU_CUDA_REPORT_ERROR(cures);
-
-	size_t totalGlobalMem = prop.totalGlobalMem;
+	size_t totalGlobalMem = props[devid].totalGlobalMem;
 
 	/* How much memory to waste ? */
 	size_t to_waste = totalGlobalMem - (size_t)limit*1024*1024;
 
+	props[devid].totalGlobalMem -= to_waste;
+
 	_STARPU_DEBUG("CUDA device %d: Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n",
 			devid, (size_t)to_waste/(1024*1024), (size_t)limit, (size_t)totalGlobalMem/(1024*1024),
 			(size_t)(totalGlobalMem - to_waste)/(1024*1024));
@@ -88,15 +86,7 @@ static void unlimit_gpu_mem_if_needed(int devid)
 
 size_t starpu_cuda_get_global_mem_size(int devid)
 {
-	cudaError_t cures;
-	struct cudaDeviceProp prop;
-
-	/* Find the size of the memory on the device */
-	cures = cudaGetDeviceProperties(&prop, devid);
-	if (STARPU_UNLIKELY(cures))
-		STARPU_CUDA_REPORT_ERROR(cures);
-
-	return (size_t)prop.totalGlobalMem;
+	return (size_t)props[devid].totalGlobalMem;
 }
 
 cudaStream_t starpu_cuda_get_local_transfer_stream(void)
@@ -113,6 +103,13 @@ cudaStream_t starpu_cuda_get_local_stream(void)
 	return streams[worker];
 }
 
+const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid)
+{
+	struct _starpu_machine_config *config = _starpu_get_machine_config();
+	unsigned devid = config->workers[workerid].devid;
+	return &props[devid];
+}
+
 static void init_context(int devid)
 {
 	cudaError_t cures;
@@ -125,6 +122,10 @@ static void init_context(int devid)
 	/* force CUDA to initialize the context for real */
 	cudaFree(0);
 
+	cures = cudaGetDeviceProperties(&props[devid], devid);
+	if (STARPU_UNLIKELY(cures))
+		STARPU_CUDA_REPORT_ERROR(cures);
+
 	limit_gpu_mem_if_needed(devid);
 
 	cures = cudaStreamCreate(&streams[workerid]);
@@ -267,18 +268,16 @@ void *_starpu_cuda_worker(void *arg)
 
 	/* get the device's name */
 	char devname[128];
-	struct cudaDeviceProp prop;
-	cudaGetDeviceProperties(&prop, devid);
-	strncpy(devname, prop.name, 128);
-	float size = (float) prop.totalGlobalMem / (1<<30);
+	strncpy(devname, props[devid].name, 128);
+	float size = (float) props[devid].totalGlobalMem / (1<<30);
 
 #ifdef STARPU_HAVE_BUSID
 #ifdef STARPU_HAVE_DOMAINID
-	if (prop.pciDomainID)
-		snprintf(args->name, sizeof(args->name), "CUDA %d (%s %.1f GiB %04x:%02x:%02x.0)", args->devid, devname, size, prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
+	if (props[devid].pciDomainID)
+		snprintf(args->name, sizeof(args->name), "CUDA %d (%s %.1f GiB %04x:%02x:%02x.0)", args->devid, devname, size, props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID);
 	else
 #endif
-		snprintf(args->name, sizeof(args->name), "CUDA %d (%s %.1f GiB %02x:%02x.0)", args->devid, devname, size, prop.pciBusID, prop.pciDeviceID);
+		snprintf(args->name, sizeof(args->name), "CUDA %d (%s %.1f GiB %02x:%02x.0)", args->devid, devname, size, props[devid].pciBusID, props[devid].pciDeviceID);
 #else
 	snprintf(args->name, sizeof(args->name), "CUDA %d (%s %.1f GiB)", args->devid, devname, size);
 #endif