浏览代码

The STARPU_LIMIT_GPU_MEM environment variable indicates the maximum number of
MB that should be available on the different CUDA devices. This should
typically be used for experimental purposes (eg. in order to stress the memory
subsystem).

Cédric Augonnet 15 年之前
父节点
当前提交
1640dc460d
共有 1 个文件被更改,包括 51 次插入0 次删除
  1. 51 0
      src/drivers/cuda/driver_cuda.c

+ 51 - 0
src/drivers/cuda/driver_cuda.c

@@ -30,6 +30,53 @@ static int ncudagpus;
 
 static cudaStream_t streams[STARPU_NMAXWORKERS];
 
+/* In case we want to cap the amount of memory available on the GPUs by the
+ * mean of the STARPU_LIMIT_GPU_MEM, we allocate a big buffer when the driver
+ * is launched. */
+static char *wasted_memory[STARPU_NMAXWORKERS];
+
+static void limit_gpu_mem_if_needed(int devid)
+{
+	cudaError_t cures;
+	int workerid = starpu_worker_get_id();
+	int limit = starpu_get_env_number("STARPU_LIMIT_GPU_MEM");
+
+	if (limit == -1)
+	{
+		wasted_memory[workerid] = NULL;
+		return;
+	}
+
+	/* Find the size of the memory on the device */
+	struct cudaDeviceProp prop;
+	cures = cudaGetDeviceProperties(&prop, devid);
+	if (STARPU_UNLIKELY(cures))
+		STARPU_CUDA_REPORT_ERROR(cures);
+
+	size_t totalGlobalMem = prop.totalGlobalMem;
+	size_t to_waste = totalGlobalMem - (size_t)limit*1024*1024;
+
+	_STARPU_DEBUG(stderr, "Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n", (size_t)to_waste/(1024*1024), (size_t)limit, (size_t)totalGlobalMem/(1024*1024), (size_t)(totalGlobalMem - to_waste)/(1024*1024));
+
+	cures = cudaMalloc((void **)&wasted_memory[workerid], to_waste);
+	if (STARPU_UNLIKELY(cures))
+		STARPU_CUDA_REPORT_ERROR(cures);
+}
+
+static void unlimit_gpu_mem_if_needed(int workerid)
+{
+	cudaError_t cures;
+
+	if (wasted_memory[workerid])
+	{
+		cures = cudaFree(wasted_memory[workerid]);
+		if (STARPU_UNLIKELY(cures))
+			STARPU_CUDA_REPORT_ERROR(cures);
+
+		wasted_memory[workerid] = NULL;
+	}
+}
+
 cudaStream_t *starpu_cuda_get_local_stream(void)
 {
 	int worker = starpu_worker_get_id();
@@ -48,6 +95,8 @@ static void init_context(int devid)
 	/* force CUDA to initialize the context for real */
 	cudaFree(0);
 
+	limit_gpu_mem_if_needed(devid);
+
 	cures = cudaStreamCreate(starpu_cuda_get_local_stream());
 	if (STARPU_UNLIKELY(cures))
 		STARPU_CUDA_REPORT_ERROR(cures);
@@ -59,6 +108,8 @@ static void deinit_context(int workerid)
 
 	cudaStreamDestroy(streams[workerid]);
 
+	unlimit_gpu_mem_if_needed(workerid);
+
 	/* cleanup the runtime API internal stuffs (which CUBLAS is using) */
 	cures = cudaThreadExit();
 	if (cures)