Browse Source

The STARPU_LIMIT_GPU_MEM environment variable indicates the maximum number of
MB that should be available on the different CUDA devices. This should
typically be used for experimental purposes (eg. in order to stress the memory
subsystem).

Cédric Augonnet 15 years ago
parent
commit
1640dc460d
1 changed files with 51 additions and 0 deletions
  1. 51 0
      src/drivers/cuda/driver_cuda.c

+ 51 - 0
src/drivers/cuda/driver_cuda.c

@@ -30,6 +30,53 @@ static int ncudagpus;
 
 static cudaStream_t streams[STARPU_NMAXWORKERS];
 
+/* In case we want to cap the amount of memory available on the GPUs by the
+ * mean of the STARPU_LIMIT_GPU_MEM, we allocate a big buffer when the driver
+ * is launched. */
+static char *wasted_memory[STARPU_NMAXWORKERS];
+
+static void limit_gpu_mem_if_needed(int devid)
+{
+	cudaError_t cures;
+	int workerid = starpu_worker_get_id();
+	int limit = starpu_get_env_number("STARPU_LIMIT_GPU_MEM");
+
+	if (limit == -1)
+	{
+		wasted_memory[workerid] = NULL;
+		return;
+	}
+
+	/* Find the size of the memory on the device */
+	struct cudaDeviceProp prop;
+	cures = cudaGetDeviceProperties(&prop, devid);
+	if (STARPU_UNLIKELY(cures))
+		STARPU_CUDA_REPORT_ERROR(cures);
+
+	size_t totalGlobalMem = prop.totalGlobalMem;
+	size_t to_waste = totalGlobalMem - (size_t)limit*1024*1024;
+
+	_STARPU_DEBUG(stderr, "Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n", (size_t)to_waste/(1024*1024), (size_t)limit, (size_t)totalGlobalMem/(1024*1024), (size_t)(totalGlobalMem - to_waste)/(1024*1024));
+
+	cures = cudaMalloc((void **)&wasted_memory[workerid], to_waste);
+	if (STARPU_UNLIKELY(cures))
+		STARPU_CUDA_REPORT_ERROR(cures);
+}
+
+static void unlimit_gpu_mem_if_needed(int workerid)
+{
+	cudaError_t cures;
+
+	if (wasted_memory[workerid])
+	{
+		cures = cudaFree(wasted_memory[workerid]);
+		if (STARPU_UNLIKELY(cures))
+			STARPU_CUDA_REPORT_ERROR(cures);
+
+		wasted_memory[workerid] = NULL;
+	}
+}
+
 cudaStream_t *starpu_cuda_get_local_stream(void)
 {
 	int worker = starpu_worker_get_id();
@@ -48,6 +95,8 @@ static void init_context(int devid)
 	/* force CUDA to initialize the context for real */
 	cudaFree(0);
 
+	limit_gpu_mem_if_needed(devid);
+
 	cures = cudaStreamCreate(starpu_cuda_get_local_stream());
 	if (STARPU_UNLIKELY(cures))
 		STARPU_CUDA_REPORT_ERROR(cures);
@@ -59,6 +108,8 @@ static void deinit_context(int workerid)
 
 	cudaStreamDestroy(streams[workerid]);
 
+	unlimit_gpu_mem_if_needed(workerid);
+
 	/* cleanup the runtime API internal stuffs (which CUBLAS is using) */
 	cures = cudaThreadExit();
 	if (cures)