Forráskód Böngészése

Store GPU size in platform file

Samuel Thibault 11 éve
szülő
commit
b1dfd30389

+ 7 - 2
src/core/perfmodel/perfmodel_bus.c

@@ -73,6 +73,7 @@ static unsigned nmic = 0;
 /* Benchmarking the performance of the bus */
 
 #ifdef STARPU_USE_CUDA
+static uint64_t cuda_size[STARPU_MAXCUDADEVS];
 static int cuda_affinity_matrix[STARPU_MAXCUDADEVS][STARPU_MAXCPUS];
 static double cudadev_timing_htod[STARPU_MAXNODES] = {0.0};
 static double cudadev_latency_htod[STARPU_MAXNODES] = {0.0};
@@ -85,6 +86,7 @@ static double cudadev_latency_dtod[STARPU_MAXNODES][STARPU_MAXNODES] = {{0.0}};
 static struct dev_timing cudadev_timing_per_cpu[STARPU_MAXNODES*STARPU_MAXCPUS];
 #endif
 #ifdef STARPU_USE_OPENCL
+static uint64_t opencl_size[STARPU_MAXCUDADEVS];
 static int opencl_affinity_matrix[STARPU_MAXOPENCLDEVS][STARPU_MAXCPUS];
 static double opencldev_timing_htod[STARPU_MAXNODES] = {0.0};
 static double opencldev_latency_htod[STARPU_MAXNODES] = {0.0};
@@ -131,6 +133,7 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_cuda(int dev, int
 	cudaError_t cures;
 	cures = cudaGetDeviceProperties(&prop, dev);
 	if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures);
+	cuda_size[dev] = prop.totalGlobalMem;
         if (size > prop.totalGlobalMem/4) size = prop.totalGlobalMem/4;
 
 	/* Allocate a buffer on the device */
@@ -344,6 +347,7 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
         starpu_opencl_get_device(dev, &device);
 	err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(maxMemAllocSize), &maxMemAllocSize, NULL);
         if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
+	opencl_size[dev] = maxMemAllocSize;
         if (size > (size_t)maxMemAllocSize/4) size = maxMemAllocSize/4;
 
 	if (_starpu_opencl_get_device_type(dev) == CL_DEVICE_TYPE_CPU)
@@ -1668,13 +1672,14 @@ static void write_bus_platform_file_content(void)
 		);
 
 	for (i = 0; i < ncpus; i++)
+		/* TODO: host memory for out-of-core simulation */
 		fprintf(f, "   <host id='CPU%d' power='2000000000'/>\n", i);
 
 	for (i = 0; i < ncuda; i++)
-		fprintf(f, "   <host id='CUDA%d' power='2000000000'/>\n", i);
+		fprintf(f, "   <host id='CUDA%d' power='2000000000'>\n    <prop id='memsize' value='%llu'/>\n   </host>\n", i, (unsigned long long) cuda_size[i]);
 
 	for (i = 0; i < nopencl; i++)
-		fprintf(f, "   <host id='OpenCL%d' power='2000000000'/>\n", i);
+		fprintf(f, "   <host id='OpenCL%d' power='2000000000'>\n    <prop id='memsize' value='%llu'/>\n   </host>\n", i, (unsigned long long) opencl_size[i]);
 
 	fprintf(f, "\n   <host id='RAM' power='1'/>\n");
 

+ 19 - 0
src/core/simgrid.c

@@ -57,6 +57,25 @@ int _starpu_simgrid_get_nbhosts(const char *prefix)
 	return ret;
 }
 
+unsigned long long _starpu_simgrid_get_memsize(const char *prefix, unsigned devid)
+{
+	char name[16];
+	msg_host_t host;
+	const char *memsize;
+
+	snprintf(name, sizeof(name), "%s%u", prefix, devid);
+
+	host = MSG_get_host_by_name(name);
+	if (!host)
+		return 0;
+
+	memsize = MSG_host_get_property_value(host, "memsize");
+	if (!memsize)
+		return 0;
+
+	return atoll(memsize);
+}
+
 #ifdef STARPU_DEVEL
 #warning TODO: use another way to start main, when simgrid provides it, and then include the application-provided configuration for platform numbers
 #endif

+ 2 - 1
src/core/simgrid.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2012-2013  Université de Bordeaux 1
+ * Copyright (C) 2012-2014  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -34,6 +34,7 @@ void _starpu_simgrid_execute_job(struct _starpu_job *job, struct starpu_perfmode
 int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node, struct _starpu_data_request *req);
 /* Return the number of hosts prefixed by PREFIX */
 int _starpu_simgrid_get_nbhosts(const char *prefix);
+unsigned long long _starpu_simgrid_get_memsize(const char *prefix, unsigned devid);
 void _starpu_simgrid_get_platform_path(char *path, size_t maxlen);
 #endif
 

+ 3 - 1
src/drivers/cuda/driver_cuda.c

@@ -79,7 +79,9 @@ static void _starpu_cuda_limit_gpu_mem_if_needed(unsigned devid)
 	size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0;
 	char name[30];
 
-#ifdef STARPU_USE_CUDA
+#ifdef STARPU_SIMGRID
+	totalGlobalMem = _starpu_simgrid_get_memsize("CUDA", devid);
+#elif defined(STARPU_USE_CUDA)
 	/* Find the size of the memory on the device */
 	totalGlobalMem = props[devid].totalGlobalMem;
 #endif

+ 3 - 1
src/drivers/opencl/driver_opencl.c

@@ -67,7 +67,9 @@ static void _starpu_opencl_limit_gpu_mem_if_needed(unsigned devid)
 	size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0;
 	char name[30];
 
-#ifdef STARPU_USE_OPENCL
+#ifdef STARPU_SIMGRID
+	totalGlobalMem = _starpu_simgrid_get_memsize("OpenCL", devid);
+#elif defined(STARPU_USE_OPENCL)
 	/* Request the size of the current device's memory */
 	cl_int err;
 	err = clGetDeviceInfo(devices[devid], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(totalGlobalMem), &totalGlobalMem, NULL);