Browse Source

core/perfmodel_bus: check maximum size available on OpenCL device

Nathalie Furmento 14 years ago
parent
commit
e1d768fab3
1 changed files with 22 additions and 15 deletions
  1. 22 15
      src/core/perfmodel/perfmodel_bus.c

+ 22 - 15
src/core/perfmodel/perfmodel_bus.c

@@ -34,7 +34,7 @@
 #ifdef STARPU_HAVE_WINDOWS
 #ifdef STARPU_HAVE_WINDOWS
 #include <windows.h>
 #include <windows.h>
 #endif
 #endif
-		
+
 #define SIZE	(32*1024*1024*sizeof(char))
 #define SIZE	(32*1024*1024*sizeof(char))
 #define NITER	128
 #define NITER	128
 
 
@@ -52,6 +52,7 @@ static unsigned was_benchmarked = 0;
 static unsigned ncpus = 0;
 static unsigned ncpus = 0;
 static int ncuda = 0;
 static int ncuda = 0;
 static int nopencl = 0;
 static int nopencl = 0;
+static size_t opencl_size = SIZE;
 
 
 /* Benchmarking the performance of the bus */
 /* Benchmarking the performance of the bus */
 
 
@@ -161,6 +162,7 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
 {
 {
         cl_context context;
         cl_context context;
         cl_command_queue queue;
         cl_command_queue queue;
+        cl_int err=0;
 
 
         struct starpu_machine_config_s *config = _starpu_get_machine_config();
         struct starpu_machine_config_s *config = _starpu_get_machine_config();
 	_starpu_bind_thread_on_cpu(config, cpu);
 	_starpu_bind_thread_on_cpu(config, cpu);
@@ -170,31 +172,35 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
         starpu_opencl_get_context(dev, &context);
         starpu_opencl_get_context(dev, &context);
         starpu_opencl_get_queue(dev, &queue);
         starpu_opencl_get_queue(dev, &queue);
 
 
+        /* Get the maximum size which can be allocated on the device */
+        cl_device_id device;
+	cl_ulong maxMemAllocSize;
+        starpu_opencl_get_device(dev, &device);
+	err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(maxMemAllocSize), &maxMemAllocSize, NULL);
+        if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+        if (opencl_size > maxMemAllocSize) opencl_size = maxMemAllocSize;
+
 	/* hack to avoid third party libs to rebind threads */
 	/* hack to avoid third party libs to rebind threads */
 	_starpu_bind_thread_on_cpu(config, cpu);
 	_starpu_bind_thread_on_cpu(config, cpu);
 
 
 	/* Allocate a buffer on the device */
 	/* Allocate a buffer on the device */
-        int err;
 	cl_mem d_buffer;
 	cl_mem d_buffer;
-	d_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, SIZE, NULL, &err);
+	d_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, opencl_size, NULL, &err);
 	if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 	if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 
 
 	/* hack to avoid third party libs to rebind threads */
 	/* hack to avoid third party libs to rebind threads */
 	_starpu_bind_thread_on_cpu(config, cpu);
 	_starpu_bind_thread_on_cpu(config, cpu);
-
         /* Allocate a buffer on the host */
         /* Allocate a buffer on the host */
 	unsigned char *h_buffer;
 	unsigned char *h_buffer;
-        h_buffer = malloc(SIZE);
+        h_buffer = malloc(opencl_size);
 	assert(h_buffer);
 	assert(h_buffer);
 
 
 	/* hack to avoid third party libs to rebind threads */
 	/* hack to avoid third party libs to rebind threads */
 	_starpu_bind_thread_on_cpu(config, cpu);
 	_starpu_bind_thread_on_cpu(config, cpu);
-
         /* Fill them */
         /* Fill them */
-	memset(h_buffer, 0, SIZE);
-        err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, SIZE, h_buffer, 0, NULL, NULL);
+	memset(h_buffer, 0, opencl_size);
+        err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, opencl_size, h_buffer, 0, NULL, NULL);
         if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
         if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
-
 	/* hack to avoid third party libs to rebind threads */
 	/* hack to avoid third party libs to rebind threads */
 	_starpu_bind_thread_on_cpu(config, cpu);
 	_starpu_bind_thread_on_cpu(config, cpu);
 
 
@@ -207,7 +213,7 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
 	gettimeofday(&start, NULL);
 	gettimeofday(&start, NULL);
 	for (iter = 0; iter < NITER; iter++)
 	for (iter = 0; iter < NITER; iter++)
 	{
 	{
-                err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, SIZE, h_buffer, 0, NULL, NULL);
+                err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, opencl_size, h_buffer, 0, NULL, NULL);
                 if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
                 if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 	}
 	}
 	gettimeofday(&end, NULL);
 	gettimeofday(&end, NULL);
@@ -219,7 +225,7 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
 	gettimeofday(&start, NULL);
 	gettimeofday(&start, NULL);
 	for (iter = 0; iter < NITER; iter++)
 	for (iter = 0; iter < NITER; iter++)
 	{
 	{
-                err = clEnqueueReadBuffer(queue, d_buffer, CL_TRUE, 0, SIZE, h_buffer, 0, NULL, NULL);
+                err = clEnqueueReadBuffer(queue, d_buffer, CL_TRUE, 0, opencl_size, h_buffer, 0, NULL, NULL);
                 if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
                 if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 	}
 	}
 	gettimeofday(&end, NULL);
 	gettimeofday(&end, NULL);
@@ -841,21 +847,22 @@ static void write_bus_bandwidth_file_content(void)
 			else if (src != dst)
 			else if (src != dst)
 			{
 			{
                                 double time_src_to_ram=0.0, time_ram_to_dst=0.0;
                                 double time_src_to_ram=0.0, time_ram_to_dst=0.0;
+                                double timing;
                                 /* Bandwidth = (SIZE)/(time i -> ram + time ram -> j)*/
                                 /* Bandwidth = (SIZE)/(time i -> ram + time ram -> j)*/
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 				time_src_to_ram = (src==0)?0.0:cudadev_timing_dtoh[src];
 				time_src_to_ram = (src==0)?0.0:cudadev_timing_dtoh[src];
                                 time_ram_to_dst = (dst==0)?0.0:cudadev_timing_htod[dst];
                                 time_ram_to_dst = (dst==0)?0.0:cudadev_timing_htod[dst];
+				timing =time_src_to_ram + time_ram_to_dst;
+				bandwidth = 1.0*SIZE/timing;
 #endif
 #endif
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL
                                 if (src > ncuda)
                                 if (src > ncuda)
                                         time_src_to_ram = (src==0)?0.0:opencldev_timing_dtoh[src-ncuda];
                                         time_src_to_ram = (src==0)?0.0:opencldev_timing_dtoh[src-ncuda];
                                 if (dst > ncuda)
                                 if (dst > ncuda)
                                         time_ram_to_dst = (dst==0)?0.0:opencldev_timing_htod[dst-ncuda];
                                         time_ram_to_dst = (dst==0)?0.0:opencldev_timing_htod[dst-ncuda];
+				timing =time_src_to_ram + time_ram_to_dst;
+				bandwidth = 1.0*opencl_size/timing;
 #endif
 #endif
-
-				double timing =time_src_to_ram + time_ram_to_dst;
-
-				bandwidth = 1.0*SIZE/timing;
 			}
 			}
 #endif
 #endif
 			else {
 			else {