Преглед на файлове

Completely fix the global work size for the stencil

Samuel Thibault преди 11 години
родител
ревизия
b4b685b944
променени са 2 файла, в които са добавени 7 реда и са изтрити 23 реда
  1. 3 11
      examples/stencil/life_opencl.c
  2. 4 12
      examples/stencil/shadow_opencl.c

+ 3 - 11
examples/stencil/life_opencl.c

@@ -84,18 +84,10 @@ void opencl_life_free(void)
 void
 opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter)
 {
-	unsigned max_parallelism = 512;
-	unsigned threads_per_dim_x = max_parallelism;
-	while (threads_per_dim_x / 2 >= nx)
-		threads_per_dim_x /= 2;
-	unsigned threads_per_dim_y = max_parallelism / threads_per_dim_x;
-	while (threads_per_dim_y / 2 >= ny)
-		threads_per_dim_y /= 2;
 #if 0
-	unsigned threads_per_dim_z = 4;
-	size_t dimGrid[] = {nx / threads_per_dim_x, ny / threads_per_dim_y, nz / threads_per_dim_z};
+	size_t dim[] = {nx, ny, nz};
 #else
-	size_t dimGrid[] = {((nx + threads_per_dim_x-1) / threads_per_dim_x)*threads_per_dim_x, ((ny + threads_per_dim_y-1) / threads_per_dim_y)*threads_per_dim_y, 1};
+	size_t dim[] = {nx, ny, 1};
 #endif
 
   int devid,id;
@@ -117,7 +109,7 @@ opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int
   clSetKernelArg(kernel, 8, sizeof(iter), &iter);
 
   cl_event ev;
-  clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dimGrid, NULL, 0, NULL, &ev);
+  clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, &ev);
   clWaitForEvents(1, &ev);
   starpu_opencl_collect_stats(ev);
   clReleaseEvent(ev);

+ 4 - 12
examples/stencil/shadow_opencl.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2011, 2013  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -81,18 +81,10 @@ void opencl_shadow_free(void)
 void
 opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i)
 {
-	unsigned max_parallelism = 512;
-	unsigned threads_per_dim_x = max_parallelism;
-	while (threads_per_dim_x / 2 >= nx)
-		threads_per_dim_x /= 2;
-	unsigned threads_per_dim_y = max_parallelism / threads_per_dim_x;
-	while (threads_per_dim_y / 2 >= ny)
-		threads_per_dim_y /= 2;
 #if 0
-	unsigned threads_per_dim_z = 4;
-	size_t dimGrid[] = {nx / threads_per_dim_x, ny / threads_per_dim_y, nz / threads_per_dim_z};
+	size_t dim[] = {nx, ny, nz};
 #else
-	size_t dimGrid[] = {((nx + threads_per_dim_x-1) / threads_per_dim_x)*threads_per_dim_x, ((ny + threads_per_dim_y-1) / threads_per_dim_y)*threads_per_dim_y, 1};
+	size_t dim[] = {nx, ny, 1};
 #endif
 
         int devid,id;
@@ -113,7 +105,7 @@ opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz,
         clSetKernelArg(kernel, 7, sizeof(i), &i);
 
         cl_event ev;
-        cl_int err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dimGrid, NULL, 0, NULL, &ev);
+        cl_int err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, &ev);
         if (err != CL_SUCCESS)
                 STARPU_OPENCL_REPORT_ERROR(err);
         clWaitForEvents(1, &ev);