|
@@ -1,6 +1,6 @@
|
|
|
/* StarPU --- Runtime system for heterogeneous multicore architectures.
|
|
|
*
|
|
|
- * Copyright (C) 2010-2011 Université de Bordeaux 1
|
|
|
+ * Copyright (C) 2010-2011, 2013 Université de Bordeaux 1
|
|
|
*
|
|
|
* StarPU is free software; you can redistribute it and/or modify
|
|
|
* it under the terms of the GNU Lesser General Public License as published by
|
|
@@ -81,18 +81,10 @@ void opencl_shadow_free(void)
|
|
|
void
|
|
|
opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i)
|
|
|
{
|
|
|
- unsigned max_parallelism = 512;
|
|
|
- unsigned threads_per_dim_x = max_parallelism;
|
|
|
- while (threads_per_dim_x / 2 >= nx)
|
|
|
- threads_per_dim_x /= 2;
|
|
|
- unsigned threads_per_dim_y = max_parallelism / threads_per_dim_x;
|
|
|
- while (threads_per_dim_y / 2 >= ny)
|
|
|
- threads_per_dim_y /= 2;
|
|
|
#if 0
|
|
|
- unsigned threads_per_dim_z = 4;
|
|
|
- size_t dimGrid[] = {nx / threads_per_dim_x, ny / threads_per_dim_y, nz / threads_per_dim_z};
|
|
|
+ size_t dim[] = {nx, ny, nz};
|
|
|
#else
|
|
|
- size_t dimGrid[] = {((nx + threads_per_dim_x-1) / threads_per_dim_x)*threads_per_dim_x, ((ny + threads_per_dim_y-1) / threads_per_dim_y)*threads_per_dim_y, 1};
|
|
|
+ size_t dim[] = {nx, ny, 1};
|
|
|
#endif
|
|
|
|
|
|
int devid,id;
|
|
@@ -113,7 +105,7 @@ opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz,
|
|
|
clSetKernelArg(kernel, 7, sizeof(i), &i);
|
|
|
|
|
|
cl_event ev;
|
|
|
- cl_int err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dimGrid, NULL, 0, NULL, &ev);
|
|
|
+ cl_int err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, &ev);
|
|
|
if (err != CL_SUCCESS)
|
|
|
STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
clWaitForEvents(1, &ev);
|