лет назад: 5 · 485327669e
--- a/examples/filters/fblock_opencl.c
+++ b/examples/filters/fblock_opencl.c
@@ -60,8 +60,8 @@ void opencl_func(void *buffers[], void *cl_arg)
 
				 	CHECK_CL_SET_KERNEL_ARG(kernel, 7, sizeof(*factor), factor);
			
 
				 
			
 
				 	{
			
 
				-		size_t global=nx*ny*nz;
			
 
				-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
			
 
				+		size_t global[3]={nx,ny,nz};
			
 
				+		err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL);
			
 
				 		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
			
 
				 	}
			
 
				 	starpu_opencl_release_kernel(kernel);
			
--- a/examples/filters/fblock_opencl_kernel.cl
+++ b/examples/filters/fblock_opencl_kernel.cl
@@ -18,14 +18,17 @@
 
				 
			
 
				 __kernel void fblock_opencl(__global int* block, unsigned offset, int nx, int ny, int nz, unsigned ldy, unsigned ldz, int factor)
			
 
				 {
			
 
				-        int i, j, k;
			
 
				-        block = (__global int*) ((__global char *)block + offset);
			
 
				-        for(k=0; k<nz ; k++)
			
 
				-	{
			
 
				-                for(j=0; j<ny ; j++)
			
 
				-		{
			
 
				-                        for(i=0; i<nx ; i++)
			
 
				-                                block[(k*ldz)+(j*ldy)+i] = factor;
			
 
				-                }
			
 
				-        }
			
 
				+	const int idx = get_global_id(0);
			
 
				+	const int idy = get_global_id(1);
			
 
				+	const int idz = get_global_id(2);
			
 
				+	if (idx >= nx)
			
 
				+		return;
			
 
				+	if (idy >= ny)
			
 
				+		return;
			
 
				+	if (idz >= nz)
			
 
				+		return;
			
 
				+
			
 
				+	block = (__global int*) ((__global char *)block + offset);
			
 
				+	int i = idz*ldz + idy*ldy + idx;
			
 
				+	block[i] = factor;
			
 
				 }
			
--- a/tests/datawizard/interfaces/block/block_opencl.c
+++ b/tests/datawizard/interfaces/block/block_opencl.c
@@ -83,12 +83,12 @@ test_block_opencl_func(void *buffers[], void *args)
 
				 	}
			
 
				 			
			
 
				 	{
			
 
				-		size_t global = nx * ny * nz;
			
 
				+		size_t global[3] = {nx, ny, nz};
			
 
				 		err = clEnqueueNDRangeKernel(queue,
			
 
				 					     kernel,
			
 
				-					     1,
			
 
				+					     3,
			
 
				 					     NULL,
			
 
				-					     &global,
			
 
				+					     global,
			
 
				 					     NULL,
			
 
				 					     0,
			
 
				 					     NULL,
			
--- a/tests/datawizard/interfaces/block/block_opencl_kernel.cl
+++ b/tests/datawizard/interfaces/block/block_opencl_kernel.cl
@@ -18,29 +18,21 @@ __kernel void block_opencl(__global int *block,
 
				 			   int ldy, int ldz,
			
 
				 			   int factor, __global int *err)
			
 
				 {
			
 
				-        const int id = get_global_id(0);
			
 
				-	if (id > 0)
			
 
				+	const int idx = get_global_id(0);
			
 
				+	const int idy = get_global_id(1);
			
 
				+	const int idz = get_global_id(2);
			
 
				+	if (idx >= nx)
			
 
				 		return;
			
 
				+	if (idy >= ny)
			
 
				+		return;
			
 
				+	if (idz >= nz)
			
 
				+		return;
			
 
				+
			
 
				+	int val = idz*ny*nx+idy*nx+idx;
			
 
				+	int i = (idz*ldz)+(idy*ldy)+idx;
			
 
				 
			
 
				-	unsigned int i, j, k;
			
 
				-	int val = 0;
			
 
				-	for (k = 0; k < nz; k++)
			
 
				-	{
			
 
				-		for (j = 0; j < ny; j++)
			
 
				-		{
			
 
				-			for (i = 0; i < nx; i++)
			
 
				-			{
			
 
				-                                if (block[(k*ldz)+(j*ldy)+i] != factor * val)
			
 
				-				{
			
 
				-					*err = 1;
			
 
				-					return;
			
 
				-				}
			
 
				-				else
			
 
				-				{
			
 
				-					block[(k*ldz)+(j*ldy)+i] *= -1;
			
 
				-					val++;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				+	if (block[i] != factor * val)
			
 
				+		*err = 1;
			
 
				+	else
			
 
				+		block[i] *= -1;
			
 
				 }
			
--- a/tests/datawizard/interfaces/tensor/tensor_interface.c
+++ b/tests/datawizard/interfaces/tensor/tensor_interface.c
@@ -18,7 +18,7 @@
 
				 #include "../test_interfaces.h"
			
 
				 #include "../../../helper.h"
			
 
				 
			
 
				-#define NX 16
			
 
				+#define NX 4
			
 
				 #define NY NX
			
 
				 #define NZ NX
			
 
				 #define NT NX
			
--- a/tests/datawizard/interfaces/tensor/tensor_opencl.c
+++ b/tests/datawizard/interfaces/tensor/tensor_opencl.c
@@ -87,12 +87,12 @@ test_tensor_opencl_func(void *buffers[], void *args)
 
				 	}
			
 
				 			
			
 
				 	{
			
 
				-                size_t global = 1;
			
 
				+		size_t global[3] = {nx, ny, nz*nt};
			
 
				 		err = clEnqueueNDRangeKernel(queue,
			
 
				 					     kernel,
			
 
				-					     1,
			
 
				+					     3,
			
 
				 					     NULL,
			
 
				-					     &global,
			
 
				+					     global,
			
 
				 					     NULL,
			
 
				 					     0,
			
 
				 					     NULL,
			
--- a/tests/datawizard/interfaces/tensor/tensor_opencl_kernel.cl
+++ b/tests/datawizard/interfaces/tensor/tensor_opencl_kernel.cl
@@ -18,32 +18,24 @@ __kernel void tensor_opencl(__global int *tensor,
 
				 			   int ldy, int ldz, int ldt,
			
 
				 			   int factor, __global int *err)
			
 
				 {
			
 
				-        const int id = get_global_id(0);
			
 
				-	if (id > 0)
			
 
				+	const int idx = get_global_id(0);
			
 
				+	const int idy = get_global_id(1);
			
 
				+	const int idz = get_global_id(2) % nz;
			
 
				+	const int idt = get_global_id(2) / nz;
			
 
				+	if (idx >= nx)
			
 
				 		return;
			
 
				+	if (idy >= ny)
			
 
				+		return;
			
 
				+	if (idz >= nz)
			
 
				+		return;
			
 
				+	if (idt >= nt)
			
 
				+		return;
			
 
				+
			
 
				+	int val = idt*nz*ny*nx+idz*ny*nx+idy*nx+idx;
			
 
				+	int i = (idt*ldt)+(idz*ldz)+(idy*ldy)+idx;
			
 
				 
			
 
				-	unsigned int i, j, k, l;
			
 
				-	int val = 0;
			
 
				-	for (l = 0; l < nt; l++)
			
 
				-	{
			
 
				-	    for (k = 0; k < nz; k++)
			
 
				-	    {
			
 
				-		for (j = 0; j < ny; j++)
			
 
				-		{
			
 
				-			for (i = 0; i < nx; i++)
			
 
				-			{
			
 
				-                                if (tensor[(l*ldt)+(k*ldz)+(j*ldy)+i] != factor * val)
			
 
				-				{
			
 
				-					*err = 1;
			
 
				-					return;
			
 
				-				}
			
 
				-				else
			
 
				-				{
			
 
				-					tensor[(l*ldt)+(k*ldz)+(j*ldy)+i] *= -1;
			
 
				-					val++;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	    }
			
 
				-	}
			
 
				+	if (tensor[i] != factor * val)
			
 
				+		*err = 1;
			
 
				+	else
			
 
				+		tensor[i] *= -1;
			
 
				 }