Browse Source

basic_examples/block: access block as a block using ldy and ldz parameters and not as a vector

Nathalie Furmento 15 years ago
parent
commit
24b4223ed0

+ 9 - 3
examples/basic_examples/block.c

@@ -76,8 +76,8 @@ int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny,
 int main(int argc, char **argv)
 {
 	starpu_codelet cl;
-        float *block;
-        int i, ret;
+        float *block, n=1.0;
+        int i, j, k, ret;
         int nx=3;
         int ny=2;
         int nz=4;
@@ -87,7 +87,13 @@ int main(int argc, char **argv)
 
         block = (float*)malloc(nx*ny*nz*sizeof(float));
         assert(block);
-        for(i=0 ; i<nx*ny*nz ; i++) block[i] = i+1;
+        for(k=0 ; k<nz ; k++) {
+                for(j=0 ; j<ny ; j++) {
+                        for(i=0 ; i<nx ; i++) {
+                                block[(k*nx*ny)+(j*nx)+i] = n++;
+                        }
+                }
+        }
 
         ret = execute_on(STARPU_CPU, cpu_codelet, block, nx, ny, nz, 1.0);
         if (!ret) multiplier *= 1.0;

+ 9 - 2
examples/basic_examples/block_cpu.c

@@ -22,9 +22,16 @@ void cpu_codelet(void *descr[], void *_args)
 	int nx = (int)STARPU_BLOCK_GET_NX(descr[0]);
 	int ny = (int)STARPU_BLOCK_GET_NY(descr[0]);
 	int nz = (int)STARPU_BLOCK_GET_NZ(descr[0]);
+        unsigned ldy = STARPU_BLOCK_GET_LDY(descr[0]);
+        unsigned ldz = STARPU_BLOCK_GET_LDZ(descr[0]);
         float *multiplier = (float *)_args;
-        int i;
+        unsigned i, j, k;
 
-        for(i=0 ; i<nx*ny*nz ; i++) block[i] *= *multiplier;
+        for(k=0; k<nz ; k++) {
+                for(j=0; j<ny ; j++) {
+                        for(i=0; i<nx ; i++)
+                                block[(k*ldz)+(j*ldy)+i] *= *multiplier;
+                }
+        }
 }
 

+ 12 - 4
examples/basic_examples/block_cuda.cu

@@ -16,10 +16,15 @@
 
 #include <starpu.h>
 
-static __global__ void cuda_block(float *block, int nx, int ny, int nz, float multiplier)
+static __global__ void cuda_block(float *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier)
 {
-        int i;
-        for(i=0 ; i<nx*ny*nz ; i++) block[i] *= multiplier;
+        int i, j, k;
+        for(k=0; k<nz ; k++) {
+                for(j=0; j<ny ; j++) {
+                        for(i=0; i<nx ; i++)
+                                block[(k*ldz)+(j*ldy)+i] *= multiplier;
+                }
+        }
 }
 
 extern "C" void cuda_codelet(void *descr[], void *_args)
@@ -28,7 +33,10 @@ extern "C" void cuda_codelet(void *descr[], void *_args)
 	int nx = STARPU_BLOCK_GET_NX(descr[0]);
 	int ny = STARPU_BLOCK_GET_NY(descr[0]);
 	int nz = STARPU_BLOCK_GET_NZ(descr[0]);
+        unsigned ldy = STARPU_BLOCK_GET_LDY(descr[0]);
+        unsigned ldz = STARPU_BLOCK_GET_LDZ(descr[0]);
         float *multiplier = (float *)_args;
 
-        cuda_block<<<1,1>>>(block, nx, ny, nz, *multiplier);
+        cuda_block<<<1,1>>>(block, nx, ny, nz, ldy, ldz, *multiplier);
+	cudaThreadSynchronize();
 }

+ 6 - 2
examples/basic_examples/block_opencl.c

@@ -28,6 +28,8 @@ void opencl_codelet(void *descr[], void *_args)
 	int nx = (int)STARPU_BLOCK_GET_NX(descr[0]);
 	int ny = (int)STARPU_BLOCK_GET_NY(descr[0]);
 	int nz = (int)STARPU_BLOCK_GET_NZ(descr[0]);
+        unsigned ldy = STARPU_BLOCK_GET_LDY(descr[0]);
+        unsigned ldz = STARPU_BLOCK_GET_LDZ(descr[0]);
         float *multiplier = (float *)_args;
 
         id = starpu_worker_get_id();
@@ -42,11 +44,13 @@ void opencl_codelet(void *descr[], void *_args)
 	err = clSetKernelArg(kernel, 1, sizeof(int), &nx);
 	err = clSetKernelArg(kernel, 2, sizeof(int), &ny);
 	err = clSetKernelArg(kernel, 3, sizeof(int), &nz);
-	err = clSetKernelArg(kernel, 4, sizeof(float), multiplier);
+	err = clSetKernelArg(kernel, 4, sizeof(ldy), &ldy);
+	err = clSetKernelArg(kernel, 5, sizeof(ldz), &ldz);
+	err = clSetKernelArg(kernel, 6, sizeof(float), multiplier);
         if (err) STARPU_OPENCL_REPORT_ERROR(err);
 
 	{
-                size_t global=1024;
+                size_t global=nx*ny*nz;
 		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
 		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 	}

+ 7 - 4
examples/basic_examples/block_opencl_kernel.cl

@@ -14,10 +14,13 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
-__kernel void block(__global float *b, int nx, int ny, int nz, float multiplier)
+__kernel void block(__global float *b, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier)
 {
-        const int i = get_global_id(0);
-        if (i < nx*ny*nz) {
-                b[i] *= multiplier;
+        int i, j, k;
+        for(k=0; k<nz ; k++) {
+                for(j=0; j<ny ; j++) {
+                        for(i=0; i<nx ; i++)
+                                b[(k*ldz)+(j*ldy)+i] *= multiplier;
+                }
         }
 }