Переглянути джерело

examples/basic_examples/block: use correct datatypes for block datas and really write a data parallel opencl kernel

Nathalie Furmento 13 роки тому
батько
коміт
f1c953984c

+ 6 - 6
examples/basic_examples/block_opencl.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2011  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -36,11 +36,11 @@ void opencl_codelet(void *descr[], void *_args)
 	cl_event event;
 	int id, devid, err;
 	cl_mem block = (cl_mem)STARPU_BLOCK_GET_DEV_HANDLE(descr[0]);
-	int nx = (int)STARPU_BLOCK_GET_NX(descr[0]);
-	int ny = (int)STARPU_BLOCK_GET_NY(descr[0]);
-	int nz = (int)STARPU_BLOCK_GET_NZ(descr[0]);
-        unsigned ldy = STARPU_BLOCK_GET_LDY(descr[0]);
-        unsigned ldz = STARPU_BLOCK_GET_LDZ(descr[0]);
+	uint32_t nx = (int)STARPU_BLOCK_GET_NX(descr[0]);
+	uint32_t ny = (int)STARPU_BLOCK_GET_NY(descr[0]);
+	uint32_t nz = (int)STARPU_BLOCK_GET_NZ(descr[0]);
+        uint32_t ldy = STARPU_BLOCK_GET_LDY(descr[0]);
+        uint32_t ldz = STARPU_BLOCK_GET_LDZ(descr[0]);
         float *multiplier = (float *)_args;
 
         id = starpu_worker_get_id();

+ 7 - 11
examples/basic_examples/block_opencl_kernel.cl

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -14,15 +14,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
-__kernel void block(__global float *b, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier)
+#include <stdint.h>
+
+__kernel void block(__global float *b, uint32_t nx, uint32_t ny, uint32_t nz, uint32_t ldy, uint32_t ldz, float multiplier)
 {
-        int i, j, k;
-        for(k=0; k<nz ; k++)
-	{
-                for(j=0; j<ny ; j++)
-		{
-                        for(i=0; i<nx ; i++)
-                                b[(k*ldz)+(j*ldy)+i] *= multiplier;
-                }
-        }
+     const int i = get_global_id(0);
+     if (i < (nz*ldz)+(ny*ldy)+nx)
+	  b[i] = b[i] * multiplier;
 }