life_opencl.c 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2012 Inria
  4. * Copyright (C) 2012-2013,2016-2017 CNRS
  5. * Copyright (C) 2010-2011,2013-2014,2018 Université de Bordeaux
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. /* Heart of the stencil computation: compute a new state from an old one. */
  19. /* #define _externC extern "C" */
  20. #include <stencil.h>
  21. #define CL_TARGET_OPENCL_VERSION 100
  22. #ifdef __APPLE__
  23. #include <OpenCL/cl.h>
  24. #else
  25. #include <CL/cl.h>
  26. #endif
  27. #include <starpu.h>
  28. #define str(x) #x
  29. #define clsrc(t,k) "__kernel void\n\
  30. #define TYPE " str(t) "\n\
  31. #define K " str(k) "\n\
  32. life_update(int bz, __global const TYPE *old, __global TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter)\n\
  33. {\n\
  34. unsigned idx = get_global_id(0);\n\
  35. unsigned idy = get_global_id(1);\n\
  36. //unsigned idz = threadIdx.z + blockIdx.z * blockDim.z;\n\
  37. unsigned idz = 0;\n\
  38. unsigned stepx = get_global_size(0);\n\
  39. unsigned stepy = get_global_size(1);\n\
  40. //unsigned stepz = blockDim.z * gridDim.z;\n\
  41. unsigned stepz = 1;\n\
  42. unsigned x, y, z;\n\
  43. unsigned num, alive;\n\
  44. \n\
  45. for (z = iter + idz; z < nz - iter; z += stepz)\n\
  46. for (y = K + idy; y < ny - K; y += stepy) \n\
  47. {\n \
  48. for (x = K + idx; x < nx - K; x += stepx) \
  49. {\n \
  50. unsigned index = x + y*ldy + z*ldz;\n\
  51. num = 0\n\
  52. + old[index+1*ldy+0*ldz]\n\
  53. + old[index+1*ldy+1*ldz]\n\
  54. + old[index+0*ldy+1*ldz]\n\
  55. + old[index-1*ldy+1*ldz]\n\
  56. + old[index-1*ldy+0*ldz]\n\
  57. + old[index-1*ldy-1*ldz]\n\
  58. + old[index+0*ldy-1*ldz]\n\
  59. + old[index+1*ldy-1*ldz]\n\
  60. ;\n\
  61. alive = old[index];\n\
  62. alive = (alive && num == 2) || num == 3;\n\
  63. newp[index] = alive;\n\
  64. }\n\
  65. }\n\
  66. }"
  67. static const char * src = clsrc(TYPE,K);
  68. static struct starpu_opencl_program program;
  69. void
  70. opencl_life_init(void)
  71. {
  72. starpu_opencl_load_opencl_from_string(src, &program, NULL);
  73. }
  74. void opencl_life_free(void)
  75. {
  76. int ret = starpu_opencl_unload_opencl(&program);
  77. STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
  78. }
  79. void
  80. opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter)
  81. {
  82. #if 0
  83. size_t dim[] = {nx, ny, nz};
  84. #else
  85. size_t dim[] = {nx, ny, 1};
  86. #endif
  87. int devid,id;
  88. cl_int err;
  89. id = starpu_worker_get_id_check();
  90. devid = starpu_worker_get_devid(id);
  91. cl_kernel kernel;
  92. cl_command_queue cq;
  93. err = starpu_opencl_load_kernel(&kernel, &cq, &program, "life_update", devid);
  94. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  95. clSetKernelArg(kernel, 0, sizeof(bz), &bz);
  96. clSetKernelArg(kernel, 1, sizeof(old), &old);
  97. clSetKernelArg(kernel, 2, sizeof(newp), &newp);
  98. clSetKernelArg(kernel, 3, sizeof(nx), &nx);
  99. clSetKernelArg(kernel, 4, sizeof(ny), &ny);
  100. clSetKernelArg(kernel, 5, sizeof(nz), &nz);
  101. clSetKernelArg(kernel, 6, sizeof(ldy), &ldy);
  102. clSetKernelArg(kernel, 7, sizeof(ldz), &ldz);
  103. clSetKernelArg(kernel, 8, sizeof(iter), &iter);
  104. err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, NULL);
  105. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  106. }