block.c 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. /*
  2. * StarPU
  3. * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <starpu_opencl.h>
  18. #include <pthread.h>
  19. #include <math.h>
  20. void cpu_codelet(void *descr[], __attribute__ ((unused)) void *_args)
  21. {
  22. float *block = (float *)STARPU_GET_BLOCK_PTR(descr[0]);
  23. int nx = (int)STARPU_GET_BLOCK_NX(descr[0]);
  24. int ny = (int)STARPU_GET_BLOCK_NY(descr[0]);
  25. int nz = (int)STARPU_GET_BLOCK_NZ(descr[0]);
  26. float *multiplier = (float *)STARPU_GET_VARIABLE_PTR(descr[1]);
  27. int i;
  28. for(i=0 ; i<nx*ny*nz ; i++) block[i] *= *multiplier;
  29. }
  30. #ifdef STARPU_USE_OPENCL
  31. void opencl_codelet(void *descr[], __attribute__ ((unused)) void *_args)
  32. {
  33. cl_kernel kernel;
  34. cl_command_queue queue;
  35. int id, devid, err, n;
  36. float *block = (float *)STARPU_GET_BLOCK_PTR(descr[0]);
  37. int nx = (int)STARPU_GET_BLOCK_NX(descr[0]);
  38. int ny = (int)STARPU_GET_BLOCK_NY(descr[0]);
  39. int nz = (int)STARPU_GET_BLOCK_NZ(descr[0]);
  40. float *multiplier = (float *)STARPU_GET_VARIABLE_PTR(descr[1]);
  41. id = starpu_worker_get_id();
  42. devid = starpu_worker_get_devid(id);
  43. err = starpu_opencl_load_kernel(&kernel, &queue,
  44. "examples/block/block_kernel.cl", "block", devid);
  45. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  46. err = 0;
  47. n=0;
  48. err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &block);
  49. err = clSetKernelArg(kernel, 1, sizeof(int), &nx);
  50. err = clSetKernelArg(kernel, 2, sizeof(int), &ny);
  51. err = clSetKernelArg(kernel, 3, sizeof(int), &nz);
  52. err = clSetKernelArg(kernel, 4, sizeof(cl_mem), &multiplier);
  53. if (err) STARPU_OPENCL_REPORT_ERROR(err);
  54. {
  55. size_t global=1024;
  56. err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
  57. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  58. }
  59. clFinish(queue);
  60. starpu_opencl_release(kernel);
  61. }
  62. #endif
  63. #ifdef STARPU_USE_CUDA
  64. extern void cuda_codelet(void *descr[], __attribute__ ((unused)) void *_args);
  65. #endif
  66. typedef void (*device_func)(void **, void *);
  67. int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny, int pnz, float multiplier)
  68. {
  69. starpu_codelet cl;
  70. starpu_data_handle block_handle;
  71. starpu_data_handle multiplier_handle;
  72. int i, j, k;
  73. starpu_block_data_register(&block_handle, 0, (uintptr_t)block, pnx, pnx*pny, pnx, pny, pnz, sizeof(float));
  74. starpu_variable_data_register(&multiplier_handle, 0, (uintptr_t)&multiplier, sizeof(float));
  75. cl.where = where;
  76. cl.cuda_func = func;
  77. cl.cpu_func = func;
  78. cl.opencl_func = func;
  79. cl.nbuffers = 2;
  80. cl.model = NULL;
  81. struct starpu_task *task = starpu_task_create();
  82. task->cl = &cl;
  83. task->callback_func = NULL;
  84. task->buffers[0].handle = block_handle;
  85. task->buffers[0].mode = STARPU_RW;
  86. task->buffers[1].handle = multiplier_handle;
  87. task->buffers[1].mode = STARPU_RW;
  88. int ret = starpu_task_submit(task);
  89. if (STARPU_UNLIKELY(ret == -ENODEV)) {
  90. fprintf(stderr, "No worker may execute this task\n");
  91. return 1;
  92. }
  93. starpu_task_wait_for_all();
  94. /* update the array in RAM */
  95. starpu_data_sync_with_mem(block_handle, STARPU_R);
  96. for(i=0 ; i<pnx*pny*pnz; i++) {
  97. fprintf(stderr, "%f ", block[i]);
  98. }
  99. fprintf(stderr, "\n");
  100. starpu_data_release_from_mem(block_handle);
  101. return 0;
  102. }
  103. int main(int argc, char **argv)
  104. {
  105. starpu_codelet cl;
  106. float *block;
  107. int i, ret;
  108. int nx=3;
  109. int ny=2;
  110. int nz=4;
  111. float multiplier=1.0;
  112. starpu_init(NULL);
  113. block = (float*)malloc(nx*ny*nz*sizeof(float));
  114. assert(block);
  115. for(i=0 ; i<nx*ny*nz ; i++) block[i] = i+1;
  116. ret = execute_on(STARPU_CPU, cpu_codelet, block, nx, ny, nz, 1.0);
  117. if (!ret) multiplier *= 1.0;
  118. #ifdef STARPU_USE_OPENCL
  119. _starpu_opencl_compile_source_to_opencl("examples/block/block_kernel.cl");
  120. ret = execute_on(STARPU_OPENCL, opencl_codelet, block, nx, ny, nz, 2.0);
  121. if (!ret) multiplier *= 2.0;
  122. #endif
  123. #ifdef STARPU_USE_CUDA
  124. ret = execute_on(STARPU_CUDA, cuda_codelet, block, nx, ny, nz, 3.0);
  125. if (!ret) multiplier *= 3.0;
  126. #endif
  127. // Check result is correct
  128. ret=1;
  129. for(i=0 ; i<nx*ny*nz ; i++) {
  130. if (block[i] != (i+1) * multiplier) {
  131. ret=0;
  132. break;
  133. }
  134. }
  135. fprintf(stderr,"TEST %s\n", ret==1?"PASSED":"FAILED");
  136. starpu_shutdown();
  137. return 0;
  138. }