cl_enqueuendrangekernel.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010,2011 University of Bordeaux
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include "socl.h"
  17. void soclEnqueueNDRangeKernel_task(void *descr[], void *args) {
  18. command_ndrange_kernel cmd = (command_ndrange_kernel)args;
  19. cl_command_queue cq;
  20. int wid;
  21. cl_int err;
  22. wid = starpu_worker_get_id();
  23. starpu_opencl_get_queue(wid, &cq);
  24. DEBUG_MSG("[worker %d] [kernel %d] Executing kernel...\n", wid, cmd->kernel->id);
  25. int range = starpu_worker_get_range();
  26. /* Set arguments */
  27. {
  28. unsigned int i;
  29. int buf = 0;
  30. for (i=0; i<cmd->num_args; i++) {
  31. switch (cmd->arg_types[i]) {
  32. case Null:
  33. err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], NULL);
  34. break;
  35. case Buffer: {
  36. cl_mem mem;
  37. mem = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[buf]);
  38. err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], &mem);
  39. buf++;
  40. }
  41. break;
  42. case Immediate:
  43. err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], cmd->args[i]);
  44. break;
  45. }
  46. if (err != CL_SUCCESS) {
  47. DEBUG_CL("clSetKernelArg", err);
  48. DEBUG_ERROR("Aborting\n");
  49. }
  50. }
  51. }
  52. /* Calling Kernel */
  53. cl_event event;
  54. err = clEnqueueNDRangeKernel(cq, cmd->kernel->cl_kernels[range], cmd->work_dim, cmd->global_work_offset, cmd->global_work_size, cmd->local_work_size, 0, NULL, &event);
  55. if (err != CL_SUCCESS) {
  56. ERROR_MSG("Worker[%d] Unable to Enqueue kernel (error %d)\n", wid, err);
  57. DEBUG_CL("clEnqueueNDRangeKernel", err);
  58. DEBUG_MSG("Workdim %d, global_work_offset %p, global_work_size %p, local_work_size %p\n",
  59. cmd->work_dim, cmd->global_work_offset, cmd->global_work_size, cmd->local_work_size);
  60. DEBUG_MSG("Global work size: %ld %ld %ld\n", cmd->global_work_size[0],
  61. (cmd->work_dim > 1 ? cmd->global_work_size[1] : 1), (cmd->work_dim > 2 ? cmd->global_work_size[2] : 1));
  62. if (cmd->local_work_size != NULL)
  63. DEBUG_MSG("Local work size: %ld %ld %ld\n", cmd->local_work_size[0],
  64. (cmd->work_dim > 1 ? cmd->local_work_size[1] : 1), (cmd->work_dim > 2 ? cmd->local_work_size[2] : 1));
  65. }
  66. else {
  67. /* Waiting for kernel to terminate */
  68. clWaitForEvents(1, &event);
  69. clReleaseEvent(event);
  70. }
  71. }
  72. static void cleaning_task_callback(void *args) {
  73. command_ndrange_kernel cmd = (command_ndrange_kernel)args;
  74. free(cmd->arg_sizes);
  75. free(cmd->arg_types);
  76. unsigned int i;
  77. for (i=0; i<cmd->num_args; i++) {
  78. free(cmd->args[i]);
  79. }
  80. free(cmd->args);
  81. for (i=0; i<cmd->num_buffers; i++)
  82. gc_entity_unstore(&cmd->buffers[i]);
  83. free(cmd->buffers);
  84. if (cmd->global_work_offset != NULL) {
  85. free((void*)cmd->global_work_offset);
  86. cmd->global_work_offset = NULL;
  87. }
  88. if (cmd->global_work_size != NULL) {
  89. free((void*)cmd->global_work_size);
  90. cmd->global_work_size = NULL;
  91. }
  92. if (cmd->local_work_size != NULL) {
  93. free((void*)cmd->local_work_size);
  94. cmd->local_work_size = NULL;
  95. }
  96. }
  97. /**
  98. * Real kernel enqueuing command
  99. */
  100. cl_int command_ndrange_kernel_submit(command_ndrange_kernel cmd) {
  101. starpu_task task = task_create();
  102. task->cl = &cmd->codelet;
  103. task->cl->model = cmd->kernel->perfmodel;
  104. task->cl_arg = cmd;
  105. task->cl_arg_size = sizeof(cmd);
  106. /* Execute the task on a specific worker? */
  107. if (cmd->_command.cq->device != NULL) {
  108. task->execute_on_a_specific_worker = 1;
  109. task->workerid = cmd->_command.cq->device->worker_id;
  110. }
  111. struct starpu_codelet * codelet = task->cl;
  112. /* We need to detect which parameters are OpenCL's memory objects and
  113. * we retrieve their corresponding StarPU buffers */
  114. cmd->num_buffers = 0;
  115. cmd->buffers = malloc(sizeof(cl_mem) * cmd->num_args);
  116. unsigned int i;
  117. for (i=0; i<cmd->num_args; i++) {
  118. if (cmd->arg_types[i] == Buffer) {
  119. cl_mem buf = *(cl_mem*)cmd->args[i];
  120. gc_entity_store(&cmd->buffers[cmd->num_buffers], buf);
  121. task->handles[cmd->num_buffers] = buf->handle;
  122. /* Determine best StarPU buffer access mode */
  123. int mode;
  124. if (buf->mode == CL_MEM_READ_ONLY)
  125. mode = STARPU_R;
  126. else if (buf->mode == CL_MEM_WRITE_ONLY) {
  127. mode = STARPU_W;
  128. buf->scratch = 0;
  129. }
  130. else if (buf->scratch) { //RW but never accessed in RW or W mode
  131. mode = STARPU_W;
  132. buf->scratch = 0;
  133. }
  134. else {
  135. mode = STARPU_RW;
  136. buf->scratch = 0;
  137. }
  138. codelet->modes[cmd->num_buffers] = mode;
  139. cmd->num_buffers += 1;
  140. }
  141. }
  142. codelet->nbuffers = cmd->num_buffers;
  143. task_submit(task, cmd);
  144. /* Enqueue a cleaning task */
  145. //FIXME: execute this in the callback?
  146. cl_event ev = command_event_get(cmd);
  147. static struct starpu_codelet cdl = {
  148. .name = "SOCL_NDRANGE_CLEANING_TASK"
  149. };
  150. cpu_task_submit(cmd, cleaning_task_callback, cmd, 0, &cdl, 1, &ev);
  151. return CL_SUCCESS;
  152. }
  153. CL_API_ENTRY cl_int CL_API_CALL
  154. soclEnqueueNDRangeKernel(cl_command_queue cq,
  155. cl_kernel kernel,
  156. cl_uint work_dim,
  157. const size_t * global_work_offset,
  158. const size_t * global_work_size,
  159. const size_t * local_work_size,
  160. cl_uint num_events,
  161. const cl_event * events,
  162. cl_event * event) CL_API_SUFFIX__VERSION_1_1
  163. {
  164. command_ndrange_kernel cmd = command_ndrange_kernel_create(kernel, work_dim,
  165. global_work_offset, global_work_size, local_work_size);
  166. command_queue_enqueue(cq, cmd, num_events, events);
  167. RETURN_EVENT(cmd, event);
  168. return CL_SUCCESS;
  169. }