driver_opencl.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2015 Université de Bordeaux
  4. * Copyright (C) 2010 Mehdi Juhoor <mjuhoor@gmail.com>
  5. * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015 Centre National de la Recherche Scientifique
  6. * Copyright (C) 2011 Télécom-SudParis
  7. *
  8. * StarPU is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU Lesser General Public License as published by
  10. * the Free Software Foundation; either version 2.1 of the License, or (at
  11. * your option) any later version.
  12. *
  13. * StarPU is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. *
  17. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  18. */
  19. #include <math.h>
  20. #include <starpu.h>
  21. #include <starpu_profiling.h>
  22. #include <common/config.h>
  23. #include <common/utils.h>
  24. #include <core/debug.h>
  25. #include <starpu_opencl.h>
  26. #include <drivers/driver_common/driver_common.h>
  27. #include "driver_opencl.h"
  28. #include "driver_opencl_utils.h"
  29. #include <common/utils.h>
  30. #include <datawizard/memory_manager.h>
  31. #include <datawizard/malloc.h>
  32. #ifdef STARPU_SIMGRID
  33. #include <core/simgrid.h>
  34. #endif
  35. static int nb_devices = -1;
  36. static int init_done = 0;
  37. static starpu_pthread_mutex_t big_lock = STARPU_PTHREAD_MUTEX_INITIALIZER;
  38. static size_t global_mem[STARPU_MAXOPENCLDEVS];
  39. #ifdef STARPU_USE_OPENCL
  40. static cl_context contexts[STARPU_MAXOPENCLDEVS];
  41. static cl_device_id devices[STARPU_MAXOPENCLDEVS];
  42. static cl_command_queue queues[STARPU_MAXOPENCLDEVS];
  43. static cl_command_queue in_transfer_queues[STARPU_MAXOPENCLDEVS];
  44. static cl_command_queue out_transfer_queues[STARPU_MAXOPENCLDEVS];
  45. static cl_command_queue peer_transfer_queues[STARPU_MAXOPENCLDEVS];
  46. #ifndef STARPU_SIMGRID
  47. static cl_command_queue alloc_queues[STARPU_MAXOPENCLDEVS];
  48. static cl_event task_events[STARPU_MAXOPENCLDEVS][STARPU_MAX_PIPELINE];
  49. #endif /* !STARPU_SIMGRID */
  50. #endif
  51. #ifdef STARPU_SIMGRID
  52. static unsigned task_finished[STARPU_MAXOPENCLDEVS][STARPU_MAX_PIPELINE];
  53. static starpu_pthread_mutex_t task_mutex[STARPU_MAXOPENCLDEVS][STARPU_MAX_PIPELINE];
  54. static starpu_pthread_cond_t task_cond[STARPU_MAXOPENCLDEVS][STARPU_MAX_PIPELINE];
  55. #endif /* STARPU_SIMGRID */
  56. void
  57. _starpu_opencl_discover_devices(struct _starpu_machine_config *config)
  58. {
  59. /* Discover the number of OpenCL devices. Fill the result in CONFIG. */
  60. /* As OpenCL must have been initialized before calling this function,
  61. * `nb_device' is ensured to be correctly set. */
  62. STARPU_ASSERT(init_done == 1);
  63. config->topology.nhwopenclgpus = nb_devices;
  64. }
  65. static void _starpu_opencl_limit_gpu_mem_if_needed(unsigned devid)
  66. {
  67. starpu_ssize_t limit;
  68. size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0;
  69. size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0;
  70. char name[30];
  71. #ifdef STARPU_SIMGRID
  72. totalGlobalMem = _starpu_simgrid_get_memsize("OpenCL", devid);
  73. #elif defined(STARPU_USE_OPENCL)
  74. /* Request the size of the current device's memory */
  75. cl_int err;
  76. cl_ulong size;
  77. err = clGetDeviceInfo(devices[devid], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size), &size, NULL);
  78. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  79. STARPU_OPENCL_REPORT_ERROR(err);
  80. totalGlobalMem = size;
  81. #endif
  82. limit = starpu_get_env_number("STARPU_LIMIT_OPENCL_MEM");
  83. if (limit == -1)
  84. {
  85. sprintf(name, "STARPU_LIMIT_OPENCL_%u_MEM", devid);
  86. limit = starpu_get_env_number(name);
  87. }
  88. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  89. if (limit == -1)
  90. {
  91. /* Use 90% of the available memory by default. */
  92. limit = totalGlobalMem / (1024*1024) * 0.9;
  93. }
  94. #endif
  95. global_mem[devid] = limit * 1024*1024;
  96. #ifdef STARPU_USE_OPENCL
  97. /* How much memory to waste ? */
  98. to_waste = totalGlobalMem - global_mem[devid];
  99. #endif
  100. _STARPU_DEBUG("OpenCL device %d: Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n",
  101. devid, (long)to_waste/(1024*1024), (long) limit, (long)totalGlobalMem/(1024*1024),
  102. (long)(totalGlobalMem - to_waste)/(1024*1024));
  103. }
  104. #ifdef STARPU_USE_OPENCL
  105. void starpu_opencl_get_context(int devid, cl_context *context)
  106. {
  107. *context = contexts[devid];
  108. }
  109. void starpu_opencl_get_device(int devid, cl_device_id *device)
  110. {
  111. *device = devices[devid];
  112. }
  113. void starpu_opencl_get_queue(int devid, cl_command_queue *queue)
  114. {
  115. *queue = queues[devid];
  116. }
  117. void starpu_opencl_get_current_queue(cl_command_queue *queue)
  118. {
  119. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  120. STARPU_ASSERT(queue);
  121. *queue = queues[worker->devid];
  122. }
  123. void starpu_opencl_get_current_context(cl_context *context)
  124. {
  125. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  126. STARPU_ASSERT(context);
  127. *context = contexts[worker->devid];
  128. }
  129. #endif /* STARPU_USE_OPENCL */
  130. int _starpu_opencl_init_context(int devid)
  131. {
  132. #ifdef STARPU_SIMGRID
  133. int j;
  134. for (j = 0; j < STARPU_MAX_PIPELINE; j++)
  135. {
  136. task_finished[devid][j] = 0;
  137. STARPU_PTHREAD_MUTEX_INIT(&task_mutex[devid][j], NULL);
  138. STARPU_PTHREAD_COND_INIT(&task_cond[devid][j], NULL);
  139. }
  140. #else /* !STARPU_SIMGRID */
  141. cl_int err;
  142. cl_uint uint;
  143. STARPU_PTHREAD_MUTEX_LOCK(&big_lock);
  144. _STARPU_DEBUG("Initialising context for dev %d\n", devid);
  145. // Create a compute context
  146. err = 0;
  147. contexts[devid] = clCreateContext(NULL, 1, &devices[devid], NULL, NULL, &err);
  148. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  149. err = clGetDeviceInfo(devices[devid], CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(uint), &uint, NULL);
  150. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  151. STARPU_OPENCL_REPORT_ERROR(err);
  152. starpu_malloc_set_align(uint/8);
  153. // Create execution queue for the given device
  154. queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], 0, &err);
  155. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  156. // Create transfer queue for the given device
  157. cl_command_queue_properties props;
  158. err = clGetDeviceInfo(devices[devid], CL_DEVICE_QUEUE_PROPERTIES, sizeof(props), &props, NULL);
  159. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  160. STARPU_OPENCL_REPORT_ERROR(err);
  161. props &= ~CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
  162. in_transfer_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], props, &err);
  163. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  164. out_transfer_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], props, &err);
  165. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  166. peer_transfer_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], props, &err);
  167. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  168. alloc_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], 0, &err);
  169. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  170. STARPU_PTHREAD_MUTEX_UNLOCK(&big_lock);
  171. #endif /* !STARPU_SIMGRID */
  172. return 0;
  173. }
  174. int _starpu_opencl_deinit_context(int devid)
  175. {
  176. #ifdef STARPU_SIMGRID
  177. int j;
  178. for (j = 0; j < STARPU_MAX_PIPELINE; j++)
  179. {
  180. task_finished[devid][j] = 0;
  181. STARPU_PTHREAD_MUTEX_DESTROY(&task_mutex[devid][j]);
  182. STARPU_PTHREAD_COND_DESTROY(&task_cond[devid][j]);
  183. }
  184. #else /* !STARPU_SIMGRID */
  185. cl_int err;
  186. STARPU_PTHREAD_MUTEX_LOCK(&big_lock);
  187. _STARPU_DEBUG("De-initialising context for dev %d\n", devid);
  188. err = clReleaseContext(contexts[devid]);
  189. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  190. err = clReleaseCommandQueue(queues[devid]);
  191. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  192. err = clReleaseCommandQueue(in_transfer_queues[devid]);
  193. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  194. err = clReleaseCommandQueue(out_transfer_queues[devid]);
  195. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  196. err = clReleaseCommandQueue(peer_transfer_queues[devid]);
  197. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  198. err = clReleaseCommandQueue(alloc_queues[devid]);
  199. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  200. contexts[devid] = NULL;
  201. STARPU_PTHREAD_MUTEX_UNLOCK(&big_lock);
  202. #endif
  203. return 0;
  204. }
  205. #ifdef STARPU_USE_OPENCL
  206. cl_int starpu_opencl_allocate_memory(int devid STARPU_ATTRIBUTE_UNUSED, cl_mem *mem STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED, cl_mem_flags flags STARPU_ATTRIBUTE_UNUSED)
  207. {
  208. #ifdef STARPU_SIMGRID
  209. STARPU_ABORT();
  210. #else
  211. cl_int err;
  212. cl_mem memory;
  213. memory = clCreateBuffer(contexts[devid], flags, size, NULL, &err);
  214. if (err == CL_OUT_OF_HOST_MEMORY) return err;
  215. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  216. /*
  217. * OpenCL uses lazy memory allocation: we will only know if the
  218. * allocation failed when trying to copy data onto the device. But we
  219. * want to know this __now__, so we just perform a dummy copy.
  220. */
  221. char dummy = 0;
  222. cl_event ev;
  223. err = clEnqueueWriteBuffer(alloc_queues[devid], memory, CL_TRUE,
  224. 0, sizeof(dummy), &dummy,
  225. 0, NULL, &ev);
  226. if (err == CL_MEM_OBJECT_ALLOCATION_FAILURE)
  227. return err;
  228. if (err == CL_OUT_OF_RESOURCES)
  229. return err;
  230. if (err != CL_SUCCESS)
  231. STARPU_OPENCL_REPORT_ERROR(err);
  232. clWaitForEvents(1, &ev);
  233. clReleaseEvent(ev);
  234. *mem = memory;
  235. return CL_SUCCESS;
  236. #endif
  237. }
  238. cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem buffer, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret)
  239. {
  240. cl_int err;
  241. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  242. if (event)
  243. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  244. cl_event ev;
  245. err = clEnqueueWriteBuffer(in_transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, &ev);
  246. if (event)
  247. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  248. if (STARPU_LIKELY(err == CL_SUCCESS))
  249. {
  250. if (event == NULL)
  251. {
  252. /* We want a synchronous copy, let's synchronise the queue */
  253. err = clWaitForEvents(1, &ev);
  254. if (STARPU_UNLIKELY(err))
  255. STARPU_OPENCL_REPORT_ERROR(err);
  256. err = clReleaseEvent(ev);
  257. if (STARPU_UNLIKELY(err))
  258. STARPU_OPENCL_REPORT_ERROR(err);
  259. }
  260. else
  261. {
  262. *event = ev;
  263. }
  264. if (ret)
  265. {
  266. *ret = (event == NULL) ? 0 : -EAGAIN;
  267. }
  268. }
  269. return err;
  270. }
  271. cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret)
  272. {
  273. cl_int err;
  274. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  275. if (event)
  276. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  277. cl_event ev;
  278. err = clEnqueueReadBuffer(out_transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, &ev);
  279. if (event)
  280. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  281. if (STARPU_LIKELY(err == CL_SUCCESS))
  282. {
  283. if (event == NULL)
  284. {
  285. /* We want a synchronous copy, let's synchronise the queue */
  286. err = clWaitForEvents(1, &ev);
  287. if (STARPU_UNLIKELY(err))
  288. STARPU_OPENCL_REPORT_ERROR(err);
  289. err = clReleaseEvent(ev);
  290. if (STARPU_UNLIKELY(err))
  291. STARPU_OPENCL_REPORT_ERROR(err);
  292. }
  293. else
  294. {
  295. *event = ev;
  296. }
  297. if (ret)
  298. {
  299. *ret = (event == NULL) ? 0 : -EAGAIN;
  300. }
  301. }
  302. return err;
  303. }
  304. cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, size_t src_offset, cl_mem dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t dst_offset, size_t size, cl_event *event, int *ret)
  305. {
  306. cl_int err;
  307. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  308. if (event)
  309. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  310. cl_event ev;
  311. err = clEnqueueCopyBuffer(peer_transfer_queues[worker->devid], src, dst, src_offset, dst_offset, size, 0, NULL, &ev);
  312. if (event)
  313. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  314. if (STARPU_LIKELY(err == CL_SUCCESS))
  315. {
  316. if (event == NULL)
  317. {
  318. /* We want a synchronous copy, let's synchronise the queue */
  319. err = clWaitForEvents(1, &ev);
  320. if (STARPU_UNLIKELY(err))
  321. STARPU_OPENCL_REPORT_ERROR(err);
  322. err = clReleaseEvent(ev);
  323. if (STARPU_UNLIKELY(err))
  324. STARPU_OPENCL_REPORT_ERROR(err);
  325. }
  326. else
  327. {
  328. *event = ev;
  329. }
  330. if (ret)
  331. {
  332. *ret = (event == NULL) ? 0 : -EAGAIN;
  333. }
  334. }
  335. return err;
  336. }
  337. cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event)
  338. {
  339. enum starpu_node_kind src_kind = starpu_node_get_kind(src_node);
  340. enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node);
  341. cl_int err;
  342. int ret;
  343. switch (_STARPU_MEMORY_NODE_TUPLE(src_kind,dst_kind))
  344. {
  345. case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_CPU_RAM):
  346. err = starpu_opencl_copy_opencl_to_ram(
  347. (cl_mem) src, src_node,
  348. (void*) (dst + dst_offset), dst_node,
  349. size, src_offset, event, &ret);
  350. if (STARPU_UNLIKELY(err))
  351. STARPU_OPENCL_REPORT_ERROR(err);
  352. return ret;
  353. case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_OPENCL_RAM):
  354. err = starpu_opencl_copy_ram_to_opencl(
  355. (void*) (src + src_offset), src_node,
  356. (cl_mem) dst, dst_node,
  357. size, dst_offset, event, &ret);
  358. if (STARPU_UNLIKELY(err))
  359. STARPU_OPENCL_REPORT_ERROR(err);
  360. return ret;
  361. case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_OPENCL_RAM):
  362. err = starpu_opencl_copy_opencl_to_opencl(
  363. (cl_mem) src, src_node, src_offset,
  364. (cl_mem) dst, dst_node, dst_offset,
  365. size, event, &ret);
  366. if (STARPU_UNLIKELY(err))
  367. STARPU_OPENCL_REPORT_ERROR(err);
  368. return ret;
  369. default:
  370. STARPU_ABORT();
  371. break;
  372. }
  373. }
  374. #if 0
  375. cl_int _starpu_opencl_copy_rect_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, const size_t buffer_origin[3], const size_t host_origin[3],
  376. const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch,
  377. size_t host_row_pitch, size_t host_slice_pitch, cl_event *event)
  378. {
  379. cl_int err;
  380. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  381. cl_bool blocking;
  382. blocking = (event == NULL) ? CL_TRUE : CL_FALSE;
  383. if (event)
  384. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  385. err = clEnqueueReadBufferRect(out_transfer_queues[worker->devid], buffer, blocking, buffer_origin, host_origin, region, buffer_row_pitch,
  386. buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, 0, NULL, event);
  387. if (event)
  388. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  389. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  390. return CL_SUCCESS;
  391. }
  392. cl_int _starpu_opencl_copy_rect_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem buffer, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, const size_t buffer_origin[3], const size_t host_origin[3],
  393. const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch,
  394. size_t host_row_pitch, size_t host_slice_pitch, cl_event *event)
  395. {
  396. cl_int err;
  397. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  398. cl_bool blocking;
  399. blocking = (event == NULL) ? CL_TRUE : CL_FALSE;
  400. if (event)
  401. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  402. err = clEnqueueWriteBufferRect(in_transfer_queues[worker->devid], buffer, blocking, buffer_origin, host_origin, region, buffer_row_pitch,
  403. buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, 0, NULL, event);
  404. if (event)
  405. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  406. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  407. return CL_SUCCESS;
  408. }
  409. #endif
  410. #endif /* STARPU_USE_OPENCL */
  411. static size_t _starpu_opencl_get_global_mem_size(int devid)
  412. {
  413. return global_mem[devid];
  414. }
  415. void _starpu_opencl_init(void)
  416. {
  417. STARPU_PTHREAD_MUTEX_LOCK(&big_lock);
  418. if (!init_done)
  419. {
  420. #ifdef STARPU_SIMGRID
  421. nb_devices = _starpu_simgrid_get_nbhosts("OpenCL");
  422. #else /* STARPU_USE_OPENCL */
  423. cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
  424. cl_uint nb_platforms;
  425. cl_int err;
  426. int i;
  427. cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
  428. _STARPU_DEBUG("Initialising OpenCL\n");
  429. // Get Platforms
  430. if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
  431. device_type |= CL_DEVICE_TYPE_CPU;
  432. if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
  433. device_type = CL_DEVICE_TYPE_CPU;
  434. err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
  435. if (STARPU_UNLIKELY(err != CL_SUCCESS)) nb_platforms=0;
  436. _STARPU_DEBUG("Platforms detected: %u\n", nb_platforms);
  437. _STARPU_DEBUG("CPU device type: %s\n", device_type&CL_DEVICE_TYPE_CPU?"requested":"not requested");
  438. _STARPU_DEBUG("GPU device type: %s\n", device_type&CL_DEVICE_TYPE_GPU?"requested":"not requested");
  439. _STARPU_DEBUG("Accelerator device type: %s\n", device_type&CL_DEVICE_TYPE_ACCELERATOR?"requested":"not requested");
  440. // Get devices
  441. nb_devices = 0;
  442. {
  443. unsigned j;
  444. for (j=0; j<nb_platforms; j++)
  445. {
  446. cl_uint num;
  447. int platform_valid = 1;
  448. char name[1024], vendor[1024];
  449. err = clGetPlatformInfo(platform_id[j], CL_PLATFORM_NAME, 1024, name, NULL);
  450. if (err != CL_SUCCESS)
  451. {
  452. STARPU_OPENCL_REPORT_ERROR_WITH_MSG("clGetPlatformInfo NAME", err);
  453. platform_valid = 0;
  454. }
  455. else
  456. {
  457. err = clGetPlatformInfo(platform_id[j], CL_PLATFORM_VENDOR, 1024, vendor, NULL);
  458. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  459. {
  460. STARPU_OPENCL_REPORT_ERROR_WITH_MSG("clGetPlatformInfo VENDOR", err);
  461. platform_valid = 0;
  462. }
  463. }
  464. if(strcmp(name, "SOCL Platform") == 0)
  465. {
  466. platform_valid = 0;
  467. _STARPU_DEBUG("Skipping SOCL Platform\n");
  468. }
  469. #ifdef STARPU_VERBOSE
  470. if (platform_valid)
  471. _STARPU_DEBUG("Platform: %s - %s\n", name, vendor);
  472. else
  473. _STARPU_DEBUG("Platform invalid\n");
  474. #endif
  475. if (platform_valid && nb_devices <= STARPU_MAXOPENCLDEVS)
  476. {
  477. err = clGetDeviceIDs(platform_id[j], device_type, STARPU_MAXOPENCLDEVS-nb_devices, STARPU_MAXOPENCLDEVS == nb_devices ? NULL : &devices[nb_devices], &num);
  478. if (err == CL_DEVICE_NOT_FOUND)
  479. {
  480. const cl_device_type all_device_types = CL_DEVICE_TYPE_CPU|CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
  481. if (device_type != all_device_types)
  482. {
  483. _STARPU_DEBUG(" No devices of the requested type(s) subset detected on this platform\n");
  484. }
  485. else
  486. {
  487. _STARPU_DEBUG(" No devices detected on this platform\n");
  488. }
  489. }
  490. else
  491. {
  492. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  493. _STARPU_DEBUG(" %u devices detected\n", num);
  494. nb_devices += num;
  495. }
  496. }
  497. }
  498. }
  499. // Get location of OpenCl kernel source files
  500. _starpu_opencl_program_dir = getenv("STARPU_OPENCL_PROGRAM_DIR");
  501. if (nb_devices > STARPU_MAXOPENCLDEVS)
  502. {
  503. _STARPU_DISP("# Warning: %u OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices?\n", nb_devices, STARPU_MAXOPENCLDEVS);
  504. nb_devices = STARPU_MAXOPENCLDEVS;
  505. }
  506. // initialise internal structures
  507. for(i=0 ; i<nb_devices ; i++)
  508. {
  509. contexts[i] = NULL;
  510. queues[i] = NULL;
  511. in_transfer_queues[i] = NULL;
  512. out_transfer_queues[i] = NULL;
  513. peer_transfer_queues[i] = NULL;
  514. alloc_queues[i] = NULL;
  515. }
  516. #endif /* STARPU_USE_OPENCL */
  517. init_done=1;
  518. }
  519. STARPU_PTHREAD_MUTEX_UNLOCK(&big_lock);
  520. }
  521. #ifndef STARPU_SIMGRID
  522. static unsigned _starpu_opencl_get_device_name(int dev, char *name, int lname);
  523. #endif
  524. static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker *worker, unsigned char pipeline_idx);
  525. static void _starpu_opencl_stop_job(struct _starpu_job *j, struct _starpu_worker *worker);
  526. static void _starpu_opencl_execute_job(struct starpu_task *task, struct _starpu_worker *worker);
  527. int _starpu_opencl_driver_init(struct _starpu_worker *worker)
  528. {
  529. int devid = worker->devid;
  530. _starpu_worker_start(worker, _STARPU_FUT_OPENCL_KEY);
  531. _starpu_opencl_init_context(devid);
  532. /* one more time to avoid hacks from third party lib :) */
  533. _starpu_bind_thread_on_cpu(worker->config, worker->bindid);
  534. _starpu_opencl_limit_gpu_mem_if_needed(devid);
  535. _starpu_memory_manager_set_global_memory_size(worker->memory_node, _starpu_opencl_get_global_mem_size(devid));
  536. _starpu_malloc_init(worker->memory_node);
  537. float size = (float) global_mem[devid] / (1<<30);
  538. #ifdef STARPU_SIMGRID
  539. const char *devname = "Simgrid";
  540. #else
  541. /* get the device's name */
  542. char devname[128];
  543. _starpu_opencl_get_device_name(devid, devname, 128);
  544. #endif
  545. snprintf(worker->name, sizeof(worker->name), "OpenCL %u (%s %.1f GiB)", devid, devname, size);
  546. snprintf(worker->short_name, sizeof(worker->short_name), "OpenCL %u", devid);
  547. worker->pipeline_length = starpu_get_env_number_default("STARPU_OPENCL_PIPELINE", 2);
  548. if (worker->pipeline_length > STARPU_MAX_PIPELINE)
  549. {
  550. _STARPU_DISP("Warning: STARPU_OPENCL_PIPELINE is %u, but STARPU_MAX_PIPELINE is only %u", worker->pipeline_length, STARPU_MAX_PIPELINE);
  551. worker->pipeline_length = STARPU_MAX_PIPELINE;
  552. }
  553. #if defined(STARPU_SIMGRID) && defined(STARPU_NON_BLOCKING_DRIVERS)
  554. if (worker->pipeline_length >= 1)
  555. {
  556. /* We need blocking drivers, otherwise idle drivers
  557. * would keep consuming real CPU time while just
  558. * polling for task termination */
  559. _STARPU_DISP("Warning: reducing STARPU_OPENCL_PIPELINE to 0 because simgrid is enabled and blocking drivers are not enabled\n");
  560. worker->pipeline_length = 0;
  561. }
  562. #endif
  563. #if !defined(STARPU_SIMGRID) && !defined(STARPU_NON_BLOCKING_DRIVERS)
  564. if (worker->pipeline_length >= 1)
  565. {
  566. /* We need non-blocking drivers, to poll for OPENCL task
  567. * termination */
  568. _STARPU_DISP("Warning: reducing STARPU_OPENCL_PIPELINE to 0 because blocking drivers are not enabled (and simgrid is not enabled)\n");
  569. worker->pipeline_length = 0;
  570. }
  571. #endif
  572. _STARPU_DEBUG("OpenCL (%s) dev id %d thread is ready to run on CPU %d !\n", devname, devid, worker->bindid);
  573. _STARPU_TRACE_WORKER_INIT_END(worker->workerid);
  574. /* tell the main thread that this one is ready */
  575. STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex);
  576. worker->status = STATUS_UNKNOWN;
  577. worker->worker_is_initialized = 1;
  578. STARPU_PTHREAD_COND_SIGNAL(&worker->ready_cond);
  579. STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex);
  580. return 0;
  581. }
  582. int _starpu_opencl_driver_run_once(struct _starpu_worker *worker)
  583. {
  584. int workerid = worker->workerid;
  585. unsigned memnode = worker->memory_node;
  586. struct _starpu_job *j;
  587. struct starpu_task *task;
  588. if (worker->ntasks)
  589. {
  590. #ifndef STARPU_SIMGRID
  591. size_t size;
  592. int err;
  593. #endif
  594. /* On-going asynchronous task, check for its termination first */
  595. task = worker->current_tasks[worker->first_task];
  596. #ifdef STARPU_SIMGRID
  597. if (task_finished[worker->devid][worker->first_task])
  598. #else /* !STARPU_SIMGRID */
  599. cl_int status;
  600. err = clGetEventInfo(task_events[worker->devid][worker->first_task], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, &size);
  601. STARPU_ASSERT(size == sizeof(cl_int));
  602. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  603. if (status != CL_COMPLETE)
  604. #endif /* !STARPU_SIMGRID */
  605. {
  606. _STARPU_TRACE_START_EXECUTING();
  607. /* Not ready yet, no better thing to do than waiting */
  608. __starpu_datawizard_progress(memnode, 1, 0);
  609. return 0;
  610. }
  611. else
  612. {
  613. #ifndef STARPU_SIMGRID
  614. task_events[worker->devid][worker->first_task] = 0;
  615. #endif
  616. /* Asynchronous task completed! */
  617. _starpu_opencl_stop_job(_starpu_get_job_associated_to_task(task), worker);
  618. /* See next task if any */
  619. if (worker->ntasks)
  620. {
  621. task = worker->current_tasks[worker->first_task];
  622. j = _starpu_get_job_associated_to_task(task);
  623. if (task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC)
  624. {
  625. /* An asynchronous task, it was already queued,
  626. * it's now running, record its start time. */
  627. _starpu_driver_start_job(worker, j, &worker->perf_arch, &j->cl_start, 0, starpu_profiling_status_get());
  628. }
  629. else
  630. {
  631. /* A synchronous task, we have finished flushing the pipeline, we can now at last execute it. */
  632. _STARPU_TRACE_END_PROGRESS(memnode);
  633. _STARPU_TRACE_EVENT("sync_task");
  634. _starpu_opencl_execute_job(task, worker);
  635. _STARPU_TRACE_EVENT("end_sync_task");
  636. _STARPU_TRACE_START_PROGRESS(memnode);
  637. worker->pipeline_stuck = 0;
  638. }
  639. }
  640. _STARPU_TRACE_END_EXECUTING();
  641. }
  642. }
  643. __starpu_datawizard_progress(memnode, 1, 1);
  644. task = _starpu_get_worker_task(worker, workerid, memnode);
  645. if (task == NULL)
  646. return 0;
  647. j = _starpu_get_job_associated_to_task(task);
  648. /* can OpenCL do that task ? */
  649. if (!_STARPU_OPENCL_MAY_PERFORM(j))
  650. {
  651. /* this is not a OpenCL task */
  652. _starpu_push_task_to_workers(task);
  653. return 0;
  654. }
  655. worker->current_tasks[(worker->first_task + worker->ntasks)%STARPU_MAX_PIPELINE] = task;
  656. worker->ntasks++;
  657. if (worker->ntasks > 1 && !(task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC))
  658. {
  659. /* We have to execute a non-asynchronous task but we
  660. * still have tasks in the pipeline... Record it to
  661. * prevent more tasks from coming, and do it later */
  662. worker->pipeline_stuck = 1;
  663. return 0;
  664. }
  665. _STARPU_TRACE_END_PROGRESS(memnode);
  666. _starpu_opencl_execute_job(task, worker);
  667. _STARPU_TRACE_START_PROGRESS(memnode);
  668. return 0;
  669. }
  670. int _starpu_opencl_driver_deinit(struct _starpu_worker *worker)
  671. {
  672. _STARPU_TRACE_WORKER_DEINIT_START;
  673. unsigned memnode = worker->memory_node;
  674. _starpu_handle_all_pending_node_data_requests(memnode);
  675. /* In case there remains some memory that was automatically
  676. * allocated by StarPU, we release it now. Note that data
  677. * coherency is not maintained anymore at that point ! */
  678. _starpu_free_all_automatically_allocated_buffers(memnode);
  679. _starpu_malloc_shutdown(memnode);
  680. unsigned devid = worker->devid;
  681. _starpu_opencl_deinit_context(devid);
  682. _STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_OPENCL_KEY);
  683. return 0;
  684. }
  685. void *_starpu_opencl_worker(void *_arg)
  686. {
  687. struct _starpu_worker* worker = _arg;
  688. _starpu_opencl_driver_init(worker);
  689. _STARPU_TRACE_START_PROGRESS(memnode);
  690. while (_starpu_machine_is_running())
  691. {
  692. _starpu_may_pause();
  693. _starpu_opencl_driver_run_once(worker);
  694. }
  695. _starpu_opencl_driver_deinit(worker);
  696. _STARPU_TRACE_END_PROGRESS(memnode);
  697. return NULL;
  698. }
  699. #ifdef STARPU_USE_OPENCL
  700. #ifndef STARPU_SIMGRID
  701. static unsigned _starpu_opencl_get_device_name(int dev, char *name, int lname)
  702. {
  703. int err;
  704. if (!init_done)
  705. {
  706. _starpu_opencl_init();
  707. }
  708. // Get device name
  709. err = clGetDeviceInfo(devices[dev], CL_DEVICE_NAME, lname, name, NULL);
  710. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  711. _STARPU_DEBUG("Device %d : [%s]\n", dev, name);
  712. return EXIT_SUCCESS;
  713. }
  714. #endif
  715. #endif
  716. unsigned _starpu_opencl_get_device_count(void)
  717. {
  718. if (!init_done)
  719. {
  720. _starpu_opencl_init();
  721. }
  722. return nb_devices;
  723. }
  724. #ifdef STARPU_USE_OPENCL
  725. cl_device_type _starpu_opencl_get_device_type(int devid)
  726. {
  727. int err;
  728. cl_device_type type;
  729. if (!init_done)
  730. _starpu_opencl_init();
  731. err = clGetDeviceInfo(devices[devid], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL);
  732. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  733. STARPU_OPENCL_REPORT_ERROR(err);
  734. return type;
  735. }
  736. #endif /* STARPU_USE_OPENCL */
  737. static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker *worker, unsigned char pipeline_idx STARPU_ATTRIBUTE_UNUSED)
  738. {
  739. int ret;
  740. STARPU_ASSERT(j);
  741. struct starpu_task *task = j->task;
  742. int profiling = starpu_profiling_status_get();
  743. STARPU_ASSERT(task);
  744. struct starpu_codelet *cl = task->cl;
  745. STARPU_ASSERT(cl);
  746. _starpu_set_current_task(j->task);
  747. ret = _starpu_fetch_task_input(j);
  748. if (ret != 0)
  749. {
  750. /* there was not enough memory, so the input of
  751. * the codelet cannot be fetched ... put the
  752. * codelet back, and try it later */
  753. return -EAGAIN;
  754. }
  755. if (worker->ntasks == 1)
  756. {
  757. /* We are alone in the pipeline, the kernel will start now, record it */
  758. _starpu_driver_start_job(worker, j, &worker->perf_arch, &j->cl_start, 0, profiling);
  759. }
  760. starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, j->nimpl);
  761. STARPU_ASSERT_MSG(func, "when STARPU_OPENCL is defined in 'where', opencl_func or opencl_funcs has to be defined");
  762. if (starpu_get_env_number("STARPU_DISABLE_KERNELS") <= 0)
  763. {
  764. _STARPU_TRACE_START_EXECUTING();
  765. #ifdef STARPU_SIMGRID
  766. double length = NAN;
  767. #ifdef STARPU_OPENCL_SIMULATOR
  768. func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
  769. #ifndef CL_PROFILING_CLOCK_CYCLE_COUNT
  770. #ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
  771. #define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
  772. #else
  773. #error The OpenCL simulator must provide CL_PROFILING_CLOCK_CYCLE_COUNT
  774. #endif
  775. #endif
  776. struct starpu_profiling_task_info *profiling_info = task->profiling_info;
  777. STARPU_ASSERT_MSG(profiling_info->used_cycles, "Application kernel must call starpu_opencl_collect_stats to collect simulated time");
  778. length = ((double) profiling_info->used_cycles)/MSG_get_host_speed(MSG_host_self());
  779. #endif
  780. int async = task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC;
  781. _starpu_simgrid_submit_job(worker->workerid, j, &worker->perf_arch, length,
  782. async ? &task_finished[worker->devid][pipeline_idx] : NULL,
  783. async ? &task_mutex[worker->devid][pipeline_idx] : NULL,
  784. async ? &task_cond[worker->devid][pipeline_idx] : NULL);
  785. #else
  786. func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
  787. #endif
  788. _STARPU_TRACE_END_EXECUTING();
  789. }
  790. return 0;
  791. }
  792. static void _starpu_opencl_stop_job(struct _starpu_job *j, struct _starpu_worker *worker)
  793. {
  794. struct timespec codelet_end;
  795. int profiling = starpu_profiling_status_get();
  796. _starpu_set_current_task(NULL);
  797. if (worker->pipeline_length)
  798. worker->current_tasks[worker->first_task] = NULL;
  799. else
  800. worker->current_task = NULL;
  801. worker->first_task = (worker->first_task + 1) % STARPU_MAX_PIPELINE;
  802. worker->ntasks--;
  803. _starpu_driver_end_job(worker, j, &worker->perf_arch, &codelet_end, 0, profiling);
  804. struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j);
  805. STARPU_ASSERT_MSG(sched_ctx != NULL, "there should be a worker %d in the ctx of this job \n", worker->workerid);
  806. if(!sched_ctx->sched_policy)
  807. _starpu_driver_update_job_feedback(j, worker, &sched_ctx->perf_arch, &j->cl_start, &codelet_end, profiling);
  808. else
  809. _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, &j->cl_start, &codelet_end, profiling);
  810. _starpu_push_task_output(j);
  811. _starpu_handle_job_termination(j);
  812. }
  813. static void _starpu_opencl_execute_job(struct starpu_task *task, struct _starpu_worker *worker)
  814. {
  815. int res;
  816. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  817. unsigned char pipeline_idx = (worker->first_task + worker->ntasks - 1)%STARPU_MAX_PIPELINE;
  818. res = _starpu_opencl_start_job(j, worker, pipeline_idx);
  819. if (res)
  820. {
  821. switch (res)
  822. {
  823. case -EAGAIN:
  824. _STARPU_DISP("ouch, OpenCL could not actually run task %p, putting it back...\n", task);
  825. _starpu_push_task_to_workers(task);
  826. STARPU_ABORT();
  827. default:
  828. STARPU_ABORT();
  829. }
  830. }
  831. if (task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC)
  832. {
  833. /* Record event to synchronize with task termination later */
  834. #ifndef STARPU_SIMGRID
  835. cl_command_queue queue;
  836. starpu_opencl_get_queue(worker->devid, &queue);
  837. #endif
  838. if (worker->pipeline_length == 0)
  839. {
  840. #ifdef STARPU_SIMGRID
  841. _starpu_simgrid_wait_tasks(worker->workerid);
  842. #else
  843. starpu_opencl_get_queue(worker->devid, &queue);
  844. clFinish(queue);
  845. #endif
  846. _starpu_opencl_stop_job(j, worker);
  847. }
  848. else
  849. {
  850. #ifndef STARPU_SIMGRID
  851. int err;
  852. /* the function clEnqueueMarker is deprecated from
  853. * OpenCL version 1.2. We would like to use the new
  854. * function clEnqueueMarkerWithWaitList. We could do
  855. * it by checking its availability through our own
  856. * configure macro HAVE_CLENQUEUEMARKERWITHWAITLIST
  857. * and the OpenCL macro CL_VERSION_1_2. However these
  858. * 2 macros detect the function availability in the
  859. * ICD and not in the device implementation.
  860. */
  861. err = clEnqueueMarker(queue, &task_events[worker->devid][pipeline_idx]);
  862. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  863. #endif
  864. _STARPU_TRACE_START_EXECUTING();
  865. }
  866. }
  867. else
  868. /* Synchronous execution */
  869. {
  870. _starpu_opencl_stop_job(j, worker);
  871. }
  872. }
  873. #ifdef STARPU_USE_OPENCL
  874. int _starpu_run_opencl(struct _starpu_worker *workerarg)
  875. {
  876. _STARPU_DEBUG("Running OpenCL %u from the application\n", workerarg->devid);
  877. workerarg->set = NULL;
  878. workerarg->worker_is_initialized = 0;
  879. /* Let's go ! */
  880. _starpu_opencl_worker(workerarg);
  881. return 0;
  882. }
  883. #endif /* STARPU_USE_OPENCL */