driver_opencl.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2013 Université de Bordeaux 1
  4. * Copyright (C) 2010 Mehdi Juhoor <mjuhoor@gmail.com>
  5. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  6. * Copyright (C) 2011 Télécom-SudParis
  7. *
  8. * StarPU is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU Lesser General Public License as published by
  10. * the Free Software Foundation; either version 2.1 of the License, or (at
  11. * your option) any later version.
  12. *
  13. * StarPU is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. *
  17. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  18. */
  19. #include <math.h>
  20. #include <starpu.h>
  21. #include <starpu_profiling.h>
  22. #include <common/config.h>
  23. #include <common/utils.h>
  24. #include <core/debug.h>
  25. #include <starpu_opencl.h>
  26. #include <drivers/driver_common/driver_common.h>
  27. #include "driver_opencl.h"
  28. #include "driver_opencl_utils.h"
  29. #include <common/utils.h>
  30. #include <datawizard/memory_manager.h>
  31. #ifdef STARPU_SIMGRID
  32. #include <core/simgrid.h>
  33. #endif
  34. static int nb_devices = -1;
  35. static int init_done = 0;
  36. static starpu_pthread_mutex_t big_lock = STARPU_PTHREAD_MUTEX_INITIALIZER;
  37. static size_t global_mem[STARPU_MAXOPENCLDEVS];
  38. #ifdef STARPU_USE_OPENCL
  39. static cl_context contexts[STARPU_MAXOPENCLDEVS];
  40. static cl_device_id devices[STARPU_MAXOPENCLDEVS];
  41. static cl_command_queue queues[STARPU_MAXOPENCLDEVS];
  42. static cl_command_queue transfer_queues[STARPU_MAXOPENCLDEVS];
  43. static cl_command_queue alloc_queues[STARPU_MAXOPENCLDEVS];
  44. #endif
  45. void
  46. _starpu_opencl_discover_devices(struct _starpu_machine_config *config)
  47. {
  48. /* Discover the number of OpenCL devices. Fill the result in CONFIG. */
  49. /* As OpenCL must have been initialized before calling this function,
  50. * `nb_device' is ensured to be correctly set. */
  51. STARPU_ASSERT(init_done == 1);
  52. config->topology.nhwopenclgpus = nb_devices;
  53. }
  54. static void _starpu_opencl_limit_gpu_mem_if_needed(unsigned devid)
  55. {
  56. starpu_ssize_t limit;
  57. size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0;
  58. size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0;
  59. char name[30];
  60. limit = starpu_get_env_number("STARPU_LIMIT_OPENCL_MEM");
  61. if (limit == -1)
  62. {
  63. sprintf(name, "STARPU_LIMIT_OPENCL_%u_MEM", devid);
  64. limit = starpu_get_env_number(name);
  65. }
  66. if (limit == -1)
  67. {
  68. return;
  69. }
  70. global_mem[devid] = limit * 1024*1024;
  71. #ifdef STARPU_USE_OPENCL
  72. /* Request the size of the current device's memory */
  73. cl_int err;
  74. err = clGetDeviceInfo(devices[devid], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(totalGlobalMem), &totalGlobalMem, NULL);
  75. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  76. STARPU_OPENCL_REPORT_ERROR(err);
  77. /* How much memory to waste ? */
  78. to_waste = totalGlobalMem - global_mem[devid];
  79. #endif
  80. _STARPU_DEBUG("OpenCL device %d: Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n",
  81. devid, (long)to_waste/(1024*1024), (long) limit, (long)totalGlobalMem/(1024*1024),
  82. (long)(totalGlobalMem - to_waste)/(1024*1024));
  83. }
  84. #ifdef STARPU_USE_OPENCL
  85. void starpu_opencl_get_context(int devid, cl_context *context)
  86. {
  87. *context = contexts[devid];
  88. }
  89. void starpu_opencl_get_device(int devid, cl_device_id *device)
  90. {
  91. *device = devices[devid];
  92. }
  93. void starpu_opencl_get_queue(int devid, cl_command_queue *queue)
  94. {
  95. *queue = queues[devid];
  96. }
  97. void starpu_opencl_get_current_queue(cl_command_queue *queue)
  98. {
  99. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  100. STARPU_ASSERT(queue);
  101. *queue = queues[worker->devid];
  102. }
  103. void starpu_opencl_get_current_context(cl_context *context)
  104. {
  105. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  106. STARPU_ASSERT(context);
  107. *context = contexts[worker->devid];
  108. }
  109. #ifndef STARPU_SIMGRID
  110. cl_int _starpu_opencl_init_context(int devid)
  111. {
  112. cl_int err;
  113. cl_uint uint;
  114. _STARPU_PTHREAD_MUTEX_LOCK(&big_lock);
  115. _STARPU_DEBUG("Initialising context for dev %d\n", devid);
  116. // Create a compute context
  117. err = 0;
  118. contexts[devid] = clCreateContext(NULL, 1, &devices[devid], NULL, NULL, &err);
  119. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  120. err = clGetDeviceInfo(devices[devid], CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(uint), &uint, NULL);
  121. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  122. STARPU_OPENCL_REPORT_ERROR(err);
  123. starpu_malloc_set_align(uint/8);
  124. // Create execution queue for the given device
  125. queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], 0, &err);
  126. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  127. // Create transfer queue for the given device
  128. cl_command_queue_properties props;
  129. err = clGetDeviceInfo(devices[devid], CL_DEVICE_QUEUE_PROPERTIES, sizeof(props), &props, NULL);
  130. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  131. STARPU_OPENCL_REPORT_ERROR(err);
  132. props &= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
  133. transfer_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], props, &err);
  134. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  135. alloc_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], 0, &err);
  136. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  137. _STARPU_PTHREAD_MUTEX_UNLOCK(&big_lock);
  138. return CL_SUCCESS;
  139. }
  140. cl_int _starpu_opencl_deinit_context(int devid)
  141. {
  142. cl_int err;
  143. _STARPU_PTHREAD_MUTEX_LOCK(&big_lock);
  144. _STARPU_DEBUG("De-initialising context for dev %d\n", devid);
  145. err = clReleaseContext(contexts[devid]);
  146. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  147. err = clReleaseCommandQueue(queues[devid]);
  148. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  149. err = clReleaseCommandQueue(transfer_queues[devid]);
  150. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  151. err = clReleaseCommandQueue(alloc_queues[devid]);
  152. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  153. contexts[devid] = NULL;
  154. _STARPU_PTHREAD_MUTEX_UNLOCK(&big_lock);
  155. return CL_SUCCESS;
  156. }
  157. #endif
  158. cl_int starpu_opencl_allocate_memory(cl_mem *mem STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED, cl_mem_flags flags STARPU_ATTRIBUTE_UNUSED)
  159. {
  160. #ifdef STARPU_SIMGRID
  161. STARPU_ABORT();
  162. #else
  163. cl_int err;
  164. cl_mem memory;
  165. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  166. memory = clCreateBuffer(contexts[worker->devid], flags, size, NULL, &err);
  167. if (err == CL_OUT_OF_HOST_MEMORY) return err;
  168. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  169. /*
  170. * OpenCL uses lazy memory allocation: we will only know if the
  171. * allocation failed when trying to copy data onto the device. But we
  172. * want to know this __now__, so we just perform a dummy copy.
  173. */
  174. char dummy = 0;
  175. cl_event ev;
  176. err = clEnqueueWriteBuffer(alloc_queues[worker->devid], memory, CL_TRUE,
  177. 0, sizeof(dummy), &dummy,
  178. 0, NULL, &ev);
  179. if (err == CL_MEM_OBJECT_ALLOCATION_FAILURE)
  180. return err;
  181. if (err == CL_OUT_OF_RESOURCES)
  182. return err;
  183. if (err != CL_SUCCESS)
  184. STARPU_OPENCL_REPORT_ERROR(err);
  185. clWaitForEvents(1, &ev);
  186. clReleaseEvent(ev);
  187. *mem = memory;
  188. return CL_SUCCESS;
  189. #endif
  190. }
  191. cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem buffer, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret)
  192. {
  193. cl_int err;
  194. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  195. if (event)
  196. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  197. cl_event ev;
  198. err = clEnqueueWriteBuffer(transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, &ev);
  199. if (event)
  200. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  201. if (STARPU_LIKELY(err == CL_SUCCESS))
  202. {
  203. if (event == NULL)
  204. {
  205. /* We want a synchronous copy, let's synchronise the queue */
  206. err = clWaitForEvents(1, &ev);
  207. if (STARPU_UNLIKELY(err))
  208. STARPU_OPENCL_REPORT_ERROR(err);
  209. err = clReleaseEvent(ev);
  210. if (STARPU_UNLIKELY(err))
  211. STARPU_OPENCL_REPORT_ERROR(err);
  212. }
  213. else
  214. {
  215. *event = ev;
  216. }
  217. if (ret)
  218. {
  219. *ret = (event == NULL) ? 0 : -EAGAIN;
  220. }
  221. }
  222. return err;
  223. }
  224. cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret)
  225. {
  226. cl_int err;
  227. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  228. if (event)
  229. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  230. cl_event ev;
  231. err = clEnqueueReadBuffer(transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, &ev);
  232. if (event)
  233. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  234. if (STARPU_LIKELY(err == CL_SUCCESS))
  235. {
  236. if (event == NULL)
  237. {
  238. /* We want a synchronous copy, let's synchronise the queue */
  239. err = clWaitForEvents(1, &ev);
  240. if (STARPU_UNLIKELY(err))
  241. STARPU_OPENCL_REPORT_ERROR(err);
  242. err = clReleaseEvent(ev);
  243. if (STARPU_UNLIKELY(err))
  244. STARPU_OPENCL_REPORT_ERROR(err);
  245. }
  246. else
  247. {
  248. *event = ev;
  249. }
  250. if (ret)
  251. {
  252. *ret = (event == NULL) ? 0 : -EAGAIN;
  253. }
  254. }
  255. return err;
  256. }
  257. cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, size_t src_offset, cl_mem dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t dst_offset, size_t size, cl_event *event, int *ret)
  258. {
  259. cl_int err;
  260. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  261. if (event)
  262. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  263. cl_event ev;
  264. err = clEnqueueCopyBuffer(transfer_queues[worker->devid], src, dst, src_offset, dst_offset, size, 0, NULL, &ev);
  265. if (event)
  266. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  267. if (STARPU_LIKELY(err == CL_SUCCESS))
  268. {
  269. if (event == NULL)
  270. {
  271. /* We want a synchronous copy, let's synchronise the queue */
  272. err = clWaitForEvents(1, &ev);
  273. if (STARPU_UNLIKELY(err))
  274. STARPU_OPENCL_REPORT_ERROR(err);
  275. err = clReleaseEvent(ev);
  276. if (STARPU_UNLIKELY(err))
  277. STARPU_OPENCL_REPORT_ERROR(err);
  278. }
  279. else
  280. {
  281. *event = ev;
  282. }
  283. if (ret)
  284. {
  285. *ret = (event == NULL) ? 0 : -EAGAIN;
  286. }
  287. }
  288. return err;
  289. }
  290. #ifdef STARPU_USE_OPENCL
  291. cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event)
  292. {
  293. enum starpu_node_kind src_kind = starpu_node_get_kind(src_node);
  294. enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node);
  295. cl_int err;
  296. int ret;
  297. switch (_STARPU_MEMORY_NODE_TUPLE(src_kind,dst_kind))
  298. {
  299. case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_CPU_RAM):
  300. err = starpu_opencl_copy_opencl_to_ram(
  301. (cl_mem) src, src_node,
  302. (void*) dst + dst_offset, dst_node,
  303. size, src_offset, event, &ret);
  304. if (STARPU_UNLIKELY(err))
  305. STARPU_OPENCL_REPORT_ERROR(err);
  306. return ret;
  307. case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_OPENCL_RAM):
  308. err = starpu_opencl_copy_ram_to_opencl(
  309. (void*) src + src_offset, src_node,
  310. (cl_mem) dst, dst_node,
  311. size, dst_offset, event, &ret);
  312. if (STARPU_UNLIKELY(err))
  313. STARPU_OPENCL_REPORT_ERROR(err);
  314. return ret;
  315. case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_OPENCL_RAM):
  316. err = starpu_opencl_copy_opencl_to_opencl(
  317. (cl_mem) src, src_node, src_offset,
  318. (cl_mem) dst, dst_node, dst_offset,
  319. size, event, &ret);
  320. if (STARPU_UNLIKELY(err))
  321. STARPU_OPENCL_REPORT_ERROR(err);
  322. return ret;
  323. default:
  324. STARPU_ABORT();
  325. break;
  326. }
  327. }
  328. #endif
  329. #if 0
  330. cl_int _starpu_opencl_copy_rect_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, const size_t buffer_origin[3], const size_t host_origin[3],
  331. const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch,
  332. size_t host_row_pitch, size_t host_slice_pitch, cl_event *event)
  333. {
  334. cl_int err;
  335. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  336. cl_bool blocking;
  337. blocking = (event == NULL) ? CL_TRUE : CL_FALSE;
  338. if (event)
  339. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  340. err = clEnqueueReadBufferRect(transfer_queues[worker->devid], buffer, blocking, buffer_origin, host_origin, region, buffer_row_pitch,
  341. buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, 0, NULL, event);
  342. if (event)
  343. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  344. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  345. return CL_SUCCESS;
  346. }
  347. cl_int _starpu_opencl_copy_rect_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem buffer, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, const size_t buffer_origin[3], const size_t host_origin[3],
  348. const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch,
  349. size_t host_row_pitch, size_t host_slice_pitch, cl_event *event)
  350. {
  351. cl_int err;
  352. struct _starpu_worker *worker = _starpu_get_local_worker_key();
  353. cl_bool blocking;
  354. blocking = (event == NULL) ? CL_TRUE : CL_FALSE;
  355. if (event)
  356. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  357. err = clEnqueueWriteBufferRect(transfer_queues[worker->devid], buffer, blocking, buffer_origin, host_origin, region, buffer_row_pitch,
  358. buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, 0, NULL, event);
  359. if (event)
  360. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  361. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  362. return CL_SUCCESS;
  363. }
  364. #endif
  365. #endif /* STARPU_USE_OPENCL */
  366. static size_t _starpu_opencl_get_global_mem_size(int devid)
  367. {
  368. return global_mem[devid];
  369. }
  370. void _starpu_opencl_init(void)
  371. {
  372. _STARPU_PTHREAD_MUTEX_LOCK(&big_lock);
  373. if (!init_done)
  374. {
  375. #ifdef STARPU_SIMGRID
  376. unsigned ncuda = _starpu_simgrid_get_nbhosts("CUDA");
  377. unsigned nopencl = _starpu_simgrid_get_nbhosts("OpenCL");
  378. nb_devices = nopencl - ncuda;
  379. STARPU_ASSERT_MSG((nopencl == ncuda) || !ncuda, "Does not yet support selectively disabling OpenCL devices of NVIDIA cards.");
  380. #else /* STARPU_USE_OPENCL */
  381. cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
  382. cl_uint nb_platforms;
  383. cl_int err;
  384. int i;
  385. cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
  386. _STARPU_DEBUG("Initialising OpenCL\n");
  387. // Get Platforms
  388. if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
  389. device_type |= CL_DEVICE_TYPE_CPU;
  390. if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
  391. device_type = CL_DEVICE_TYPE_CPU;
  392. err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
  393. if (STARPU_UNLIKELY(err != CL_SUCCESS)) nb_platforms=0;
  394. _STARPU_DEBUG("Platforms detected: %u\n", nb_platforms);
  395. // Get devices
  396. nb_devices = 0;
  397. {
  398. unsigned j;
  399. for (j=0; j<nb_platforms; j++)
  400. {
  401. cl_uint num;
  402. int platform_valid = 1;
  403. char name[1024], vendor[1024];
  404. err = clGetPlatformInfo(platform_id[j], CL_PLATFORM_NAME, 1024, name, NULL);
  405. if (err != CL_SUCCESS)
  406. {
  407. STARPU_OPENCL_REPORT_ERROR_WITH_MSG("clGetPlatformInfo NAME", err);
  408. platform_valid = 0;
  409. }
  410. else
  411. {
  412. err = clGetPlatformInfo(platform_id[j], CL_PLATFORM_VENDOR, 1024, vendor, NULL);
  413. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  414. {
  415. STARPU_OPENCL_REPORT_ERROR_WITH_MSG("clGetPlatformInfo VENDOR", err);
  416. platform_valid = 0;
  417. }
  418. }
  419. if(strcmp(name, "SOCL Platform") == 0)
  420. {
  421. platform_valid = 0;
  422. _STARPU_DEBUG("Skipping SOCL Platform\n");
  423. }
  424. #ifdef STARPU_VERBOSE
  425. if (platform_valid)
  426. _STARPU_DEBUG("Platform: %s - %s\n", name, vendor);
  427. else
  428. _STARPU_DEBUG("Platform invalid\n");
  429. #endif
  430. if (platform_valid)
  431. {
  432. err = clGetDeviceIDs(platform_id[j], device_type, STARPU_MAXOPENCLDEVS-nb_devices, &devices[nb_devices], &num);
  433. if (err == CL_DEVICE_NOT_FOUND)
  434. {
  435. _STARPU_DEBUG(" No devices detected on this platform\n");
  436. }
  437. else
  438. {
  439. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  440. _STARPU_DEBUG(" %u devices detected\n", num);
  441. nb_devices += num;
  442. }
  443. }
  444. }
  445. }
  446. // Get location of OpenCl kernel source files
  447. _starpu_opencl_program_dir = getenv("STARPU_OPENCL_PROGRAM_DIR");
  448. if (nb_devices > STARPU_MAXOPENCLDEVS)
  449. {
  450. _STARPU_DISP("# Warning: %u OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices?\n", nb_devices, STARPU_MAXOPENCLDEVS);
  451. nb_devices = STARPU_MAXOPENCLDEVS;
  452. }
  453. // initialise internal structures
  454. for(i=0 ; i<nb_devices ; i++)
  455. {
  456. contexts[i] = NULL;
  457. queues[i] = NULL;
  458. transfer_queues[i] = NULL;
  459. alloc_queues[i] = NULL;
  460. }
  461. #endif /* STARPU_USE_OPENCL */
  462. init_done=1;
  463. }
  464. _STARPU_PTHREAD_MUTEX_UNLOCK(&big_lock);
  465. }
  466. #ifndef STARPU_SIMGRID
  467. static unsigned _starpu_opencl_get_device_name(int dev, char *name, int lname);
  468. #endif
  469. static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_worker *args);
  470. static struct _starpu_worker*
  471. _starpu_opencl_get_worker_from_driver(struct starpu_driver *d)
  472. {
  473. #ifdef STARPU_USE_OPENCL
  474. int nworkers;
  475. int workers[STARPU_MAXOPENCLDEVS];
  476. nworkers = starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, workers, STARPU_MAXOPENCLDEVS);
  477. if (nworkers == 0)
  478. return NULL;
  479. int i;
  480. for (i = 0; i < nworkers; i++)
  481. {
  482. cl_device_id device;
  483. int devid = starpu_worker_get_devid(workers[i]);
  484. starpu_opencl_get_device(devid, &device);
  485. if (device == d->id.opencl_id)
  486. break;
  487. }
  488. if (i == nworkers)
  489. return NULL;
  490. return _starpu_get_worker_struct(workers[i]);
  491. #else
  492. unsigned nworkers = starpu_worker_get_count();
  493. unsigned workerid;
  494. for (workerid = 0; workerid < nworkers; workerid++)
  495. {
  496. if (starpu_worker_get_type(workerid) == d->type)
  497. {
  498. struct _starpu_worker *worker;
  499. worker = _starpu_get_worker_struct(workerid);
  500. if (worker->devid == d->id.opencl_id)
  501. return worker;
  502. }
  503. }
  504. return NULL;
  505. #endif
  506. }
  507. int _starpu_opencl_driver_init(struct starpu_driver *d)
  508. {
  509. struct _starpu_worker* args;
  510. args = _starpu_opencl_get_worker_from_driver(d);
  511. STARPU_ASSERT(args);
  512. int devid = args->devid;
  513. _starpu_worker_init(args, _STARPU_FUT_OPENCL_KEY);
  514. #ifndef STARPU_SIMGRID
  515. _starpu_opencl_init_context(devid);
  516. #endif
  517. /* one more time to avoid hacks from third party lib :) */
  518. _starpu_bind_thread_on_cpu(args->config, args->bindid);
  519. _starpu_opencl_limit_gpu_mem_if_needed(devid);
  520. _starpu_memory_manager_set_global_memory_size(args->memory_node, _starpu_opencl_get_global_mem_size(devid));
  521. args->status = STATUS_UNKNOWN;
  522. #ifdef STARPU_SIMGRID
  523. const char *devname = "Simgrid";
  524. #else
  525. /* get the device's name */
  526. char devname[128];
  527. _starpu_opencl_get_device_name(devid, devname, 128);
  528. #endif
  529. snprintf(args->name, sizeof(args->name), "OpenCL %u (%s)", devid, devname);
  530. snprintf(args->short_name, sizeof(args->short_name), "OpenCL %u", devid);
  531. _STARPU_DEBUG("OpenCL (%s) dev id %d thread is ready to run on CPU %d !\n", devname, devid, args->bindid);
  532. _STARPU_TRACE_WORKER_INIT_END;
  533. /* tell the main thread that this one is ready */
  534. _STARPU_PTHREAD_MUTEX_LOCK(&args->mutex);
  535. args->worker_is_initialized = 1;
  536. _STARPU_PTHREAD_COND_SIGNAL(&args->ready_cond);
  537. _STARPU_PTHREAD_MUTEX_UNLOCK(&args->mutex);
  538. return 0;
  539. }
  540. int _starpu_opencl_driver_run_once(struct starpu_driver *d)
  541. {
  542. struct _starpu_worker* args;
  543. args = _starpu_opencl_get_worker_from_driver(d);
  544. STARPU_ASSERT(args);
  545. int workerid = args->workerid;
  546. unsigned memnode = args->memory_node;
  547. struct _starpu_job *j;
  548. struct starpu_task *task;
  549. int res;
  550. _STARPU_TRACE_START_PROGRESS(memnode);
  551. _starpu_datawizard_progress(memnode, 1);
  552. _STARPU_TRACE_END_PROGRESS(memnode);
  553. task = _starpu_get_worker_task(args, workerid, memnode);
  554. if (task == NULL)
  555. return 0;
  556. j = _starpu_get_job_associated_to_task(task);
  557. /* can OpenCL do that task ? */
  558. if (!_STARPU_OPENCL_MAY_PERFORM(j))
  559. {
  560. /* this is not a OpenCL task */
  561. _starpu_push_task_to_workers(task);
  562. return 0;
  563. }
  564. _starpu_set_current_task(j->task);
  565. args->current_task = j->task;
  566. res = _starpu_opencl_execute_job(j, args);
  567. _starpu_set_current_task(NULL);
  568. args->current_task = NULL;
  569. if (res)
  570. {
  571. switch (res)
  572. {
  573. case -EAGAIN:
  574. _STARPU_DISP("ouch, put the codelet %p back ... \n", j);
  575. _starpu_push_task_to_workers(task);
  576. STARPU_ABORT();
  577. return 0;
  578. default:
  579. STARPU_ABORT();
  580. }
  581. }
  582. _starpu_handle_job_termination(j);
  583. return 0;
  584. }
  585. int _starpu_opencl_driver_deinit(struct starpu_driver *d)
  586. {
  587. _STARPU_TRACE_WORKER_DEINIT_START;
  588. struct _starpu_worker* args;
  589. args = _starpu_opencl_get_worker_from_driver(d);
  590. STARPU_ASSERT(args);
  591. unsigned memnode = args->memory_node;
  592. _starpu_handle_all_pending_node_data_requests(memnode);
  593. /* In case there remains some memory that was automatically
  594. * allocated by StarPU, we release it now. Note that data
  595. * coherency is not maintained anymore at that point ! */
  596. _starpu_free_all_automatically_allocated_buffers(memnode);
  597. #ifndef STARPU_SIMGRID
  598. unsigned devid = args->devid;
  599. _starpu_opencl_deinit_context(devid);
  600. #endif
  601. _STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_OPENCL_KEY);
  602. return 0;
  603. }
  604. void *_starpu_opencl_worker(void *arg)
  605. {
  606. struct _starpu_worker* args = arg;
  607. #ifdef STARPU_USE_OPENCL
  608. cl_device_id id;
  609. starpu_opencl_get_device(args->devid, &id);
  610. struct starpu_driver d =
  611. {
  612. .type = STARPU_OPENCL_WORKER,
  613. .id.opencl_id = id
  614. };
  615. #else
  616. struct starpu_driver d =
  617. {
  618. .type = STARPU_OPENCL_WORKER,
  619. .id.opencl_id = args->devid
  620. };
  621. #endif
  622. _starpu_opencl_driver_init(&d);
  623. while (_starpu_machine_is_running())
  624. _starpu_opencl_driver_run_once(&d);
  625. _starpu_opencl_driver_deinit(&d);
  626. return NULL;
  627. }
  628. #ifdef STARPU_USE_OPENCL
  629. #ifndef STARPU_SIMGRID
  630. static unsigned _starpu_opencl_get_device_name(int dev, char *name, int lname)
  631. {
  632. int err;
  633. if (!init_done)
  634. {
  635. _starpu_opencl_init();
  636. }
  637. // Get device name
  638. err = clGetDeviceInfo(devices[dev], CL_DEVICE_NAME, lname, name, NULL);
  639. if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
  640. _STARPU_DEBUG("Device %d : [%s]\n", dev, name);
  641. return EXIT_SUCCESS;
  642. }
  643. #endif
  644. #endif
  645. unsigned _starpu_opencl_get_device_count(void)
  646. {
  647. if (!init_done)
  648. {
  649. _starpu_opencl_init();
  650. }
  651. return nb_devices;
  652. }
  653. #ifdef STARPU_USE_OPENCL
  654. cl_device_type _starpu_opencl_get_device_type(int devid)
  655. {
  656. int err;
  657. cl_device_type type;
  658. if (!init_done)
  659. _starpu_opencl_init();
  660. err = clGetDeviceInfo(devices[devid], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL);
  661. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  662. STARPU_OPENCL_REPORT_ERROR(err);
  663. return type;
  664. }
  665. #endif /* STARPU_USE_OPENCL */
  666. static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_worker *args)
  667. {
  668. int ret;
  669. uint32_t mask = 0;
  670. STARPU_ASSERT(j);
  671. struct starpu_task *task = j->task;
  672. int profiling = starpu_profiling_status_get();
  673. struct timespec codelet_start, codelet_end;
  674. STARPU_ASSERT(task);
  675. struct starpu_codelet *cl = task->cl;
  676. STARPU_ASSERT(cl);
  677. ret = _starpu_fetch_task_input(j, mask);
  678. if (ret != 0)
  679. {
  680. /* there was not enough memory, so the input of
  681. * the codelet cannot be fetched ... put the
  682. * codelet back, and try it later */
  683. return -EAGAIN;
  684. }
  685. _starpu_driver_start_job(args, j, &codelet_start, 0, profiling);
  686. starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, j->nimpl);
  687. STARPU_ASSERT(func);
  688. #ifdef STARPU_SIMGRID
  689. double length = NAN;
  690. #ifdef STARPU_OPENCL_SIMULATOR
  691. func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
  692. #ifndef CL_PROFILING_CLOCK_CYCLE_COUNT
  693. #ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
  694. #define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
  695. #else
  696. #error The OpenCL simulator must provide CL_PROFILING_CLOCK_CYCLE_COUNT
  697. #endif
  698. #endif
  699. struct starpu_task_profiling_info *profiling_info = task->profiling_info;
  700. STARPU_ASSERT_MSG(profiling_info->used_cycles, "Application kernel must call starpu_opencl_collect_stats to collect simulated time");
  701. length = ((double) profiling_info->used_cycles)/MSG_get_host_speed(MSG_host_self());
  702. #endif
  703. _starpu_simgrid_execute_job(j, args->perf_arch, length);
  704. #else
  705. func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
  706. #endif
  707. _starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
  708. _starpu_driver_update_job_feedback(j, args, args->perf_arch,
  709. &codelet_start, &codelet_end, profiling);
  710. _starpu_push_task_output(j, mask);
  711. return EXIT_SUCCESS;
  712. }
  713. #ifdef STARPU_USE_OPENCL
  714. int _starpu_run_opencl(struct starpu_driver *d)
  715. {
  716. STARPU_ASSERT(d && d->type == STARPU_OPENCL_WORKER);
  717. int nworkers;
  718. int workers[STARPU_MAXOPENCLDEVS];
  719. nworkers = starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, workers, STARPU_MAXOPENCLDEVS);
  720. if (nworkers == 0)
  721. return -ENODEV;
  722. int i;
  723. for (i = 0; i < nworkers; i++)
  724. {
  725. cl_device_id device;
  726. int devid = starpu_worker_get_devid(workers[i]);
  727. starpu_opencl_get_device(devid, &device);
  728. if (device == d->id.opencl_id)
  729. break;
  730. }
  731. if (i == nworkers)
  732. return -ENODEV;
  733. struct _starpu_worker *workerarg = _starpu_get_worker_struct(i);
  734. _STARPU_DEBUG("Running OpenCL %u from the application\n", workerarg->devid);
  735. workerarg->set = NULL;
  736. workerarg->worker_is_initialized = 0;
  737. /* Let's go ! */
  738. _starpu_opencl_worker(workerarg);
  739. /* XXX: Should we wait for the driver to be ready, as it is done when
  740. * launching it the usual way ? Cf. the end of _starpu_launch_drivers()
  741. */
  742. return 0;
  743. }
  744. #endif /* STARPU_USE_OPENCL */