starpu_opencl.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2014,2018 Université de Bordeaux
  4. * Copyright (C) 2011,2012 Inria
  5. * Copyright (C) 2010-2013,2015-2017,2019 CNRS
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #ifndef __STARPU_OPENCL_H__
  19. #define __STARPU_OPENCL_H__
  20. #include <starpu_config.h>
  21. #ifdef STARPU_USE_OPENCL
  22. #ifndef CL_TARGET_OPENCL_VERSION
  23. #define CL_TARGET_OPENCL_VERSION 100
  24. #endif
  25. #ifdef __APPLE__
  26. #include <OpenCL/cl.h>
  27. #else
  28. #include <CL/cl.h>
  29. #endif
  30. #include <assert.h>
  31. #ifdef __cplusplus
  32. extern "C"
  33. {
  34. #endif
  35. /**
  36. @defgroup API_OpenCL_Extensions OpenCL Extensions
  37. @{
  38. */
  39. /**
  40. Store the OpenCL programs as compiled for the different OpenCL
  41. devices.
  42. */
  43. struct starpu_opencl_program
  44. {
  45. /** Store each program for each OpenCL device. */
  46. cl_program programs[STARPU_MAXOPENCLDEVS];
  47. };
  48. /**
  49. @name Writing OpenCL kernels
  50. @{
  51. */
  52. /**
  53. Return the OpenCL context of the device designated by \p devid
  54. in \p context.
  55. */
  56. void starpu_opencl_get_context(int devid, cl_context *context);
  57. /**
  58. Return the cl_device_id corresponding to \p devid in \p device.
  59. */
  60. void starpu_opencl_get_device(int devid, cl_device_id *device);
  61. /**
  62. Return the command queue of the device designated by \p devid
  63. into \p queue.
  64. */
  65. void starpu_opencl_get_queue(int devid, cl_command_queue *queue);
  66. /**
  67. Return the context of the current worker.
  68. */
  69. void starpu_opencl_get_current_context(cl_context *context);
  70. /**
  71. Return the computation kernel command queue of the current
  72. worker.
  73. */
  74. void starpu_opencl_get_current_queue(cl_command_queue *queue);
  75. /**
  76. Set the arguments of a given kernel. The list of arguments
  77. must be given as <c>(size_t size_of_the_argument, cl_mem *
  78. pointer_to_the_argument)</c>. The last argument must be 0. Return the
  79. number of arguments that were successfully set. In case of failure,
  80. return the id of the argument that could not be set and \p err is set to
  81. the error returned by OpenCL. Otherwise, return the number of
  82. arguments that were set.
  83. Here an example:
  84. \code{.c}
  85. int n;
  86. cl_int err;
  87. cl_kernel kernel;
  88. n = starpu_opencl_set_kernel_args(&err, 2, &kernel, sizeof(foo), &foo, sizeof(bar), &bar, 0);
  89. if (n != 2) fprintf(stderr, "Error : %d\n", err);
  90. \endcode
  91. */
  92. int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...);
  93. /** @} */
  94. /**
  95. @name Compiling OpenCL kernels
  96. Source codes for OpenCL kernels can be stored in a file or in a
  97. string. StarPU provides functions to build the program executable for
  98. each available OpenCL device as a cl_program object. This program
  99. executable can then be loaded within a specific queue as explained in
  100. the next section. These are only helpers, Applications can also fill a
  101. starpu_opencl_program array by hand for more advanced use (e.g.
  102. different programs on the different OpenCL devices, for relocation
  103. purpose for instance).
  104. @{
  105. */
  106. /**
  107. Store the contents of the file \p source_file_name in the buffer
  108. \p opencl_program_source. The file \p source_file_name can be located in the
  109. current directory, or in the directory specified by the environment
  110. variable \ref STARPU_OPENCL_PROGRAM_DIR, or
  111. in the directory <c>share/starpu/opencl</c> of the installation
  112. directory of StarPU, or in the source directory of StarPU. When the
  113. file is found, \p located_file_name is the full name of the file as it
  114. has been located on the system, \p located_dir_name the directory
  115. where it has been located. Otherwise, they are both set to the empty
  116. string.
  117. */
  118. void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source);
  119. /**
  120. Similar to function starpu_opencl_load_program_source() but
  121. allocate the buffers \p located_file_name, \p located_dir_name and
  122. \p opencl_program_source.
  123. */
  124. void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source);
  125. /**
  126. Compile the OpenCL kernel stored in the file \p source_file_name
  127. with the given options \p build_options and store the result in the
  128. directory <c>$STARPU_HOME/.starpu/opencl</c> with the same filename as
  129. \p source_file_name. The compilation is done for every OpenCL device,
  130. and the filename is suffixed with the vendor id and the device id of
  131. the OpenCL device.
  132. */
  133. int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char *build_options);
  134. /**
  135. Compile the OpenCL kernel in the string \p opencl_program_source
  136. with the given options \p build_options and store the result in the
  137. directory <c>$STARPU_HOME/.starpu/opencl</c> with the filename \p
  138. file_name. The compilation is done for every OpenCL device, and the
  139. filename is suffixed with the vendor id and the device id of the
  140. OpenCL device.
  141. */
  142. int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char *build_options);
  143. /**
  144. Compile the binary OpenCL kernel identified with \p kernel_id.
  145. For every OpenCL device, the binary OpenCL kernel will be loaded from
  146. the file
  147. <c>$STARPU_HOME/.starpu/opencl/\<kernel_id\>.\<device_type\>.vendor_id_\<vendor_id\>_device_id_\<device_id\></c>.
  148. */
  149. int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs);
  150. /**
  151. Compile an OpenCL source code stored in a file.
  152. */
  153. int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char *build_options);
  154. /**
  155. Compile an OpenCL source code stored in a string.
  156. */
  157. int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, const char *build_options);
  158. /**
  159. Unload an OpenCL compiled code.
  160. */
  161. int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs);
  162. /** @} */
  163. /**
  164. @name Loading OpenCL kernels
  165. @{
  166. */
  167. /**
  168. Create a kernel \p kernel for device \p devid, on its computation
  169. command queue returned in \p queue, using program \p opencl_programs
  170. and name \p kernel_name.
  171. */
  172. int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, const char *kernel_name, int devid);
  173. /**
  174. Release the given \p kernel, to be called after kernel execution.
  175. */
  176. int starpu_opencl_release_kernel(cl_kernel kernel);
  177. /** @} */
  178. /**
  179. @name OpenCL Statistics
  180. @{
  181. */
  182. /**
  183. Collect statistics on a kernel execution.
  184. After termination of the kernels, the OpenCL codelet should call this
  185. function with the event returned by \c clEnqueueNDRangeKernel(), to
  186. let StarPU collect statistics about the kernel execution (used cycles,
  187. consumed energy).
  188. */
  189. int starpu_opencl_collect_stats(cl_event event);
  190. /** @} */
  191. /**
  192. @name OpenCL Utilities
  193. @{
  194. */
  195. /**
  196. Return the error message in English corresponding to \p status, an OpenCL
  197. error code.
  198. */
  199. const char *starpu_opencl_error_string(cl_int status);
  200. /**
  201. Given a valid error status, print the corresponding error message on
  202. \c stdout, along with the function name \p func, the filename
  203. \p file, the line number \p line and the message \p msg.
  204. */
  205. void starpu_opencl_display_error(const char *func, const char *file, int line, const char *msg, cl_int status);
  206. /**
  207. Call the function starpu_opencl_display_error() with the error
  208. \p status, the current function name, current file and line number,
  209. and a empty message.
  210. */
  211. #define STARPU_OPENCL_DISPLAY_ERROR(status) starpu_opencl_display_error(__starpu_func__, __FILE__, __LINE__, NULL, status)
  212. /**
  213. Call the function starpu_opencl_display_error() and abort.
  214. */
  215. static __starpu_inline void starpu_opencl_report_error(const char *func, const char *file, int line, const char *msg, cl_int status)
  216. {
  217. starpu_opencl_display_error(func, file, line, msg, status);
  218. assert(0);
  219. }
  220. /**
  221. Call the function starpu_opencl_report_error() with the error \p
  222. status, the current function name, current file and line number,
  223. and a empty message.
  224. */
  225. #define STARPU_OPENCL_REPORT_ERROR(status) starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, NULL, status)
  226. /**
  227. Call the function starpu_opencl_report_error() with \p msg
  228. and \p status, the current function name, current file and line number.
  229. */
  230. #define STARPU_OPENCL_REPORT_ERROR_WITH_MSG(msg, status) starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, msg, status)
  231. /**
  232. Allocate \p size bytes of memory, stored in \p addr. \p flags must be a valid
  233. combination of \c cl_mem_flags values.
  234. */
  235. cl_int starpu_opencl_allocate_memory(int devid, cl_mem *addr, size_t size, cl_mem_flags flags);
  236. /**
  237. Copy \p size bytes from the given \p ptr on RAM \p src_node to the
  238. given \p buffer on OpenCL \p dst_node. \p offset is the offset, in
  239. bytes, in \p buffer. if \p event is <c>NULL</c>, the copy is
  240. synchronous, i.e the queue is synchronised before returning. If not
  241. <c>NULL</c>, \p event can be used after the call to wait for this
  242. particular copy to complete. This function returns <c>CL_SUCCESS</c>
  243. if the copy was successful, or a valid OpenCL error code otherwise.
  244. The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
  245. asynchronous launch was successful, or to 0 if \p event was
  246. <c>NULL</c>.
  247. */
  248. cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
  249. /**
  250. Copy \p size bytes asynchronously from the given \p buffer on OpenCL
  251. \p src_node to the given \p ptr on RAM \p dst_node. \p offset is the
  252. offset, in bytes, in \p buffer. if \p event is <c>NULL</c>, the copy
  253. is synchronous, i.e the queue is synchronised before returning. If not
  254. <c>NULL</c>, \p event can be used after the call to wait for this
  255. particular copy to complete. This function returns <c>CL_SUCCESS</c>
  256. if the copy was successful, or a valid OpenCL error code otherwise.
  257. The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
  258. asynchronous launch was successful, or to 0 if \p event was
  259. <c>NULL</c>.
  260. */
  261. cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
  262. /**
  263. Copy \p size bytes asynchronously from byte offset \p src_offset of \p
  264. src on OpenCL \p src_node to byte offset \p dst_offset of \p dst on
  265. OpenCL \p dst_node. if \p event is <c>NULL</c>, the copy is
  266. synchronous, i.e. the queue is synchronised before returning. If not
  267. <c>NULL</c>, \p event can be used after the call to wait for this
  268. particular copy to complete. This function returns <c>CL_SUCCESS</c>
  269. if the copy was successful, or a valid OpenCL error code otherwise.
  270. The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
  271. asynchronous launch was successful, or to 0 if \p event was
  272. <c>NULL</c>.
  273. */
  274. cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, size_t src_offset, cl_mem dst, unsigned dst_node, size_t dst_offset, size_t size, cl_event *event, int *ret);
  275. /**
  276. Copy \p size bytes from byte offset \p src_offset of \p src on \p
  277. src_node to byte offset \p dst_offset of \p dst on \p dst_node. if \p
  278. event is <c>NULL</c>, the copy is synchronous, i.e. the queue is
  279. synchronised before returning. If not <c>NULL</c>, \p event can be
  280. used after the call to wait for this particular copy to complete. The
  281. function returns <c>-EAGAIN</c> if the asynchronous launch was
  282. successfull. It returns 0 if the synchronous copy was successful, or
  283. fails otherwise.
  284. */
  285. cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event);
  286. /** @} */
  287. /** @} */
  288. #ifdef __cplusplus
  289. }
  290. #endif
  291. #endif /* STARPU_USE_OPENCL */
  292. #endif /* __STARPU_OPENCL_H__ */