advanced-api.texi 49 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178
  1. @c -*-texinfo-*-
  2. @c This file is part of the StarPU Handbook.
  3. @c Copyright (C) 2009--2011 Universit@'e de Bordeaux 1
  4. @c Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. @c Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
  6. @c See the file starpu.texi for copying conditions.
  7. @menu
  8. * Insert Task::
  9. * Tracing support::
  10. * MPI Interface::
  11. * Defining a new data interface::
  12. * Multiformat Data Interface::
  13. * Task Bundles::
  14. * Task Lists::
  15. * Using Parallel Tasks::
  16. * Scheduling Contexts::
  17. * Defining a new scheduling policy::
  18. * Running drivers::
  19. * Expert mode::
  20. @end menu
  21. @node Insert Task
  22. @section Insert Task
  23. @deftypefun int starpu_insert_task (struct starpu_codelet *@var{cl}, ...)
  24. Create and submit a task corresponding to @var{cl} with the following
  25. arguments. The argument list must be zero-terminated.
  26. The arguments following the codelets can be of the following types:
  27. @itemize
  28. @item
  29. @code{STARPU_R}, @code{STARPU_W}, @code{STARPU_RW}, @code{STARPU_SCRATCH}, @code{STARPU_REDUX} an access mode followed by a data handle;
  30. @item
  31. @code{STARPU_DATA_ARRAY} followed by an array of data handles and its number of elements;
  32. @item
  33. the specific values @code{STARPU_VALUE}, @code{STARPU_CALLBACK},
  34. @code{STARPU_CALLBACK_ARG}, @code{STARPU_CALLBACK_WITH_ARG},
  35. @code{STARPU_PRIORITY}, @code{STARPU_TAG}, @code{STARPU_FLOPS}, followed by the appropriated objects
  36. as defined below.
  37. @end itemize
  38. When using @code{STARPU_DATA_ARRAY}, the access mode of the data
  39. handles is not defined.
  40. Parameters to be passed to the codelet implementation are defined
  41. through the type @code{STARPU_VALUE}. The function
  42. @code{starpu_codelet_unpack_args} must be called within the codelet
  43. implementation to retrieve them.
  44. @end deftypefun
  45. @defmac STARPU_VALUE
  46. this macro is used when calling @code{starpu_insert_task}, and must be
  47. followed by a pointer to a constant value and the size of the constant
  48. @end defmac
  49. @defmac STARPU_CALLBACK
  50. this macro is used when calling @code{starpu_insert_task}, and must be
  51. followed by a pointer to a callback function
  52. @end defmac
  53. @defmac STARPU_CALLBACK_ARG
  54. this macro is used when calling @code{starpu_insert_task}, and must be
  55. followed by a pointer to be given as an argument to the callback
  56. function
  57. @end defmac
  58. @defmac STARPU_CALLBACK_WITH_ARG
  59. this macro is used when calling @code{starpu_insert_task}, and must be
  60. followed by two pointers: one to a callback function, and the other to
  61. be given as an argument to the callback function; this is equivalent
  62. to using both @code{STARPU_CALLBACK} and
  63. @code{STARPU_CALLBACK_WITH_ARG}
  64. @end defmac
  65. @defmac STARPU_PRIORITY
  66. this macro is used when calling @code{starpu_insert_task}, and must be
  67. followed by a integer defining a priority level
  68. @end defmac
  69. @defmac STARPU_TAG
  70. this macro is used when calling @code{starpu_insert_task}, and must be
  71. followed by a tag.
  72. @end defmac
  73. @defmac STARPU_FLOPS
  74. this macro is used when calling @code{starpu_insert_task}, and must be followed
  75. by an amount of floating point operations, as a double. The user may have to
  76. explicitly cast into double, otherwise parameter passing will not work.
  77. @end defmac
  78. @deftypefun void starpu_codelet_pack_args ({char **}@var{arg_buffer}, {size_t *}@var{arg_buffer_size}, ...)
  79. Pack arguments of type @code{STARPU_VALUE} into a buffer which can be
  80. given to a codelet and later unpacked with the function
  81. @code{starpu_codelet_unpack_args} defined below.
  82. @end deftypefun
  83. @deftypefun void starpu_codelet_unpack_args ({void *}@var{cl_arg}, ...)
  84. Retrieve the arguments of type @code{STARPU_VALUE} associated to a
  85. task automatically created using the function
  86. @code{starpu_insert_task} defined above.
  87. @end deftypefun
  88. @node Tracing support
  89. @section Tracing support
  90. @deftypefun void starpu_fxt_start_profiling (void)
  91. Start recording the trace. The trace is by default started from
  92. @code{starpu_init()} call, but can be paused by using
  93. @code{starpu_fxt_stop_profiling}, in which case
  94. @code{starpu_fxt_start_profiling} should be called to specify when to resume
  95. recording events.
  96. @end deftypefun
  97. @deftypefun void starpu_fxt_stop_profiling (void)
  98. Stop recording the trace. The trace is by default stopped at
  99. @code{starpu_shutdown()} call. @code{starpu_fxt_stop_profiling} can however be
  100. used to stop it earlier. @code{starpu_fxt_start_profiling} can then be called to
  101. start recording it again, etc.
  102. @end deftypefun
  103. @node MPI Interface
  104. @section MPI Interface
  105. @menu
  106. * Initialisation::
  107. * Communication::
  108. * Communication cache::
  109. @end menu
  110. @node Initialisation
  111. @subsection Initialisation
  112. @deftypefun int starpu_mpi_init (int *@var{argc}, char ***@var{argv}, int initialize_mpi)
  113. Initializes the starpumpi library. @code{initialize_mpi} indicates if
  114. MPI should be initialized or not by StarPU. If the value is not @code{0},
  115. MPI will be initialized by calling @code{MPI_Init_Thread(argc, argv,
  116. MPI_THREAD_SERIALIZED, ...)}.
  117. @end deftypefun
  118. @deftypefun int starpu_mpi_initialize (void)
  119. This function has been made deprecated. One should use instead the
  120. function @code{starpu_mpi_init()} defined above.
  121. This function does not call @code{MPI_Init}, it should be called beforehand.
  122. @end deftypefun
  123. @deftypefun int starpu_mpi_initialize_extended (int *@var{rank}, int *@var{world_size})
  124. This function has been made deprecated. One should use instead the
  125. function @code{starpu_mpi_init()} defined above.
  126. MPI will be initialized by starpumpi by calling @code{MPI_Init_Thread(argc, argv,
  127. MPI_THREAD_SERIALIZED, ...)}.
  128. @end deftypefun
  129. @deftypefun int starpu_mpi_shutdown (void)
  130. Cleans the starpumpi library. This must be called between calling
  131. @code{starpu_mpi} functions and @code{starpu_shutdown()}.
  132. @code{MPI_Finalize()} will be called if StarPU-MPI has been initialized
  133. by @code{starpu_mpi_init()}.
  134. @end deftypefun
  135. @deftypefun void starpu_mpi_comm_amounts_retrieve (size_t *@var{comm_amounts})
  136. Retrieve the current amount of communications from the current node in
  137. the array @code{comm_amounts} which must have a size greater or equal
  138. to the world size. Communications statistics must be enabled
  139. (@pxref{STARPU_COMM_STATS}).
  140. @end deftypefun
  141. @node Communication
  142. @subsection Communication
  143. @deftypefun int starpu_mpi_send (starpu_data_handle_t @var{data_handle}, int @var{dest}, int @var{mpi_tag}, MPI_Comm @var{comm})
  144. Performs a standard-mode, blocking send of @var{data_handle} to the
  145. node @var{dest} using the message tag @code{mpi_tag} within the
  146. communicator @var{comm}.
  147. @end deftypefun
  148. @deftypefun int starpu_mpi_recv (starpu_data_handle_t @var{data_handle}, int @var{source}, int @var{mpi_tag}, MPI_Comm @var{comm}, MPI_Status *@var{status})
  149. Performs a standard-mode, blocking receive in @var{data_handle} from the
  150. node @var{source} using the message tag @code{mpi_tag} within the
  151. communicator @var{comm}.
  152. @end deftypefun
  153. @deftypefun int starpu_mpi_isend (starpu_data_handle_t @var{data_handle}, starpu_mpi_req *@var{req}, int @var{dest}, int @var{mpi_tag}, MPI_Comm @var{comm})
  154. Posts a standard-mode, non blocking send of @var{data_handle} to the
  155. node @var{dest} using the message tag @code{mpi_tag} within the
  156. communicator @var{comm}. After the call, the pointer to the request
  157. @var{req} can be used to test or to wait for the completion of the communication.
  158. @end deftypefun
  159. @deftypefun int starpu_mpi_irecv (starpu_data_handle_t @var{data_handle}, starpu_mpi_req *@var{req}, int @var{source}, int @var{mpi_tag}, MPI_Comm @var{comm})
  160. Posts a nonblocking receive in @var{data_handle} from the
  161. node @var{source} using the message tag @code{mpi_tag} within the
  162. communicator @var{comm}. After the call, the pointer to the request
  163. @var{req} can be used to test or to wait for the completion of the communication.
  164. @end deftypefun
  165. @deftypefun int starpu_mpi_isend_detached (starpu_data_handle_t @var{data_handle}, int @var{dest}, int @var{mpi_tag}, MPI_Comm @var{comm}, void (*@var{callback})(void *), void *@var{arg})
  166. Posts a standard-mode, non blocking send of @var{data_handle} to the
  167. node @var{dest} using the message tag @code{mpi_tag} within the
  168. communicator @var{comm}. On completion, the @var{callback} function is
  169. called with the argument @var{arg}. Similarly to the pthread detached
  170. functionality, when a detached communication completes, its resources
  171. are automatically released back to the system, there is no need to
  172. test or to wait for the completion of the request.
  173. @end deftypefun
  174. @deftypefun int starpu_mpi_irecv_detached (starpu_data_handle_t @var{data_handle}, int @var{source}, int @var{mpi_tag}, MPI_Comm @var{comm}, void (*@var{callback})(void *), void *@var{arg})
  175. Posts a nonblocking receive in @var{data_handle} from the
  176. node @var{source} using the message tag @code{mpi_tag} within the
  177. communicator @var{comm}. On completion, the @var{callback} function is
  178. called with the argument @var{arg}. Similarly to the pthread detached
  179. functionality, when a detached communication completes, its resources
  180. are automatically released back to the system, there is no need to
  181. test or to wait for the completion of the request.
  182. @end deftypefun
  183. @deftypefun int starpu_mpi_wait (starpu_mpi_req *@var{req}, MPI_Status *@var{status})
  184. Returns when the operation identified by request @var{req} is complete.
  185. @end deftypefun
  186. @deftypefun int starpu_mpi_test (starpu_mpi_req *@var{req}, int *@var{flag}, MPI_Status *@var{status})
  187. If the operation identified by @var{req} is complete, set @var{flag}
  188. to 1. The @var{status} object is set to contain information on the
  189. completed operation.
  190. @end deftypefun
  191. @deftypefun int starpu_mpi_barrier (MPI_Comm @var{comm})
  192. Blocks the caller until all group members of the communicator
  193. @var{comm} have called it.
  194. @end deftypefun
  195. @deftypefun int starpu_mpi_isend_detached_unlock_tag (starpu_data_handle_t @var{data_handle}, int @var{dest}, int @var{mpi_tag}, MPI_Comm @var{comm}, starpu_tag_t @var{tag})
  196. Posts a standard-mode, non blocking send of @var{data_handle} to the
  197. node @var{dest} using the message tag @code{mpi_tag} within the
  198. communicator @var{comm}. On completion, @var{tag} is unlocked.
  199. @end deftypefun
  200. @deftypefun int starpu_mpi_irecv_detached_unlock_tag (starpu_data_handle_t @var{data_handle}, int @var{source}, int @var{mpi_tag}, MPI_Comm @var{comm}, starpu_tag_t @var{tag})
  201. Posts a nonblocking receive in @var{data_handle} from the
  202. node @var{source} using the message tag @code{mpi_tag} within the
  203. communicator @var{comm}. On completion, @var{tag} is unlocked.
  204. @end deftypefun
  205. @deftypefun int starpu_mpi_isend_array_detached_unlock_tag (unsigned @var{array_size}, starpu_data_handle_t *@var{data_handle}, int *@var{dest}, int *@var{mpi_tag}, MPI_Comm *@var{comm}, starpu_tag_t @var{tag})
  206. Posts @var{array_size} standard-mode, non blocking send. Each post
  207. sends the n-th data of the array @var{data_handle} to the n-th node of
  208. the array @var{dest}
  209. using the n-th message tag of the array @code{mpi_tag} within the n-th
  210. communicator of the array
  211. @var{comm}. On completion of the all the requests, @var{tag} is unlocked.
  212. @end deftypefun
  213. @deftypefun int starpu_mpi_irecv_array_detached_unlock_tag (unsigned @var{array_size}, starpu_data_handle_t *@var{data_handle}, int *@var{source}, int *@var{mpi_tag}, MPI_Comm *@var{comm}, starpu_tag_t @var{tag})
  214. Posts @var{array_size} nonblocking receive. Each post receives in the
  215. n-th data of the array @var{data_handle} from the n-th
  216. node of the array @var{source} using the n-th message tag of the array
  217. @code{mpi_tag} within the n-th communicator of the array @var{comm}.
  218. On completion of the all the requests, @var{tag} is unlocked.
  219. @end deftypefun
  220. @node Communication cache
  221. @subsection Communication cache
  222. @deftypefun void starpu_mpi_cache_flush (MPI_Comm @var{comm}, starpu_data_handle_t @var{data_handle})
  223. Clear the send and receive communication cache for the data
  224. @var{data_handle}. The function has to be called synchronously by all
  225. the MPI nodes.
  226. The function does nothing if the cache mechanism is disabled (@pxref{STARPU_MPI_CACHE}).
  227. @end deftypefun
  228. @deftypefun void starpu_mpi_cache_flush_all_data (MPI_Comm @var{comm})
  229. Clear the send and receive communication cache for all data. The
  230. function has to be called synchronously by all the MPI nodes.
  231. The function does nothing if the cache mechanism is disabled (@pxref{STARPU_MPI_CACHE}).
  232. @end deftypefun
  233. @node Defining a new data interface
  234. @section Defining a new data interface
  235. @menu
  236. * Data Interface API:: Data Interface API
  237. * An example of data interface:: An example of data interface
  238. @end menu
  239. @node Data Interface API
  240. @subsection Data Interface API
  241. @deftp {Data Type} {struct starpu_data_interface_ops}
  242. @anchor{struct starpu_data_interface_ops}
  243. Per-interface data transfer methods.
  244. @table @asis
  245. @item @code{void (*register_data_handle)(starpu_data_handle_t handle, unsigned home_node, void *data_interface)}
  246. Register an existing interface into a data handle.
  247. @item @code{starpu_ssize_t (*allocate_data_on_node)(void *data_interface, unsigned node)}
  248. Allocate data for the interface on a given node.
  249. @item @code{ void (*free_data_on_node)(void *data_interface, unsigned node)}
  250. Free data of the interface on a given node.
  251. @item @code{ const struct starpu_data_copy_methods *copy_methods}
  252. ram/cuda/opencl synchronous and asynchronous transfer methods.
  253. @item @code{ void * (*handle_to_pointer)(starpu_data_handle_t handle, unsigned node)}
  254. Return the current pointer (if any) for the handle on the given node.
  255. @item @code{ size_t (*get_size)(starpu_data_handle_t handle)}
  256. Return an estimation of the size of data, for performance models.
  257. @item @code{ uint32_t (*footprint)(starpu_data_handle_t handle)}
  258. Return a 32bit footprint which characterizes the data size.
  259. @item @code{ int (*compare)(void *data_interface_a, void *data_interface_b)}
  260. Compare the data size of two interfaces.
  261. @item @code{ void (*display)(starpu_data_handle_t handle, FILE *f)}
  262. Dump the sizes of a handle to a file.
  263. @item @code{enum starpu_data_interface_id interfaceid}
  264. An identifier that is unique to each interface.
  265. @item @code{size_t interface_size}
  266. The size of the interface data descriptor.
  267. @item @code{int is_multiformat}
  268. todo
  269. @item @code{struct starpu_multiformat_data_interface_ops* (*get_mf_ops)(void *data_interface)}
  270. todo
  271. @item @code{int (*pack_data)(starpu_data_handle_t handle, unsigned node, void **ptr, ssize_t *count)}
  272. Pack the data handle into a contiguous buffer at the address
  273. @code{ptr} and set the size of the newly created buffer in
  274. @code{count}. If @var{ptr} is @code{NULL}, the function should not copy the data in the
  275. buffer but just set @var{count} to the size of the buffer which
  276. would have been allocated. The special value @code{-1} indicates the
  277. size is yet unknown.
  278. @item @code{int (*unpack_data)(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)}
  279. Unpack the data handle from the contiguous buffer at the address @code{ptr} of size @var{count}
  280. @end table
  281. @end deftp
  282. @deftp {Data Type} {struct starpu_data_copy_methods}
  283. Defines the per-interface methods. If the @code{any_to_any} method is provided,
  284. it will be used by default if no more specific method is provided. It can still
  285. be useful to provide more specific method in case of e.g. available particular
  286. CUDA or OpenCL support.
  287. @table @asis
  288. @item @code{int (*@{ram,cuda,opencl@}_to_@{ram,cuda,opencl@})(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)}
  289. These 12 functions define how to copy data from the @var{src_interface}
  290. interface on the @var{src_node} node to the @var{dst_interface} interface
  291. on the @var{dst_node} node. They return 0 on success.
  292. @item @code{int (*@{ram,cuda@}_to_@{ram,cuda@}_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)}
  293. These 3 functions (@code{ram_to_ram} is not among these) define how to copy
  294. data from the @var{src_interface} interface on the @var{src_node} node to the
  295. @var{dst_interface} interface on the @var{dst_node} node, using the given
  296. @var{stream}. Must return 0 if the transfer was actually completed completely
  297. synchronously, or -EAGAIN if at least some transfers are still ongoing and
  298. should be awaited for by the core.
  299. @item @code{int (*@{ram,opencl@}_to_@{ram,opencl@}_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event)}
  300. These 3 functions (@code{ram_to_ram} is not among them) define how to copy
  301. data from the @var{src_interface} interface on the @var{src_node} node to the
  302. @var{dst_interface} interface on the @var{dst_node} node, by recording in
  303. @var{event}, a pointer to a cl_event, the event of the last submitted transfer.
  304. Must return 0 if the transfer was actually completed completely synchronously,
  305. or -EAGAIN if at least some transfers are still ongoing and should be awaited
  306. for by the core.
  307. @item @code{int (*any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data)}
  308. Define how to copy data from the @var{src_interface} interface on the
  309. @var{src_node} node to the @var{dst_interface} interface on the @var{dst_node}
  310. node. This is meant to be implemented through the @var{starpu_interface_copy}
  311. helper, to which @var{async_data} should be passed as such, and will be used to
  312. manage asynchronicity. This must return -EAGAIN if any of the
  313. @var{starpu_interface_copy} calls has returned -EAGAIN (i.e. at least some
  314. transfer is still ongoing), and return 0 otherwise.
  315. @end table
  316. @end deftp
  317. @deftypefun int starpu_interface_copy (uintptr_t @var{src}, size_t @var{src_offset}, unsigned @var{src_node}, uintptr_t @var{dst}, size_t @var{dst_offset}, unsigned @var{dst_node}, size_t @var{size}, {void *}@var{async_data})
  318. Copy @var{size} bytes from byte offset @var{src_offset} of @var{src} on
  319. @var{src_node} to byte offset @var{dst_offset} of @var{dst} on @var{dst_node}.
  320. This is to be used in the @var{any_to_any} copy method, which is provided with
  321. the @var{async_data} to be pased to @var{starpu_interface_copy}. this returns
  322. -EAGAIN if the transfer is still ongoing, or 0 if the transfer is already
  323. completed.
  324. @end deftypefun
  325. @deftypefun uint32_t starpu_crc32_be_n ({void *}@var{input}, size_t @var{n}, uint32_t @var{inputcrc})
  326. Compute the CRC of a byte buffer seeded by the inputcrc "current
  327. state". The return value should be considered as the new "current
  328. state" for future CRC computation. This is used for computing data size
  329. footprint.
  330. @end deftypefun
  331. @deftypefun uint32_t starpu_crc32_be (uint32_t @var{input}, uint32_t @var{inputcrc})
  332. Compute the CRC of a 32bit number seeded by the inputcrc "current
  333. state". The return value should be considered as the new "current
  334. state" for future CRC computation. This is used for computing data size
  335. footprint.
  336. @end deftypefun
  337. @deftypefun uint32_t starpu_crc32_string ({char *}@var{str}, uint32_t @var{inputcrc})
  338. Compute the CRC of a string seeded by the inputcrc "current state".
  339. The return value should be considered as the new "current state" for
  340. future CRC computation. This is used for computing data size footprint.
  341. @end deftypefun
  342. @node An example of data interface
  343. @subsection An example of data interface
  344. @deftypefun int starpu_data_interface_get_next_id (void)
  345. Returns the next available id for a newly created data interface.
  346. @end deftypefun
  347. Let's define a new data interface to manage complex numbers.
  348. @cartouche
  349. @smallexample
  350. /* interface for complex numbers */
  351. struct starpu_complex_interface
  352. @{
  353. double *real;
  354. double *imaginary;
  355. int nx;
  356. @};
  357. @end smallexample
  358. @end cartouche
  359. Registering such a data to StarPU is easily done using the function
  360. @code{starpu_data_register} (@pxref{Basic Data Management API}). The last
  361. parameter of the function, @code{interface_complex_ops}, will be
  362. described below.
  363. @cartouche
  364. @smallexample
  365. void starpu_complex_data_register(starpu_data_handle_t *handle,
  366. unsigned home_node, double *real, double *imaginary, int nx)
  367. @{
  368. struct starpu_complex_interface complex =
  369. @{
  370. .real = real,
  371. .imaginary = imaginary,
  372. .nx = nx
  373. @};
  374. if (interface_complex_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID)
  375. @{
  376. interface_complex_ops.interfaceid = starpu_data_interface_get_next_id();
  377. @}
  378. starpu_data_register(handleptr, home_node, &complex, &interface_complex_ops);
  379. @}
  380. @end smallexample
  381. @end cartouche
  382. Different operations need to be defined for a data interface through
  383. the type @code{struct starpu_data_interface_ops} (@pxref{Data
  384. Interface API}). We only define here the basic operations needed to
  385. run simple applications. The source code for the different functions
  386. can be found in the file
  387. @code{examples/interface/complex_interface.c}.
  388. @cartouche
  389. @smallexample
  390. static struct starpu_data_interface_ops interface_complex_ops =
  391. @{
  392. .register_data_handle = complex_register_data_handle,
  393. .allocate_data_on_node = complex_allocate_data_on_node,
  394. .copy_methods = &complex_copy_methods,
  395. .get_size = complex_get_size,
  396. .footprint = complex_footprint,
  397. .interfaceid = STARPU_UNKNOWN_INTERFACE_ID,
  398. .interface_size = sizeof(struct starpu_complex_interface),
  399. @};
  400. @end smallexample
  401. @end cartouche
  402. Functions need to be defined to access the different fields of the
  403. complex interface from a StarPU data handle.
  404. @cartouche
  405. @smallexample
  406. double *starpu_complex_get_real(starpu_data_handle_t handle)
  407. @{
  408. struct starpu_complex_interface *complex_interface =
  409. (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, 0);
  410. return complex_interface->real;
  411. @}
  412. double *starpu_complex_get_imaginary(starpu_data_handle_t handle);
  413. int starpu_complex_get_nx(starpu_data_handle_t handle);
  414. @end smallexample
  415. @end cartouche
  416. Similar functions need to be defined to access the different fields of the
  417. complex interface from a @code{void *} pointer to be used within codelet
  418. implemetations.
  419. @cartouche
  420. @smallexample
  421. #define STARPU_COMPLEX_GET_REAL(interface) \
  422. (((struct starpu_complex_interface *)(interface))->real)
  423. #define STARPU_COMPLEX_GET_IMAGINARY(interface) \
  424. (((struct starpu_complex_interface *)(interface))->imaginary)
  425. #define STARPU_COMPLEX_GET_NX(interface) \
  426. (((struct starpu_complex_interface *)(interface))->nx)
  427. @end smallexample
  428. @end cartouche
  429. Complex data interfaces can then be registered to StarPU.
  430. @cartouche
  431. @smallexample
  432. double real = 45.0;
  433. double imaginary = 12.0;
  434. starpu_complex_data_register(&handle1, 0, &real, &imaginary, 1);
  435. starpu_insert_task(&cl_display, STARPU_R, handle1, 0);
  436. @end smallexample
  437. @end cartouche
  438. and used by codelets.
  439. @cartouche
  440. @smallexample
  441. void display_complex_codelet(void *descr[], __attribute__ ((unused)) void *_args)
  442. @{
  443. int nx = STARPU_COMPLEX_GET_NX(descr[0]);
  444. double *real = STARPU_COMPLEX_GET_REAL(descr[0]);
  445. double *imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[0]);
  446. int i;
  447. for(i=0 ; i<nx ; i++)
  448. @{
  449. fprintf(stderr, "Complex[%d] = %3.2f + %3.2f i\n", i, real[i], imaginary[i]);
  450. @}
  451. @}
  452. @end smallexample
  453. @end cartouche
  454. The whole code for this complex data interface is available in the
  455. directory @code{examples/interface/}.
  456. @node Multiformat Data Interface
  457. @section Multiformat Data Interface
  458. @deftp {Data Type} {struct starpu_multiformat_data_interface_ops}
  459. The different fields are:
  460. @table @asis
  461. @item @code{size_t cpu_elemsize}
  462. the size of each element on CPUs,
  463. @item @code{size_t opencl_elemsize}
  464. the size of each element on OpenCL devices,
  465. @item @code{struct starpu_codelet *cpu_to_opencl_cl}
  466. pointer to a codelet which converts from CPU to OpenCL
  467. @item @code{struct starpu_codelet *opencl_to_cpu_cl}
  468. pointer to a codelet which converts from OpenCL to CPU
  469. @item @code{size_t cuda_elemsize}
  470. the size of each element on CUDA devices,
  471. @item @code{struct starpu_codelet *cpu_to_cuda_cl}
  472. pointer to a codelet which converts from CPU to CUDA
  473. @item @code{struct starpu_codelet *cuda_to_cpu_cl}
  474. pointer to a codelet which converts from CUDA to CPU
  475. @end table
  476. @end deftp
  477. @deftypefun void starpu_multiformat_data_register (starpu_data_handle_t *@var{handle}, unsigned @var{home_node}, void *@var{ptr}, uint32_t @var{nobjects}, struct starpu_multiformat_data_interface_ops *@var{format_ops})
  478. Register a piece of data that can be represented in different ways, depending upon
  479. the processing unit that manipulates it. It allows the programmer, for instance, to
  480. use an array of structures when working on a CPU, and a structure of arrays when
  481. working on a GPU.
  482. @var{nobjects} is the number of elements in the data. @var{format_ops} describes
  483. the format.
  484. @end deftypefun
  485. @defmac STARPU_MULTIFORMAT_GET_CPU_PTR ({void *}@var{interface})
  486. returns the local pointer to the data with CPU format.
  487. @end defmac
  488. @defmac STARPU_MULTIFORMAT_GET_CUDA_PTR ({void *}@var{interface})
  489. returns the local pointer to the data with CUDA format.
  490. @end defmac
  491. @defmac STARPU_MULTIFORMAT_GET_OPENCL_PTR ({void *}@var{interface})
  492. returns the local pointer to the data with OpenCL format.
  493. @end defmac
  494. @defmac STARPU_MULTIFORMAT_GET_NX ({void *}@var{interface})
  495. returns the number of elements in the data.
  496. @end defmac
  497. @node Task Bundles
  498. @section Task Bundles
  499. @deftp {Data Type} {starpu_task_bundle_t}
  500. Opaque structure describing a list of tasks that should be scheduled
  501. on the same worker whenever it's possible. It must be considered as a
  502. hint given to the scheduler as there is no guarantee that they will be
  503. executed on the same worker.
  504. @end deftp
  505. @deftypefun void starpu_task_bundle_create ({starpu_task_bundle_t *}@var{bundle})
  506. Factory function creating and initializing @var{bundle}, when the call returns, memory needed is allocated and @var{bundle} is ready to use.
  507. @end deftypefun
  508. @deftypefun int starpu_task_bundle_insert (starpu_task_bundle_t @var{bundle}, {struct starpu_task *}@var{task})
  509. Insert @var{task} in @var{bundle}. Until @var{task} is removed from @var{bundle} its expected length and data transfer time will be considered along those of the other tasks of @var{bundle}.
  510. This function mustn't be called if @var{bundle} is already closed and/or @var{task} is already submitted.
  511. @end deftypefun
  512. @deftypefun int starpu_task_bundle_remove (starpu_task_bundle_t @var{bundle}, {struct starpu_task *}@var{task})
  513. Remove @var{task} from @var{bundle}.
  514. Of course @var{task} must have been previously inserted @var{bundle}.
  515. This function mustn't be called if @var{bundle} is already closed and/or @var{task} is already submitted. Doing so would result in undefined behaviour.
  516. @end deftypefun
  517. @deftypefun void starpu_task_bundle_close (starpu_task_bundle_t @var{bundle})
  518. Inform the runtime that the user won't modify @var{bundle} anymore, it means no more inserting or removing task. Thus the runtime can destroy it when possible.
  519. @end deftypefun
  520. @deftypefun double starpu_task_bundle_expected_length (starpu_task_bundle_t @var{bundle}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{nimpl})
  521. Return the expected duration of the entire task bundle in µs.
  522. @end deftypefun
  523. @deftypefun double starpu_task_bundle_expected_power (starpu_task_bundle_t @var{bundle}, enum starpu_perf_archtype @var{arch}, unsigned @var{nimpl})
  524. Return the expected power consumption of the entire task bundle in J.
  525. @end deftypefun
  526. @deftypefun double starpu_task_bundle_expected_data_transfer_time (starpu_task_bundle_t @var{bundle}, unsigned @var{memory_node})
  527. Return the time (in µs) expected to transfer all data used within the bundle.
  528. @end deftypefun
  529. @node Task Lists
  530. @section Task Lists
  531. @deftp {Data Type} {struct starpu_task_list}
  532. Stores a double-chained list of tasks
  533. @end deftp
  534. @deftypefun void starpu_task_list_init ({struct starpu_task_list *}@var{list})
  535. Initialize a list structure
  536. @end deftypefun
  537. @deftypefun void starpu_task_list_push_front ({struct starpu_task_list *}@var{list}, {struct starpu_task *}@var{task})
  538. Push a task at the front of a list
  539. @end deftypefun
  540. @deftypefun void starpu_task_list_push_back ({struct starpu_task_list *}@var{list}, {struct starpu_task *}@var{task})
  541. Push a task at the back of a list
  542. @end deftypefun
  543. @deftypefun {struct starpu_task *} starpu_task_list_front ({struct starpu_task_list *}@var{list})
  544. Get the front of the list (without removing it)
  545. @end deftypefun
  546. @deftypefun {struct starpu_task *} starpu_task_list_back ({struct starpu_task_list *}@var{list})
  547. Get the back of the list (without removing it)
  548. @end deftypefun
  549. @deftypefun int starpu_task_list_empty ({struct starpu_task_list *}@var{list})
  550. Test if a list is empty
  551. @end deftypefun
  552. @deftypefun void starpu_task_list_erase ({struct starpu_task_list *}@var{list}, {struct starpu_task *}@var{task})
  553. Remove an element from the list
  554. @end deftypefun
  555. @deftypefun {struct starpu_task *} starpu_task_list_pop_front ({struct starpu_task_list *}@var{list})
  556. Remove the element at the front of the list
  557. @end deftypefun
  558. @deftypefun {struct starpu_task *} starpu_task_list_pop_back ({struct starpu_task_list *}@var{list})
  559. Remove the element at the back of the list
  560. @end deftypefun
  561. @deftypefun {struct starpu_task *} starpu_task_list_begin ({struct starpu_task_list *}@var{list})
  562. Get the first task of the list.
  563. @end deftypefun
  564. @deftypefun {struct starpu_task *} starpu_task_list_end ({struct starpu_task_list *}@var{list})
  565. Get the end of the list.
  566. @end deftypefun
  567. @deftypefun {struct starpu_task *} starpu_task_list_next ({struct starpu_task *}@var{task})
  568. Get the next task of the list. This is not erase-safe.
  569. @end deftypefun
  570. @node Using Parallel Tasks
  571. @section Using Parallel Tasks
  572. These are used by parallel tasks:
  573. @deftypefun int starpu_combined_worker_get_size (void)
  574. Return the size of the current combined worker, i.e. the total number of cpus
  575. running the same task in the case of SPMD parallel tasks, or the total number
  576. of threads that the task is allowed to start in the case of FORKJOIN parallel
  577. tasks.
  578. @end deftypefun
  579. @deftypefun int starpu_combined_worker_get_rank (void)
  580. Return the rank of the current thread within the combined worker. Can only be
  581. used in FORKJOIN parallel tasks, to know which part of the task to work on.
  582. @end deftypefun
  583. Most of these are used for schedulers which support parallel tasks.
  584. @deftypefun unsigned starpu_combined_worker_get_count (void)
  585. Return the number of different combined workers.
  586. @end deftypefun
  587. @deftypefun int starpu_combined_worker_get_id (void)
  588. Return the identifier of the current combined worker.
  589. @end deftypefun
  590. @deftypefun int starpu_combined_worker_assign_workerid (int @var{nworkers}, int @var{workerid_array}[])
  591. Register a new combined worker and get its identifier
  592. @end deftypefun
  593. @deftypefun int starpu_combined_worker_get_description (int @var{workerid}, {int *}@var{worker_size}, {int **}@var{combined_workerid})
  594. Get the description of a combined worker
  595. @end deftypefun
  596. @deftypefun int starpu_combined_worker_can_execute_task (unsigned @var{workerid}, {struct starpu_task *}@var{task}, unsigned @var{nimpl})
  597. Variant of starpu_worker_can_execute_task compatible with combined workers
  598. @end deftypefun
  599. @deftp {Data Type} {struct starpu_machine_topology}
  600. @table @asis
  601. @item @code{unsigned nworkers}
  602. Total number of workers.
  603. @item @code{unsigned ncombinedworkers}
  604. Total number of combined workers.
  605. @item @code{hwloc_topology_t hwtopology}
  606. Topology as detected by hwloc.
  607. To maintain ABI compatibility when hwloc is not available, the field
  608. is replaced with @code{void *dummy}
  609. @item @code{unsigned nhwcpus}
  610. Total number of CPUs, as detected by the topology code. May be different from
  611. the actual number of CPU workers.
  612. @item @code{unsigned nhwcudagpus}
  613. Total number of CUDA devices, as detected. May be different from the actual
  614. number of CUDA workers.
  615. @item @code{unsigned nhwopenclgpus}
  616. Total number of OpenCL devices, as detected. May be different from the actual
  617. number of CUDA workers.
  618. @item @code{unsigned ncpus}
  619. Actual number of CPU workers used by StarPU.
  620. @item @code{unsigned ncudagpus}
  621. Actual number of CUDA workers used by StarPU.
  622. @item @code{unsigned nopenclgpus}
  623. Actual number of OpenCL workers used by StarPU.
  624. @item @code{unsigned workers_bindid[STARPU_NMAXWORKERS]}
  625. Indicates the successive cpu identifier that should be used to bind the
  626. workers. It is either filled according to the user's explicit
  627. parameters (from starpu_conf) or according to the STARPU_WORKERS_CPUID env.
  628. variable. Otherwise, a round-robin policy is used to distributed the workers
  629. over the cpus.
  630. @item @code{unsigned workers_cuda_gpuid[STARPU_NMAXWORKERS]}
  631. Indicates the successive cpu identifier that should be used by the CUDA
  632. driver. It is either filled according to the user's explicit parameters (from
  633. starpu_conf) or according to the STARPU_WORKERS_CUDAID env. variable. Otherwise,
  634. they are taken in ID order.
  635. @item @code{unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS]}
  636. Indicates the successive cpu identifier that should be used by the OpenCL
  637. driver. It is either filled according to the user's explicit parameters (from
  638. starpu_conf) or according to the STARPU_WORKERS_OPENCLID env. variable. Otherwise,
  639. they are taken in ID order.
  640. @end table
  641. @end deftp
  642. @node Scheduling Contexts
  643. @section Scheduling Contexts
  644. StarPU permits on one hand grouping workers in combined workers in order to execute a parallel task and on the other hand grouping tasks in bundles that will be executed by a single specified worker.
  645. In contrast when we group workers in scheduling contexts we submit starpu tasks to them and we schedule them with the policy assigned to the context.
  646. Scheduling contexts can be created, deleted and modified dynamically.
  647. @deftypefun unsigned starpu_sched_ctx_create (const char *@var{policy_name}, int *@var{workerids_ctx}, int @var{nworkers_ctx}, const char *@var{sched_ctx_name})
  648. This function creates a scheduling context which uses the scheduling policy indicated in the first argument and assigns the workers indicated in the second argument to execute the tasks submitted to it.
  649. The return value represents the identifier of the context that has just been created. It will be further used to indicate the context the tasks will be submitted to. The return value should be at most @code{STARPU_NMAX_SCHED_CTXS}.
  650. @end deftypefun
  651. @deftypefun void starpu_sched_ctx_delete (unsigned @var{sched_ctx_id})
  652. Delete scheduling context @var{sched_ctx_id} and transfer remaining workers to the inheritor scheduling context.
  653. @end deftypefun
  654. @deftypefun void starpu_sched_ctx_add_workers ({int *}@var{workerids_ctx}, int @var{nworkers_ctx}, unsigned @var{sched_ctx_id})
  655. This function adds dynamically the workers indicated in the first argument to the context indicated in the last argument. The last argument cannot be greater than @code{STARPU_NMAX_SCHED_CTXS}.
  656. @end deftypefun
  657. @deftypefun void starpu_sched_ctx_remove_workers ({int *}@var{workerids_ctx}, int @var{nworkers_ctx}, unsigned @var{sched_ctx_id})
  658. This function removes the workers indicated in the first argument from the context indicated in the last argument. The last argument cannot be greater than @code{STARPU_NMAX_SCHED_CTXS}.
  659. @end deftypefun
  660. A scheduling context manages a collection of workers that can be memorized using different data structures. Thus, a generic structure is available in order to simplify the choice of its type.
  661. Only the list data structure is available but further data structures(like tree) implementations are foreseen.
  662. @deftp {Data Type} {struct starpu_worker_collection}
  663. @table @asis
  664. @item @code{void *workerids}
  665. The workerids managed by the collection
  666. @item @code{unsigned nworkers}
  667. The number of workerids
  668. @item @code{pthread_key_t cursor_key} (optional)
  669. The cursor needed to iterate the collection (depending on the data structure)
  670. @item @code{int type}
  671. The type of structure (currently STARPU_WORKER_LIST is the only one available)
  672. @item @code{unsigned (*has_next)(struct starpu_worker_collection *workers)}
  673. Checks if there is a next worker
  674. @item @code{int (*get_next)(struct starpu_worker_collection *workers)}
  675. Gets the next worker
  676. @item @code{int (*add)(struct starpu_worker_collection *workers, int worker)}
  677. Adds a worker to the collection
  678. @item @code{int (*remove)(struct starpu_worker_collection *workers, int worker)}
  679. Removes a worker from the collection
  680. @item @code{void* (*init)(struct starpu_worker_collection *workers)}
  681. Initialize the collection
  682. @item @code{void (*deinit)(struct starpu_worker_collection *workers)}
  683. Deinitialize the colection
  684. @item @code{void (*init_cursor)(struct starpu_worker_collection *workers)} (optional)
  685. Initialize the cursor if there is one
  686. @item @code{void (*deinit_cursor)(struct starpu_worker_collection *workers)} (optional)
  687. Deinitialize the cursor if there is one
  688. @end table
  689. @end deftp
  690. @deftypefun struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection (unsigned @var{sched_ctx_id}, int @var{type})
  691. Create a worker collection of the type indicated by the last parameter for the context specified through the first parameter.
  692. @end deftypefun
  693. @deftypefun void starpu_sched_ctx_delete_worker_collection (unsigned @var{sched_ctx_id})
  694. Delete the worker collection of the specified scheduling context
  695. @end deftypefun
  696. @deftypefun struct starpu_worker_collection* starpu_sched_ctx_get_worker_collection (unsigned @var{sched_ctx_id})
  697. Return the worker collection managed by the indicated context
  698. @end deftypefun
  699. @deftypefun pthread_mutex_t* starpu_sched_ctx_get_changing_ctx_mutex (unsigned @var{sched_ctx_id})
  700. TODO
  701. @end deftypefun
  702. @deftypefun void starpu_sched_ctx_set_context (unsigned *@var{sched_ctx_id})
  703. Set the scheduling context the subsequent tasks will be submitted to
  704. @end deftypefun
  705. @deftypefun unsigned starpu_sched_ctx_get_context (void)
  706. Return the scheduling context the tasks are currently submitted to
  707. @end deftypefun
  708. @deftypefun unsigned starpu_sched_ctx_get_nworkers (unsigned @var{sched_ctx_id})
  709. Return the number of workers managed by the specified contexts
  710. (Usually needed to verify if it manages any workers or if it should be blocked)
  711. @end deftypefun
  712. @deftypefun unsigned starpu_sched_ctx_get_nshared_workers (unsigned @var{sched_ctx_id}, unsigned @var{sched_ctx_id2})
  713. Return the number of workers shared by two contexts
  714. @end deftypefun
  715. @node Defining a new scheduling policy
  716. @section Defining a new scheduling policy
  717. TODO
  718. A full example showing how to define a new scheduling policy is available in
  719. the StarPU sources in the directory @code{examples/scheduler/}.
  720. @menu
  721. * Scheduling Policy API:: Scheduling Policy API
  722. * Source code::
  723. @end menu
  724. @node Scheduling Policy API
  725. @subsection Scheduling Policy API
  726. While StarPU comes with a variety of scheduling policies (@pxref{Task
  727. scheduling policy}), it may sometimes be desirable to implement custom
  728. policies to address specific problems. The API described below allows
  729. users to write their own scheduling policy.
  730. @deftp {Data Type} {struct starpu_sched_policy}
  731. This structure contains all the methods that implement a scheduling policy. An
  732. application may specify which scheduling strategy in the @code{sched_policy}
  733. field of the @code{starpu_conf} structure passed to the @code{starpu_init}
  734. function. The different fields are:
  735. @table @asis
  736. @item @code{void (*init_sched)(unsigned sched_ctx_id)}
  737. Initialize the scheduling policy.
  738. @item @code{void (*deinit_sched)(unsigned sched_ctx_id)}
  739. Cleanup the scheduling policy.
  740. @item @code{int (*push_task)(struct starpu_task *)}
  741. Insert a task into the scheduler.
  742. @item @code{void (*push_task_notify)(struct starpu_task *, int workerid)}
  743. Notify the scheduler that a task was pushed on a given worker. This method is
  744. called when a task that was explicitely assigned to a worker becomes ready and
  745. is about to be executed by the worker. This method therefore permits to keep
  746. the state of of the scheduler coherent even when StarPU bypasses the scheduling
  747. strategy.
  748. @item @code{struct starpu_task *(*pop_task)(unsigned sched_ctx_id)} (optional)
  749. Get a task from the scheduler. The mutex associated to the worker is already
  750. taken when this method is called. If this method is defined as @code{NULL}, the
  751. worker will only execute tasks from its local queue. In this case, the
  752. @code{push_task} method should use the @code{starpu_push_local_task} method to
  753. assign tasks to the different workers.
  754. @item @code{struct starpu_task *(*pop_every_task)(unsigned sched_ctx_id)}
  755. Remove all available tasks from the scheduler (tasks are chained by the means
  756. of the prev and next fields of the starpu_task structure). The mutex associated
  757. to the worker is already taken when this method is called. This is currently
  758. not used.
  759. @item @code{void (*pre_exec_hook)(struct starpu_task *)} (optional)
  760. This method is called every time a task is starting.
  761. @item @code{void (*post_exec_hook)(struct starpu_task *)} (optional)
  762. This method is called every time a task has been executed.
  763. @item @code{void (*add_workers)(unsigned sched_ctx_id, int *workerids, unsigned nworkers)}
  764. Initialize scheduling structures corresponding to each worker used by the policy.
  765. @item @code{void (*remove_workers)(unsigned sched_ctx_id, int *workerids, unsigned nworkers)}
  766. Deinitialize scheduling structures corresponding to each worker used by the policy.
  767. @item @code{const char *policy_name} (optional)
  768. Name of the policy.
  769. @item @code{const char *policy_description} (optional)
  770. Description of the policy.
  771. @end table
  772. @end deftp
  773. @deftypefun {struct starpu_sched_policy **} starpu_sched_get_predefined_policies ()
  774. Return an NULL-terminated array of all the predefined scheduling policies.
  775. @end deftypefun
  776. @deftypefun void starpu_sched_ctx_set_worker_mutex_and_cond (unsigned @var{sched_ctx_id}, int @var{workerid}, pthread_mutex_t *@var{sched_mutex}, {pthread_cond_t *}@var{sched_cond})
  777. This function specifies the condition variable associated to a worker per context
  778. When there is no available task for a worker, StarPU blocks this worker on a
  779. condition variable. This function specifies which condition variable (and the
  780. associated mutex) should be used to block (and to wake up) a worker. Note that
  781. multiple workers may use the same condition variable. For instance, in the case
  782. of a scheduling strategy with a single task queue, the same condition variable
  783. would be used to block and wake up all workers.
  784. The initialization method of a scheduling strategy (@code{init_sched}) must
  785. call this function once per worker.
  786. @end deftypefun
  787. @deftypefun void starpu_sched_ctx_get_worker_mutex_and_cond (unsigned @var{sched_ctx_id}, int @var{workerid}, {pthread_mutex_t **}@var{sched_mutex}, {pthread_cond_t **}@var{sched_cond})
  788. This function returns the condition variables associated to a worker in a context
  789. It is used in the policy to access to the local queue of the worker
  790. @end deftypefun
  791. @deftypefun void starpu_sched_ctx_set_policy_data (unsigned @var{sched_ctx_id}, {void *} @var{policy_data})
  792. Each scheduling policy uses some specific data (queues, variables, additional condition variables).
  793. It is memorize through a local structure. This function assigns it to a scheduling context.
  794. @end deftypefun
  795. @deftypefun void* starpu_sched_ctx_get_policy_data (unsigned @var{sched_ctx_id})
  796. Returns the policy data previously assigned to a context
  797. @end deftypefun
  798. @deftypefun void starpu_sched_set_min_priority (int @var{min_prio})
  799. Defines the minimum priority level supported by the scheduling policy. The
  800. default minimum priority level is the same as the default priority level which
  801. is 0 by convention. The application may access that value by calling the
  802. @code{starpu_sched_get_min_priority} function. This function should only be
  803. called from the initialization method of the scheduling policy, and should not
  804. be used directly from the application.
  805. @end deftypefun
  806. @deftypefun void starpu_sched_set_max_priority (int @var{max_prio})
  807. Defines the maximum priority level supported by the scheduling policy. The
  808. default maximum priority level is 1. The application may access that value by
  809. calling the @code{starpu_sched_get_max_priority} function. This function should
  810. only be called from the initialization method of the scheduling policy, and
  811. should not be used directly from the application.
  812. @end deftypefun
  813. @deftypefun int starpu_sched_get_min_priority (void)
  814. Returns the current minimum priority level supported by the
  815. scheduling policy
  816. @end deftypefun
  817. @deftypefun int starpu_sched_get_max_priority (void)
  818. Returns the current maximum priority level supported by the
  819. scheduling policy
  820. @end deftypefun
  821. @deftypefun int starpu_push_local_task (int @var{workerid}, {struct starpu_task} *@var{task}, int @var{back})
  822. The scheduling policy may put tasks directly into a worker's local queue so
  823. that it is not always necessary to create its own queue when the local queue
  824. is sufficient. If @var{back} not null, @var{task} is put at the back of the queue
  825. where the worker will pop tasks first. Setting @var{back} to 0 therefore ensures
  826. a FIFO ordering.
  827. @end deftypefun
  828. @deftypefun int starpu_push_task_end ({struct starpu_task} *@var{task})
  829. This function must be called by a scheduler to notify that the given
  830. task has just been pushed.
  831. @end deftypefun
  832. @deftypefun int starpu_worker_can_execute_task (unsigned @var{workerid}, {struct starpu_task *}@var{task}, unsigned {nimpl})
  833. Check if the worker specified by workerid can execute the codelet. Schedulers need to call it before assigning a task to a worker, otherwise the task may fail to execute.
  834. @end deftypefun
  835. @deftypefun double starpu_timing_now (void)
  836. Return the current date in µs
  837. @end deftypefun
  838. @deftypefun uint32_t starpu_task_footprint ({struct starpu_perfmodel *}@var{model}, {struct starpu_task *} @var{task}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{nimpl})
  839. Returns the footprint for a given task
  840. @end deftypefun
  841. @deftypefun double starpu_task_expected_length ({struct starpu_task *}@var{task}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{nimpl})
  842. Returns expected task duration in µs
  843. @end deftypefun
  844. @deftypefun double starpu_worker_get_relative_speedup ({enum starpu_perf_archtype} @var{perf_archtype})
  845. Returns an estimated speedup factor relative to CPU speed
  846. @end deftypefun
  847. @deftypefun double starpu_task_expected_data_transfer_time (unsigned @var{memory_node}, {struct starpu_task *}@var{task})
  848. Returns expected data transfer time in µs
  849. @end deftypefun
  850. @deftypefun double starpu_data_expected_transfer_time (starpu_data_handle_t @var{handle}, unsigned @var{memory_node}, {enum starpu_access_mode} @var{mode})
  851. Predict the transfer time (in µs) to move a handle to a memory node
  852. @end deftypefun
  853. @deftypefun double starpu_task_expected_power ({struct starpu_task *}@var{task}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{nimpl})
  854. Returns expected power consumption in J
  855. @end deftypefun
  856. @deftypefun double starpu_task_expected_conversion_time ({struct starpu_task *}@var{task}, {enum starpu_perf_archtype} @var{arch}, unsigned {nimpl})
  857. Returns expected conversion time in ms (multiformat interface only)
  858. @end deftypefun
  859. @node Source code
  860. @subsection Source code
  861. @cartouche
  862. @smallexample
  863. static struct starpu_sched_policy dummy_sched_policy = @{
  864. .init_sched = init_dummy_sched,
  865. .deinit_sched = deinit_dummy_sched,
  866. .add_workers = dummy_sched_add_workers,
  867. .remove_workers = dummy_sched_remove_workers,
  868. .push_task = push_task_dummy,
  869. .push_prio_task = NULL,
  870. .pop_task = pop_task_dummy,
  871. .post_exec_hook = NULL,
  872. .pop_every_task = NULL,
  873. .policy_name = "dummy",
  874. .policy_description = "dummy scheduling strategy"
  875. @};
  876. @end smallexample
  877. @end cartouche
  878. @node Running drivers
  879. @section Running drivers
  880. @menu
  881. * Driver API::
  882. * Example::
  883. @end menu
  884. @node Driver API
  885. @subsection Driver API
  886. @deftypefun int starpu_driver_run ({struct starpu_driver *}@var{d})
  887. Initialize the given driver, run it until it receives a request to terminate,
  888. deinitialize it and return 0 on success. It returns -EINVAL if @code{d->type}
  889. is not a valid StarPU device type (STARPU_CPU_WORKER, STARPU_CUDA_WORKER or
  890. STARPU_OPENCL_WORKER). This is the same as using the following
  891. functions: calling @code{starpu_driver_init()}, then calling
  892. @code{starpu_driver_run_once()} in a loop, and eventually
  893. @code{starpu_driver_deinit()}.
  894. @end deftypefun
  895. @deftypefun int starpu_driver_init (struct starpu_driver *@var{d})
  896. Initialize the given driver. Returns 0 on success, -EINVAL if
  897. @code{d->type} is not a valid StarPU device type (STARPU_CPU_WORKER,
  898. STARPU_CUDA_WORKER or STARPU_OPENCL_WORKER).
  899. @end deftypefun
  900. @deftypefun int starpu_driver_run_once (struct starpu_driver *@var{d})
  901. Run the driver once, then returns 0 on success, -EINVAL if
  902. @code{d->type} is not a valid StarPU device type (STARPU_CPU_WORKER,
  903. STARPU_CUDA_WORKER or STARPU_OPENCL_WORKER).
  904. @end deftypefun
  905. @deftypefun int starpu_driver_deinit (struct starpu_driver *@var{d})
  906. Deinitialize the given driver. Returns 0 on success, -EINVAL if
  907. @code{d->type} is not a valid StarPU device type (STARPU_CPU_WORKER,
  908. STARPU_CUDA_WORKER or STARPU_OPENCL_WORKER).
  909. @end deftypefun
  910. @deftypefun void starpu_drivers_request_termination (void)
  911. Notify all running drivers they should terminate.
  912. @end deftypefun
  913. @node Example
  914. @subsection Example
  915. @cartouche
  916. @smallexample
  917. int ret;
  918. struct starpu_driver = @{
  919. .type = STARPU_CUDA_WORKER,
  920. .id.cuda_id = 0
  921. @};
  922. ret = starpu_driver_init(&d);
  923. if (ret != 0)
  924. error();
  925. while (some_condition) @{
  926. ret = starpu_driver_run_once(&d);
  927. if (ret != 0)
  928. error();
  929. @}
  930. ret = starpu_driver_deinit(&d);
  931. if (ret != 0)
  932. error();
  933. @end smallexample
  934. @end cartouche
  935. @node Expert mode
  936. @section Expert mode
  937. @deftypefun void starpu_wake_all_blocked_workers (void)
  938. Wake all the workers, so they can inspect data requests and task submissions
  939. again.
  940. @end deftypefun
  941. @deftypefun int starpu_progression_hook_register (unsigned (*@var{func})(void *arg), void *@var{arg})
  942. Register a progression hook, to be called when workers are idle.
  943. @end deftypefun
  944. @deftypefun void starpu_progression_hook_deregister (int @var{hook_id})
  945. Unregister a given progression hook.
  946. @end deftypefun