advanced-api.texi 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955
  1. @c -*-texinfo-*-
  2. @c This file is part of the StarPU Handbook.
  3. @c Copyright (C) 2009--2011 Universit@'e de Bordeaux 1
  4. @c Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique
  5. @c Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
  6. @c See the file starpu.texi for copying conditions.
  7. @menu
  8. * Defining a new data interface::
  9. * Multiformat Data Interface::
  10. * Task Bundles::
  11. * Task Lists::
  12. * Using Parallel Tasks::
  13. * Scheduling Contexts::
  14. * Defining a new scheduling policy::
  15. * Running drivers::
  16. * Expert mode::
  17. @end menu
  18. @node Defining a new data interface
  19. @section Defining a new data interface
  20. @menu
  21. * Data Interface API:: Data Interface API
  22. * An example of data interface:: An example of data interface
  23. @end menu
  24. @node Data Interface API
  25. @subsection Data Interface API
  26. @deftp {Data Type} {struct starpu_data_interface_ops}
  27. @anchor{struct starpu_data_interface_ops}
  28. Per-interface data transfer methods.
  29. @table @asis
  30. @item @code{void (*register_data_handle)(starpu_data_handle_t handle, uint32_t home_node, void *data_interface)}
  31. Register an existing interface into a data handle.
  32. @item @code{starpu_ssize_t (*allocate_data_on_node)(void *data_interface, uint32_t node)}
  33. Allocate data for the interface on a given node.
  34. @item @code{ void (*free_data_on_node)(void *data_interface, uint32_t node)}
  35. Free data of the interface on a given node.
  36. @item @code{ const struct starpu_data_copy_methods *copy_methods}
  37. ram/cuda/spu/opencl synchronous and asynchronous transfer methods.
  38. @item @code{ void * (*handle_to_pointer)(starpu_data_handle_t handle, uint32_t node)}
  39. Return the current pointer (if any) for the handle on the given node.
  40. @item @code{ size_t (*get_size)(starpu_data_handle_t handle)}
  41. Return an estimation of the size of data, for performance models.
  42. @item @code{ uint32_t (*footprint)(starpu_data_handle_t handle)}
  43. Return a 32bit footprint which characterizes the data size.
  44. @item @code{ int (*compare)(void *data_interface_a, void *data_interface_b)}
  45. Compare the data size of two interfaces.
  46. @item @code{ void (*display)(starpu_data_handle_t handle, FILE *f)}
  47. Dump the sizes of a handle to a file.
  48. @item @code{ int (*convert_to_gordon)(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss)}
  49. Convert the data size to the spu size format. If no SPUs are used, this field can be seto NULL.
  50. @item @code{enum starpu_data_interface_id interfaceid}
  51. An identifier that is unique to each interface.
  52. @item @code{size_t interface_size}
  53. The size of the interface data descriptor.
  54. @item @code{int is_multiformat}
  55. todo
  56. @item @code{struct starpu_multiformat_data_interface_ops* (*get_mf_ops)(void *data_interface)}
  57. todo
  58. @item @code{int (*pack_data)(starpu_data_handle_t handle, uint32_t node, void **ptr)}
  59. Pack the data handle into a contiguous buffer at the address @code{ptr}
  60. @item @code{int (*unpack_data)(starpu_data_handle_t handle, uint32_t node, void *ptr)}
  61. Unpack the data handle from the contiguous buffer at the address @code{ptr}
  62. @end table
  63. @end deftp
  64. @deftp {Data Type} {struct starpu_data_copy_methods}
  65. Defines the per-interface methods.
  66. @table @asis
  67. @item @code{int @{ram,cuda,opencl,spu@}_to_@{ram,cuda,opencl,spu@}(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)}
  68. These 16 functions define how to copy data from the @var{src_interface}
  69. interface on the @var{src_node} node to the @var{dst_interface} interface
  70. on the @var{dst_node} node. They return 0 on success.
  71. @item @code{int (*ram_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)}
  72. Define how to copy data from the @var{src_interface} interface on the
  73. @var{src_node} node (in RAM) to the @var{dst_interface} interface on the
  74. @var{dst_node} node (on a CUDA device), using the given @var{stream}. Return 0
  75. on success.
  76. @item @code{int (*cuda_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)}
  77. Define how to copy data from the @var{src_interface} interface on the
  78. @var{src_node} node (on a CUDA device) to the @var{dst_interface} interface on the
  79. @var{dst_node} node (in RAM), using the given @var{stream}. Return 0
  80. on success.
  81. @item @code{int (*cuda_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)}
  82. Define how to copy data from the @var{src_interface} interface on the
  83. @var{src_node} node (on a CUDA device) to the @var{dst_interface} interface on
  84. the @var{dst_node} node (on another CUDA device), using the given @var{stream}.
  85. Return 0 on success.
  86. @item @code{int (*ram_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event)}
  87. Define how to copy data from the @var{src_interface} interface on the
  88. @var{src_node} node (in RAM) to the @var{dst_interface} interface on the
  89. @var{dst_node} node (on an OpenCL device), using @var{event}, a pointer to a
  90. cl_event. Return 0 on success.
  91. @item @code{int (*opencl_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event)}
  92. Define how to copy data from the @var{src_interface} interface on the
  93. @var{src_node} node (on an OpenCL device) to the @var{dst_interface} interface
  94. on the @var{dst_node} node (in RAM), using the given @var{event}, a pointer to
  95. a cl_event. Return 0 on success.
  96. @item @code{int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, /* cl_event * */ void *event)}
  97. Define how to copy data from the @var{src_interface} interface on the
  98. @var{src_node} node (on an OpenCL device) to the @var{dst_interface} interface
  99. on the @var{dst_node} node (on another OpenCL device), using the given
  100. @var{event}, a pointer to a cl_event. Return 0 on success.
  101. @end table
  102. @end deftp
  103. @deftypefun uint32_t starpu_crc32_be_n ({void *}@var{input}, size_t @var{n}, uint32_t @var{inputcrc})
  104. Compute the CRC of a byte buffer seeded by the inputcrc "current
  105. state". The return value should be considered as the new "current
  106. state" for future CRC computation. This is used for computing data size
  107. footprint.
  108. @end deftypefun
  109. @deftypefun uint32_t starpu_crc32_be (uint32_t @var{input}, uint32_t @var{inputcrc})
  110. Compute the CRC of a 32bit number seeded by the inputcrc "current
  111. state". The return value should be considered as the new "current
  112. state" for future CRC computation. This is used for computing data size
  113. footprint.
  114. @end deftypefun
  115. @deftypefun uint32_t starpu_crc32_string ({char *}@var{str}, uint32_t @var{inputcrc})
  116. Compute the CRC of a string seeded by the inputcrc "current state".
  117. The return value should be considered as the new "current state" for
  118. future CRC computation. This is used for computing data size footprint.
  119. @end deftypefun
  120. @node An example of data interface
  121. @subsection An example of data interface
  122. @deftypefun int starpu_data_interface_get_next_id (void)
  123. Returns the next available id for a newly created data interface.
  124. @end deftypefun
  125. Let's define a new data interface to manage complex numbers.
  126. @cartouche
  127. @smallexample
  128. /* interface for complex numbers */
  129. struct starpu_complex_interface
  130. @{
  131. double *real;
  132. double *imaginary;
  133. int nx;
  134. @};
  135. @end smallexample
  136. @end cartouche
  137. Registering such a data to StarPU is easily done using the function
  138. @code{starpu_data_register} (@pxref{Basic Data Management API}). The last
  139. parameter of the function, @code{interface_complex_ops}, will be
  140. described below.
  141. @cartouche
  142. @smallexample
  143. void starpu_complex_data_register(starpu_data_handle_t *handle,
  144. uint32_t home_node, double *real, double *imaginary, int nx)
  145. @{
  146. struct starpu_complex_interface complex =
  147. @{
  148. .real = real,
  149. .imaginary = imaginary,
  150. .nx = nx
  151. @};
  152. if (interface_complex_ops.interfaceid == -1)
  153. @{
  154. interface_complex_ops.interfaceid = starpu_data_interface_get_next_id();
  155. @}
  156. starpu_data_register(handleptr, home_node, &complex, &interface_complex_ops);
  157. @}
  158. @end smallexample
  159. @end cartouche
  160. Different operations need to be defined for a data interface through
  161. the type @code{struct starpu_data_interface_ops} (@pxref{Data
  162. Interface API}). We only define here the basic operations needed to
  163. run simple applications. The source code for the different functions
  164. can be found in the file
  165. @code{examples/interface/complex_interface.c}.
  166. @cartouche
  167. @smallexample
  168. static struct starpu_data_interface_ops interface_complex_ops =
  169. @{
  170. .register_data_handle = complex_register_data_handle,
  171. .allocate_data_on_node = complex_allocate_data_on_node,
  172. .copy_methods = &complex_copy_methods,
  173. .get_size = complex_get_size,
  174. .footprint = complex_footprint,
  175. .interfaceid = -1,
  176. .interface_size = sizeof(struct starpu_complex_interface),
  177. @};
  178. @end smallexample
  179. @end cartouche
  180. Functions need to be defined to access the different fields of the
  181. complex interface from a StarPU data handle.
  182. @cartouche
  183. @smallexample
  184. double *starpu_complex_get_real(starpu_data_handle_t handle)
  185. @{
  186. struct starpu_complex_interface *complex_interface =
  187. (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, 0);
  188. return complex_interface->real;
  189. @}
  190. double *starpu_complex_get_imaginary(starpu_data_handle_t handle);
  191. int starpu_complex_get_nx(starpu_data_handle_t handle);
  192. @end smallexample
  193. @end cartouche
  194. Similar functions need to be defined to access the different fields of the
  195. complex interface from a @code{void *} pointer to be used within codelet
  196. implemetations.
  197. @cartouche
  198. @smallexample
  199. #define STARPU_COMPLEX_GET_REAL(interface) \
  200. (((struct starpu_complex_interface *)(interface))->real)
  201. #define STARPU_COMPLEX_GET_IMAGINARY(interface) \
  202. (((struct starpu_complex_interface *)(interface))->imaginary)
  203. #define STARPU_COMPLEX_GET_NX(interface) \
  204. (((struct starpu_complex_interface *)(interface))->nx)
  205. @end smallexample
  206. @end cartouche
  207. Complex data interfaces can then be registered to StarPU.
  208. @cartouche
  209. @smallexample
  210. double real = 45.0;
  211. double imaginary = 12.0;
  212. starpu_complex_data_register(&handle1, 0, &real, &imaginary, 1);
  213. starpu_insert_task(&cl_display, STARPU_R, handle1, 0);
  214. @end smallexample
  215. @end cartouche
  216. and used by codelets.
  217. @cartouche
  218. @smallexample
  219. void display_complex_codelet(void *descr[], __attribute__ ((unused)) void *_args)
  220. @{
  221. int nx = STARPU_COMPLEX_GET_NX(descr[0]);
  222. double *real = STARPU_COMPLEX_GET_REAL(descr[0]);
  223. double *imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[0]);
  224. int i;
  225. for(i=0 ; i<nx ; i++)
  226. @{
  227. fprintf(stderr, "Complex[%d] = %3.2f + %3.2f i\n", i, real[i], imaginary[i]);
  228. @}
  229. @}
  230. @end smallexample
  231. @end cartouche
  232. The whole code for this complex data interface is available in the
  233. directory @code{examples/interface/}.
  234. @node Multiformat Data Interface
  235. @section Multiformat Data Interface
  236. @deftp {Data Type} {struct starpu_multiformat_data_interface_ops}
  237. The different fields are:
  238. @table @asis
  239. @item @code{size_t cpu_elemsize}
  240. the size of each element on CPUs,
  241. @item @code{size_t opencl_elemsize}
  242. the size of each element on OpenCL devices,
  243. @item @code{struct starpu_codelet *cpu_to_opencl_cl}
  244. pointer to a codelet which converts from CPU to OpenCL
  245. @item @code{struct starpu_codelet *opencl_to_cpu_cl}
  246. pointer to a codelet which converts from OpenCL to CPU
  247. @item @code{size_t cuda_elemsize}
  248. the size of each element on CUDA devices,
  249. @item @code{struct starpu_codelet *cpu_to_cuda_cl}
  250. pointer to a codelet which converts from CPU to CUDA
  251. @item @code{struct starpu_codelet *cuda_to_cpu_cl}
  252. pointer to a codelet which converts from CUDA to CPU
  253. @end table
  254. @end deftp
  255. @deftypefun void starpu_multiformat_data_register (starpu_data_handle_t *@var{handle}, uint32_t @var{home_node}, void *@var{ptr}, uint32_t @var{nobjects}, struct starpu_multiformat_data_interface_ops *@var{format_ops})
  256. Register a piece of data that can be represented in different ways, depending upon
  257. the processing unit that manipulates it. It allows the programmer, for instance, to
  258. use an array of structures when working on a CPU, and a structure of arrays when
  259. working on a GPU.
  260. @var{nobjects} is the number of elements in the data. @var{format_ops} describes
  261. the format.
  262. @end deftypefun
  263. @defmac STARPU_MULTIFORMAT_GET_CPU_PTR ({void *}@var{interface})
  264. returns the local pointer to the data with CPU format.
  265. @end defmac
  266. @defmac STARPU_MULTIFORMAT_GET_CUDA_PTR ({void *}@var{interface})
  267. returns the local pointer to the data with CUDA format.
  268. @end defmac
  269. @defmac STARPU_MULTIFORMAT_GET_OPENCL_PTR ({void *}@var{interface})
  270. returns the local pointer to the data with OpenCL format.
  271. @end defmac
  272. @defmac STARPU_MULTIFORMAT_GET_NX ({void *}@var{interface})
  273. returns the number of elements in the data.
  274. @end defmac
  275. @node Task Bundles
  276. @section Task Bundles
  277. @deftp {Data Type} {starpu_task_bundle_t}
  278. Opaque structure describing a list of tasks that should be scheduled
  279. on the same worker whenever it's possible. It must be considered as a
  280. hint given to the scheduler as there is no guarantee that they will be
  281. executed on the same worker.
  282. @end deftp
  283. @deftypefun void starpu_task_bundle_create ({starpu_task_bundle_t *}@var{bundle})
  284. Factory function creating and initializing @var{bundle}, when the call returns, memory needed is allocated and @var{bundle} is ready to use.
  285. @end deftypefun
  286. @deftypefun int starpu_task_bundle_insert (starpu_task_bundle_t @var{bundle}, {struct starpu_task *}@var{task})
  287. Insert @var{task} in @var{bundle}. Until @var{task} is removed from @var{bundle} its expected length and data transfer time will be considered along those of the other tasks of @var{bundle}.
  288. This function mustn't be called if @var{bundle} is already closed and/or @var{task} is already submitted.
  289. @end deftypefun
  290. @deftypefun int starpu_task_bundle_remove (starpu_task_bundle_t @var{bundle}, {struct starpu_task *}@var{task})
  291. Remove @var{task} from @var{bundle}.
  292. Of course @var{task} must have been previously inserted @var{bundle}.
  293. This function mustn't be called if @var{bundle} is already closed and/or @var{task} is already submitted. Doing so would result in undefined behaviour.
  294. @end deftypefun
  295. @deftypefun void starpu_task_bundle_close (starpu_task_bundle_t @var{bundle})
  296. Inform the runtime that the user won't modify @var{bundle} anymore, it means no more inserting or removing task. Thus the runtime can destroy it when possible.
  297. @end deftypefun
  298. @deftypefun double starpu_task_bundle_expected_length (starpu_task_bundle_t @var{bundle}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{nimpl})
  299. Return the expected duration of the entire task bundle in µs.
  300. @end deftypefun
  301. @deftypefun double starpu_task_bundle_expected_power (starpu_task_bundle_t @var{bundle}, enum starpu_perf_archtype @var{arch}, unsigned @var{nimpl})
  302. Return the expected power consumption of the entire task bundle in J.
  303. @end deftypefun
  304. @deftypefun double starpu_task_bundle_expected_data_transfer_time (starpu_task_bundle_t @var{bundle}, unsigned @var{memory_node})
  305. Return the time (in µs) expected to transfer all data used within the bundle.
  306. @end deftypefun
  307. @node Task Lists
  308. @section Task Lists
  309. @deftp {Data Type} {struct starpu_task_list}
  310. Stores a double-chained list of tasks
  311. @end deftp
  312. @deftypefun void starpu_task_list_init ({struct starpu_task_list *}@var{list})
  313. Initialize a list structure
  314. @end deftypefun
  315. @deftypefun void starpu_task_list_push_front ({struct starpu_task_list *}@var{list}, {struct starpu_task *}@var{task})
  316. Push a task at the front of a list
  317. @end deftypefun
  318. @deftypefun void starpu_task_list_push_back ({struct starpu_task_list *}@var{list}, {struct starpu_task *}@var{task})
  319. Push a task at the back of a list
  320. @end deftypefun
  321. @deftypefun {struct starpu_task *} starpu_task_list_front ({struct starpu_task_list *}@var{list})
  322. Get the front of the list (without removing it)
  323. @end deftypefun
  324. @deftypefun {struct starpu_task *} starpu_task_list_back ({struct starpu_task_list *}@var{list})
  325. Get the back of the list (without removing it)
  326. @end deftypefun
  327. @deftypefun int starpu_task_list_empty ({struct starpu_task_list *}@var{list})
  328. Test if a list is empty
  329. @end deftypefun
  330. @deftypefun void starpu_task_list_erase ({struct starpu_task_list *}@var{list}, {struct starpu_task *}@var{task})
  331. Remove an element from the list
  332. @end deftypefun
  333. @deftypefun {struct starpu_task *} starpu_task_list_pop_front ({struct starpu_task_list *}@var{list})
  334. Remove the element at the front of the list
  335. @end deftypefun
  336. @deftypefun {struct starpu_task *} starpu_task_list_pop_back ({struct starpu_task_list *}@var{list})
  337. Remove the element at the back of the list
  338. @end deftypefun
  339. @deftypefun {struct starpu_task *} starpu_task_list_begin ({struct starpu_task_list *}@var{list})
  340. Get the first task of the list.
  341. @end deftypefun
  342. @deftypefun {struct starpu_task *} starpu_task_list_end ({struct starpu_task_list *}@var{list})
  343. Get the end of the list.
  344. @end deftypefun
  345. @deftypefun {struct starpu_task *} starpu_task_list_next ({struct starpu_task *}@var{task})
  346. Get the next task of the list. This is not erase-safe.
  347. @end deftypefun
  348. @node Using Parallel Tasks
  349. @section Using Parallel Tasks
  350. Workers are grouped considering the topology of the machine in order to permit the opaque execution of parallel tasks.
  351. @deftp {Data Type} {struct starpu_machine_topology}
  352. @table @asis
  353. @item @code{unsigned nworkers}
  354. Total number of workers.
  355. @item @code{unsigned ncombinedworkers}
  356. Total number of combined workers.
  357. @item @code{hwloc_topology_t hwtopology}
  358. Topology as detected by hwloc.
  359. To maintain ABI compatibility when hwloc is not available, the field
  360. is replaced with @code{void *dummy}
  361. @item @code{unsigned nhwcpus}
  362. Total number of CPUs, as detected by the topology code. May be different from
  363. the actual number of CPU workers.
  364. @item @code{unsigned nhwcudagpus}
  365. Total number of CUDA devices, as detected. May be different from the actual
  366. number of CUDA workers.
  367. @item @code{unsigned nhwopenclgpus}
  368. Total number of OpenCL devices, as detected. May be different from the actual
  369. number of CUDA workers.
  370. @item @code{unsigned ncpus}
  371. Actual number of CPU workers used by StarPU.
  372. @item @code{unsigned ncudagpus}
  373. Actual number of CUDA workers used by StarPU.
  374. @item @code{unsigned nopenclgpus}
  375. Actual number of OpenCL workers used by StarPU.
  376. @item @code{unsigned ngordon_spus}
  377. Actual number of Gordon workers used by StarPU.
  378. @item @code{unsigned workers_bindid[STARPU_NMAXWORKERS]}
  379. Indicates the successive cpu identifier that should be used to bind the
  380. workers. It is either filled according to the user's explicit
  381. parameters (from starpu_conf) or according to the STARPU_WORKERS_CPUID env.
  382. variable. Otherwise, a round-robin policy is used to distributed the workers
  383. over the cpus.
  384. @item @code{unsigned workers_cuda_gpuid[STARPU_NMAXWORKERS]}
  385. Indicates the successive cpu identifier that should be used by the CUDA
  386. driver. It is either filled according to the user's explicit parameters (from
  387. starpu_conf) or according to the STARPU_WORKERS_CUDAID env. variable. Otherwise,
  388. they are taken in ID order.
  389. @item @code{unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS]}
  390. Indicates the successive cpu identifier that should be used by the OpenCL
  391. driver. It is either filled according to the user's explicit parameters (from
  392. starpu_conf) or according to the STARPU_WORKERS_OPENCLID env. variable. Otherwise,
  393. they are taken in ID order.
  394. @end table
  395. @end deftp
  396. @deftypefun int starpu_combined_worker_get_size (void)
  397. Return the size of the current combined worker, i.e. the total number of cpus
  398. running the same task in the case of SPMD parallel tasks, or the total number
  399. of threads that the task is allowed to start in the case of FORKJOIN parallel
  400. tasks.
  401. @end deftypefun
  402. @deftypefun int starpu_combined_worker_get_rank (void)
  403. Return the rank of the current thread within the combined worker. Can only be
  404. used in FORKJOIN parallel tasks, to know which part of the task to work on.
  405. @end deftypefun
  406. Most of these are used for schedulers which support parallel tasks.
  407. @deftypefun unsigned starpu_combined_worker_get_count (void)
  408. Return the number of different combined workers.
  409. @end deftypefun
  410. @deftypefun int starpu_combined_worker_get_id (void)
  411. Return the identifier of the current combined worker.
  412. @end deftypefun
  413. @deftypefun int starpu_combined_worker_assign_workerid (int @var{nworkers}, int @var{workerid_array}[])
  414. Register a new combined worker and get its identifier
  415. @end deftypefun
  416. @deftypefun int starpu_combined_worker_get_description (int @var{workerid}, {int *}@var{worker_size}, {int **}@var{combined_workerid})
  417. Get the description of a combined worker
  418. @end deftypefun
  419. @deftypefun int starpu_combined_worker_can_execute_task (unsigned @var{workerid}, {struct starpu_task *}@var{task}, unsigned @var{nimpl})
  420. Variant of starpu_worker_can_execute_task compatible with combined workers
  421. @end deftypefun
  422. @node Scheduling Contexts
  423. @section Scheduling Contexts
  424. StarPU permits on one hand grouping workers in combined workers in order to execute a parallel task and on the other hand grouping tasks in bundles that will be executed by a single specified worker.
  425. In contrast when we group workers in scheduling contexts we submit starpu tasks to them and we schedule them with the policy assigned to the context.
  426. Scheduling contexts can be created, deleted and modified dynamically.
  427. @deftypefun unsigned starpu_create_sched_ctx (const char *@var{policy_name}, int *@var{workerids_ctx}, int @var{nworkers_ctx}, const char *@var{sched_ctx_name})
  428. This function creates a scheduling context which uses the scheduling policy indicated in the first argument and assigns the workers indicated in the second argument to execute the tasks submitted to it.
  429. The return value represents the identifier of the context that has just been created. It will be further used to indicate the context the tasks will be submitted to. The return value should be at most @code{STARPU_NMAX_SCHED_CTXS}.
  430. @end deftypefun
  431. @deftypefun void starpu_delete_sched_ctx (unsigned @var{sched_ctx_id}, unsigned @var{inheritor_sched_ctx_id})
  432. Delete scheduling context @var{sched_ctx_id} and lets scheduling context @var{inheritor_sched_ctx_id} take over its workers.
  433. @end deftypefun
  434. @deftypefun void starpu_add_workers_to_sched_ctx ({int *}@var{workerids_ctx}, int @var{nworkers_ctx}, unsigned @var{sched_ctx})
  435. This function adds dynamically the workers indicated in the first argument to the context indicated in the last argument. The last argument cannot be greater than @code{STARPU_NMAX_SCHED_CTXS}.
  436. @end deftypefun
  437. @deftypefun void starpu_remove_workers_from_sched_ctx ({int *}@var{workerids_ctx}, int @var{nworkers_ctx}, unsigned @var{sched_ctx})
  438. This function removes the workers indicated in the first argument from the context indicated in the last argument. The last argument cannot be greater than @code{STARPU_NMAX_SCHED_CTXS}.
  439. @end deftypefun
  440. A scheduling context manages a collection of workers that can be memorized using different data structures. Thus, a generic structure is available in order to simplify the choice of its type.
  441. Only the list data structure is available but further data structures(like tree) implementations are foreseen.
  442. @deftp {Data Type} {struct worker_collection}
  443. @table @asis
  444. @item @code{void *workerids}
  445. The workerids managed by the collection
  446. @item @code{unsigned nworkers}
  447. The number of workerids
  448. @item @code{pthread_key_t cursor_key} (optional)
  449. The cursor needed to iterate the collection (depending on the data structure)
  450. @item @code{int type}
  451. The type of structure (currently WORKER_LIST is the only one available)
  452. @item @code{unsigned (*has_next)(struct worker_collection *workers)}
  453. Checks if there is a next worker
  454. @item @code{int (*get_next)(struct worker_collection *workers)}
  455. Gets the next worker
  456. @item @code{int (*add)(struct worker_collection *workers, int worker)}
  457. Adds a worker to the collection
  458. @item @code{int (*remove)(struct worker_collection *workers, int worker)}
  459. Removes a worker from the collection
  460. @item @code{void* (*init)(struct worker_collection *workers)}
  461. Initialize the collection
  462. @item @code{void (*deinit)(struct worker_collection *workers)}
  463. Deinitialize the colection
  464. @item @code{void (*init_cursor)(struct worker_collection *workers)} (optional)
  465. Initialize the cursor if there is one
  466. @item @code{void (*deinit_cursor)(struct worker_collection *workers)} (optional)
  467. Deinitialize the cursor if there is one
  468. @end table
  469. @end deftp
  470. @deftypefun struct worker_collection* starpu_create_worker_collection_for_sched_ctx (unsigned @var{sched_ctx_id}, int @var{type})
  471. Creates a worker collection of the type indicated by the last parameter for the context specified through the first parameter.
  472. @end deftypefun
  473. @deftypefun void starpu_delete_worker_collection_for_sched_ctx (unsigned @var{sched_ctx_id})
  474. Deletes the worker collection of the specified scheduling context
  475. @end deftypefun
  476. @deftypefun struct worker_collection* starpu_get_worker_collection_of_sched_ctx (unsigned @var{sched_ctx_id})
  477. Returns the worker collection managed by the indicated context
  478. @end deftypefun
  479. @deftypefun pthread_mutex_t* starpu_get_changing_ctx_mutex (unsigned @var{sched_ctx_id})
  480. @end deftypefun
  481. @deftypefun void starpu_set_sched_ctx (unsigned *@var{sched_ctx})
  482. Sets the scheduling context the task will be submitted to
  483. @end deftypefun
  484. @deftypefun unsigned starpu_get_sched_ctx (void)
  485. Returns the scheduling contexts the tasks are currently submitted to
  486. @end deftypefun
  487. @deftypefun unsigned starpu_get_nworkers_of_sched_ctx (unsigned @var{sched_ctx})
  488. Returns the number of workers managed by the specified contexts
  489. (Usually needed to verify if it manages any workers or if it should be blocked)
  490. @end deftypefun
  491. @deftypefun unsigned starpu_get_nshared_workers (unsigned @var{sched_ctx_id}, unsigned @var{sched_ctx_id2})
  492. Returns the number of workers shared by two contexts
  493. @end deftypefun
  494. @node Defining a new scheduling policy
  495. @section Defining a new scheduling policy
  496. TODO
  497. A full example showing how to define a new scheduling policy is available in
  498. the StarPU sources in the directory @code{examples/scheduler/}.
  499. @menu
  500. * Scheduling Policy API:: Scheduling Policy API
  501. * Source code::
  502. @end menu
  503. @node Scheduling Policy API
  504. @subsection Scheduling Policy API
  505. While StarPU comes with a variety of scheduling policies (@pxref{Task
  506. scheduling policy}), it may sometimes be desirable to implement custom
  507. policies to address specific problems. The API described below allows
  508. users to write their own scheduling policy.
  509. @deftp {Data Type} {struct starpu_sched_policy}
  510. This structure contains all the methods that implement a scheduling policy. An
  511. application may specify which scheduling strategy in the @code{sched_policy}
  512. field of the @code{starpu_conf} structure passed to the @code{starpu_init}
  513. function. The different fields are:
  514. @table @asis
  515. @item @code{void (*init_sched)(unsigned sched_ctx_id)}
  516. Initialize the scheduling policy.
  517. @item @code{void (*deinit_sched)(unsigned sched_ctx_id)}
  518. Cleanup the scheduling policy.
  519. @item @code{int (*push_task)(struct starpu_task *)}
  520. Insert a task into the scheduler.
  521. @item @code{void (*push_task_notify)(struct starpu_task *, int workerid)}
  522. Notify the scheduler that a task was pushed on a given worker. This method is
  523. called when a task that was explicitely assigned to a worker becomes ready and
  524. is about to be executed by the worker. This method therefore permits to keep
  525. the state of of the scheduler coherent even when StarPU bypasses the scheduling
  526. strategy.
  527. @item @code{struct starpu_task *(*pop_task)(unsigned sched_ctx_id)} (optional)
  528. Get a task from the scheduler. The mutex associated to the worker is already
  529. taken when this method is called. If this method is defined as @code{NULL}, the
  530. worker will only execute tasks from its local queue. In this case, the
  531. @code{push_task} method should use the @code{starpu_push_local_task} method to
  532. assign tasks to the different workers.
  533. @item @code{struct starpu_task *(*pop_every_task)(unsigned sched_ctx_id)}
  534. Remove all available tasks from the scheduler (tasks are chained by the means
  535. of the prev and next fields of the starpu_task structure). The mutex associated
  536. to the worker is already taken when this method is called. This is currently
  537. only used by the Gordon driver.
  538. @item @code{void (*pre_exec_hook)(struct starpu_task *)} (optional)
  539. This method is called every time a task is starting.
  540. @item @code{void (*post_exec_hook)(struct starpu_task *)} (optional)
  541. This method is called every time a task has been executed.
  542. @item @code{void (*add_workers)(unsigned sched_ctx_id, int *workerids, unsigned nworkers)}
  543. Initialize scheduling structures corresponding to each worker used by the policy.
  544. @item @code{void (*remove_workers)(unsigned sched_ctx_id, int *workerids, unsigned nworkers)}
  545. Deinitialize scheduling structures corresponding to each worker used by the policy.
  546. @item @code{const char *policy_name} (optional)
  547. Name of the policy.
  548. @item @code{const char *policy_description} (optional)
  549. Description of the policy.
  550. @end table
  551. @end deftp
  552. @deftypefun void starpu_worker_set_sched_condition (unsigned @var{sched_ctx_id}, int @var{workerid}, {pthread_cond_t *}@var{sched_cond}, pthread_mutex_t *@var{sched_mutex})
  553. This function specifies the condition variable associated to a worker per context
  554. When there is no available task for a worker, StarPU blocks this worker on a
  555. condition variable. This function specifies which condition variable (and the
  556. associated mutex) should be used to block (and to wake up) a worker. Note that
  557. multiple workers may use the same condition variable. For instance, in the case
  558. of a scheduling strategy with a single task queue, the same condition variable
  559. would be used to block and wake up all workers.
  560. The initialization method of a scheduling strategy (@code{init_sched}) must
  561. call this function once per worker.
  562. @end deftypefun
  563. @deftypefun void starpu_worker_get_sched_condition (unsigned @var{sched_ctx_id}, int @var{workerid}, {pthread_cond_t **}@var{sched_cond}, {pthread_mutex_t **}@var{sched_mutex})
  564. This function returns the condition variables associated to a worker in a context
  565. It is used in the policy to access to the local queue of the worker
  566. @end deftypefun
  567. @deftypefun void starpu_set_sched_ctx_policy_data (unsigned @var{sched_ctx}, {void*} @var{policy_data})
  568. Each scheduling policy uses some specific data (queues, variables, additional condition variables).
  569. It is memorize through a local structure. This function assigns it to a scheduling context.
  570. @end deftypefun
  571. @deftypefun void* starpu_get_sched_ctx_policy_data (unsigned @var{sched_ctx})
  572. Returns the policy data previously assigned to a context
  573. @end deftypefun
  574. @deftypefun void starpu_sched_set_min_priority (int @var{min_prio})
  575. Defines the minimum priority level supported by the scheduling policy. The
  576. default minimum priority level is the same as the default priority level which
  577. is 0 by convention. The application may access that value by calling the
  578. @code{starpu_sched_get_min_priority} function. This function should only be
  579. called from the initialization method of the scheduling policy, and should not
  580. be used directly from the application.
  581. @end deftypefun
  582. @deftypefun void starpu_sched_set_max_priority (int @var{max_prio})
  583. Defines the maximum priority level supported by the scheduling policy. The
  584. default maximum priority level is 1. The application may access that value by
  585. calling the @code{starpu_sched_get_max_priority} function. This function should
  586. only be called from the initialization method of the scheduling policy, and
  587. should not be used directly from the application.
  588. @end deftypefun
  589. @deftypefun int starpu_sched_get_min_priority (void)
  590. Returns the current minimum priority level supported by the
  591. scheduling policy
  592. @end deftypefun
  593. @deftypefun int starpu_sched_get_max_priority (void)
  594. Returns the current maximum priority level supported by the
  595. scheduling policy
  596. @end deftypefun
  597. @deftypefun int starpu_push_local_task (int @var{workerid}, {struct starpu_task} *@var{task}, int @var{back})
  598. The scheduling policy may put tasks directly into a worker's local queue so
  599. that it is not always necessary to create its own queue when the local queue
  600. is sufficient. If @var{back} not null, @var{task} is put at the back of the queue
  601. where the worker will pop tasks first. Setting @var{back} to 0 therefore ensures
  602. a FIFO ordering.
  603. @end deftypefun
  604. @deftypefun int starpu_worker_can_execute_task (unsigned @var{workerid}, {struct starpu_task *}@var{task}, unsigned {nimpl})
  605. Check if the worker specified by workerid can execute the codelet. Schedulers need to call it before assigning a task to a worker, otherwise the task may fail to execute.
  606. @end deftypefun
  607. @deftypefun double starpu_timing_now (void)
  608. Return the current date in µs
  609. @end deftypefun
  610. @deftypefun double starpu_task_expected_length ({struct starpu_task *}@var{task}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{nimpl})
  611. Returns expected task duration in µs
  612. @end deftypefun
  613. @deftypefun double starpu_worker_get_relative_speedup ({enum starpu_perf_archtype} @var{perf_archtype})
  614. Returns an estimated speedup factor relative to CPU speed
  615. @end deftypefun
  616. @deftypefun double starpu_task_expected_data_transfer_time (uint32_t @var{memory_node}, {struct starpu_task *}@var{task})
  617. Returns expected data transfer time in µs
  618. @end deftypefun
  619. @deftypefun double starpu_data_expected_transfer_time (starpu_data_handle_t @var{handle}, unsigned @var{memory_node}, {enum starpu_access_mode} @var{mode})
  620. Predict the transfer time (in µs) to move a handle to a memory node
  621. @end deftypefun
  622. @deftypefun double starpu_task_expected_power ({struct starpu_task *}@var{task}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{nimpl})
  623. Returns expected power consumption in J
  624. @end deftypefun
  625. @deftypefun double starpu_task_expected_conversion_time ({struct starpu_task *}@var{task}, {enum starpu_perf_archtype} @var{arch}, unsigned {nimpl})
  626. Returns expected conversion time in ms (multiformat interface only)
  627. @end deftypefun
  628. @node Source code
  629. @subsection Source code
  630. @cartouche
  631. @smallexample
  632. static struct starpu_sched_policy dummy_sched_policy = @{
  633. .init_sched = init_dummy_sched,
  634. .deinit_sched = deinit_dummy_sched,
  635. .add_workers = dummy_sched_add_workers,
  636. .remove_workers = dummy_sched_remove_workers,
  637. .push_task = push_task_dummy,
  638. .push_prio_task = NULL,
  639. .pop_task = pop_task_dummy,
  640. .post_exec_hook = NULL,
  641. .pop_every_task = NULL,
  642. .policy_name = "dummy",
  643. .policy_description = "dummy scheduling strategy"
  644. @};
  645. @end smallexample
  646. @end cartouche
  647. <<<<<<< .working
  648. @node Running drivers
  649. @section Running drivers
  650. @menu
  651. * Driver API::
  652. * Example::
  653. @end menu
  654. @node Driver API
  655. @subsection Driver API
  656. @deftypefun int starpu_driver_run ({struct starpu_driver *}@var{d})
  657. Initialize the given driver, run it until it receives a request to terminate,
  658. deinitialize it and return 0 on success. It returns -EINVAL if @code{d->type}
  659. is not a valid StarPU device type (STARPU_CPU_WORKER, STARPU_CUDA_WORKER or
  660. STARPU_OPENCL_WORKER). This is the same as using the following
  661. functions: calling @code{starpu_driver_init()}, then calling
  662. @code{starpu_driver_run_once()} in a loop, and eventually
  663. @code{starpu_driver_deinit()}.
  664. @end deftypefun
  665. @deftypefun int starpu_driver_init (struct starpu_driver *@var{d})
  666. Initialize the given driver. Returns 0 on success, -EINVAL if
  667. @code{d->type} is not a valid StarPU device type (STARPU_CPU_WORKER,
  668. STARPU_CUDA_WORKER or STARPU_OPENCL_WORKER).
  669. @end deftypefun
  670. @deftypefun int starpu_driver_run_once (struct starpu_driver *@var{d})
  671. Run the driver once, then returns 0 on success, -EINVAL if
  672. @code{d->type} is not a valid StarPU device type (STARPU_CPU_WORKER,
  673. STARPU_CUDA_WORKER or STARPU_OPENCL_WORKER).
  674. @end deftypefun
  675. @deftypefun int starpu_driver_deinit (struct starpu_driver *@var{d})
  676. Deinitialize the given driver. Returns 0 on success, -EINVAL if
  677. @code{d->type} is not a valid StarPU device type (STARPU_CPU_WORKER,
  678. STARPU_CUDA_WORKER or STARPU_OPENCL_WORKER).
  679. @end deftypefun
  680. @deftypefun void starpu_drivers_request_termination (void)
  681. Notify all running drivers they should terminate.
  682. @end deftypefun
  683. @node Example
  684. @subsection Example
  685. @cartouche
  686. @smallexample
  687. int ret;
  688. struct starpu_driver = @{
  689. .type = STARPU_CUDA_WORKER,
  690. .id.cuda_id = 0
  691. @};
  692. ret = starpu_driver_init(&d);
  693. if (ret != 0)
  694. error();
  695. while (some_condition) @{
  696. ret = starpu_driver_run_once(&d);
  697. if (ret != 0)
  698. error();
  699. @}
  700. ret = starpu_driver_deinit(&d);
  701. if (ret != 0)
  702. error();
  703. @end smallexample
  704. @end cartouche
  705. =======
  706. @node Running drivers
  707. @section Running drivers
  708. @menu
  709. * Driver API::
  710. * Example::
  711. @end menu
  712. @node Driver API
  713. @subsection Driver API
  714. @deftypefun int starpu_driver_init(struct starpu_driver *@var{d})
  715. Initialize the given driver. Returns 0 on success, -EINVAL if d->type is not
  716. STARPU_CUDA_WORKER.
  717. @end deftypefun
  718. @deftypefun int starpu_driver_run_once(struct starpu_driver *@var{d})
  719. Runs the driver for a while, then returns 0 on success, -EINVAL if d->type is
  720. not STARPU_CUDA_WORKER.
  721. @end deftypefun
  722. @deftypefun int starpu_driver_deinit(struct starpu_driver *@var{d})
  723. Deinitialize the given driver. Returns 0 on success, -EINVAL if d->type is not
  724. STARPU_CUDA_WORKER.
  725. @end deftypefun
  726. @node Example
  727. @subsection Example
  728. @cartouche
  729. @smallexample
  730. int ret;
  731. struct starpu_driver = @{
  732. .type = STARPU_CUDA_WORKER,
  733. .id.cuda_id = 0
  734. @};
  735. ret = starpu_driver_init(&d);
  736. if (ret != 0)
  737. error();
  738. while (some_condition) @{
  739. ret = starpu_driver_run_once(&d);
  740. if (ret != 0)
  741. error();
  742. @}
  743. ret = starpu_driver_deinit(&d);
  744. if (ret != 0)
  745. error();
  746. @end smallexample
  747. @end cartouche
  748. >>>>>>> .merge-right.r6541
  749. @node Expert mode
  750. @section Expert mode
  751. @deftypefun void starpu_wake_all_blocked_workers (void)
  752. Wake all the workers, so they can inspect data requests and task submissions
  753. again.
  754. @end deftypefun
  755. @deftypefun int starpu_progression_hook_register (unsigned (*@var{func})(void *arg), void *@var{arg})
  756. Register a progression hook, to be called when workers are idle.
  757. @end deftypefun
  758. @deftypefun void starpu_progression_hook_deregister (int @var{hook_id})
  759. Unregister a given progression hook.
  760. @end deftypefun