starpu_worker.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. * Copyright (C) 2013 Thibaut Lambert
  5. * Copyright (C) 2016 Uppsala University
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #ifndef __STARPU_WORKER_H__
  19. #define __STARPU_WORKER_H__
  20. #include <stdlib.h>
  21. #include <starpu_config.h>
  22. #include <starpu_thread.h>
  23. #include <starpu_task.h>
  24. #ifdef STARPU_HAVE_HWLOC
  25. #include <hwloc.h>
  26. #endif
  27. #ifdef __cplusplus
  28. extern "C"
  29. {
  30. #endif
  31. /**
  32. @defgroup API_Workers_Properties Workers’ Properties
  33. @{
  34. */
  35. /**
  36. Memory node Type
  37. */
  38. enum starpu_node_kind
  39. {
  40. STARPU_UNUSED=0,
  41. STARPU_CPU_RAM=1,
  42. STARPU_CUDA_RAM=2,
  43. STARPU_OPENCL_RAM=3,
  44. STARPU_DISK_RAM=4,
  45. STARPU_MPI_MS_RAM=5,
  46. STARPU_MAX_RAM=5
  47. };
  48. /**
  49. Worker Architecture Type
  50. The value 4 which was used by the driver SCC is no longer used as
  51. renumbering workers would make unusable old performance model
  52. files.
  53. */
  54. enum starpu_worker_archtype
  55. {
  56. STARPU_CPU_WORKER=0, /**< CPU core */
  57. STARPU_CUDA_WORKER=1, /**< NVIDIA CUDA device */
  58. STARPU_OPENCL_WORKER=2, /**< OpenCL device */
  59. STARPU_MPI_MS_WORKER=5, /**< MPI Slave device */
  60. STARPU_NARCH = 6, /**< Number of arch types */
  61. STARPU_ANY_WORKER=255 /**< any worker, used in the hypervisor */
  62. };
  63. /**
  64. Structure needed to iterate on the collection
  65. */
  66. struct starpu_sched_ctx_iterator
  67. {
  68. /**
  69. The index of the current worker in the collection, needed
  70. when iterating on the collection.
  71. */
  72. int cursor;
  73. void *value;
  74. void *possible_value;
  75. char visited[STARPU_NMAXWORKERS];
  76. int possibly_parallel;
  77. };
  78. /**
  79. Types of structures the worker collection can implement
  80. */
  81. enum starpu_worker_collection_type
  82. {
  83. STARPU_WORKER_TREE, /**< The collection is a tree */
  84. STARPU_WORKER_LIST /**< The collection is an array */
  85. };
  86. /**
  87. A scheduling context manages a collection of workers that can be
  88. memorized using different data structures. Thus, a generic
  89. structure is available in order to simplify the choice of its type.
  90. Only the list data structure is available but further data
  91. structures(like tree) implementations are foreseen.
  92. */
  93. struct starpu_worker_collection
  94. {
  95. /**
  96. The workerids managed by the collection
  97. */
  98. int *workerids;
  99. void *collection_private;
  100. /**
  101. The number of workers in the collection
  102. */
  103. unsigned nworkers;
  104. void *unblocked_workers;
  105. unsigned nunblocked_workers;
  106. void *masters;
  107. unsigned nmasters;
  108. char present[STARPU_NMAXWORKERS];
  109. char is_unblocked[STARPU_NMAXWORKERS];
  110. char is_master[STARPU_NMAXWORKERS];
  111. /**
  112. The type of structure
  113. */
  114. enum starpu_worker_collection_type type;
  115. /**
  116. Check if there is another element in collection
  117. */
  118. unsigned (*has_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
  119. /**
  120. Return the next element in the collection
  121. */
  122. int (*get_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
  123. /**
  124. Add a new element in the collection
  125. */
  126. int (*add)(struct starpu_worker_collection *workers, int worker);
  127. /**
  128. Remove an element from the collection
  129. */
  130. int (*remove)(struct starpu_worker_collection *workers, int worker);
  131. /**
  132. Initialize the collection
  133. */
  134. void (*init)(struct starpu_worker_collection *workers);
  135. /**
  136. Deinitialize the colection
  137. */
  138. void (*deinit)(struct starpu_worker_collection *workers);
  139. /**
  140. Initialize the cursor if there is one
  141. */
  142. void (*init_iterator)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
  143. void (*init_iterator_for_parallel_tasks)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it, struct starpu_task *task);
  144. };
  145. extern struct starpu_worker_collection starpu_worker_list;
  146. extern struct starpu_worker_collection starpu_worker_tree;
  147. /**
  148. Return true if type matches one of StarPU's defined worker architectures
  149. */
  150. unsigned starpu_worker_archtype_is_valid(enum starpu_worker_archtype type);
  151. /**
  152. Convert a mask of architectures to a worker archtype
  153. */
  154. enum starpu_worker_archtype starpu_arch_mask_to_worker_archtype(unsigned mask);
  155. /**
  156. Return the number of workers (i.e. processing units executing
  157. StarPU tasks). The return value should be at most \ref
  158. STARPU_NMAXWORKERS.
  159. */
  160. unsigned starpu_worker_get_count(void);
  161. /**
  162. Return the number of CPUs controlled by StarPU. The return value
  163. should be at most \ref STARPU_MAXCPUS.
  164. */
  165. unsigned starpu_cpu_worker_get_count(void);
  166. /**
  167. Return the number of CUDA devices controlled by StarPU. The return
  168. value should be at most \ref STARPU_MAXCUDADEVS.
  169. */
  170. unsigned starpu_cuda_worker_get_count(void);
  171. /**
  172. Return the number of OpenCL devices controlled by StarPU. The
  173. return value should be at most \ref STARPU_MAXOPENCLDEVS.
  174. */
  175. unsigned starpu_opencl_worker_get_count(void);
  176. /**
  177. Return the number of MPI Master Slave workers controlled by StarPU.
  178. */
  179. unsigned starpu_mpi_ms_worker_get_count(void);
  180. /**
  181. Return the identifier of the current worker, i.e the one associated
  182. to the calling thread. The return value is either \c -1 if the
  183. current context is not a StarPU worker (i.e. when called from the
  184. application outside a task or a callback), or an integer between \c
  185. 0 and starpu_worker_get_count() - \c 1.
  186. */
  187. int starpu_worker_get_id(void);
  188. unsigned _starpu_worker_get_id_check(const char *f, int l);
  189. /**
  190. Similar to starpu_worker_get_id(), but abort when called from
  191. outside a worker (i.e. when starpu_worker_get_id() would return \c
  192. -1).
  193. */
  194. unsigned starpu_worker_get_id_check(void);
  195. #define starpu_worker_get_id_check() _starpu_worker_get_id_check(__FILE__, __LINE__)
  196. int starpu_worker_get_bindid(int workerid);
  197. void starpu_sched_find_all_worker_combinations(void);
  198. /**
  199. Return the type of processing unit associated to the worker \p id.
  200. The worker identifier is a value returned by the function
  201. starpu_worker_get_id()). The return value indicates the
  202. architecture of the worker: ::STARPU_CPU_WORKER for a CPU core,
  203. ::STARPU_CUDA_WORKER for a CUDA device, and ::STARPU_OPENCL_WORKER
  204. for a OpenCL device. The return value for an invalid identifier is
  205. unspecified.
  206. */
  207. enum starpu_worker_archtype starpu_worker_get_type(int id);
  208. /**
  209. Return the number of workers of \p type. A positive (or
  210. <c>NULL</c>) value is returned in case of success, <c>-EINVAL</c>
  211. indicates that \p type is not valid otherwise.
  212. */
  213. int starpu_worker_get_count_by_type(enum starpu_worker_archtype type);
  214. /**
  215. Get the list of identifiers of workers of \p type. Fill the array
  216. \p workerids with the identifiers of the \p workers. The argument
  217. \p maxsize indicates the size of the array \p workerids. The return
  218. value gives the number of identifiers that were put in the array.
  219. <c>-ERANGE</c> is returned is \p maxsize is lower than the number
  220. of workers with the appropriate type: in that case, the array is
  221. filled with the \p maxsize first elements. To avoid such overflows,
  222. the value of maxsize can be chosen by the means of the function
  223. starpu_worker_get_count_by_type(), or by passing a value greater or
  224. equal to \ref STARPU_NMAXWORKERS.
  225. */
  226. unsigned starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, unsigned maxsize);
  227. /**
  228. Return the identifier of the \p num -th worker that has the
  229. specified \p type. If there is no such worker, -1 is returned.
  230. */
  231. int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num);
  232. /**
  233. Return the identifier of the worker that has the specified \p type
  234. and device id \p devid (which may not be the n-th, if some devices
  235. are skipped for instance). If there is no such worker, \c -1 is
  236. returned.
  237. */
  238. int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid);
  239. /**
  240. Return true if task has a codelet for this worker type
  241. */
  242. unsigned starpu_worker_type_can_execute_task(enum starpu_worker_archtype worker_type, const struct starpu_task *task);
  243. /**
  244. Get the name of the worker \p id. StarPU associates a unique human
  245. readable string to each processing unit. This function copies at
  246. most the \p maxlen first bytes of the unique string associated to
  247. the worker \p id into the \p dst buffer. The caller is responsible
  248. for ensuring that \p dst is a valid pointer to a buffer of \p
  249. maxlen bytes at least. Calling this function on an invalid
  250. identifier results in an unspecified behaviour.
  251. */
  252. void starpu_worker_get_name(int id, char *dst, size_t maxlen);
  253. /**
  254. Display on \p output the list (if any) of all the workers of the
  255. given \p type.
  256. */
  257. void starpu_worker_display_names(FILE *output, enum starpu_worker_archtype type);
  258. /**
  259. Return the device id of the worker \p id. The worker should be
  260. identified with the value returned by the starpu_worker_get_id()
  261. function. In the case of a CUDA worker, this device identifier is
  262. the logical device identifier exposed by CUDA (used by the function
  263. \c cudaGetDevice() for instance). The device identifier of a CPU
  264. worker is the logical identifier of the core on which the worker
  265. was bound; this identifier is either provided by the OS or by the
  266. library <c>hwloc</c> in case it is available.
  267. */
  268. int starpu_worker_get_devid(int id);
  269. int starpu_worker_get_mp_nodeid(int id);
  270. struct starpu_tree* starpu_workers_get_tree(void);
  271. unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx);
  272. /**
  273. Return whether worker \p workerid is currently blocked in a parallel task.
  274. */
  275. unsigned starpu_worker_is_blocked_in_parallel(int workerid);
  276. unsigned starpu_worker_is_slave_somewhere(int workerid);
  277. /**
  278. Return worker \p type as a string.
  279. */
  280. const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type);
  281. /**
  282. Return worker \p type as a string suitable for environment variable names (CPU, CUDA, etc.)
  283. */
  284. const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type);
  285. int starpu_bindid_get_workerids(int bindid, int **workerids);
  286. int starpu_worker_get_devids(enum starpu_worker_archtype type, int *devids, int num);
  287. int starpu_worker_get_stream_workerids(unsigned devid, int *workerids, enum starpu_worker_archtype type);
  288. unsigned starpu_worker_get_sched_ctx_id_stream(unsigned stream_workerid);
  289. #ifdef STARPU_HAVE_HWLOC
  290. /**
  291. If StarPU was compiled with \c hwloc support, return a duplicate of
  292. the \c hwloc cpuset associated with the worker \p workerid. The
  293. returned cpuset is obtained from a \c hwloc_bitmap_dup() function
  294. call. It must be freed by the caller using \c hwloc_bitmap_free().
  295. */
  296. hwloc_cpuset_t starpu_worker_get_hwloc_cpuset(int workerid);
  297. /**
  298. If StarPU was compiled with \c hwloc support, return the \c hwloc
  299. object corresponding to the worker \p workerid.
  300. */
  301. hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid);
  302. #endif
  303. int starpu_memory_node_get_devid(unsigned node);
  304. /**
  305. Return the memory node associated to the current worker
  306. */
  307. unsigned starpu_worker_get_local_memory_node(void);
  308. /**
  309. Return the identifier of the memory node associated to the worker
  310. identified by \p workerid.
  311. */
  312. unsigned starpu_worker_get_memory_node(unsigned workerid);
  313. /**
  314. Return the number of memory nodes
  315. */
  316. unsigned starpu_memory_nodes_get_count(void);
  317. /**
  318. Return in \p name the name of a memory node (NUMA 0, CUDA 0, etc.)
  319. \p size is the size of the \p name array.
  320. */
  321. int starpu_memory_node_get_name(unsigned node, char *name, size_t size);
  322. /**
  323. Return the number of NUMA nodes used by StarPU
  324. */
  325. int starpu_memory_nodes_get_numa_count(void);
  326. /**
  327. Return the identifier of the memory node associated to the NUMA
  328. node identified by \p osid by the Operating System.
  329. */
  330. int starpu_memory_nodes_numa_id_to_devid(int osid);
  331. /**
  332. Return the Operating System identifier of the memory node whose
  333. StarPU identifier is \p id.
  334. */
  335. int starpu_memory_nodes_numa_devid_to_id(unsigned id);
  336. /**
  337. Return the type of \p node as defined by ::starpu_node_kind. For
  338. example, when defining a new data interface, this function should
  339. be used in the allocation function to determine on which device the
  340. memory needs to be allocated.
  341. */
  342. enum starpu_node_kind starpu_node_get_kind(unsigned node);
  343. /**
  344. Return the type of worker which operates on memory node kind \p node_kind
  345. */
  346. enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind);
  347. /**
  348. Return the type of memory node that arch type \p type operates on
  349. */
  350. enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type);
  351. /**
  352. @name Scheduling operations
  353. @{
  354. */
  355. /**
  356. Return \c !0 if current worker has a scheduling operation in
  357. progress, and \c 0 otherwise.
  358. */
  359. int starpu_worker_sched_op_pending(void);
  360. /**
  361. Allow other threads and workers to temporarily observe the current
  362. worker state, even though it is performing a scheduling operation.
  363. Must be called by a worker before performing a potentially blocking
  364. call such as acquiring a mutex other than its own sched_mutex. This
  365. function increases \c state_relax_refcnt from the current worker.
  366. No more than <c>UINT_MAX-1</c> nested starpu_worker_relax_on()
  367. calls should performed on the same worker. This function is
  368. automatically called by starpu_worker_lock() to relax the caller
  369. worker state while attempting to lock the target worker.
  370. */
  371. void starpu_worker_relax_on(void);
  372. /**
  373. Must be called after a potentially blocking call is complete, to
  374. restore the relax state in place before the corresponding
  375. starpu_worker_relax_on(). Decreases \c state_relax_refcnt. Calls to
  376. starpu_worker_relax_on() and starpu_worker_relax_off() must be
  377. properly paired. This function is automatically called by
  378. starpu_worker_unlock() after the target worker has been unlocked.
  379. */
  380. void starpu_worker_relax_off(void);
  381. /**
  382. Return \c !0 if the current worker \c state_relax_refcnt!=0 and \c
  383. 0 otherwise.
  384. */
  385. int starpu_worker_get_relax_state(void);
  386. /**
  387. Acquire the sched mutex of \p workerid. If the caller is a worker,
  388. distinct from \p workerid, the caller worker automatically enters a
  389. relax state while acquiring the target worker lock.
  390. */
  391. void starpu_worker_lock(int workerid);
  392. /**
  393. Attempt to acquire the sched mutex of \p workerid. Returns \c 0 if
  394. successful, \c !0 if \p workerid sched mutex is held or the
  395. corresponding worker is not in a relax state. If the caller is a
  396. worker, distinct from \p workerid, the caller worker automatically
  397. enters relax state if successfully acquiring the target worker lock.
  398. */
  399. int starpu_worker_trylock(int workerid);
  400. /**
  401. Release the previously acquired sched mutex of \p workerid. Restore
  402. the relax state of the caller worker if needed.
  403. */
  404. void starpu_worker_unlock(int workerid);
  405. /**
  406. Acquire the current worker sched mutex.
  407. */
  408. void starpu_worker_lock_self(void);
  409. /**
  410. Release the current worker sched mutex.
  411. */
  412. void starpu_worker_unlock_self(void);
  413. #ifdef STARPU_WORKER_CALLBACKS
  414. /**
  415. If StarPU was compiled with blocking drivers support and worker
  416. callbacks support enabled, allow to specify an external resource
  417. manager callback to be notified about workers going to sleep.
  418. */
  419. void starpu_worker_set_going_to_sleep_callback(void (*callback)(unsigned workerid));
  420. /**
  421. If StarPU was compiled with blocking drivers support and worker
  422. callbacks support enabled, allow to specify an external resource
  423. manager callback to be notified about workers waking-up.
  424. */
  425. void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid));
  426. #endif
  427. /** @} */
  428. /** @} */
  429. /**
  430. @defgroup API_Parallel_Tasks Parallel Tasks
  431. @{
  432. */
  433. /**
  434. Return the number of different combined workers.
  435. */
  436. unsigned starpu_combined_worker_get_count(void);
  437. unsigned starpu_worker_is_combined_worker(int id);
  438. /**
  439. Return the identifier of the current combined worker.
  440. */
  441. int starpu_combined_worker_get_id(void);
  442. /**
  443. Return the size of the current combined worker, i.e. the total
  444. number of CPUS running the same task in the case of ::STARPU_SPMD
  445. parallel tasks, or the total number of threads that the task is
  446. allowed to start in the case of ::STARPU_FORKJOIN parallel tasks.
  447. */
  448. int starpu_combined_worker_get_size(void);
  449. /**
  450. Return the rank of the current thread within the combined worker.
  451. Can only be used in ::STARPU_SPMD parallel tasks, to know which
  452. part of the task to work on.
  453. */
  454. int starpu_combined_worker_get_rank(void);
  455. /**
  456. Register a new combined worker and get its identifier
  457. */
  458. int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[]);
  459. /**
  460. Get the description of a combined worker
  461. */
  462. int starpu_combined_worker_get_description(int workerid, int *worker_size, int **combined_workerid);
  463. /**
  464. Variant of starpu_worker_can_execute_task() compatible with
  465. combined workers
  466. */
  467. int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl);
  468. /**
  469. Initialise the barrier for the parallel task, and dispatch the task
  470. between the different workers of the given combined worker.
  471. */
  472. void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid);
  473. /**
  474. Initialise the barrier for the parallel task, to be pushed to \p
  475. worker_size workers (without having to explicit a given combined
  476. worker).
  477. */
  478. void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size);
  479. /** @} */
  480. #ifdef __cplusplus
  481. }
  482. #endif
  483. #endif /* __STARPU_WORKER_H__ */