workers.c 80 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2008-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. * Copyright (C) 2011 Télécom-SudParis
  5. * Copyright (C) 2013 Thibaut Lambert
  6. * Copyright (C) 2016 Uppsala University
  7. *
  8. * StarPU is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU Lesser General Public License as published by
  10. * the Free Software Foundation; either version 2.1 of the License, or (at
  11. * your option) any later version.
  12. *
  13. * StarPU is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. *
  17. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  18. */
  19. #include <stdlib.h>
  20. #include <stdio.h>
  21. #ifdef __linux__
  22. #include <sys/utsname.h>
  23. #endif
  24. #include <common/config.h>
  25. #include <common/utils.h>
  26. #include <common/graph.h>
  27. #include <core/progress_hook.h>
  28. #include <core/idle_hook.h>
  29. #include <core/workers.h>
  30. #include <core/debug.h>
  31. #include <core/disk.h>
  32. #include <core/task.h>
  33. #include <core/detect_combined_workers.h>
  34. #include <datawizard/malloc.h>
  35. #include <profiling/profiling.h>
  36. #include <profiling/bound.h>
  37. #include <sched_policies/sched_component.h>
  38. #include <datawizard/memory_nodes.h>
  39. #include <common/knobs.h>
  40. #include <drivers/mp_common/sink_common.h>
  41. #include <drivers/mpi/driver_mpi_common.h>
  42. #include <drivers/cpu/driver_cpu.h>
  43. #include <drivers/cuda/driver_cuda.h>
  44. #include <drivers/opencl/driver_opencl.h>
  45. #include <drivers/mpi/driver_mpi_source.h>
  46. #include <drivers/disk/driver_disk.h>
  47. #ifdef STARPU_SIMGRID
  48. #include <core/simgrid.h>
  49. #endif
  50. #if defined(_WIN32) && !defined(__CYGWIN__)
  51. #include <windows.h>
  52. #endif
  53. /* global knobs */
  54. static int __g_calibrate_knob;
  55. static int __g_enable_catch_signal_knob;
  56. /* per-worker knobs */
  57. static int __w_bind_to_pu_knob;
  58. static int __w_enable_worker_knob;
  59. static struct starpu_perf_knob_group * __kg_starpu_global;
  60. static struct starpu_perf_knob_group * __kg_starpu_worker__per_worker;
  61. static void global_knobs__set(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value)
  62. {
  63. /* context is not used for global knobs */
  64. STARPU_ASSERT(context == NULL);
  65. (void)context;
  66. if (knob->id == __g_calibrate_knob)
  67. {
  68. _starpu_set_calibrate_flag((unsigned)value->val_int32_t);
  69. }
  70. else if (knob->id == __g_enable_catch_signal_knob)
  71. {
  72. _starpu_set_catch_signals(!!value->val_int32_t);
  73. }
  74. else
  75. {
  76. STARPU_ASSERT(0);
  77. abort();
  78. }
  79. }
  80. static void global_knobs__get(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value)
  81. {
  82. /* context is not used for global knobs */
  83. STARPU_ASSERT(context == NULL);
  84. (void)context;
  85. if (knob->id == __g_calibrate_knob)
  86. {
  87. value->val_int32_t = (int32_t)_starpu_get_calibrate_flag();
  88. }
  89. else if (knob->id == __g_enable_catch_signal_knob)
  90. {
  91. value->val_int32_t = _starpu_get_catch_signals();
  92. }
  93. else
  94. {
  95. STARPU_ASSERT(0);
  96. abort();
  97. }
  98. }
  99. static void worker_knobs__set(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value)
  100. {
  101. const unsigned workerid = *(unsigned *)context;
  102. struct _starpu_worker * const worker = _starpu_get_worker_struct(workerid);
  103. if (knob->id == __w_bind_to_pu_knob)
  104. {
  105. STARPU_ASSERT(value->val_int32_t >= 0);
  106. worker->bindid_requested = value->val_int32_t;
  107. }
  108. else if (knob->id == __w_enable_worker_knob)
  109. {
  110. worker->enable_knob = !!value->val_int32_t;
  111. }
  112. else
  113. {
  114. STARPU_ASSERT(0);
  115. abort();
  116. }
  117. }
  118. static void worker_knobs__get(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value)
  119. {
  120. const unsigned workerid = *(unsigned *)context;
  121. struct _starpu_worker * const worker = _starpu_get_worker_struct(workerid);
  122. if (knob->id == __w_bind_to_pu_knob)
  123. {
  124. value->val_int32_t = worker->bindid;
  125. }
  126. else if (knob->id == __w_enable_worker_knob)
  127. {
  128. value->val_int32_t = worker->enable_knob;
  129. }
  130. else
  131. {
  132. STARPU_ASSERT(0);
  133. abort();
  134. }
  135. }
  136. void _starpu__workers_c__register_knobs(void)
  137. {
  138. {
  139. const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_global;
  140. __kg_starpu_global = _starpu_perf_knob_group_register(scope, global_knobs__set, global_knobs__get);
  141. __STARPU_PERF_KNOB_REG("starpu.global", __kg_starpu_global, g_calibrate_knob, int32, "enable or disable performance models calibration (override STARPU_CALIBRATE env var)");
  142. __STARPU_PERF_KNOB_REG("starpu.global", __kg_starpu_global, g_enable_catch_signal_knob, int32, "enable or disable signal catching (override STARPU_CATCH_SIGNALS env var)");
  143. }
  144. {
  145. const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_worker;
  146. __kg_starpu_worker__per_worker = _starpu_perf_knob_group_register(scope, worker_knobs__set, worker_knobs__get);
  147. __STARPU_PERF_KNOB_REG("starpu.worker", __kg_starpu_worker__per_worker, w_bind_to_pu_knob, int32, "bind worker to PU (PU logical number, override StarPU binding env vars)");
  148. __STARPU_PERF_KNOB_REG("starpu.worker", __kg_starpu_worker__per_worker, w_enable_worker_knob, int32, "enable assigning task to that worker (1:Enabled | [0:Disabled])");
  149. }
  150. #if 0
  151. {
  152. const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_scheduler;
  153. __kg_starpu_worker__per_scheduler = _starpu_perf_knob_group_register(scope, sched_knobs__set, sched_knobs__get);
  154. }
  155. #endif
  156. }
  157. void _starpu__workers_c__unregister_knobs(void)
  158. {
  159. _starpu_perf_knob_group_unregister(__kg_starpu_global);
  160. _starpu_perf_knob_group_unregister(__kg_starpu_worker__per_worker);
  161. __kg_starpu_global = NULL;
  162. __kg_starpu_worker__per_worker = NULL;
  163. }
  164. /* acquire/release semantic for concurrent initialization/de-initialization */
  165. static starpu_pthread_mutex_t init_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
  166. static starpu_pthread_cond_t init_cond = STARPU_PTHREAD_COND_INITIALIZER;
  167. static int init_count = 0;
  168. static enum initialization initialized = UNINITIALIZED;
  169. int _starpu_keys_initialized;
  170. starpu_pthread_key_t _starpu_worker_key;
  171. starpu_pthread_key_t _starpu_worker_set_key;
  172. struct _starpu_machine_config _starpu_config;
  173. static int check_entire_platform;
  174. int _starpu_worker_parallel_blocks;
  175. /* Pointers to argc and argv
  176. */
  177. static int *my_argc = 0;
  178. static char ***my_argv = NULL;
  179. void _starpu__workers_c__register_kobs(void)
  180. {
  181. /* TODO */
  182. }
  183. struct _starpu_driver_info starpu_driver_info[STARPU_NARCH];
  184. void _starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct _starpu_driver_info *info)
  185. {
  186. starpu_driver_info[archtype] = *info;
  187. }
  188. struct _starpu_memory_driver_info starpu_memory_driver_info[STARPU_MAX_RAM+1];
  189. void _starpu_memory_driver_info_register(enum starpu_node_kind kind, const struct _starpu_memory_driver_info *info)
  190. {
  191. starpu_memory_driver_info[kind] = *info;
  192. }
  193. /* Initialize value of static argc and argv, called when the process begins
  194. */
  195. void _starpu_set_argc_argv(int *argc_param, char ***argv_param)
  196. {
  197. my_argc = argc_param;
  198. my_argv = argv_param;
  199. }
  200. int *_starpu_get_argc()
  201. {
  202. return my_argc;
  203. }
  204. char ***_starpu_get_argv()
  205. {
  206. return my_argv;
  207. }
  208. int starpu_is_initialized(void)
  209. {
  210. return initialized != UNINITIALIZED;
  211. }
  212. void starpu_wait_initialized(void)
  213. {
  214. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  215. while (initialized != INITIALIZED)
  216. STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex);
  217. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  218. }
  219. /* Makes sure that at least one of the workers of type <arch> can execute
  220. * <task>, for at least one of its implementations. */
  221. static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
  222. enum starpu_worker_archtype arch)
  223. {
  224. _starpu_codelet_check_deprecated_fields(task->cl);
  225. /* make sure there is a worker on the machine able to execute the
  226. task, independent of the sched_ctx, this latter may receive latter on
  227. the necessary worker - the user or the hypervisor should take care this happens */
  228. struct _starpu_sched_ctx *sched_ctx = check_entire_platform == 1 ? _starpu_get_initial_sched_ctx() : _starpu_get_sched_ctx_struct(task->sched_ctx);
  229. struct starpu_worker_collection *workers = sched_ctx->workers;
  230. struct starpu_sched_ctx_iterator it;
  231. workers->init_iterator(workers, &it);
  232. while(workers->has_next(workers, &it))
  233. {
  234. int i = workers->get_next(workers, &it);
  235. if (starpu_worker_get_type(i) != arch)
  236. continue;
  237. unsigned impl;
  238. for (impl = 0; impl < STARPU_MAXIMPLEMENTATIONS; impl++)
  239. {
  240. /* We could call task->cl->can_execute(i, task, impl)
  241. here, it would definitely work. It is probably
  242. cheaper to check whether it is necessary in order to
  243. avoid a useless function call, though. */
  244. unsigned test_implementation = 0;
  245. switch (arch)
  246. {
  247. case STARPU_CPU_WORKER:
  248. if (task->cl->cpu_funcs[impl] != NULL)
  249. test_implementation = 1;
  250. break;
  251. case STARPU_CUDA_WORKER:
  252. if (task->cl->cuda_funcs[impl] != NULL)
  253. test_implementation = 1;
  254. break;
  255. case STARPU_OPENCL_WORKER:
  256. if (task->cl->opencl_funcs[impl] != NULL)
  257. test_implementation = 1;
  258. break;
  259. case STARPU_MPI_MS_WORKER:
  260. if (task->cl->cpu_funcs_name[impl] != NULL || task->cl->mpi_ms_funcs[impl] != NULL)
  261. test_implementation = 1;
  262. break;
  263. default:
  264. STARPU_ABORT();
  265. }
  266. if (!test_implementation)
  267. continue;
  268. if (task->cl->can_execute)
  269. return task->cl->can_execute(i, task, impl);
  270. if(test_implementation)
  271. return 1;
  272. }
  273. }
  274. return 0;
  275. }
  276. /* in case a task is submitted, we may check whether there exists a worker
  277. that may execute the task or not */
  278. uint32_t _starpu_worker_exists(struct starpu_task *task)
  279. {
  280. _starpu_codelet_check_deprecated_fields(task->cl);
  281. if (task->where == STARPU_NOWHERE)
  282. return 1;
  283. /* if the task belongs to the init context we can
  284. check out all the worker mask of the machine
  285. if not we should iterate on the workers of the ctx
  286. and verify if it exists a worker able to exec the task */
  287. if(task->sched_ctx == 0)
  288. {
  289. if (!(task->where & _starpu_config.worker_mask))
  290. return 0;
  291. if (!task->cl->can_execute)
  292. return 1;
  293. }
  294. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  295. if ((task->where & STARPU_CPU) &&
  296. _starpu_worker_exists_and_can_execute(task, STARPU_CPU_WORKER))
  297. return 1;
  298. #endif
  299. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  300. if ((task->where & STARPU_CUDA) &&
  301. _starpu_worker_exists_and_can_execute(task, STARPU_CUDA_WORKER))
  302. return 1;
  303. #endif
  304. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  305. if ((task->where & STARPU_OPENCL) &&
  306. _starpu_worker_exists_and_can_execute(task, STARPU_OPENCL_WORKER))
  307. return 1;
  308. #endif
  309. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  310. if ((task->where & STARPU_MPI_MS) &&
  311. _starpu_worker_exists_and_can_execute(task, STARPU_MPI_MS_WORKER))
  312. return 1;
  313. #endif
  314. return 0;
  315. }
  316. uint32_t _starpu_can_submit_cuda_task(void)
  317. {
  318. return STARPU_CUDA & _starpu_config.worker_mask;
  319. }
  320. uint32_t _starpu_can_submit_cpu_task(void)
  321. {
  322. return STARPU_CPU & _starpu_config.worker_mask;
  323. }
  324. uint32_t _starpu_can_submit_opencl_task(void)
  325. {
  326. return STARPU_OPENCL & _starpu_config.worker_mask;
  327. }
  328. static inline int _starpu_can_use_nth_implementation(enum starpu_worker_archtype arch, struct starpu_codelet *cl, unsigned nimpl)
  329. {
  330. switch(arch)
  331. {
  332. case STARPU_ANY_WORKER:
  333. {
  334. int cpu_func_enabled=1, cuda_func_enabled=1, opencl_func_enabled=1;
  335. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  336. starpu_cpu_func_t cpu_func = _starpu_task_get_cpu_nth_implementation(cl, nimpl);
  337. cpu_func_enabled = cpu_func != NULL && starpu_cpu_worker_get_count();
  338. #endif
  339. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  340. starpu_cuda_func_t cuda_func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
  341. cuda_func_enabled = cuda_func != NULL && starpu_cuda_worker_get_count();
  342. #endif
  343. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  344. starpu_opencl_func_t opencl_func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
  345. opencl_func_enabled = opencl_func != NULL && starpu_opencl_worker_get_count();
  346. #endif
  347. return cpu_func_enabled && cuda_func_enabled && opencl_func_enabled;
  348. }
  349. case STARPU_CPU_WORKER:
  350. {
  351. starpu_cpu_func_t func = _starpu_task_get_cpu_nth_implementation(cl, nimpl);
  352. return func != NULL;
  353. }
  354. case STARPU_CUDA_WORKER:
  355. {
  356. starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
  357. return func != NULL;
  358. }
  359. case STARPU_OPENCL_WORKER:
  360. {
  361. starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
  362. return func != NULL;
  363. }
  364. case STARPU_MPI_MS_WORKER:
  365. {
  366. starpu_mpi_ms_func_t func = _starpu_task_get_mpi_ms_nth_implementation(cl, nimpl);
  367. const char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
  368. return func != NULL || func_name != NULL;
  369. }
  370. default:
  371. STARPU_ASSERT_MSG(0, "Unknown arch type %d", arch);
  372. }
  373. return 0;
  374. }
  375. /* Test if this task can be processed on this worker, regardless of the implementation */
  376. /* must be called with sched_mutex locked to protect state_blocked */
  377. static inline int _starpu_can_execute_task_any_impl(unsigned workerid, struct starpu_task *task)
  378. {
  379. if (!_starpu_config.workers[workerid].enable_knob)
  380. return 0;
  381. if (task->workerids_len)
  382. {
  383. size_t div = sizeof(*task->workerids) * 8;
  384. if (workerid / div >= task->workerids_len || ! (task->workerids[workerid / div] & (1UL << workerid % div)))
  385. return 0;
  386. }
  387. /* if the worker is blocked in a parallel ctx don't submit tasks on it */
  388. #ifdef STARPU_DEVEL
  389. #warning FIXME: this is very expensive, while can_execute is supposed to be not very costly so schedulers can call it a lot
  390. #endif
  391. if(starpu_worker_is_blocked_in_parallel(workerid))
  392. return 0;
  393. if (!(task->where & _starpu_config.workers[workerid].worker_mask))
  394. return 0;
  395. return 1;
  396. }
  397. /* must be called with sched_mutex locked to protect state_blocked_in_parallel */
  398. int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
  399. {
  400. /* TODO: check that the task operand sizes will fit on that device */
  401. return _starpu_can_execute_task_any_impl(workerid, task) &&
  402. _starpu_can_use_nth_implementation(_starpu_config.workers[workerid].arch, task->cl, nimpl) &&
  403. (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl));
  404. }
  405. /* must be called with sched_mutex locked to protect state_blocked_in_parallel */
  406. int starpu_worker_can_execute_task_impl(unsigned workerid, struct starpu_task *task, unsigned *impl_mask)
  407. {
  408. if (!_starpu_can_execute_task_any_impl(workerid, task))
  409. return 0;
  410. unsigned mask;
  411. int i;
  412. enum starpu_worker_archtype arch;
  413. struct starpu_codelet *cl;
  414. /* TODO: check that the task operand sizes will fit on that device */
  415. cl = task->cl;
  416. mask = 0;
  417. arch = _starpu_config.workers[workerid].arch;
  418. if (!task->cl->can_execute)
  419. {
  420. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  421. if (_starpu_can_use_nth_implementation(arch, cl, i))
  422. {
  423. mask |= 1U << i;
  424. if (!impl_mask)
  425. break;
  426. }
  427. }
  428. else
  429. {
  430. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  431. if (_starpu_can_use_nth_implementation(arch, cl, i)
  432. && (!task->cl->can_execute || task->cl->can_execute(workerid, task, i)))
  433. {
  434. mask |= 1U << i;
  435. if (!impl_mask)
  436. break;
  437. }
  438. }
  439. if (impl_mask)
  440. *impl_mask = mask;
  441. return mask != 0;
  442. }
  443. /* must be called with sched_mutex locked to protect state_blocked */
  444. int starpu_worker_can_execute_task_first_impl(unsigned workerid, struct starpu_task *task, unsigned *nimpl)
  445. {
  446. if (!_starpu_can_execute_task_any_impl(workerid, task))
  447. return 0;
  448. int i;
  449. enum starpu_worker_archtype arch;
  450. struct starpu_codelet *cl;
  451. /* TODO: check that the task operand sizes will fit on that device */
  452. cl = task->cl;
  453. arch = _starpu_config.workers[workerid].arch;
  454. if (!task->cl->can_execute)
  455. {
  456. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  457. if (_starpu_can_use_nth_implementation(arch, cl, i))
  458. {
  459. if (nimpl)
  460. *nimpl = i;
  461. return 1;
  462. }
  463. }
  464. else
  465. {
  466. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  467. if (_starpu_can_use_nth_implementation(arch, cl, i)
  468. && (task->cl->can_execute(workerid, task, i)))
  469. {
  470. if (nimpl)
  471. *nimpl = i;
  472. return 1;
  473. }
  474. }
  475. return 0;
  476. }
  477. int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
  478. {
  479. /* TODO: check that the task operand sizes will fit on that device */
  480. struct starpu_codelet *cl = task->cl;
  481. unsigned nworkers = _starpu_config.topology.nworkers;
  482. /* Is this a parallel worker ? */
  483. if (workerid < nworkers)
  484. {
  485. if (!_starpu_config.workers[workerid].enable_knob)
  486. return 0;
  487. return !!((task->where & _starpu_config.workers[workerid].worker_mask) &&
  488. _starpu_can_use_nth_implementation(_starpu_config.workers[workerid].arch, task->cl, nimpl) &&
  489. (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl)));
  490. }
  491. else
  492. {
  493. if (cl->type == STARPU_SPMD
  494. #ifdef STARPU_HAVE_HWLOC
  495. || cl->type == STARPU_FORKJOIN
  496. #else
  497. #ifdef __GLIBC__
  498. || cl->type == STARPU_FORKJOIN
  499. #endif
  500. #endif
  501. )
  502. {
  503. /* TODO we should add other types of constraints */
  504. /* Is the worker larger than requested ? */
  505. int worker_size = (int)_starpu_config.combined_workers[workerid - nworkers].worker_size;
  506. int worker0 = _starpu_config.combined_workers[workerid - nworkers].combined_workerid[0];
  507. return !!((worker_size <= task->cl->max_parallelism) &&
  508. _starpu_can_use_nth_implementation(_starpu_config.workers[worker0].arch, task->cl, nimpl) &&
  509. (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl)));
  510. }
  511. else
  512. {
  513. /* We have a sequential task but a parallel worker */
  514. return 0;
  515. }
  516. }
  517. }
  518. /*
  519. * Runtime initialization methods
  520. */
  521. static void _starpu_init_worker_queue(struct _starpu_worker *worker)
  522. {
  523. _starpu_memory_node_register_condition(worker, &worker->sched_cond, worker->memory_node);
  524. }
  525. /*
  526. * Returns 0 if the given driver is one of the drivers that must be launched by
  527. * the application itself, and not by StarPU, 1 otherwise.
  528. */
  529. static unsigned _starpu_may_launch_driver(struct starpu_conf *conf,
  530. struct starpu_driver *d)
  531. {
  532. if (conf->n_not_launched_drivers == 0 || conf->not_launched_drivers == NULL)
  533. return 1;
  534. /* Is <d> in conf->not_launched_drivers ? */
  535. unsigned i;
  536. for (i = 0; i < conf->n_not_launched_drivers; i++)
  537. {
  538. if (d->type != conf->not_launched_drivers[i].type)
  539. continue;
  540. switch (d->type)
  541. {
  542. case STARPU_CPU_WORKER:
  543. if (d->id.cpu_id == conf->not_launched_drivers[i].id.cpu_id)
  544. return 0;
  545. break;
  546. case STARPU_CUDA_WORKER:
  547. if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id)
  548. return 0;
  549. break;
  550. #ifdef STARPU_USE_OPENCL
  551. case STARPU_OPENCL_WORKER:
  552. if (d->id.opencl_id == conf->not_launched_drivers[i].id.opencl_id)
  553. return 0;
  554. break;
  555. #endif
  556. default:
  557. STARPU_ABORT();
  558. }
  559. }
  560. return 1;
  561. }
  562. #ifdef STARPU_PERF_DEBUG
  563. struct itimerval prof_itimer;
  564. #endif
  565. void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu_machine_config *pconfig)
  566. {
  567. workerarg->config = pconfig;
  568. STARPU_PTHREAD_MUTEX_INIT(&workerarg->mutex, NULL);
  569. /* arch initialized by topology.c */
  570. /* worker_mask initialized by topology.c */
  571. /* perf_arch initialized by topology.c */
  572. /* worker_thread initialized by _starpu_launch_drivers */
  573. /* devid initialized by topology.c */
  574. /* subworkerid initialized by topology.c */
  575. /* bindid initialized by topology.c */
  576. /* workerid initialized by topology.c */
  577. workerarg->combined_workerid = workerarg->workerid;
  578. workerarg->current_rank = 0;
  579. workerarg->worker_size = 1;
  580. STARPU_PTHREAD_COND_INIT(&workerarg->started_cond, NULL);
  581. STARPU_PTHREAD_COND_INIT(&workerarg->ready_cond, NULL);
  582. /* memory_node initialized by topology.c */
  583. STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL);
  584. STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL);
  585. starpu_task_prio_list_init(&workerarg->local_tasks);
  586. _starpu_ctx_change_list_init(&workerarg->ctx_change_list);
  587. workerarg->local_ordered_tasks = NULL;
  588. workerarg->local_ordered_tasks_size = 0;
  589. workerarg->current_ordered_task = 0;
  590. workerarg->current_ordered_task_order = 1;
  591. workerarg->current_task = NULL;
  592. #ifdef STARPU_SIMGRID
  593. starpu_pthread_wait_init(&workerarg->wait);
  594. starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_task_queue[workerarg->workerid]);
  595. #endif
  596. workerarg->task_transferring = NULL;
  597. workerarg->nb_buffers_transferred = 0;
  598. workerarg->nb_buffers_totransfer = 0;
  599. workerarg->first_task = 0;
  600. workerarg->ntasks = 0;
  601. /* set initialized by topology.c */
  602. workerarg->pipeline_length = 0;
  603. workerarg->pipeline_stuck = 0;
  604. workerarg->worker_is_running = 0;
  605. workerarg->worker_is_initialized = 0;
  606. workerarg->wait_for_worker_initialization = 0;
  607. workerarg->status = STATUS_INITIALIZING;
  608. workerarg->state_keep_awake = 0;
  609. /* name initialized by driver */
  610. /* short_name initialized by driver */
  611. workerarg->run_by_starpu = 1;
  612. workerarg->driver_ops = NULL;
  613. workerarg->sched_ctx_list = NULL;
  614. workerarg->tmp_sched_ctx = -1;
  615. workerarg->nsched_ctxs = 0;
  616. _starpu_barrier_counter_init(&workerarg->tasks_barrier, 0);
  617. workerarg->has_prev_init = 0;
  618. int ctx;
  619. for(ctx = 0; ctx < STARPU_NMAX_SCHED_CTXS; ctx++)
  620. workerarg->removed_from_ctx[ctx] = 0;
  621. workerarg->spinning_backoff = 1;
  622. for(ctx = 0; ctx < STARPU_NMAX_SCHED_CTXS; ctx++)
  623. {
  624. workerarg->shares_tasks_lists[ctx] = 0;
  625. workerarg->poped_in_ctx[ctx] = 0;
  626. }
  627. workerarg->reverse_phase[0] = 0;
  628. workerarg->reverse_phase[1] = 0;
  629. workerarg->pop_ctx_priority = 1;
  630. workerarg->is_slave_somewhere = 0;
  631. workerarg->state_relax_refcnt = 1;
  632. #ifdef STARPU_SPINLOCK_CHECK
  633. workerarg->relax_on_file = __FILE__;
  634. workerarg->relax_on_line = __LINE__;
  635. workerarg->relax_on_func = __starpu_func__;
  636. workerarg->relax_off_file = NULL;
  637. workerarg->relax_off_line = 0;
  638. workerarg->relax_off_func = NULL;
  639. #endif
  640. workerarg->state_sched_op_pending = 0;
  641. workerarg->state_changing_ctx_waiting = 0;
  642. workerarg->state_changing_ctx_notice = 0;
  643. workerarg->state_blocked_in_parallel_observed = 0;
  644. workerarg->state_blocked_in_parallel = 0;
  645. workerarg->state_block_in_parallel_req = 0;
  646. workerarg->state_block_in_parallel_ack = 0;
  647. workerarg->state_unblock_in_parallel_req = 0;
  648. workerarg->state_unblock_in_parallel_ack = 0;
  649. workerarg->block_in_parallel_ref_count = 0;
  650. _starpu_perf_counter_sample_init(&workerarg->perf_counter_sample, starpu_perf_counter_scope_per_worker);
  651. workerarg->enable_knob = 1;
  652. workerarg->bindid_requested = -1;
  653. /* cpu_set/hwloc_cpu_set/hwloc_obj initialized in topology.c */
  654. }
  655. static void _starpu_worker_deinit(struct _starpu_worker *workerarg)
  656. {
  657. (void) workerarg;
  658. #ifdef STARPU_SIMGRID
  659. starpu_pthread_queue_unregister(&workerarg->wait, &_starpu_simgrid_task_queue[workerarg->workerid]);
  660. starpu_pthread_wait_destroy(&workerarg->wait);
  661. #endif
  662. _starpu_perf_counter_sample_exit(&workerarg->perf_counter_sample);
  663. }
  664. #ifdef STARPU_USE_FXT
  665. void _starpu_worker_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync)
  666. {
  667. unsigned devid = worker->devid;
  668. unsigned memnode = worker->memory_node;
  669. _STARPU_TRACE_WORKER_INIT_START(archtype, worker->workerid, devid, memnode, worker->bindid, sync);
  670. }
  671. #endif
  672. void _starpu_driver_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync STARPU_ATTRIBUTE_UNUSED)
  673. {
  674. (void) archtype;
  675. int devid = worker->devid;
  676. (void) devid;
  677. #ifdef STARPU_USE_FXT
  678. _STARPU_TRACE_REGISTER_THREAD(worker->bindid);
  679. _starpu_worker_start(worker, archtype, sync);
  680. #endif
  681. _starpu_set_local_worker_key(worker);
  682. STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex);
  683. worker->worker_is_running = 1;
  684. STARPU_PTHREAD_COND_SIGNAL(&worker->started_cond);
  685. STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex);
  686. _starpu_bind_thread_on_cpu(worker->bindid, worker->workerid, NULL);
  687. #if defined(STARPU_PERF_DEBUG) && !defined(STARPU_SIMGRID)
  688. setitimer(ITIMER_PROF, &prof_itimer, NULL);
  689. #endif
  690. _STARPU_DEBUG("worker %p %d for dev %d is ready on logical cpu %d\n", worker, worker->workerid, devid, worker->bindid);
  691. #ifdef STARPU_HAVE_HWLOC
  692. _STARPU_DEBUG("worker %p %d cpuset start at %d\n", worker, worker->workerid, hwloc_bitmap_first(worker->hwloc_cpu_set));
  693. #endif
  694. }
  695. static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
  696. {
  697. pconfig->running = 1;
  698. pconfig->pause_depth = 0;
  699. pconfig->submitting = 1;
  700. STARPU_HG_DISABLE_CHECKING(pconfig->watchdog_ok);
  701. unsigned nworkers = pconfig->topology.nworkers;
  702. unsigned worker;
  703. #if defined(STARPU_PERF_DEBUG) && !defined(STARPU_SIMGRID)
  704. /* Get itimer of the main thread, to set it for the worker threads */
  705. getitimer(ITIMER_PROF, &prof_itimer);
  706. #endif
  707. STARPU_AYU_INIT();
  708. /* Launch workers asynchronously */
  709. for (worker = 0; worker < nworkers; worker++)
  710. {
  711. struct _starpu_worker *workerarg = &pconfig->workers[worker];
  712. workerarg->wait_for_worker_initialization = 0;
  713. _STARPU_DEBUG("initialising worker %u/%u\n", worker, nworkers);
  714. _starpu_init_worker_queue(workerarg);
  715. switch (workerarg->arch)
  716. {
  717. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  718. case STARPU_CPU_WORKER:
  719. {
  720. struct starpu_driver driver;
  721. driver.type = workerarg->arch;
  722. driver.id.cpu_id = workerarg->devid;
  723. workerarg->driver_ops = &_starpu_driver_cpu_ops;
  724. workerarg->wait_for_worker_initialization = 1;
  725. if (_starpu_may_launch_driver(&pconfig->conf, &driver))
  726. {
  727. STARPU_PTHREAD_CREATE_ON(
  728. "CPU",
  729. &workerarg->worker_thread,
  730. NULL,
  731. _starpu_cpu_worker,
  732. workerarg,
  733. _starpu_simgrid_get_host_by_worker(workerarg));
  734. }
  735. else
  736. {
  737. workerarg->run_by_starpu = 0;
  738. }
  739. break;
  740. }
  741. #endif
  742. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  743. case STARPU_CUDA_WORKER:
  744. {
  745. struct starpu_driver driver;
  746. driver.type = workerarg->arch;
  747. driver.id.cuda_id = workerarg->devid;
  748. workerarg->driver_ops = &_starpu_driver_cuda_ops;
  749. struct _starpu_worker_set *worker_set = workerarg->set;
  750. if (worker_set->workers != workerarg)
  751. /* We are not the first worker of the
  752. * set, don't start a thread for it. */
  753. break;
  754. worker_set->set_is_initialized = 0;
  755. worker_set->wait_for_set_initialization = 1;
  756. workerarg->wait_for_worker_initialization = 0;
  757. if (_starpu_may_launch_driver(&pconfig->conf, &driver))
  758. {
  759. STARPU_PTHREAD_CREATE_ON(
  760. "CUDA",
  761. &worker_set->worker_thread,
  762. NULL,
  763. _starpu_cuda_worker,
  764. worker_set,
  765. _starpu_simgrid_get_host_by_worker(workerarg));
  766. }
  767. else
  768. {
  769. workerarg->run_by_starpu = 0;
  770. }
  771. break;
  772. }
  773. #endif
  774. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  775. case STARPU_OPENCL_WORKER:
  776. {
  777. #ifndef STARPU_SIMGRID
  778. struct starpu_driver driver;
  779. driver.type = workerarg->arch;
  780. starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
  781. workerarg->driver_ops = &_starpu_driver_opencl_ops;
  782. workerarg->wait_for_worker_initialization = 1;
  783. if (_starpu_may_launch_driver(&pconfig->conf, &driver))
  784. {
  785. STARPU_PTHREAD_CREATE_ON(
  786. "OpenCL",
  787. &workerarg->worker_thread,
  788. NULL,
  789. _starpu_opencl_worker,
  790. workerarg,
  791. _starpu_simgrid_get_host_by_worker(workerarg));
  792. }
  793. else
  794. {
  795. workerarg->run_by_starpu = 0;
  796. }
  797. #endif
  798. break;
  799. }
  800. #endif
  801. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  802. case STARPU_MPI_MS_WORKER:
  803. {
  804. /* We spawn only one thread
  805. * per MPI device, which will control all MPI
  806. * workers of this device. (by using a worker set). */
  807. struct _starpu_worker_set *worker_set = workerarg->set;
  808. if (worker_set->workers != workerarg)
  809. break;
  810. worker_set->set_is_initialized = 0;
  811. worker_set->wait_for_set_initialization = 1;
  812. workerarg->wait_for_worker_initialization = 0;
  813. #ifdef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
  814. /* if MPI has multiple threads supports
  815. * we launch 1 thread per device
  816. * else
  817. * we launch one thread for all devices
  818. */
  819. STARPU_PTHREAD_CREATE_ON(
  820. "MPI MS",
  821. &worker_set->worker_thread,
  822. NULL,
  823. _starpu_mpi_src_worker,
  824. worker_set,
  825. _starpu_simgrid_get_host_by_worker(workerarg));
  826. #endif /* STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD */
  827. break;
  828. }
  829. #endif /* STARPU_USE_MPI_MASTER_SLAVE */
  830. default:
  831. STARPU_ABORT();
  832. }
  833. #ifdef STARPU_USE_FXT
  834. /* In tracing mode, make sure the thread is really started
  835. * before starting another one, to make sure they appear in
  836. * order in the trace.
  837. */
  838. if ((!workerarg->set || workerarg->set->workers == workerarg)
  839. && workerarg->run_by_starpu == 1 && workerarg->arch != STARPU_MPI_MS_WORKER)
  840. {
  841. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  842. while (!workerarg->worker_is_running)
  843. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  844. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  845. }
  846. #endif
  847. }
  848. #if defined(STARPU_USE_MPI_MASTER_SLAVE) && !defined(STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD)
  849. if (pconfig->topology.ndevices[STARPU_MPI_MS_WORKER] > 0)
  850. {
  851. struct _starpu_worker_set * worker_set_zero = &mpi_worker_set[0];
  852. struct _starpu_worker * worker_zero STARPU_ATTRIBUTE_UNUSED = &worker_set_zero->workers[0];
  853. STARPU_PTHREAD_CREATE_ON(
  854. "zero",
  855. &worker_set_zero->worker_thread,
  856. NULL,
  857. _starpu_mpi_src_worker,
  858. &mpi_worker_set,
  859. _starpu_simgrid_get_host_by_worker(worker_zero));
  860. /* We use the first worker to know if everything are finished */
  861. #ifdef STARPU_USE_FXT
  862. STARPU_PTHREAD_MUTEX_LOCK(&worker_zero->mutex);
  863. while (!worker_zero->worker_is_running)
  864. STARPU_PTHREAD_COND_WAIT(&worker_zero->started_cond, &worker_zero->mutex);
  865. STARPU_PTHREAD_MUTEX_UNLOCK(&worker_zero->mutex);
  866. #endif
  867. STARPU_PTHREAD_MUTEX_LOCK(&worker_set_zero->mutex);
  868. while (!worker_set_zero->set_is_initialized)
  869. STARPU_PTHREAD_COND_WAIT(&worker_set_zero->ready_cond,
  870. &worker_set_zero->mutex);
  871. STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set_zero->mutex);
  872. worker_set_zero->started = 1;
  873. worker_set_zero->worker_thread = mpi_worker_set[0].worker_thread;
  874. }
  875. #endif
  876. for (worker = 0; worker < nworkers; worker++)
  877. {
  878. struct _starpu_worker *workerarg = &pconfig->workers[worker];
  879. _STARPU_DEBUG("waiting for worker %u initialization\n", worker);
  880. if (!workerarg->run_by_starpu)
  881. break;
  882. struct _starpu_worker_set *worker_set = workerarg->set;
  883. if (worker_set && worker_set->wait_for_set_initialization == 1)
  884. {
  885. STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex);
  886. while (!worker_set->set_is_initialized)
  887. STARPU_PTHREAD_COND_WAIT(&worker_set->ready_cond,
  888. &worker_set->mutex);
  889. STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex);
  890. worker_set->started = 1;
  891. worker_set->wait_for_set_initialization = 0;
  892. }
  893. else if (workerarg->wait_for_worker_initialization == 1)
  894. {
  895. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  896. while (!workerarg->worker_is_initialized)
  897. STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  898. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  899. workerarg->wait_for_worker_initialization = 0;
  900. }
  901. }
  902. _STARPU_DEBUG("finished launching drivers\n");
  903. }
  904. /* Initialize the starpu_conf with default values */
  905. int starpu_conf_init(struct starpu_conf *conf)
  906. {
  907. if (!conf)
  908. return -EINVAL;
  909. memset(conf, 0, sizeof(*conf));
  910. conf->magic = 42;
  911. conf->will_use_mpi = 0;
  912. conf->sched_policy_name = starpu_getenv("STARPU_SCHED");
  913. conf->sched_policy = NULL;
  914. conf->global_sched_ctx_min_priority = starpu_get_env_number("STARPU_MIN_PRIO");
  915. conf->global_sched_ctx_max_priority = starpu_get_env_number("STARPU_MAX_PRIO");
  916. conf->catch_signals = starpu_get_env_number_default("STARPU_CATCH_SIGNALS", 1);
  917. /* Note that starpu_get_env_number returns -1 in case the variable is
  918. * not defined */
  919. /* Backward compatibility: check the value of STARPU_NCPUS if
  920. * STARPU_NCPU is not set. */
  921. conf->ncpus = starpu_get_env_number("STARPU_NCPU");
  922. if (conf->ncpus == -1)
  923. conf->ncpus = starpu_get_env_number("STARPU_NCPUS");
  924. conf->reserve_ncpus = starpu_get_env_number("STARPU_RESERVE_NCPU");
  925. int main_thread_bind = starpu_get_env_number_default("STARPU_MAIN_THREAD_BIND", 0);
  926. if (main_thread_bind)
  927. conf->reserve_ncpus++;
  928. conf->ncuda = starpu_get_env_number("STARPU_NCUDA");
  929. conf->nopencl = starpu_get_env_number("STARPU_NOPENCL");
  930. conf->nmpi_ms = starpu_get_env_number("STARPU_NMPI_MS");
  931. conf->calibrate = starpu_get_env_number("STARPU_CALIBRATE");
  932. conf->bus_calibrate = starpu_get_env_number("STARPU_BUS_CALIBRATE");
  933. if (conf->calibrate == -1)
  934. conf->calibrate = 0;
  935. if (conf->bus_calibrate == -1)
  936. conf->bus_calibrate = 0;
  937. conf->use_explicit_workers_bindid = 0; /* TODO */
  938. conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */
  939. conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */
  940. conf->use_explicit_workers_mpi_ms_deviceid = 0; /* TODO */
  941. conf->single_combined_worker = starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER");
  942. if (conf->single_combined_worker == -1)
  943. conf->single_combined_worker = 0;
  944. #if defined(STARPU_DISABLE_ASYNCHRONOUS_COPY)
  945. conf->disable_asynchronous_copy = 1;
  946. #else
  947. conf->disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
  948. if (conf->disable_asynchronous_copy == -1)
  949. conf->disable_asynchronous_copy = 0;
  950. #endif
  951. #if defined(STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY)
  952. conf->disable_asynchronous_cuda_copy = 1;
  953. #else
  954. conf->disable_asynchronous_cuda_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY");
  955. if (conf->disable_asynchronous_cuda_copy == -1)
  956. conf->disable_asynchronous_cuda_copy = 0;
  957. #endif
  958. #if defined(STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY)
  959. conf->disable_asynchronous_opencl_copy = 1;
  960. #else
  961. conf->disable_asynchronous_opencl_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY");
  962. if (conf->disable_asynchronous_opencl_copy == -1)
  963. conf->disable_asynchronous_opencl_copy = 0;
  964. #endif
  965. #if defined(STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY)
  966. conf->disable_asynchronous_mpi_ms_copy = 1;
  967. #else
  968. conf->disable_asynchronous_mpi_ms_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY");
  969. if(conf->disable_asynchronous_mpi_ms_copy == -1)
  970. conf->disable_asynchronous_mpi_ms_copy = 0;
  971. #endif
  972. /* 64MiB by default */
  973. conf->trace_buffer_size = ((uint64_t) starpu_get_env_number_default("STARPU_TRACE_BUFFER_SIZE", 64)) << 20;
  974. conf->driver_spinning_backoff_min = (unsigned) starpu_get_env_number_default("STARPU_BACKOFF_MIN", 1);
  975. conf->driver_spinning_backoff_max = (unsigned) starpu_get_env_number_default("STARPU_BACKOFF_MAX", 32);
  976. /* Do not start performance counter collection by default */
  977. conf->start_perf_counter_collection = 0;
  978. conf->cuda_only_fast_alloc_other_memnodes = starpu_get_env_number_default("STARPU_CUDA_ONLY_FAST_ALLOC_OTHER_MEMNODES", 0);
  979. return 0;
  980. }
  981. int starpu_conf_noworker(struct starpu_conf *conf)
  982. {
  983. conf->ncpus = 0;
  984. conf->ncuda = 0;
  985. conf->nopencl = 0;
  986. conf->nmpi_ms = 0;
  987. return 0;
  988. }
  989. static void _starpu_conf_set_value_against_environment(char *name, int *value, int precedence_over_env)
  990. {
  991. if (precedence_over_env == 0)
  992. {
  993. int number;
  994. number = starpu_get_env_number(name);
  995. if (number != -1)
  996. {
  997. *value = number;
  998. }
  999. }
  1000. }
  1001. void _starpu_conf_check_environment(struct starpu_conf *conf)
  1002. {
  1003. char *sched = starpu_getenv("STARPU_SCHED");
  1004. if (sched)
  1005. {
  1006. conf->sched_policy_name = sched;
  1007. }
  1008. _starpu_conf_set_value_against_environment("STARPU_NCPUS", &conf->ncpus, conf->precedence_over_environment_variables);
  1009. _starpu_conf_set_value_against_environment("STARPU_NCPU", &conf->ncpus, conf->precedence_over_environment_variables);
  1010. _starpu_conf_set_value_against_environment("STARPU_RESERVE_NCPU", &conf->reserve_ncpus, conf->precedence_over_environment_variables);
  1011. int main_thread_bind = starpu_get_env_number_default("STARPU_MAIN_THREAD_BIND", 0);
  1012. if (main_thread_bind)
  1013. conf->reserve_ncpus++;
  1014. _starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda, conf->precedence_over_environment_variables);
  1015. _starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl, conf->precedence_over_environment_variables);
  1016. _starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate, conf->precedence_over_environment_variables);
  1017. _starpu_conf_set_value_against_environment("STARPU_BUS_CALIBRATE", &conf->bus_calibrate, conf->precedence_over_environment_variables);
  1018. #ifdef STARPU_SIMGRID
  1019. if (conf->calibrate == 2)
  1020. {
  1021. _STARPU_DISP("Warning: History will be cleared due to calibrate or STARPU_CALIBRATE being set to 2. This will prevent simgrid from having task simulation times!");
  1022. }
  1023. if (conf->bus_calibrate)
  1024. {
  1025. _STARPU_DISP("Warning: Bus calibration will be cleared due to bus_calibrate or STARPU_BUS_CALIBRATE being set. This will prevent simgrid from having data transfer simulation times!");
  1026. }
  1027. #endif
  1028. _starpu_conf_set_value_against_environment("STARPU_SINGLE_COMBINED_WORKER", &conf->single_combined_worker, conf->precedence_over_environment_variables);
  1029. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy, conf->precedence_over_environment_variables);
  1030. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY", &conf->disable_asynchronous_cuda_copy, conf->precedence_over_environment_variables);
  1031. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY", &conf->disable_asynchronous_opencl_copy, conf->precedence_over_environment_variables);
  1032. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY", &conf->disable_asynchronous_mpi_ms_copy, conf->precedence_over_environment_variables);
  1033. _starpu_conf_set_value_against_environment("STARPU_MIN_PRIO", &conf->global_sched_ctx_min_priority, conf->precedence_over_environment_variables);
  1034. _starpu_conf_set_value_against_environment("STARPU_MAX_PRIO", &conf->global_sched_ctx_max_priority, conf->precedence_over_environment_variables);
  1035. _starpu_conf_set_value_against_environment("STARPU_CATCH_SIGNALS", &conf->catch_signals, conf->precedence_over_environment_variables);
  1036. }
  1037. struct starpu_tree* starpu_workers_get_tree(void)
  1038. {
  1039. return _starpu_config.topology.tree;
  1040. }
  1041. #if HWLOC_API_VERSION >= 0x20000
  1042. #define NORMAL_CHILD(obj) 1
  1043. #else
  1044. #define NORMAL_CHILD(obj) ((obj)->type < HWLOC_OBJ_BRIDGE)
  1045. #endif
  1046. #ifdef STARPU_HAVE_HWLOC
  1047. static void _fill_tree(struct starpu_tree *tree, hwloc_obj_t curr_obj, unsigned depth, hwloc_topology_t topology, struct starpu_tree *father)
  1048. {
  1049. unsigned i, j;
  1050. unsigned arity;
  1051. #if HWLOC_API_VERSION >= 0x20000
  1052. arity = curr_obj->arity;
  1053. #else
  1054. arity = 0;
  1055. for(i = 0; i < curr_obj->arity; i++)
  1056. {
  1057. if (!NORMAL_CHILD(curr_obj->children[i]))
  1058. /* I/O stuff, stop caring */
  1059. break;
  1060. arity++;
  1061. }
  1062. #endif
  1063. if (arity == 1)
  1064. {
  1065. /* Nothing interestin here, skip level */
  1066. _fill_tree(tree, curr_obj->children[0], depth+1, topology, father);
  1067. return;
  1068. }
  1069. starpu_tree_insert(tree, curr_obj->logical_index, depth, curr_obj->type == HWLOC_OBJ_PU, arity, father);
  1070. starpu_tree_prepare_children(arity, tree);
  1071. j = 0;
  1072. for(i = 0; i < arity; i++)
  1073. {
  1074. hwloc_obj_t child = curr_obj->children[i];
  1075. if (!NORMAL_CHILD(child))
  1076. /* I/O stuff, stop caring (shouldn't happen, though) */
  1077. break;
  1078. #if 0
  1079. char string[128];
  1080. hwloc_obj_snprintf(string, sizeof(string), topology, child, "#", 0);
  1081. printf("%*s%s %d is_pu %d \n", 0, "", string, child->logical_index, child->type == HWLOC_OBJ_PU);
  1082. #endif
  1083. _fill_tree(&tree->nodes[j], child, depth+1, topology, tree);
  1084. j++;
  1085. }
  1086. }
  1087. #endif
  1088. static void _starpu_build_tree(void)
  1089. {
  1090. #ifdef STARPU_HAVE_HWLOC
  1091. struct starpu_tree *tree;
  1092. _STARPU_MALLOC(tree, sizeof(struct starpu_tree));
  1093. _starpu_config.topology.tree = tree;
  1094. hwloc_obj_t root = hwloc_get_root_obj(_starpu_config.topology.hwtopology);
  1095. #if 0
  1096. char string[128];
  1097. hwloc_obj_snprintf(string, sizeof(string), topology, root, "#", 0);
  1098. printf("%*s%s %d is_pu = %d \n", 0, "", string, root->logical_index, root->type == HWLOC_OBJ_PU);
  1099. #endif
  1100. /* level, is_pu, is in the tree (it will be true only after add) */
  1101. _fill_tree(tree, root, 0, _starpu_config.topology.hwtopology, NULL);
  1102. #endif
  1103. }
  1104. static starpu_pthread_mutex_t sig_handlers_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
  1105. static void (*act_sigint)(int);
  1106. static void (*act_sigsegv)(int);
  1107. static void (*act_sigtrap)(int);
  1108. void _starpu_handler(int sig)
  1109. {
  1110. #ifdef STARPU_VERBOSE
  1111. _STARPU_MSG("Catching signal '%d'\n", sig);
  1112. #endif
  1113. #ifdef STARPU_USE_FXT
  1114. _starpu_fxt_dump_file();
  1115. #endif
  1116. if (sig == SIGINT)
  1117. {
  1118. void (*sig_act)(int) = act_sigint;
  1119. if (sig_act == NULL)
  1120. sig_act = SIG_DFL;
  1121. signal(SIGINT, sig_act);
  1122. }
  1123. if (sig == SIGSEGV)
  1124. {
  1125. void (*sig_act)(int) = act_sigsegv;
  1126. if (sig_act == NULL)
  1127. sig_act = SIG_DFL;
  1128. signal(SIGSEGV, sig_act);
  1129. }
  1130. #ifdef SIGTRAP
  1131. if (sig == SIGTRAP)
  1132. {
  1133. void (*sig_act)(int) = act_sigtrap;
  1134. if (sig_act == NULL)
  1135. sig_act = SIG_DFL;
  1136. signal(SIGTRAP, sig_act);
  1137. }
  1138. #endif
  1139. #ifdef STARPU_VERBOSE
  1140. _STARPU_MSG("Rearming signal '%d'\n", sig);
  1141. #endif
  1142. raise(sig);
  1143. }
  1144. void _starpu_catch_signals(void)
  1145. {
  1146. if (_starpu_config.conf.catch_signals == 1)
  1147. {
  1148. static void (*old_sig_act)(int);
  1149. old_sig_act = signal(SIGINT, _starpu_handler);
  1150. if (old_sig_act != _starpu_handler)
  1151. act_sigint = old_sig_act;
  1152. old_sig_act = signal(SIGSEGV, _starpu_handler);
  1153. if (old_sig_act != _starpu_handler)
  1154. act_sigsegv = old_sig_act;
  1155. #ifdef SIGTRAP
  1156. old_sig_act = signal(SIGTRAP, _starpu_handler);
  1157. if (old_sig_act != _starpu_handler)
  1158. act_sigtrap = old_sig_act;
  1159. #endif
  1160. }
  1161. else
  1162. {
  1163. if (act_sigint != NULL)
  1164. {
  1165. signal(SIGINT, act_sigint);
  1166. act_sigint = NULL;
  1167. }
  1168. if (act_sigsegv != NULL)
  1169. {
  1170. signal(SIGSEGV, act_sigsegv);
  1171. act_sigsegv = NULL;
  1172. }
  1173. #ifdef SIGTRAP
  1174. if (act_sigtrap != NULL)
  1175. {
  1176. signal(SIGTRAP, act_sigtrap);
  1177. act_sigtrap = NULL;
  1178. }
  1179. #endif
  1180. }
  1181. }
  1182. void _starpu_set_catch_signals(int do_catch_signal)
  1183. {
  1184. STARPU_PTHREAD_MUTEX_LOCK(&sig_handlers_mutex);
  1185. _starpu_config.conf.catch_signals = do_catch_signal;
  1186. _starpu_catch_signals();
  1187. STARPU_PTHREAD_MUTEX_UNLOCK(&sig_handlers_mutex);
  1188. }
  1189. int _starpu_get_catch_signals(void)
  1190. {
  1191. return _starpu_config.conf.catch_signals;
  1192. }
  1193. void starpu_drivers_preinit(void)
  1194. {
  1195. _starpu_cpu_preinit();
  1196. _starpu_cuda_preinit();
  1197. _starpu_opencl_preinit();
  1198. _starpu_mpi_ms_preinit();
  1199. _starpu_disk_preinit();
  1200. }
  1201. int starpu_init(struct starpu_conf *user_conf)
  1202. {
  1203. return starpu_initialize(user_conf, NULL, NULL);
  1204. }
  1205. int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
  1206. {
  1207. int is_a_sink = 0; /* Always defined. If the MP infrastructure is not
  1208. * used, we cannot be a sink. */
  1209. unsigned worker;
  1210. #if !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MP)
  1211. (void)argc;
  1212. (void)argv;
  1213. #endif
  1214. /* This initializes _starpu_silent, thus needs to be early */
  1215. _starpu_util_init();
  1216. STARPU_HG_DISABLE_CHECKING(_starpu_worker_parallel_blocks);
  1217. #ifdef STARPU_SIMGRID
  1218. /* This initializes the simgrid thread library, thus needs to be early */
  1219. _starpu_simgrid_init_early(argc, argv);
  1220. #endif
  1221. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  1222. while (initialized == CHANGING)
  1223. /* Wait for the other one changing it */
  1224. STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex);
  1225. init_count++;
  1226. if (initialized == INITIALIZED)
  1227. {
  1228. /* He initialized it, don't do it again, and let the others get the mutex */
  1229. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1230. return 0;
  1231. }
  1232. /* initialized == UNINITIALIZED */
  1233. initialized = CHANGING;
  1234. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1235. #ifdef STARPU_USE_MP
  1236. _starpu_set_argc_argv(argc, argv);
  1237. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1238. if (_starpu_mpi_common_mp_init() == -ENODEV)
  1239. {
  1240. initialized = UNINITIALIZED;
  1241. return -ENODEV;
  1242. }
  1243. /* In MPI case we look at the rank to know if we are a sink */
  1244. if (!_starpu_mpi_common_is_src_node())
  1245. setenv("STARPU_SINK", "STARPU_MPI_MS", 1);
  1246. # endif
  1247. /* If StarPU was configured to use MP sinks, we have to control the
  1248. * kind on node we are running on : host or sink ? */
  1249. if (starpu_getenv("STARPU_SINK"))
  1250. is_a_sink = 1;
  1251. #endif /* STARPU_USE_MP */
  1252. int ret;
  1253. #ifdef STARPU_OPENMP
  1254. _starpu_omp_dummy_init();
  1255. #endif
  1256. #ifdef STARPU_SIMGRID
  1257. /* Warn when the lots of stacks malloc()-ated by simgrid for transfer
  1258. * processes will take a long time to get initialized */
  1259. char *perturb = starpu_getenv("MALLOC_PERTURB_");
  1260. if (perturb && perturb[0] && atoi(perturb) != 0)
  1261. _STARPU_DISP("Warning: MALLOC_PERTURB_ is set to non-zero, this makes simgrid run very slow\n");
  1262. #else
  1263. #ifdef __GNUC__
  1264. #ifndef __OPTIMIZE__
  1265. _STARPU_DISP("Warning: StarPU was configured with --enable-debug (-O0), and is thus not optimized\n");
  1266. #endif
  1267. #endif
  1268. #ifdef STARPU_SPINLOCK_CHECK
  1269. _STARPU_DISP("Warning: StarPU was configured with --enable-spinlock-check, which slows down a bit\n");
  1270. #endif
  1271. #if 0
  1272. #ifndef STARPU_NO_ASSERT
  1273. _STARPU_DISP("Warning: StarPU was configured without --enable-fast\n");
  1274. #endif
  1275. #endif
  1276. #ifdef STARPU_MEMORY_STATS
  1277. _STARPU_DISP("Warning: StarPU was configured with --enable-memory-stats, which slows down a bit\n");
  1278. #endif
  1279. #ifdef STARPU_VERBOSE
  1280. _STARPU_DISP("Warning: StarPU was configured with --enable-verbose, which slows down a bit\n");
  1281. #endif
  1282. #ifdef STARPU_USE_FXT
  1283. _STARPU_DISP("Warning: StarPU was configured with --with-fxt, which slows down a bit, limits scalability and makes worker initialization sequential\n");
  1284. #endif
  1285. #ifdef STARPU_FXT_LOCK_TRACES
  1286. _STARPU_DISP("Warning: StarPU was configured with --enable-fxt-lock, which slows down things a huge lot, and is really only meant for StarPU insides debugging. Did you really want to enable that?\n");
  1287. #endif
  1288. #ifdef STARPU_PERF_DEBUG
  1289. _STARPU_DISP("Warning: StarPU was configured with --enable-perf-debug, which slows down a bit\n");
  1290. #endif
  1291. #ifdef STARPU_MODEL_DEBUG
  1292. _STARPU_DISP("Warning: StarPU was configured with --enable-model-debug, which slows down a bit\n");
  1293. #endif
  1294. #ifdef __linux__
  1295. {
  1296. struct utsname buf;
  1297. if (uname(&buf) == 0
  1298. && (!strncmp(buf.release, "4.7.", 4)
  1299. || !strncmp(buf.release, "4.8.", 4)))
  1300. _STARPU_DISP("Warning: This system is running a 4.7 or 4.8 kernel. These have a severe scheduling performance regression issue, please upgrade to at least 4.9.\n");
  1301. }
  1302. #endif
  1303. #endif
  1304. if (starpu_getenv("STARPU_ENABLE_STATS"))
  1305. {
  1306. _STARPU_DISP("Warning: STARPU_ENABLE_STATS is enabled, which slows down a bit\n");
  1307. }
  1308. #ifndef STARPU_SIMGRID
  1309. if (starpu_get_env_number_default("STARPU_SIMGRID", 0))
  1310. {
  1311. _STARPU_DISP("Simulation mode requested, but this libstarpu was built without simgrid support, please recompile\n");
  1312. return -EINVAL;
  1313. }
  1314. #endif
  1315. #if defined(_WIN32) && !defined(__CYGWIN__)
  1316. WSADATA wsadata;
  1317. WSAStartup(MAKEWORD(1,0), &wsadata);
  1318. #endif
  1319. STARPU_AYU_PREINIT();
  1320. /* store the pointer to the user explicit configuration during the
  1321. * initialization */
  1322. if (user_conf == NULL)
  1323. starpu_conf_init(&_starpu_config.conf);
  1324. else
  1325. {
  1326. if (user_conf->magic != 42)
  1327. {
  1328. _STARPU_DISP("starpu_conf structure needs to be initialized with starpu_conf_init\n");
  1329. return -EINVAL;
  1330. }
  1331. _starpu_config.conf = *user_conf;
  1332. }
  1333. _starpu_conf_check_environment(&_starpu_config.conf);
  1334. /* Make a copy of arrays */
  1335. if (_starpu_config.conf.sched_policy_name)
  1336. _starpu_config.conf.sched_policy_name = strdup(_starpu_config.conf.sched_policy_name);
  1337. if (_starpu_config.conf.n_cuda_opengl_interoperability)
  1338. {
  1339. size_t size = _starpu_config.conf.n_cuda_opengl_interoperability * sizeof(*_starpu_config.conf.cuda_opengl_interoperability);
  1340. unsigned *copy;
  1341. _STARPU_MALLOC(copy, size);
  1342. memcpy(copy, _starpu_config.conf.cuda_opengl_interoperability, size);
  1343. _starpu_config.conf.cuda_opengl_interoperability = copy;
  1344. }
  1345. if (_starpu_config.conf.n_not_launched_drivers)
  1346. {
  1347. size_t size = _starpu_config.conf.n_not_launched_drivers * sizeof(*_starpu_config.conf.not_launched_drivers);
  1348. struct starpu_driver *copy;
  1349. _STARPU_MALLOC(copy, size);
  1350. memcpy(copy, _starpu_config.conf.not_launched_drivers, size);
  1351. _starpu_config.conf.not_launched_drivers = copy;
  1352. }
  1353. _starpu_sched_init();
  1354. _starpu_job_init();
  1355. _starpu_graph_init();
  1356. _starpu_init_all_sched_ctxs(&_starpu_config);
  1357. _starpu_init_progression_hooks();
  1358. _starpu_init_idle_hooks();
  1359. _starpu_init_tags();
  1360. #ifdef STARPU_USE_FXT
  1361. _starpu_fxt_init_profiling(_starpu_config.conf.trace_buffer_size);
  1362. #endif
  1363. _starpu_open_debug_logfile();
  1364. _starpu_data_interface_init();
  1365. _starpu_timing_init();
  1366. _starpu_load_bus_performance_files();
  1367. /* Let drivers register themselves */
  1368. starpu_drivers_preinit();
  1369. /* Note: nothing before here should be allocating anything, in case we
  1370. * actually return ENODEV here */
  1371. /* Depending on whether we are a MP sink or not, we must build the
  1372. * topology with MP nodes or not. */
  1373. ret = _starpu_build_topology(&_starpu_config, is_a_sink);
  1374. /* sink doesn't exit even if no worker discorvered */
  1375. if (ret && !is_a_sink)
  1376. {
  1377. starpu_perfmodel_free_sampling();
  1378. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  1379. init_count--;
  1380. _starpu_destroy_machine_config(&_starpu_config);
  1381. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1382. if (_starpu_mpi_common_is_mp_initialized())
  1383. _starpu_mpi_common_mp_deinit();
  1384. #endif
  1385. initialized = UNINITIALIZED;
  1386. /* Let somebody else try to do it */
  1387. STARPU_PTHREAD_COND_SIGNAL(&init_cond);
  1388. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1389. #ifdef STARPU_USE_FXT
  1390. _starpu_stop_fxt_profiling();
  1391. #endif
  1392. return ret;
  1393. }
  1394. _starpu_profiling_init();
  1395. _starpu_task_init();
  1396. for (worker = 0; worker < _starpu_config.topology.nworkers; worker++)
  1397. _starpu_worker_init(&_starpu_config.workers[worker], &_starpu_config);
  1398. //FIXME: find out if the variable STARPU_CHECK_ENTIRE_PLATFORM is really needed, for now, just set 1 as a default value
  1399. check_entire_platform = 1;//starpu_get_env_number("STARPU_CHECK_ENTIRE_PLATFORM");
  1400. _starpu_config.disable_kernels = starpu_get_env_number("STARPU_DISABLE_KERNELS");
  1401. STARPU_PTHREAD_KEY_CREATE(&_starpu_worker_key, NULL);
  1402. STARPU_PTHREAD_KEY_CREATE(&_starpu_worker_set_key, NULL);
  1403. _starpu_keys_initialized = 1;
  1404. STARPU_WMB();
  1405. _starpu_build_tree();
  1406. if (!is_a_sink)
  1407. {
  1408. struct starpu_sched_policy *selected_policy = _starpu_select_sched_policy(&_starpu_config, _starpu_config.conf.sched_policy_name);
  1409. _starpu_create_sched_ctx(selected_policy, NULL, -1, 1, "init", (_starpu_config.conf.global_sched_ctx_min_priority != -1), _starpu_config.conf.global_sched_ctx_min_priority, (_starpu_config.conf.global_sched_ctx_max_priority != -1), _starpu_config.conf.global_sched_ctx_max_priority, 1, _starpu_config.conf.sched_policy_callback, NULL, 0, NULL, 0);
  1410. }
  1411. _starpu_initialize_registered_performance_models();
  1412. _starpu_perf_counter_init(&_starpu_config);
  1413. _starpu_perf_knob_init();
  1414. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  1415. _starpu_cuda_init();
  1416. #endif
  1417. #ifdef STARPU_SIMGRID
  1418. _starpu_simgrid_init();
  1419. #endif
  1420. /* Launch "basic" workers (ie. non-combined workers) */
  1421. if (!is_a_sink)
  1422. _starpu_launch_drivers(&_starpu_config);
  1423. /* Allocate swap, if any */
  1424. if (!is_a_sink)
  1425. _starpu_swap_init();
  1426. _starpu_watchdog_init();
  1427. _starpu_profiling_start();
  1428. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  1429. initialized = INITIALIZED;
  1430. /* Tell everybody that we initialized */
  1431. STARPU_PTHREAD_COND_BROADCAST(&init_cond);
  1432. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1433. int main_thread_cpuid = starpu_get_env_number_default("STARPU_MAIN_THREAD_CPUID", -1);
  1434. int main_thread_coreid = starpu_get_env_number_default("STARPU_MAIN_THREAD_COREID", -1);
  1435. if (main_thread_cpuid >= 0 && main_thread_coreid >= 0)
  1436. {
  1437. _STARPU_DISP("Warning: STARPU_MAIN_THREAD_CPUID and STARPU_MAIN_THREAD_COREID cannot be set at the same time. STARPU_MAIN_THREAD_CPUID will be used.\n");
  1438. }
  1439. if (main_thread_cpuid == -1 && main_thread_coreid >= 0)
  1440. {
  1441. main_thread_cpuid = main_thread_coreid * _starpu_get_nhyperthreads();
  1442. }
  1443. int main_thread_bind = starpu_get_env_number_default("STARPU_MAIN_THREAD_BIND", 0);
  1444. int main_thread_activity = STARPU_NONACTIVETHREAD;
  1445. if (main_thread_bind)
  1446. {
  1447. main_thread_activity = STARPU_ACTIVETHREAD;
  1448. if (main_thread_cpuid == -1)
  1449. main_thread_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0);
  1450. }
  1451. if (main_thread_cpuid >= 0)
  1452. _starpu_bind_thread_on_cpu(main_thread_cpuid, main_thread_activity, "main");
  1453. _STARPU_DEBUG("Initialisation finished\n");
  1454. #ifdef STARPU_USE_MP
  1455. /* Finally, if we are a MP sink, we never leave this function. Else,
  1456. * we enter an infinite event loop which listen for MP commands from
  1457. * the source. */
  1458. if (is_a_sink)
  1459. {
  1460. _starpu_sink_common_worker();
  1461. /* We should normally never leave the loop as we don't want to
  1462. * really initialize STARPU */
  1463. STARPU_ASSERT(0);
  1464. }
  1465. #endif
  1466. _starpu_catch_signals();
  1467. /* if MPI is enabled, binding display will be done later, after MPI initialization */
  1468. if (!_starpu_config.conf.will_use_mpi && starpu_get_env_number_default("STARPU_DISPLAY_BINDINGS", 0))
  1469. {
  1470. fprintf(stdout, "== Binding ==\n");
  1471. starpu_display_bindings();
  1472. fprintf(stdout, "== End of binding ==\n");
  1473. fflush(stdout);
  1474. }
  1475. return 0;
  1476. }
  1477. /*
  1478. * Handle runtime termination
  1479. */
  1480. static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig)
  1481. {
  1482. int status = 0;
  1483. unsigned workerid;
  1484. unsigned n;
  1485. starpu_wake_all_blocked_workers();
  1486. for (workerid = 0; workerid < pconfig->topology.nworkers; workerid++)
  1487. {
  1488. _STARPU_DEBUG("wait for worker %u\n", workerid);
  1489. struct _starpu_worker_set *set = pconfig->workers[workerid].set;
  1490. struct _starpu_worker *worker = &pconfig->workers[workerid];
  1491. /* in case StarPU termination code is called from a callback,
  1492. * we have to check if starpu_pthread_self() is the worker itself */
  1493. if (set && set->nworkers > 0)
  1494. {
  1495. if (set->started)
  1496. {
  1497. if (!starpu_pthread_equal(starpu_pthread_self(), set->worker_thread))
  1498. status = starpu_pthread_join(set->worker_thread, NULL);
  1499. if (status)
  1500. {
  1501. #ifdef STARPU_VERBOSE
  1502. _STARPU_DEBUG("starpu_pthread_join -> %d\n", status);
  1503. #endif
  1504. }
  1505. set->started = 0;
  1506. }
  1507. }
  1508. else
  1509. {
  1510. if (!worker->run_by_starpu)
  1511. goto out;
  1512. if (!starpu_pthread_equal(starpu_pthread_self(), worker->worker_thread))
  1513. status = starpu_pthread_join(worker->worker_thread, NULL);
  1514. if (status)
  1515. {
  1516. #ifdef STARPU_VERBOSE
  1517. _STARPU_DEBUG("starpu_pthread_join -> %d\n", status);
  1518. #endif
  1519. }
  1520. }
  1521. out:
  1522. STARPU_ASSERT(starpu_task_prio_list_empty(&worker->local_tasks));
  1523. for (n = 0; n < worker->local_ordered_tasks_size; n++)
  1524. STARPU_ASSERT(worker->local_ordered_tasks[n] == NULL);
  1525. _starpu_sched_ctx_list_delete(&worker->sched_ctx_list);
  1526. free(worker->local_ordered_tasks);
  1527. STARPU_ASSERT(_starpu_ctx_change_list_empty(&worker->ctx_change_list));
  1528. }
  1529. }
  1530. /* Condition variable and mutex used to pause/resume. */
  1531. static starpu_pthread_cond_t pause_cond = STARPU_PTHREAD_COND_INITIALIZER;
  1532. static starpu_pthread_mutex_t pause_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
  1533. void _starpu_may_pause(void)
  1534. {
  1535. /* pause_depth is just protected by a memory barrier */
  1536. STARPU_RMB();
  1537. if (STARPU_UNLIKELY(_starpu_config.pause_depth > 0))
  1538. {
  1539. STARPU_PTHREAD_MUTEX_LOCK(&pause_mutex);
  1540. if (_starpu_config.pause_depth > 0)
  1541. {
  1542. STARPU_PTHREAD_COND_WAIT(&pause_cond, &pause_mutex);
  1543. }
  1544. STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex);
  1545. }
  1546. }
  1547. void starpu_pause()
  1548. {
  1549. STARPU_HG_DISABLE_CHECKING(_starpu_config.pause_depth);
  1550. _starpu_config.pause_depth += 1;
  1551. starpu_fxt_trace_user_event_string("starpu_pause");
  1552. }
  1553. void starpu_resume()
  1554. {
  1555. STARPU_PTHREAD_MUTEX_LOCK(&pause_mutex);
  1556. _starpu_config.pause_depth -= 1;
  1557. if (!_starpu_config.pause_depth)
  1558. {
  1559. STARPU_PTHREAD_COND_BROADCAST(&pause_cond);
  1560. }
  1561. STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex);
  1562. starpu_fxt_trace_user_event_string("starpu_resume");
  1563. }
  1564. int starpu_is_paused()
  1565. {
  1566. STARPU_RMB();
  1567. return _starpu_config.pause_depth > 0;
  1568. }
  1569. unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *worker STARPU_ATTRIBUTE_UNUSED)
  1570. {
  1571. #ifdef STARPU_NON_BLOCKING_DRIVERS
  1572. return 0;
  1573. #else
  1574. /* do not block if a sched_ctx change operation is pending */
  1575. if (worker->state_changing_ctx_notice)
  1576. return 0;
  1577. unsigned can_block = 1;
  1578. struct starpu_driver driver;
  1579. driver.type = worker->arch;
  1580. switch (driver.type)
  1581. {
  1582. case STARPU_CPU_WORKER:
  1583. driver.id.cpu_id = worker->devid;
  1584. break;
  1585. case STARPU_CUDA_WORKER:
  1586. driver.id.cuda_id = worker->devid;
  1587. break;
  1588. #ifdef STARPU_USE_OPENCL
  1589. case STARPU_OPENCL_WORKER:
  1590. starpu_opencl_get_device(worker->devid, &driver.id.opencl_id);
  1591. break;
  1592. #endif
  1593. default:
  1594. goto always_launch;
  1595. }
  1596. if (!_starpu_may_launch_driver(&_starpu_config.conf, &driver))
  1597. return 0;
  1598. always_launch:
  1599. #ifndef STARPU_SIMGRID
  1600. if (!_starpu_check_that_no_data_request_exists(memnode))
  1601. can_block = 0;
  1602. #endif
  1603. if (!_starpu_machine_is_running())
  1604. can_block = 0;
  1605. if (!_starpu_execute_registered_progression_hooks())
  1606. can_block = 0;
  1607. return can_block;
  1608. #endif
  1609. }
  1610. static void _starpu_kill_all_workers(struct _starpu_machine_config *pconfig)
  1611. {
  1612. /* set the flag which will tell workers to stop */
  1613. ANNOTATE_HAPPENS_AFTER(&_starpu_config.running);
  1614. pconfig->running = 0;
  1615. /* running is just protected by a memory barrier */
  1616. ANNOTATE_HAPPENS_BEFORE(&_starpu_config.running);
  1617. STARPU_WMB();
  1618. starpu_wake_all_blocked_workers();
  1619. }
  1620. void starpu_display_stats()
  1621. {
  1622. starpu_profiling_bus_helper_display_summary();
  1623. starpu_profiling_worker_helper_display_summary();
  1624. }
  1625. void starpu_shutdown(void)
  1626. {
  1627. unsigned worker;
  1628. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  1629. init_count--;
  1630. STARPU_ASSERT_MSG(init_count >= 0, "Number of calls to starpu_shutdown() can not be higher than the number of calls to starpu_init()\n");
  1631. if (init_count)
  1632. {
  1633. _STARPU_DEBUG("Still somebody needing StarPU, don't deinitialize\n");
  1634. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1635. return;
  1636. }
  1637. /* We're last */
  1638. initialized = CHANGING;
  1639. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1640. /* If the workers are frozen, no progress can be made. */
  1641. STARPU_ASSERT_MSG(_starpu_config.pause_depth <= 0, "Did you forget to call starpu_resume before starpu_shutdown?");
  1642. starpu_task_wait_for_no_ready();
  1643. /* tell all workers to shutdown */
  1644. _starpu_kill_all_workers(&_starpu_config);
  1645. unsigned i;
  1646. unsigned nb_numa_nodes = starpu_memory_nodes_get_numa_count();
  1647. for (i=0; i<nb_numa_nodes; i++)
  1648. {
  1649. _starpu_free_all_automatically_allocated_buffers(i);
  1650. }
  1651. {
  1652. int stats = starpu_get_env_number("STARPU_STATS");
  1653. if (stats != 0)
  1654. {
  1655. _starpu_display_msi_stats(stderr);
  1656. _starpu_display_alloc_cache_stats(stderr);
  1657. }
  1658. }
  1659. starpu_profiling_bus_helper_display_summary();
  1660. starpu_profiling_worker_helper_display_summary();
  1661. starpu_bound_clear();
  1662. _starpu_deinitialize_registered_performance_models();
  1663. _starpu_watchdog_shutdown();
  1664. /* wait for their termination */
  1665. _starpu_terminate_workers(&_starpu_config);
  1666. {
  1667. int stats = starpu_get_env_number("STARPU_MEMORY_STATS");
  1668. if (stats != 0)
  1669. {
  1670. // Display statistics on data which have not been unregistered
  1671. starpu_data_display_memory_stats();
  1672. }
  1673. }
  1674. _starpu_delete_all_sched_ctxs();
  1675. _starpu_sched_component_workers_destroy();
  1676. for (worker = 0; worker < _starpu_config.topology.nworkers; worker++)
  1677. _starpu_worker_deinit(&_starpu_config.workers[worker]);
  1678. _starpu_profiling_terminate();
  1679. _starpu_disk_unregister();
  1680. #ifdef STARPU_HAVE_HWLOC
  1681. starpu_tree_free(_starpu_config.topology.tree);
  1682. free(_starpu_config.topology.tree);
  1683. #endif
  1684. _starpu_destroy_topology(&_starpu_config);
  1685. _starpu_initialized_combined_workers = 0;
  1686. #ifdef STARPU_USE_FXT
  1687. _starpu_stop_fxt_profiling();
  1688. #endif
  1689. _starpu_data_interface_shutdown();
  1690. _starpu_job_fini();
  1691. /* Drop all remaining tags */
  1692. _starpu_tag_clear();
  1693. #ifdef STARPU_OPENMP
  1694. _starpu_omp_dummy_shutdown();
  1695. #endif
  1696. _starpu_perf_knob_exit();
  1697. _starpu_perf_counter_exit();
  1698. _starpu_close_debug_logfile();
  1699. _starpu_keys_initialized = 0;
  1700. STARPU_PTHREAD_KEY_DELETE(_starpu_worker_key);
  1701. STARPU_PTHREAD_KEY_DELETE(_starpu_worker_set_key);
  1702. _starpu_task_deinit();
  1703. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  1704. initialized = UNINITIALIZED;
  1705. /* Let someone else that wants to initialize it again do it */
  1706. STARPU_PTHREAD_COND_SIGNAL(&init_cond);
  1707. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1708. /* Clear memory */
  1709. free((char*) _starpu_config.conf.sched_policy_name);
  1710. if (_starpu_config.conf.n_cuda_opengl_interoperability)
  1711. free(_starpu_config.conf.cuda_opengl_interoperability);
  1712. if (_starpu_config.conf.n_not_launched_drivers)
  1713. free(_starpu_config.conf.not_launched_drivers);
  1714. STARPU_AYU_FINISH();
  1715. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  1716. if (_starpu_mpi_common_is_mp_initialized())
  1717. _starpu_mpi_common_mp_deinit();
  1718. #endif
  1719. _starpu_print_idle_time();
  1720. _STARPU_DEBUG("Shutdown finished\n");
  1721. #ifdef STARPU_SIMGRID
  1722. /* This finalizes the simgrid thread library, thus needs to be late */
  1723. _starpu_simgrid_deinit();
  1724. #endif
  1725. }
  1726. #undef starpu_worker_get_count
  1727. unsigned starpu_worker_get_count(void)
  1728. {
  1729. return _starpu_config.topology.nworkers;
  1730. }
  1731. unsigned starpu_worker_is_blocked_in_parallel(int workerid)
  1732. {
  1733. if (!_starpu_worker_parallel_blocks)
  1734. return 0;
  1735. int relax_own_observation_state = 0;
  1736. struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
  1737. STARPU_ASSERT(worker != NULL);
  1738. STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex);
  1739. struct _starpu_worker *cur_worker = NULL;
  1740. int cur_workerid = starpu_worker_get_id();
  1741. if (workerid != cur_workerid)
  1742. {
  1743. /* in order to observe the 'blocked' state of a worker from
  1744. * another worker, we must avoid race conditions between
  1745. * 'blocked' state changes and state observations. This is the
  1746. * purpose of this 'if' block. */
  1747. cur_worker = cur_workerid >= 0 ? _starpu_get_worker_struct(cur_workerid) : NULL;
  1748. relax_own_observation_state = (cur_worker != NULL) && (cur_worker->state_relax_refcnt == 0);
  1749. if (relax_own_observation_state && !worker->state_relax_refcnt)
  1750. {
  1751. /* moreover, when a worker (cur_worker != NULL)
  1752. * observes another worker, we need to take special
  1753. * care to avoid live locks, thus the observing worker
  1754. * must enter the relaxed state (if not relaxed
  1755. * already) before doing the observation in mutual
  1756. * exclusion */
  1757. STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
  1758. STARPU_PTHREAD_MUTEX_LOCK_SCHED(&cur_worker->sched_mutex);
  1759. cur_worker->state_relax_refcnt = 1;
  1760. STARPU_PTHREAD_COND_BROADCAST(&cur_worker->sched_cond);
  1761. STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&cur_worker->sched_mutex);
  1762. STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex);
  1763. }
  1764. /* the observer waits for a safe window to observe the state,
  1765. * and also waits for any pending blocking state change
  1766. * requests to be processed, in order to not obtain an
  1767. * ephemeral information */
  1768. while (!worker->state_relax_refcnt
  1769. || worker->state_block_in_parallel_req
  1770. || worker->state_unblock_in_parallel_req)
  1771. {
  1772. STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex);
  1773. }
  1774. }
  1775. unsigned ret = _starpu_config.workers[workerid].state_blocked_in_parallel;
  1776. /* once a worker state has been observed, the worker is 'tainted' for the next one full sched_op,
  1777. * to avoid changing the observed worker state - on which the observer
  1778. * made a scheduling decision - after the fact. */
  1779. worker->state_blocked_in_parallel_observed = 1;
  1780. STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
  1781. if (relax_own_observation_state)
  1782. {
  1783. STARPU_PTHREAD_MUTEX_LOCK_SCHED(&cur_worker->sched_mutex);
  1784. cur_worker->state_relax_refcnt = 0;
  1785. STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&cur_worker->sched_mutex);
  1786. }
  1787. return ret;
  1788. }
  1789. unsigned starpu_worker_is_slave_somewhere(int workerid)
  1790. {
  1791. starpu_worker_lock(workerid);
  1792. unsigned ret = _starpu_config.workers[workerid].is_slave_somewhere;
  1793. starpu_worker_unlock(workerid);
  1794. return ret;
  1795. }
  1796. int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
  1797. {
  1798. unsigned n = 0;
  1799. if (type != STARPU_ANY_WORKER)
  1800. {
  1801. if (type >= STARPU_NARCH)
  1802. return -EINVAL;
  1803. unsigned i;
  1804. for (i = 0; i < _starpu_config.topology.ndevices[type]; i++)
  1805. n += _starpu_config.topology.nworker[type][i];
  1806. return n;
  1807. }
  1808. for (type = 0; type < STARPU_NARCH; type++)
  1809. n += starpu_worker_get_count_by_type(type);
  1810. return n;
  1811. }
  1812. unsigned starpu_combined_worker_get_count(void)
  1813. {
  1814. return _starpu_config.topology.ncombinedworkers;
  1815. }
  1816. unsigned starpu_cpu_worker_get_count(void)
  1817. {
  1818. return starpu_worker_get_count_by_type(STARPU_CPU_WORKER);
  1819. }
  1820. unsigned starpu_cuda_worker_get_count(void)
  1821. {
  1822. return starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
  1823. }
  1824. unsigned starpu_opencl_worker_get_count(void)
  1825. {
  1826. return starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER);
  1827. }
  1828. int starpu_asynchronous_copy_disabled(void)
  1829. {
  1830. return _starpu_config.conf.disable_asynchronous_copy;
  1831. }
  1832. int starpu_asynchronous_cuda_copy_disabled(void)
  1833. {
  1834. return _starpu_config.conf.disable_asynchronous_cuda_copy;
  1835. }
  1836. int starpu_asynchronous_opencl_copy_disabled(void)
  1837. {
  1838. return _starpu_config.conf.disable_asynchronous_opencl_copy;
  1839. }
  1840. int starpu_asynchronous_mpi_ms_copy_disabled(void)
  1841. {
  1842. return _starpu_config.conf.disable_asynchronous_mpi_ms_copy;
  1843. }
  1844. unsigned starpu_mpi_ms_worker_get_count(void)
  1845. {
  1846. return starpu_worker_get_count_by_type(STARPU_MPI_MS_WORKER);
  1847. }
  1848. /* When analyzing performance, it is useful to see what is the processing unit
  1849. * that actually performed the task. This function returns the id of the
  1850. * processing unit actually executing it, therefore it makes no sense to use it
  1851. * within the callbacks of SPU functions for instance. If called by some thread
  1852. * that is not controlled by StarPU, starpu_worker_get_id returns -1. */
  1853. #undef starpu_worker_get_id
  1854. int starpu_worker_get_id(void)
  1855. {
  1856. struct _starpu_worker * worker;
  1857. worker = _starpu_get_local_worker_key();
  1858. if (worker)
  1859. {
  1860. return worker->workerid;
  1861. }
  1862. else
  1863. {
  1864. /* there is no worker associated to that thread, perhaps it is
  1865. * a thread from the application or this is some SPU worker */
  1866. return -1;
  1867. }
  1868. }
  1869. #define starpu_worker_get_id _starpu_worker_get_id
  1870. #undef _starpu_worker_get_id_check
  1871. unsigned _starpu_worker_get_id_check(const char *f, int l)
  1872. {
  1873. (void) f;
  1874. (void) l;
  1875. int id = _starpu_worker_get_id();
  1876. STARPU_ASSERT_MSG(id>=0, "%s:%d Cannot be called from outside a worker\n", f, l);
  1877. return id;
  1878. }
  1879. int starpu_combined_worker_get_id(void)
  1880. {
  1881. struct _starpu_worker *worker;
  1882. worker = _starpu_get_local_worker_key();
  1883. if (worker)
  1884. {
  1885. return worker->combined_workerid;
  1886. }
  1887. else
  1888. {
  1889. /* there is no worker associated to that thread, perhaps it is
  1890. * a thread from the application or this is some SPU worker */
  1891. return -1;
  1892. }
  1893. }
  1894. int starpu_combined_worker_get_size(void)
  1895. {
  1896. struct _starpu_worker *worker;
  1897. worker = _starpu_get_local_worker_key();
  1898. if (worker)
  1899. {
  1900. return worker->worker_size;
  1901. }
  1902. else
  1903. {
  1904. /* there is no worker associated to that thread, perhaps it is
  1905. * a thread from the application or this is some SPU worker */
  1906. return -1;
  1907. }
  1908. }
  1909. int starpu_combined_worker_get_rank(void)
  1910. {
  1911. struct _starpu_worker *worker;
  1912. worker = _starpu_get_local_worker_key();
  1913. if (worker)
  1914. {
  1915. return worker->current_rank;
  1916. }
  1917. else
  1918. {
  1919. /* there is no worker associated to that thread, perhaps it is
  1920. * a thread from the application or this is some SPU worker */
  1921. return -1;
  1922. }
  1923. }
  1924. int starpu_worker_get_subworkerid(int id)
  1925. {
  1926. return _starpu_config.workers[id].subworkerid;
  1927. }
  1928. int starpu_worker_get_devid(int id)
  1929. {
  1930. return _starpu_config.workers[id].devid;
  1931. }
  1932. unsigned starpu_worker_is_combined_worker(int id)
  1933. {
  1934. return id >= (int)_starpu_config.topology.nworkers;
  1935. }
  1936. struct _starpu_combined_worker *_starpu_get_combined_worker_struct(unsigned id)
  1937. {
  1938. unsigned basic_worker_count = starpu_worker_get_count();
  1939. //_STARPU_DEBUG("basic_worker_count:%d\n",basic_worker_count);
  1940. STARPU_ASSERT(id >= basic_worker_count);
  1941. return &_starpu_config.combined_workers[id - basic_worker_count];
  1942. }
  1943. enum starpu_worker_archtype starpu_worker_get_type(int id)
  1944. {
  1945. return _starpu_config.workers[id].arch;
  1946. }
  1947. unsigned starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, unsigned maxsize)
  1948. {
  1949. unsigned nworkers = starpu_worker_get_count();
  1950. unsigned cnt = 0;
  1951. unsigned id;
  1952. for (id = 0; id < nworkers; id++)
  1953. {
  1954. if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type)
  1955. {
  1956. /* Perhaps the array is too small ? */
  1957. if (cnt >= maxsize)
  1958. return -ERANGE;
  1959. workerids[cnt++] = id;
  1960. }
  1961. }
  1962. return cnt;
  1963. }
  1964. int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num)
  1965. {
  1966. unsigned nworkers = starpu_worker_get_count();
  1967. int cnt = 0;
  1968. unsigned id;
  1969. for (id = 0; id < nworkers; id++)
  1970. {
  1971. if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type)
  1972. {
  1973. if (num == cnt)
  1974. return id;
  1975. cnt++;
  1976. }
  1977. }
  1978. /* Not found */
  1979. return -1;
  1980. }
  1981. int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid)
  1982. {
  1983. unsigned nworkers = starpu_worker_get_count();
  1984. unsigned id;
  1985. for (id = 0; id < nworkers; id++)
  1986. if (starpu_worker_get_type(id) == type && starpu_worker_get_devid(id) == devid)
  1987. return id;
  1988. /* Not found */
  1989. return -1;
  1990. }
  1991. int starpu_worker_get_devids(enum starpu_worker_archtype type, int *devids, int num)
  1992. {
  1993. unsigned nworkers = starpu_worker_get_count();
  1994. int workerids[nworkers];
  1995. unsigned ndevice_workers = starpu_worker_get_ids_by_type(type, workerids, nworkers);
  1996. unsigned ndevids = 0;
  1997. if(ndevice_workers > 0)
  1998. {
  1999. unsigned id, devid;
  2000. int cnt = 0;
  2001. unsigned found = 0;
  2002. for(id = 0; id < ndevice_workers; id++)
  2003. {
  2004. int curr_devid;
  2005. curr_devid = _starpu_config.workers[workerids[id]].devid;
  2006. for(devid = 0; devid < ndevids; devid++)
  2007. {
  2008. if(curr_devid == devids[devid])
  2009. {
  2010. found = 1;
  2011. break;
  2012. }
  2013. }
  2014. if(!found)
  2015. {
  2016. devids[ndevids++] = curr_devid;
  2017. cnt++;
  2018. }
  2019. else
  2020. found = 0;
  2021. if(cnt == num)
  2022. break;
  2023. }
  2024. }
  2025. return ndevids;
  2026. }
  2027. void starpu_worker_get_name(int id, char *dst, size_t maxlen)
  2028. {
  2029. char *name = _starpu_config.workers[id].name;
  2030. snprintf(dst, maxlen, "%s", name);
  2031. }
  2032. int starpu_worker_get_bindid(int workerid)
  2033. {
  2034. return _starpu_config.workers[workerid].bindid;
  2035. }
  2036. int starpu_bindid_get_workerids(int bindid, int **workerids)
  2037. {
  2038. if (bindid >= (int) _starpu_config.nbindid)
  2039. return 0;
  2040. *workerids = _starpu_config.bindid_workers[bindid].workerids;
  2041. return _starpu_config.bindid_workers[bindid].nworkers;
  2042. }
  2043. int starpu_worker_get_stream_workerids(unsigned devid, int *workerids, enum starpu_worker_archtype type)
  2044. {
  2045. unsigned nworkers = starpu_worker_get_count();
  2046. int nw = 0;
  2047. unsigned id;
  2048. for (id = 0; id < nworkers; id++)
  2049. {
  2050. if (_starpu_config.workers[id].devid == devid &&
  2051. (type == STARPU_ANY_WORKER || _starpu_config.workers[id].arch == type))
  2052. workerids[nw++] = id;
  2053. }
  2054. return nw;
  2055. }
  2056. void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond)
  2057. {
  2058. STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS);
  2059. *sched_cond = &_starpu_config.workers[workerid].sched_cond;
  2060. *sched_mutex = &_starpu_config.workers[workerid].sched_mutex;
  2061. }
  2062. /* returns 1 if the call results in initiating a transition of worker WORKERID
  2063. * from sleeping state to awake
  2064. * returns 0 if worker WORKERID is not sleeping or the wake-up transition
  2065. * already has been initiated
  2066. */
  2067. static int starpu_wakeup_worker_locked(int workerid, starpu_pthread_cond_t *sched_cond, starpu_pthread_mutex_t *mutex STARPU_ATTRIBUTE_UNUSED)
  2068. {
  2069. #ifdef STARPU_SIMGRID
  2070. starpu_pthread_queue_broadcast(&_starpu_simgrid_task_queue[workerid]);
  2071. #endif
  2072. if (_starpu_config.workers[workerid].status == STATUS_SCHEDULING || _starpu_config.workers[workerid].status == STATUS_SLEEPING_SCHEDULING)
  2073. {
  2074. _starpu_config.workers[workerid].state_keep_awake = 1;
  2075. return 0;
  2076. }
  2077. else if (_starpu_config.workers[workerid].status == STATUS_SLEEPING)
  2078. {
  2079. int ret = 0;
  2080. if (_starpu_config.workers[workerid].state_keep_awake != 1)
  2081. {
  2082. _starpu_config.workers[workerid].state_keep_awake = 1;
  2083. ret = 1;
  2084. }
  2085. /* cond_broadcast is required over cond_signal since
  2086. * the condition is share for multiple purpose */
  2087. STARPU_PTHREAD_COND_BROADCAST(sched_cond);
  2088. return ret;
  2089. }
  2090. return 0;
  2091. }
  2092. static int starpu_wakeup_worker_no_relax(int workerid, starpu_pthread_cond_t *sched_cond, starpu_pthread_mutex_t *sched_mutex)
  2093. {
  2094. int success;
  2095. STARPU_PTHREAD_MUTEX_LOCK_SCHED(sched_mutex);
  2096. success = starpu_wakeup_worker_locked(workerid, sched_cond, sched_mutex);
  2097. STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(sched_mutex);
  2098. return success;
  2099. }
  2100. int starpu_wake_worker_locked(int workerid)
  2101. {
  2102. starpu_pthread_mutex_t *sched_mutex;
  2103. starpu_pthread_cond_t *sched_cond;
  2104. starpu_worker_get_sched_condition(workerid, &sched_mutex, &sched_cond);
  2105. return starpu_wakeup_worker_locked(workerid, sched_cond, sched_mutex);
  2106. }
  2107. int starpu_wake_worker_no_relax(int workerid)
  2108. {
  2109. starpu_pthread_mutex_t *sched_mutex;
  2110. starpu_pthread_cond_t *sched_cond;
  2111. starpu_worker_get_sched_condition(workerid, &sched_mutex, &sched_cond);
  2112. return starpu_wakeup_worker_no_relax(workerid, sched_cond, sched_mutex);
  2113. }
  2114. int _starpu_worker_get_nids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
  2115. {
  2116. unsigned nworkers = starpu_worker_get_count();
  2117. int cnt = 0;
  2118. unsigned id;
  2119. for (id = 0; id < nworkers; id++)
  2120. {
  2121. if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type)
  2122. {
  2123. /* Perhaps the array is too small ? */
  2124. if (cnt >= maxsize)
  2125. return cnt;
  2126. workerids[cnt++] = id;
  2127. }
  2128. }
  2129. return cnt;
  2130. }
  2131. int _starpu_worker_get_nids_ctx_free_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
  2132. {
  2133. unsigned nworkers = starpu_worker_get_count();
  2134. int cnt = 0;
  2135. unsigned id;
  2136. for (id = 0; id < nworkers; id++)
  2137. {
  2138. if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type)
  2139. {
  2140. /* Perhaps the array is too small ? */
  2141. if (cnt >= maxsize)
  2142. return cnt;
  2143. unsigned found = 0;
  2144. int s;
  2145. for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++)
  2146. {
  2147. if(_starpu_config.sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  2148. {
  2149. struct starpu_worker_collection *workers = _starpu_config.sched_ctxs[s].workers;
  2150. struct starpu_sched_ctx_iterator it;
  2151. workers->init_iterator(workers, &it);
  2152. while(workers->has_next(workers, &it))
  2153. {
  2154. unsigned worker = workers->get_next(workers, &it);
  2155. if(worker == id)
  2156. {
  2157. found = 1;
  2158. break;
  2159. }
  2160. }
  2161. if(found)
  2162. break;
  2163. }
  2164. }
  2165. if(!found)
  2166. workerids[cnt++] = id;
  2167. }
  2168. }
  2169. return cnt;
  2170. }
  2171. void starpu_get_version(int *major, int *minor, int *release)
  2172. {
  2173. *major = STARPU_MAJOR_VERSION;
  2174. *minor = STARPU_MINOR_VERSION;
  2175. *release = STARPU_RELEASE_VERSION;
  2176. }
  2177. unsigned starpu_worker_get_sched_ctx_list(int workerid, unsigned **sched_ctxs)
  2178. {
  2179. unsigned s = 0;
  2180. unsigned nsched_ctxs = _starpu_worker_get_nsched_ctxs(workerid);
  2181. _STARPU_MALLOC(*sched_ctxs, nsched_ctxs*sizeof(unsigned));
  2182. struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
  2183. struct _starpu_sched_ctx_elt *e = NULL;
  2184. struct _starpu_sched_ctx_list_iterator list_it;
  2185. _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it);
  2186. while (_starpu_sched_ctx_list_iterator_has_next(&list_it))
  2187. {
  2188. e = _starpu_sched_ctx_list_iterator_get_next(&list_it);
  2189. (*sched_ctxs)[s++] = e->sched_ctx;
  2190. }
  2191. return nsched_ctxs;
  2192. }
  2193. const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
  2194. {
  2195. const char *ret = starpu_driver_info[type].name_upper;
  2196. if (!ret)
  2197. ret = "unknown";
  2198. return ret;
  2199. }
  2200. const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type)
  2201. {
  2202. const char *ret = starpu_driver_info[type].name_var;
  2203. if (!ret)
  2204. ret = "UNKNOWN";
  2205. return ret;
  2206. }
  2207. void _starpu_worker_set_stream_ctx(unsigned workerid, struct _starpu_sched_ctx *sched_ctx)
  2208. {
  2209. STARPU_ASSERT(workerid < starpu_worker_get_count());
  2210. struct _starpu_worker *w = _starpu_get_worker_struct(workerid);
  2211. w->stream_ctx = sched_ctx;
  2212. }
  2213. struct _starpu_sched_ctx* _starpu_worker_get_ctx_stream(unsigned stream_workerid)
  2214. {
  2215. if (stream_workerid >= starpu_worker_get_count())
  2216. return NULL;
  2217. struct _starpu_worker *w = _starpu_get_worker_struct(stream_workerid);
  2218. return w->stream_ctx;
  2219. }
  2220. unsigned starpu_worker_get_sched_ctx_id_stream(unsigned stream_workerid)
  2221. {
  2222. if (stream_workerid >= starpu_worker_get_count())
  2223. return STARPU_NMAX_SCHED_CTXS;
  2224. struct _starpu_worker *w = _starpu_get_worker_struct(stream_workerid);
  2225. return w->stream_ctx != NULL ? w->stream_ctx->id : STARPU_NMAX_SCHED_CTXS;
  2226. }
  2227. void starpu_worker_display_names(FILE *output, enum starpu_worker_archtype type)
  2228. {
  2229. int nworkers = starpu_worker_get_count_by_type(type);
  2230. if (nworkers <= 0)
  2231. {
  2232. fprintf(output, "No %s worker\n", starpu_worker_get_type_as_string(type));
  2233. }
  2234. else
  2235. {
  2236. int i, ids[nworkers];
  2237. starpu_worker_get_ids_by_type(type, ids, nworkers);
  2238. fprintf(output, "%d %s worker%s:\n", nworkers, starpu_worker_get_type_as_string(type), nworkers==1?"":"s");
  2239. for(i = 0; i < nworkers; i++)
  2240. {
  2241. char name[256];
  2242. starpu_worker_get_name(ids[i], name, 256);
  2243. fprintf(output, "\t%s\n", name);
  2244. }
  2245. }
  2246. }
  2247. void _starpu_worker_refuse_task(struct _starpu_worker *worker, struct starpu_task *task)
  2248. {
  2249. if (worker->pipeline_length || worker->arch == STARPU_OPENCL_WORKER)
  2250. {
  2251. int j;
  2252. for (j = 0; j < worker->ntasks; j++)
  2253. {
  2254. const int j_mod = (j+worker->first_task)%STARPU_MAX_PIPELINE;
  2255. if (task == worker->current_tasks[j_mod])
  2256. {
  2257. worker->current_tasks[j_mod] = NULL;
  2258. if (j == 0)
  2259. {
  2260. worker->first_task = (worker->first_task + 1) % STARPU_MAX_PIPELINE;
  2261. worker->current_task = NULL;
  2262. _starpu_set_current_task(NULL);
  2263. }
  2264. break;
  2265. }
  2266. }
  2267. STARPU_ASSERT(j<worker->ntasks);
  2268. }
  2269. else
  2270. {
  2271. worker->current_task = NULL;
  2272. _starpu_set_current_task(NULL);
  2273. }
  2274. worker->ntasks--;
  2275. task->prefetched = 0;
  2276. int res = _starpu_push_task_to_workers(task);
  2277. STARPU_ASSERT_MSG(res == 0, "_starpu_push_task_to_workers() unexpectedly returned = %d\n", res);
  2278. }
  2279. int starpu_worker_sched_op_pending(void)
  2280. {
  2281. return _starpu_worker_sched_op_pending();
  2282. }
  2283. #undef starpu_worker_relax_on
  2284. void starpu_worker_relax_on(void)
  2285. {
  2286. _starpu_worker_relax_on();
  2287. }
  2288. #undef starpu_worker_relax_off
  2289. void starpu_worker_relax_off(void)
  2290. {
  2291. _starpu_worker_relax_off();
  2292. }
  2293. #undef starpu_worker_get_relax_state
  2294. int starpu_worker_get_relax_state(void)
  2295. {
  2296. return _starpu_worker_get_relax_state();
  2297. }
  2298. #undef starpu_worker_lock
  2299. void starpu_worker_lock(int workerid)
  2300. {
  2301. _starpu_worker_lock(workerid);
  2302. }
  2303. #undef starpu_worker_trylock
  2304. int starpu_worker_trylock(int workerid)
  2305. {
  2306. return _starpu_worker_trylock(workerid);
  2307. }
  2308. #undef starpu_worker_unlock
  2309. void starpu_worker_unlock(int workerid)
  2310. {
  2311. _starpu_worker_unlock(workerid);
  2312. }
  2313. #undef starpu_worker_lock_self
  2314. void starpu_worker_lock_self(void)
  2315. {
  2316. _starpu_worker_lock_self();
  2317. }
  2318. #undef starpu_worker_unlock_self
  2319. void starpu_worker_unlock_self(void)
  2320. {
  2321. _starpu_worker_unlock_self();
  2322. }
  2323. #undef starpu_wake_worker_relax
  2324. int starpu_wake_worker_relax(int workerid)
  2325. {
  2326. return _starpu_wake_worker_relax(workerid);
  2327. }
  2328. #ifdef STARPU_HAVE_HWLOC
  2329. hwloc_cpuset_t starpu_worker_get_hwloc_cpuset(int workerid)
  2330. {
  2331. struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
  2332. return hwloc_bitmap_dup(worker->hwloc_cpu_set);
  2333. }
  2334. hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid)
  2335. {
  2336. struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
  2337. return worker->hwloc_obj;
  2338. }
  2339. #endif
  2340. /* Light version of _starpu_wake_worker_relax, which, when possible,
  2341. * speculatively sets keep_awake on the target worker without waiting that
  2342. * worker to enter the relaxed state.
  2343. */
  2344. int starpu_wake_worker_relax_light(int workerid)
  2345. {
  2346. struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
  2347. STARPU_ASSERT(worker != NULL);
  2348. int cur_workerid = starpu_worker_get_id();
  2349. if (workerid != cur_workerid)
  2350. {
  2351. starpu_worker_relax_on();
  2352. STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex);
  2353. while (!worker->state_relax_refcnt)
  2354. {
  2355. /* Attempt a fast path if the worker is not really asleep */
  2356. if (_starpu_config.workers[workerid].status == STATUS_SCHEDULING
  2357. || _starpu_config.workers[workerid].status == STATUS_SLEEPING_SCHEDULING)
  2358. {
  2359. _starpu_config.workers[workerid].state_keep_awake = 1;
  2360. STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
  2361. starpu_worker_relax_off();
  2362. return 1;
  2363. }
  2364. STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex);
  2365. }
  2366. }
  2367. else
  2368. {
  2369. STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex);
  2370. }
  2371. int ret = starpu_wake_worker_locked(workerid);
  2372. STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
  2373. if (workerid != cur_workerid)
  2374. {
  2375. starpu_worker_relax_off();
  2376. }
  2377. return ret;
  2378. }
  2379. #ifdef STARPU_WORKER_CALLBACKS
  2380. void starpu_worker_set_going_to_sleep_callback(void (*callback)(unsigned workerid))
  2381. {
  2382. STARPU_ASSERT(_starpu_config.conf.callback_worker_going_to_sleep);
  2383. _starpu_config.conf.callback_worker_going_to_sleep = callback;
  2384. }
  2385. void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid))
  2386. {
  2387. STARPU_ASSERT(_starpu_config.conf.callback_worker_waking_up);
  2388. _starpu_config.conf.callback_worker_waking_up = callback;
  2389. }
  2390. #endif
  2391. enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type)
  2392. {
  2393. enum starpu_node_kind kind = starpu_driver_info[type].memory_kind;
  2394. STARPU_ASSERT_MSG(kind != (enum starpu_node_kind) -1, "no memory for archtype %d", type);
  2395. return kind;
  2396. }