workers.c 56 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2014 Université de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2010, 2011 Institut National de Recherche en Informatique et Automatique
  6. * Copyright (C) 2011 Télécom-SudParis
  7. * Copyright (C) 2011-2012 INRIA
  8. *
  9. * StarPU is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as published by
  11. * the Free Software Foundation; either version 2.1 of the License, or (at
  12. * your option) any later version.
  13. *
  14. * StarPU is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17. *
  18. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  19. */
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <common/config.h>
  23. #include <common/utils.h>
  24. #include <core/progress_hook.h>
  25. #include <core/workers.h>
  26. #include <core/debug.h>
  27. #include <core/disk.h>
  28. #include <core/task.h>
  29. #include <datawizard/malloc.h>
  30. #include <profiling/profiling.h>
  31. #include <starpu_task_list.h>
  32. #include <drivers/mp_common/sink_common.h>
  33. #include <drivers/scc/driver_scc_common.h>
  34. #include <drivers/cpu/driver_cpu.h>
  35. #include <drivers/cuda/driver_cuda.h>
  36. #include <drivers/opencl/driver_opencl.h>
  37. #ifdef STARPU_SIMGRID
  38. #include <msg/msg.h>
  39. #include <core/simgrid.h>
  40. #endif
  41. #if defined(_WIN32) && !defined(__CYGWIN__)
  42. #include <windows.h>
  43. #endif
  44. /* acquire/release semantic for concurrent initialization/de-initialization */
  45. static starpu_pthread_mutex_t init_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
  46. static starpu_pthread_cond_t init_cond = STARPU_PTHREAD_COND_INITIALIZER;
  47. static int init_count = 0;
  48. static enum { UNINITIALIZED, CHANGING, INITIALIZED } initialized = UNINITIALIZED;
  49. static starpu_pthread_key_t worker_key;
  50. static struct _starpu_machine_config config;
  51. /* Pointers to argc and argv
  52. */
  53. static int *my_argc = 0;
  54. static char ***my_argv = NULL;
  55. /* Initialize value of static argc and argv, called when the process begins
  56. */
  57. void _starpu_set_argc_argv(int *argc_param, char ***argv_param)
  58. {
  59. my_argc = argc_param;
  60. my_argv = argv_param;
  61. }
  62. int *_starpu_get_argc()
  63. {
  64. return my_argc;
  65. }
  66. char ***_starpu_get_argv()
  67. {
  68. return my_argv;
  69. }
  70. int _starpu_is_initialized(void)
  71. {
  72. return initialized == INITIALIZED;
  73. }
  74. struct _starpu_machine_config *_starpu_get_machine_config(void)
  75. {
  76. return &config;
  77. }
  78. /* Makes sure that at least one of the workers of type <arch> can execute
  79. * <task>, for at least one of its implementations. */
  80. static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
  81. enum starpu_worker_archtype arch)
  82. {
  83. int i;
  84. _starpu_codelet_check_deprecated_fields(task->cl);
  85. struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
  86. struct starpu_worker_collection *workers = sched_ctx->workers;
  87. struct starpu_sched_ctx_iterator it;
  88. workers->init_iterator(workers, &it);
  89. while(workers->has_next(workers, &it))
  90. {
  91. i = workers->get_next(workers, &it);
  92. if (starpu_worker_get_type(i) != arch)
  93. continue;
  94. unsigned impl;
  95. for (impl = 0; impl < STARPU_MAXIMPLEMENTATIONS; impl++)
  96. {
  97. /* We could call task->cl->can_execute(i, task, impl)
  98. here, it would definitely work. It is probably
  99. cheaper to check whether it is necessary in order to
  100. avoid a useless function call, though. */
  101. unsigned test_implementation = 0;
  102. switch (arch)
  103. {
  104. case STARPU_CPU_WORKER:
  105. if (task->cl->cpu_funcs[impl] != NULL)
  106. test_implementation = 1;
  107. break;
  108. case STARPU_CUDA_WORKER:
  109. if (task->cl->cuda_funcs[impl] != NULL)
  110. test_implementation = 1;
  111. break;
  112. case STARPU_OPENCL_WORKER:
  113. if (task->cl->opencl_funcs[impl] != NULL)
  114. test_implementation = 1;
  115. break;
  116. case STARPU_MIC_WORKER:
  117. if (task->cl->cpu_funcs_name[impl] != NULL || task->cl->mic_funcs[impl] != NULL)
  118. test_implementation = 1;
  119. break;
  120. case STARPU_SCC_WORKER:
  121. if (task->cl->cpu_funcs_name[impl] != NULL || task->cl->scc_funcs[impl] != NULL)
  122. test_implementation = 1;
  123. break;
  124. default:
  125. STARPU_ABORT();
  126. }
  127. if (!test_implementation)
  128. break;
  129. if (task->cl->can_execute)
  130. return task->cl->can_execute(i, task, impl);
  131. if(test_implementation)
  132. return 1;
  133. }
  134. }
  135. return 0;
  136. }
  137. /* in case a task is submitted, we may check whether there exists a worker
  138. that may execute the task or not */
  139. uint32_t _starpu_worker_exists(struct starpu_task *task)
  140. {
  141. _starpu_codelet_check_deprecated_fields(task->cl);
  142. /* if the task belongs to the init context we can
  143. check out all the worker mask of the machine
  144. if not we should iterate on the workers of the ctx
  145. and verify if it exists a worker able to exec the task */
  146. if(task->sched_ctx == 0)
  147. {
  148. if (!(task->cl->where & config.worker_mask))
  149. return 0;
  150. if (!task->cl->can_execute)
  151. return 1;
  152. }
  153. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  154. if ((task->cl->where & STARPU_CPU) &&
  155. _starpu_worker_exists_and_can_execute(task, STARPU_CPU_WORKER))
  156. return 1;
  157. #endif
  158. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  159. if ((task->cl->where & STARPU_CUDA) &&
  160. _starpu_worker_exists_and_can_execute(task, STARPU_CUDA_WORKER))
  161. return 1;
  162. #endif
  163. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  164. if ((task->cl->where & STARPU_OPENCL) &&
  165. _starpu_worker_exists_and_can_execute(task, STARPU_OPENCL_WORKER))
  166. return 1;
  167. #endif
  168. #ifdef STARPU_USE_MIC
  169. if ((task->cl->where & STARPU_MIC) &&
  170. _starpu_worker_exists_and_can_execute(task, STARPU_MIC_WORKER))
  171. return 1;
  172. #endif
  173. #ifdef STARPU_USE_SCC
  174. if ((task->cl->where & STARPU_SCC) &&
  175. _starpu_worker_exists_and_can_execute(task, STARPU_SCC_WORKER))
  176. return 1;
  177. #endif
  178. return 0;
  179. }
  180. uint32_t _starpu_can_submit_cuda_task(void)
  181. {
  182. return (STARPU_CUDA & config.worker_mask);
  183. }
  184. uint32_t _starpu_can_submit_cpu_task(void)
  185. {
  186. return (STARPU_CPU & config.worker_mask);
  187. }
  188. uint32_t _starpu_can_submit_opencl_task(void)
  189. {
  190. return (STARPU_OPENCL & config.worker_mask);
  191. }
  192. uint32_t _starpu_can_submit_scc_task(void)
  193. {
  194. return (STARPU_SCC & config.worker_mask);
  195. }
  196. static inline int _starpu_can_use_nth_implementation(enum starpu_worker_archtype arch, struct starpu_codelet *cl, unsigned nimpl)
  197. {
  198. switch(arch)
  199. {
  200. case STARPU_ANY_WORKER:
  201. {
  202. int cpu_func_enabled=1, cuda_func_enabled=1, opencl_func_enabled=1;
  203. /* TODO: MIC/SCC */
  204. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  205. starpu_cpu_func_t cpu_func = _starpu_task_get_cpu_nth_implementation(cl, nimpl);
  206. cpu_func_enabled = cpu_func != NULL && starpu_cpu_worker_get_count();
  207. #endif
  208. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  209. starpu_cuda_func_t cuda_func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
  210. cuda_func_enabled = cuda_func != NULL && starpu_cuda_worker_get_count();
  211. #endif
  212. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  213. starpu_opencl_func_t opencl_func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
  214. opencl_func_enabled = opencl_func != NULL && starpu_opencl_worker_get_count();
  215. #endif
  216. return (cpu_func_enabled && cuda_func_enabled && opencl_func_enabled);
  217. }
  218. case STARPU_CPU_WORKER:
  219. {
  220. starpu_cpu_func_t func = _starpu_task_get_cpu_nth_implementation(cl, nimpl);
  221. return func != NULL;
  222. }
  223. case STARPU_CUDA_WORKER:
  224. {
  225. starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
  226. return func != NULL;
  227. }
  228. case STARPU_OPENCL_WORKER:
  229. {
  230. starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
  231. return func != NULL;
  232. }
  233. case STARPU_MIC_WORKER:
  234. {
  235. starpu_mic_func_t func = _starpu_task_get_mic_nth_implementation(cl, nimpl);
  236. char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
  237. return func != NULL || func_name != NULL;
  238. }
  239. case STARPU_SCC_WORKER:
  240. {
  241. starpu_scc_func_t func = _starpu_task_get_scc_nth_implementation(cl, nimpl);
  242. char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
  243. return func != NULL || func_name != NULL;
  244. }
  245. default:
  246. STARPU_ASSERT_MSG(0, "Unknown arch type %d", arch);
  247. }
  248. return 0;
  249. }
  250. int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
  251. {
  252. struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
  253. /* if the task can't be parallel don't submit it to a ctx */
  254. unsigned child_sched_ctx = starpu_sched_ctx_worker_is_master_for_child_ctx(workerid, sched_ctx->id);
  255. if(child_sched_ctx != STARPU_NMAX_SCHED_CTXS)
  256. if(!task->possibly_parallel) return 0;
  257. /* if the worker is blocked in a parallel ctx don't submit tasks on it */
  258. if(sched_ctx->parallel_sect[workerid] ) return 0;
  259. /* TODO: check that the task operand sizes will fit on that device */
  260. return (task->cl->where & config.workers[workerid].worker_mask) &&
  261. _starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl) &&
  262. (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl));
  263. }
  264. int starpu_worker_can_execute_task_impl(unsigned workerid, struct starpu_task *task, unsigned *impl_mask)
  265. {
  266. struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
  267. unsigned mask;
  268. int i;
  269. enum starpu_worker_archtype arch;
  270. struct starpu_codelet *cl;
  271. if(sched_ctx->parallel_sect[workerid]) return 0;
  272. /* TODO: check that the task operand sizes will fit on that device */
  273. cl = task->cl;
  274. if (!(cl->where & config.workers[workerid].worker_mask)) return 0;
  275. mask = 0;
  276. arch = config.workers[workerid].arch;
  277. if (!task->cl->can_execute)
  278. {
  279. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  280. if (_starpu_can_use_nth_implementation(arch, cl, i)) {
  281. mask |= 1U << i;
  282. if (!impl_mask)
  283. break;
  284. }
  285. } else {
  286. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  287. if (_starpu_can_use_nth_implementation(arch, cl, i)
  288. && (!task->cl->can_execute || task->cl->can_execute(workerid, task, i))) {
  289. mask |= 1U << i;
  290. if (!impl_mask)
  291. break;
  292. }
  293. }
  294. if (impl_mask)
  295. *impl_mask = mask;
  296. return mask != 0;
  297. }
  298. int starpu_worker_can_execute_task_first_impl(unsigned workerid, struct starpu_task *task, unsigned *nimpl)
  299. {
  300. struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
  301. int i;
  302. enum starpu_worker_archtype arch;
  303. struct starpu_codelet *cl;
  304. if(sched_ctx->parallel_sect[workerid]) return 0;
  305. /* TODO: check that the task operand sizes will fit on that device */
  306. cl = task->cl;
  307. if (!(cl->where & config.workers[workerid].worker_mask)) return 0;
  308. arch = config.workers[workerid].arch;
  309. if (!task->cl->can_execute)
  310. {
  311. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  312. if (_starpu_can_use_nth_implementation(arch, cl, i)) {
  313. if (nimpl)
  314. *nimpl = i;
  315. return 1;
  316. }
  317. } else {
  318. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  319. if (_starpu_can_use_nth_implementation(arch, cl, i)
  320. && (!task->cl->can_execute || task->cl->can_execute(workerid, task, i))) {
  321. if (nimpl)
  322. *nimpl = i;
  323. return 1;
  324. }
  325. }
  326. return 0;
  327. }
  328. int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
  329. {
  330. /* TODO: check that the task operand sizes will fit on that device */
  331. struct starpu_codelet *cl = task->cl;
  332. unsigned nworkers = config.topology.nworkers;
  333. /* Is this a parallel worker ? */
  334. if (workerid < nworkers)
  335. {
  336. return !!((task->cl->where & config.workers[workerid].worker_mask) &&
  337. _starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl) &&
  338. (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl)));
  339. }
  340. else
  341. {
  342. if ((cl->type == STARPU_SPMD)
  343. #ifdef STARPU_HAVE_HWLOC
  344. || (cl->type == STARPU_FORKJOIN)
  345. #else
  346. #ifdef __GLIBC__
  347. || (cl->type == STARPU_FORKJOIN)
  348. #endif
  349. #endif
  350. )
  351. {
  352. /* TODO we should add other types of constraints */
  353. /* Is the worker larger than requested ? */
  354. int worker_size = (int)config.combined_workers[workerid - nworkers].worker_size;
  355. int worker0 = config.combined_workers[workerid - nworkers].combined_workerid[0];
  356. return !!((worker_size <= task->cl->max_parallelism) &&
  357. _starpu_can_use_nth_implementation(config.workers[worker0].arch, task->cl, nimpl) &&
  358. (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl)));
  359. }
  360. else
  361. {
  362. /* We have a sequential task but a parallel worker */
  363. return 0;
  364. }
  365. }
  366. }
  367. /*
  368. * Runtime initialization methods
  369. */
  370. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  371. static struct _starpu_worker_set cuda_worker_set[STARPU_MAXCUDADEVS];
  372. #endif
  373. #ifdef STARPU_USE_MIC
  374. static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
  375. #endif
  376. static void _starpu_init_worker_queue(struct _starpu_worker *workerarg)
  377. {
  378. starpu_pthread_cond_t *cond = &workerarg->sched_cond;
  379. starpu_pthread_mutex_t *mutex = &workerarg->sched_mutex;
  380. unsigned memory_node = workerarg->memory_node;
  381. _starpu_memory_node_register_condition(cond, mutex, memory_node);
  382. }
  383. /*
  384. * Returns 0 if the given driver is one of the drivers that must be launched by
  385. * the application itself, and not by StarPU, 1 otherwise.
  386. */
  387. static unsigned _starpu_may_launch_driver(struct starpu_conf *conf,
  388. struct starpu_driver *d)
  389. {
  390. if (conf->n_not_launched_drivers == 0 ||
  391. conf->not_launched_drivers == NULL)
  392. return 1;
  393. /* Is <d> in conf->not_launched_drivers ? */
  394. unsigned i;
  395. for (i = 0; i < conf->n_not_launched_drivers; i++)
  396. {
  397. if (d->type != conf->not_launched_drivers[i].type)
  398. continue;
  399. switch (d->type)
  400. {
  401. case STARPU_CPU_WORKER:
  402. if (d->id.cpu_id == conf->not_launched_drivers[i].id.cpu_id)
  403. return 0;
  404. case STARPU_CUDA_WORKER:
  405. if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id)
  406. return 0;
  407. break;
  408. #ifdef STARPU_USE_OPENCL
  409. case STARPU_OPENCL_WORKER:
  410. if (d->id.opencl_id == conf->not_launched_drivers[i].id.opencl_id)
  411. return 0;
  412. break;
  413. #endif
  414. default:
  415. STARPU_ABORT();
  416. }
  417. }
  418. return 1;
  419. }
  420. #ifdef STARPU_PERF_DEBUG
  421. struct itimerval prof_itimer;
  422. #endif
  423. static void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu_machine_config *pconfig)
  424. {
  425. workerarg->config = pconfig;
  426. STARPU_PTHREAD_MUTEX_INIT(&workerarg->mutex, NULL);
  427. /* arch initialized by topology.c */
  428. /* worker_mask initialized by topology.c */
  429. /* perf_arch initialized by topology.c */
  430. /* worker_thread initialized by _starpu_launch_drivers */
  431. /* devid initialized by topology.c */
  432. /* subworkerid initialized by topology.c */
  433. /* bindid initialized by topology.c */
  434. /* workerid initialized by topology.c */
  435. workerarg->combined_workerid = workerarg->workerid;
  436. workerarg->current_rank = 0;
  437. workerarg->worker_size = 1;
  438. STARPU_PTHREAD_COND_INIT(&workerarg->started_cond, NULL);
  439. STARPU_PTHREAD_COND_INIT(&workerarg->ready_cond, NULL);
  440. /* memory_node initialized by topology.c */
  441. STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL);
  442. STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL);
  443. starpu_task_list_init(&workerarg->local_tasks);
  444. workerarg->local_ordered_tasks = NULL;
  445. workerarg->local_ordered_tasks_size = 0;
  446. workerarg->current_ordered_task = 0;
  447. workerarg->current_ordered_task_order = 1;
  448. workerarg->current_task = NULL;
  449. workerarg->first_task = 0;
  450. workerarg->ntasks = 0;
  451. workerarg->pipeline_length = 0;
  452. workerarg->pipeline_stuck = 0;
  453. workerarg->set = NULL;
  454. /* if some codelet's termination cannot be handled directly :
  455. * for instance in the Gordon driver, Gordon tasks' callbacks
  456. * may be executed by another thread than that of the Gordon
  457. * driver so that we cannot call the push_codelet_output method
  458. * directly */
  459. workerarg->terminated_jobs = _starpu_job_list_new();
  460. workerarg->worker_is_running = 0;
  461. workerarg->worker_is_initialized = 0;
  462. workerarg->status = STATUS_INITIALIZING;
  463. /* name initialized by driver */
  464. /* short_name initialized by driver */
  465. workerarg->run_by_starpu = 1;
  466. workerarg->sched_ctx_list = NULL;
  467. workerarg->tmp_sched_ctx = -1;
  468. workerarg->nsched_ctxs = 0;
  469. _starpu_barrier_counter_init(&workerarg->tasks_barrier, 0);
  470. workerarg->has_prev_init = 0;
  471. int ctx;
  472. for(ctx = 0; ctx < STARPU_NMAX_SCHED_CTXS; ctx++)
  473. workerarg->removed_from_ctx[ctx] = 0;
  474. workerarg->spinning_backoff = 1;
  475. for(ctx = 0; ctx < STARPU_NMAX_SCHED_CTXS; ctx++)
  476. {
  477. workerarg->shares_tasks_lists[ctx] = 0;
  478. workerarg->poped_in_ctx[ctx] = 0;
  479. }
  480. workerarg->reverse_phase[0] = 0;
  481. workerarg->reverse_phase[1] = 0;
  482. workerarg->pop_ctx_priority = 1;
  483. workerarg->sched_mutex_locked = 0;
  484. workerarg->slave = 0;
  485. /* cpu_set/hwloc_cpu_set initialized in topology.c */
  486. }
  487. void _starpu_worker_start(struct _starpu_worker *worker, unsigned fut_key)
  488. {
  489. (void) fut_key;
  490. int devid = worker->devid;
  491. (void) devid;
  492. #if defined(STARPU_PERF_DEBUG) && !defined(STARPU_SIMGRID)
  493. setitimer(ITIMER_PROF, &prof_itimer, NULL);
  494. #endif
  495. #ifdef STARPU_USE_FXT
  496. _starpu_fxt_register_thread(worker->bindid);
  497. unsigned memnode = worker->memory_node;
  498. _STARPU_TRACE_WORKER_INIT_START(fut_key, worker->workerid, devid, memnode);
  499. #endif
  500. _starpu_bind_thread_on_cpu(worker->config, worker->bindid);
  501. _STARPU_DEBUG("worker %p %d for dev %d is ready on logical cpu %d\n", worker, worker->workerid, devid, worker->bindid);
  502. #ifdef STARPU_HAVE_HWLOC
  503. _STARPU_DEBUG("worker %p %d cpuset start at %d\n", worker, worker->workerid, hwloc_bitmap_first(worker->hwloc_cpu_set));
  504. #endif
  505. _starpu_memory_node_set_local_key(&worker->memory_node);
  506. _starpu_set_local_worker_key(worker);
  507. STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex);
  508. worker->worker_is_running = 1;
  509. STARPU_PTHREAD_COND_SIGNAL(&worker->started_cond);
  510. STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex);
  511. }
  512. static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
  513. {
  514. pconfig->running = 1;
  515. pconfig->pause_depth = 0;
  516. pconfig->submitting = 1;
  517. STARPU_HG_DISABLE_CHECKING(pconfig->watchdog_ok);
  518. unsigned nworkers = pconfig->topology.nworkers;
  519. /* Launch workers asynchronously */
  520. unsigned worker, i;
  521. #if defined(STARPU_PERF_DEBUG) && !defined(STARPU_SIMGRID)
  522. /* Get itimer of the main thread, to set it for the worker threads */
  523. getitimer(ITIMER_PROF, &prof_itimer);
  524. #endif
  525. #ifdef HAVE_AYUDAME_H
  526. if (AYU_event) AYU_event(AYU_INIT, 0, NULL);
  527. #endif
  528. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  529. for (i = 0; i < sizeof(cuda_worker_set)/sizeof(cuda_worker_set[0]); i++)
  530. cuda_worker_set[i].workers = NULL;
  531. #endif
  532. #ifdef STARPU_USE_MIC
  533. for (i = 0; i < sizeof(mic_worker_set)/sizeof(mic_worker_set[0]); i++)
  534. mic_worker_set[i].workers = NULL;
  535. #endif
  536. for (worker = 0; worker < nworkers; worker++)
  537. {
  538. struct _starpu_worker *workerarg = &pconfig->workers[worker];
  539. #if defined(STARPU_USE_MIC) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  540. unsigned devid = workerarg->devid;
  541. #endif
  542. _STARPU_DEBUG("initialising worker %u/%u\n", worker, nworkers);
  543. _starpu_init_worker_queue(workerarg);
  544. struct starpu_driver driver;
  545. driver.type = workerarg->arch;
  546. switch (workerarg->arch)
  547. {
  548. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  549. case STARPU_CPU_WORKER:
  550. driver.id.cpu_id = workerarg->devid;
  551. if (_starpu_may_launch_driver(pconfig->conf, &driver))
  552. {
  553. STARPU_PTHREAD_CREATE_ON(
  554. workerarg->name,
  555. &workerarg->worker_thread,
  556. NULL,
  557. _starpu_cpu_worker,
  558. workerarg,
  559. worker+1);
  560. #ifdef STARPU_USE_FXT
  561. /* In tracing mode, make sure the
  562. * thread is really started before
  563. * starting another one, to make sure
  564. * they appear in order in the trace.
  565. */
  566. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  567. while (!workerarg->worker_is_running)
  568. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  569. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  570. #endif
  571. }
  572. else
  573. {
  574. workerarg->run_by_starpu = 0;
  575. }
  576. break;
  577. #endif
  578. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  579. case STARPU_CUDA_WORKER:
  580. driver.id.cuda_id = workerarg->devid;
  581. workerarg->set = &cuda_worker_set[devid];
  582. /* We spawn only one thread per CUDA device,
  583. * which will control all CUDA workers of this
  584. * device. (by using a worker set). */
  585. if (cuda_worker_set[devid].workers)
  586. break;
  587. cuda_worker_set[devid].nworkers = starpu_get_env_number_default("STARPU_NWORKER_PER_CUDA", 1);
  588. cuda_worker_set[devid].workers = workerarg;
  589. cuda_worker_set[devid].set_is_initialized = 0;
  590. if (!_starpu_may_launch_driver(pconfig->conf, &driver))
  591. {
  592. workerarg->run_by_starpu = 0;
  593. break;
  594. }
  595. STARPU_PTHREAD_CREATE_ON(
  596. workerarg->name,
  597. &cuda_worker_set[devid].worker_thread,
  598. NULL,
  599. _starpu_cuda_worker,
  600. &cuda_worker_set[devid],
  601. worker+1);
  602. #ifdef STARPU_USE_FXT
  603. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  604. while (!workerarg->worker_is_running)
  605. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  606. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  607. #endif
  608. STARPU_PTHREAD_MUTEX_LOCK(&cuda_worker_set[devid].mutex);
  609. while (!cuda_worker_set[devid].set_is_initialized)
  610. STARPU_PTHREAD_COND_WAIT(&cuda_worker_set[devid].ready_cond,
  611. &cuda_worker_set[devid].mutex);
  612. STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_worker_set[devid].mutex);
  613. cuda_worker_set[devid].started = 1;
  614. break;
  615. #endif
  616. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  617. case STARPU_OPENCL_WORKER:
  618. #ifndef STARPU_SIMGRID
  619. starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
  620. if (!_starpu_may_launch_driver(pconfig->conf, &driver))
  621. {
  622. workerarg->run_by_starpu = 0;
  623. break;
  624. }
  625. #endif
  626. STARPU_PTHREAD_CREATE_ON(
  627. workerarg->name,
  628. &workerarg->worker_thread,
  629. NULL,
  630. _starpu_opencl_worker,
  631. workerarg,
  632. worker+1);
  633. #ifdef STARPU_USE_FXT
  634. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  635. while (!workerarg->worker_is_running)
  636. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  637. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  638. #endif
  639. break;
  640. #endif
  641. #ifdef STARPU_USE_MIC
  642. case STARPU_MIC_WORKER:
  643. workerarg->set = &mic_worker_set[devid];
  644. /* We spawn only one thread
  645. * per MIC device, which will control all MIC
  646. * workers of this device. (by using a worker set). */
  647. if (mic_worker_set[devid].workers)
  648. break;
  649. mic_worker_set[devid].nworkers = pconfig->topology.nmiccores[devid];
  650. /* We assume all MIC workers of a given MIC
  651. * device are contiguous so that we can
  652. * address them with the first one only. */
  653. mic_worker_set[devid].workers = workerarg;
  654. mic_worker_set[devid].set_is_initialized = 0;
  655. STARPU_PTHREAD_CREATE_ON(
  656. workerarg->name,
  657. &mic_worker_set[devid].worker_thread,
  658. NULL,
  659. _starpu_mic_src_worker,
  660. &mic_worker_set[devid],
  661. worker+1);
  662. #ifdef STARPU_USE_FXT
  663. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  664. while (!workerarg->worker_is_running)
  665. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  666. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  667. #endif
  668. STARPU_PTHREAD_MUTEX_LOCK(&mic_worker_set[devid].mutex);
  669. while (!mic_worker_set[devid].set_is_initialized)
  670. STARPU_PTHREAD_COND_WAIT(&mic_worker_set[devid].ready_cond,
  671. &mic_worker_set[devid].mutex);
  672. STARPU_PTHREAD_MUTEX_UNLOCK(&mic_worker_set[devid].mutex);
  673. mic_worker_set[devid].started = 1;
  674. break;
  675. #endif /* STARPU_USE_MIC */
  676. #ifdef STARPU_USE_SCC
  677. case STARPU_SCC_WORKER:
  678. workerarg->worker_is_initialized = 0;
  679. STARPU_PTHREAD_CREATE_ON(
  680. workerarg->name,
  681. &workerarg->worker_thread,
  682. NULL,
  683. _starpu_scc_src_worker,
  684. workerarg,
  685. worker+1);
  686. #ifdef STARPU_USE_FXT
  687. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  688. while (!workerarg->worker_is_running)
  689. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  690. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  691. #endif
  692. break;
  693. #endif
  694. default:
  695. STARPU_ABORT();
  696. }
  697. }
  698. for (worker = 0; worker < nworkers; worker++)
  699. {
  700. struct _starpu_worker *workerarg = &pconfig->workers[worker];
  701. struct starpu_driver driver;
  702. driver.type = workerarg->arch;
  703. switch (workerarg->arch)
  704. {
  705. case STARPU_CPU_WORKER:
  706. driver.id.cpu_id = workerarg->devid;
  707. if (!_starpu_may_launch_driver(pconfig->conf, &driver))
  708. break;
  709. _STARPU_DEBUG("waiting for worker %u initialization\n", worker);
  710. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  711. while (!workerarg->worker_is_initialized)
  712. STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  713. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  714. break;
  715. case STARPU_CUDA_WORKER:
  716. /* Already waited above */
  717. break;
  718. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  719. case STARPU_OPENCL_WORKER:
  720. #ifndef STARPU_SIMGRID
  721. starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
  722. if (!_starpu_may_launch_driver(pconfig->conf, &driver))
  723. break;
  724. #endif
  725. _STARPU_DEBUG("waiting for worker %u initialization\n", worker);
  726. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  727. while (!workerarg->worker_is_initialized)
  728. STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  729. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  730. break;
  731. #endif
  732. case STARPU_MIC_WORKER:
  733. /* Already waited above */
  734. break;
  735. case STARPU_SCC_WORKER:
  736. /* TODO: implement may_launch? */
  737. _STARPU_DEBUG("waiting for worker %u initialization\n", worker);
  738. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  739. while (!workerarg->worker_is_initialized)
  740. STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  741. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  742. break;
  743. default:
  744. STARPU_ABORT();
  745. }
  746. }
  747. _STARPU_DEBUG("finished launching drivers\n");
  748. }
  749. void _starpu_set_local_worker_key(struct _starpu_worker *worker)
  750. {
  751. STARPU_PTHREAD_SETSPECIFIC(worker_key, worker);
  752. }
  753. struct _starpu_worker *_starpu_get_local_worker_key(void)
  754. {
  755. return (struct _starpu_worker *) STARPU_PTHREAD_GETSPECIFIC(worker_key);
  756. }
  757. /* Initialize the starpu_conf with default values */
  758. int starpu_conf_init(struct starpu_conf *conf)
  759. {
  760. if (!conf)
  761. return -EINVAL;
  762. memset(conf, 0, sizeof(*conf));
  763. conf->magic = 42;
  764. conf->sched_policy_name = getenv("STARPU_SCHED");
  765. conf->sched_policy = NULL;
  766. /* Note that starpu_get_env_number returns -1 in case the variable is
  767. * not defined */
  768. /* Backward compatibility: check the value of STARPU_NCPUS if
  769. * STARPU_NCPU is not set. */
  770. conf->ncpus = starpu_get_env_number("STARPU_NCPU");
  771. if (conf->ncpus == -1)
  772. conf->ncpus = starpu_get_env_number("STARPU_NCPUS");
  773. conf->ncuda = starpu_get_env_number("STARPU_NCUDA");
  774. conf->nopencl = starpu_get_env_number("STARPU_NOPENCL");
  775. conf->nmic = starpu_get_env_number("STARPU_NMIC");
  776. conf->nscc = starpu_get_env_number("STARPU_NSCC");
  777. conf->calibrate = starpu_get_env_number("STARPU_CALIBRATE");
  778. conf->bus_calibrate = starpu_get_env_number("STARPU_BUS_CALIBRATE");
  779. conf->mic_sink_program_path = getenv("STARPU_MIC_PROGRAM_PATH");
  780. if (conf->calibrate == -1)
  781. conf->calibrate = 0;
  782. if (conf->bus_calibrate == -1)
  783. conf->bus_calibrate = 0;
  784. conf->use_explicit_workers_bindid = 0; /* TODO */
  785. conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */
  786. conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */
  787. conf->use_explicit_workers_mic_deviceid = 0; /* TODO */
  788. conf->use_explicit_workers_scc_deviceid = 0; /* TODO */
  789. conf->single_combined_worker = starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER");
  790. if (conf->single_combined_worker == -1)
  791. conf->single_combined_worker = 0;
  792. #if defined(STARPU_DISABLE_ASYNCHRONOUS_COPY)
  793. conf->disable_asynchronous_copy = 1;
  794. #else
  795. conf->disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
  796. if (conf->disable_asynchronous_copy == -1)
  797. conf->disable_asynchronous_copy = 0;
  798. #endif
  799. #if defined(STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY)
  800. conf->disable_asynchronous_cuda_copy = 1;
  801. #else
  802. conf->disable_asynchronous_cuda_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY");
  803. if (conf->disable_asynchronous_cuda_copy == -1)
  804. conf->disable_asynchronous_cuda_copy = 0;
  805. #endif
  806. #if defined(STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY)
  807. conf->disable_asynchronous_opencl_copy = 1;
  808. #else
  809. conf->disable_asynchronous_opencl_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY");
  810. if (conf->disable_asynchronous_opencl_copy == -1)
  811. conf->disable_asynchronous_opencl_copy = 0;
  812. #endif
  813. #if defined(STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY)
  814. conf->disable_asynchronous_mic_copy = 1;
  815. #else
  816. conf->disable_asynchronous_mic_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY");
  817. if (conf->disable_asynchronous_mic_copy == -1)
  818. conf->disable_asynchronous_mic_copy = 0;
  819. #endif
  820. /* 64MiB by default */
  821. conf->trace_buffer_size = starpu_get_env_number_default("STARPU_TRACE_BUFFER_SIZE", 64) << 20;
  822. return 0;
  823. }
  824. static void _starpu_conf_set_value_against_environment(char *name, int *value)
  825. {
  826. int number;
  827. number = starpu_get_env_number(name);
  828. if (number != -1)
  829. {
  830. *value = number;
  831. }
  832. }
  833. void _starpu_conf_check_environment(struct starpu_conf *conf)
  834. {
  835. char *sched = getenv("STARPU_SCHED");
  836. if (sched)
  837. {
  838. conf->sched_policy_name = sched;
  839. }
  840. _starpu_conf_set_value_against_environment("STARPU_NCPUS", &conf->ncpus);
  841. _starpu_conf_set_value_against_environment("STARPU_NCPU", &conf->ncpus);
  842. _starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda);
  843. _starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl);
  844. _starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate);
  845. _starpu_conf_set_value_against_environment("STARPU_BUS_CALIBRATE", &conf->bus_calibrate);
  846. _starpu_conf_set_value_against_environment("STARPU_SINGLE_COMBINED_WORKER", &conf->single_combined_worker);
  847. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy);
  848. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY", &conf->disable_asynchronous_cuda_copy);
  849. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY", &conf->disable_asynchronous_opencl_copy);
  850. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY", &conf->disable_asynchronous_mic_copy);
  851. }
  852. struct starpu_tree* starpu_workers_get_tree(void)
  853. {
  854. return config.topology.tree;
  855. }
  856. #ifdef STARPU_HAVE_HWLOC
  857. static void _fill_tree(struct starpu_tree *tree, hwloc_obj_t curr_obj, unsigned depth, hwloc_topology_t topology)
  858. {
  859. unsigned i;
  860. for(i = 0; i < curr_obj->arity; i++)
  861. {
  862. starpu_tree_insert(tree->nodes[i], curr_obj->children[i]->logical_index, depth, curr_obj->children[i]->type == HWLOC_OBJ_PU, curr_obj->children[i]->arity, tree);
  863. /* char string[128]; */
  864. /* hwloc_obj_snprintf(string, sizeof(string), topology, curr_obj->children[i], "#", 0); */
  865. /* printf("%*s%s %d is_pu %d \n", 0, "", string, curr_obj->children[i]->logical_index, curr_obj->children[i]->type == HWLOC_OBJ_PU); */
  866. _fill_tree(tree->nodes[i], curr_obj->children[i], depth+1, topology);
  867. }
  868. }
  869. #endif
  870. static void _starpu_build_tree(void)
  871. {
  872. #ifdef STARPU_HAVE_HWLOC
  873. struct starpu_tree* tree = (struct starpu_tree*)malloc(sizeof(struct starpu_tree));
  874. config.topology.tree = tree;
  875. hwloc_obj_t root = hwloc_get_root_obj(config.topology.hwtopology);
  876. /* char string[128]; */
  877. /* hwloc_obj_snprintf(string, sizeof(string), topology, root, "#", 0); */
  878. /* printf("%*s%s %d is_pu = %d \n", 0, "", string, root->logical_index, root->type == HWLOC_OBJ_PU); */
  879. /* level, is_pu, is in the tree (it will be true only after add*/
  880. starpu_tree_insert(tree, root->logical_index, 0,root->type == HWLOC_OBJ_PU, root->arity, NULL);
  881. _fill_tree(tree, root, 1, config.topology.hwtopology);
  882. #endif
  883. }
  884. int starpu_init(struct starpu_conf *user_conf)
  885. {
  886. return starpu_initialize(user_conf, NULL, NULL);
  887. }
  888. int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
  889. {
  890. int is_a_sink = 0; /* Always defined. If the MP infrastructure is not
  891. * used, we cannot be a sink. */
  892. unsigned worker;
  893. #ifdef STARPU_USE_MP
  894. _starpu_set_argc_argv(argc, argv);
  895. # ifdef STARPU_USE_SCC
  896. /* In SCC case we look at the rank to know if we are a sink */
  897. if (_starpu_scc_common_mp_init() && !_starpu_scc_common_is_src_node())
  898. setenv("STARPU_SINK", "STARPU_SCC", 1);
  899. # endif
  900. /* If StarPU was configured to use MP sinks, we have to control the
  901. * kind on node we are running on : host or sink ? */
  902. if (getenv("STARPU_SINK"))
  903. is_a_sink = 1;
  904. #else
  905. (void)argc;
  906. (void)argv;
  907. #endif /* STARPU_USE_MP */
  908. int ret;
  909. #ifdef STARPU_OPENMP
  910. _starpu_omp_dummy_init();
  911. #endif
  912. #ifdef STARPU_SIMGRID
  913. _starpu_simgrid_init();
  914. /* Warn when the lots of stacks malloc()-ated by simgrid for transfer
  915. * processes will take a long time to get initialized */
  916. if (getenv("MALLOC_PERTURB_"))
  917. _STARPU_DISP("Warning: MALLOC_PERTURB_ is set, this makes simgrid runs very slow\n");
  918. #else
  919. #ifdef __GNUC__
  920. #ifndef __OPTIMIZE__
  921. _STARPU_DISP("Warning: StarPU was configured with --enable-debug (-O0), and is thus not optimized\n");
  922. #endif
  923. #endif
  924. #ifdef STARPU_SPINLOCK_CHECK
  925. _STARPU_DISP("Warning: StarPU was configured with --enable-spinlock-check, which slows down a bit\n");
  926. #endif
  927. #if 0
  928. #ifndef STARPU_NO_ASSERT
  929. _STARPU_DISP("Warning: StarPU was configured without --enable-fast\n");
  930. #endif
  931. #endif
  932. #ifdef STARPU_MEMORY_STATS
  933. _STARPU_DISP("Warning: StarPU was configured with --enable-memory-stats, which slows down a bit\n");
  934. #endif
  935. #ifdef STARPU_VERBOSE
  936. _STARPU_DISP("Warning: StarPU was configured with --enable-verbose, which slows down a bit\n");
  937. #endif
  938. #ifdef STARPU_USE_FXT
  939. _STARPU_DISP("Warning: StarPU was configured with --with-fxt, which slows down a bit\n");
  940. #endif
  941. #ifdef STARPU_PERF_DEBUG
  942. _STARPU_DISP("Warning: StarPU was configured with --enable-perf-debug, which slows down a bit\n");
  943. #endif
  944. #ifdef STARPU_MODEL_DEBUG
  945. _STARPU_DISP("Warning: StarPU was configured with --enable-model-debug, which slows down a bit\n");
  946. #endif
  947. #ifdef STARPU_ENABLE_STATS
  948. _STARPU_DISP("Warning: StarPU was configured with --enable-stats, which slows down a bit\n");
  949. #endif
  950. #endif
  951. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  952. while (initialized == CHANGING)
  953. /* Wait for the other one changing it */
  954. STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex);
  955. init_count++;
  956. if (initialized == INITIALIZED)
  957. {
  958. /* He initialized it, don't do it again, and let the others get the mutex */
  959. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  960. return 0;
  961. }
  962. /* initialized == UNINITIALIZED */
  963. initialized = CHANGING;
  964. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  965. #if defined(_WIN32) && !defined(__CYGWIN__)
  966. WSADATA wsadata;
  967. WSAStartup(MAKEWORD(1,0), &wsadata);
  968. #endif
  969. srand(2008);
  970. #ifdef HAVE_AYUDAME_H
  971. #ifndef AYU_RT_STARPU
  972. /* Dumb value for now */
  973. #define AYU_RT_STARPU 32
  974. #endif
  975. if (AYU_event)
  976. {
  977. enum ayu_runtime_t ayu_rt = AYU_RT_STARPU;
  978. AYU_event(AYU_PREINIT, 0, (void*) &ayu_rt);
  979. }
  980. #endif
  981. /* store the pointer to the user explicit configuration during the
  982. * initialization */
  983. if (user_conf == NULL)
  984. {
  985. struct starpu_conf *conf = malloc(sizeof(struct starpu_conf));
  986. starpu_conf_init(conf);
  987. config.conf = conf;
  988. config.default_conf = 1;
  989. }
  990. else
  991. {
  992. if (user_conf->magic != 42)
  993. {
  994. _STARPU_DISP("starpu_conf structure needs to be initialized with starpu_conf_init\n");
  995. return -EINVAL;
  996. }
  997. config.conf = user_conf;
  998. config.default_conf = 0;
  999. }
  1000. _starpu_conf_check_environment(config.conf);
  1001. _starpu_init_all_sched_ctxs(&config);
  1002. _starpu_init_progression_hooks();
  1003. _starpu_init_tags();
  1004. #ifdef STARPU_USE_FXT
  1005. _starpu_init_fxt_profiling(config.conf->trace_buffer_size);
  1006. #endif
  1007. _starpu_open_debug_logfile();
  1008. _starpu_data_interface_init();
  1009. _starpu_timing_init();
  1010. _starpu_profiling_init();
  1011. _starpu_load_bus_performance_files();
  1012. /* Depending on whether we are a MP sink or not, we must build the
  1013. * topology with MP nodes or not. */
  1014. ret = _starpu_build_topology(&config, is_a_sink);
  1015. if (ret)
  1016. {
  1017. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  1018. init_count--;
  1019. #ifdef STARPU_USE_SCC
  1020. if (_starpu_scc_common_is_mp_initialized())
  1021. _starpu_scc_src_mp_deinit();
  1022. #endif
  1023. initialized = UNINITIALIZED;
  1024. /* Let somebody else try to do it */
  1025. STARPU_PTHREAD_COND_SIGNAL(&init_cond);
  1026. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1027. return ret;
  1028. }
  1029. /* We need to store the current task handled by the different
  1030. * threads */
  1031. _starpu_initialize_current_task_key();
  1032. for (worker = 0; worker < config.topology.nworkers; worker++)
  1033. _starpu_worker_init(&config.workers[worker], &config);
  1034. STARPU_PTHREAD_KEY_CREATE(&worker_key, NULL);
  1035. _starpu_build_tree();
  1036. if (!is_a_sink)
  1037. {
  1038. struct starpu_sched_policy *selected_policy = _starpu_select_sched_policy(&config, config.conf->sched_policy_name);
  1039. _starpu_create_sched_ctx(selected_policy, NULL, -1, 1, "init", 0, 0, 0, 0, 1);
  1040. }
  1041. _starpu_initialize_registered_performance_models();
  1042. /* Launch "basic" workers (ie. non-combined workers) */
  1043. if (!is_a_sink)
  1044. _starpu_launch_drivers(&config);
  1045. _starpu_watchdog_init();
  1046. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  1047. initialized = INITIALIZED;
  1048. /* Tell everybody that we initialized */
  1049. STARPU_PTHREAD_COND_BROADCAST(&init_cond);
  1050. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1051. _STARPU_DEBUG("Initialisation finished\n");
  1052. #ifdef STARPU_USE_MP
  1053. /* Finally, if we are a MP sink, we never leave this function. Else,
  1054. * we enter an infinite event loop which listen for MP commands from
  1055. * the source. */
  1056. if (is_a_sink) {
  1057. _starpu_sink_common_worker();
  1058. /* We should normally never leave the loop as we don't want to
  1059. * really initialize STARPU */
  1060. STARPU_ASSERT(0);
  1061. }
  1062. #endif
  1063. return 0;
  1064. }
  1065. /*
  1066. * Handle runtime termination
  1067. */
  1068. static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig)
  1069. {
  1070. int status = 0;
  1071. unsigned workerid;
  1072. unsigned n;
  1073. for (workerid = 0; workerid < pconfig->topology.nworkers; workerid++)
  1074. {
  1075. starpu_wake_all_blocked_workers();
  1076. _STARPU_DEBUG("wait for worker %u\n", workerid);
  1077. struct _starpu_worker_set *set = pconfig->workers[workerid].set;
  1078. struct _starpu_worker *worker = &pconfig->workers[workerid];
  1079. /* in case StarPU termination code is called from a callback,
  1080. * we have to check if pthread_self() is the worker itself */
  1081. if (set)
  1082. {
  1083. if (set->started)
  1084. {
  1085. #ifdef STARPU_SIMGRID
  1086. status = starpu_pthread_join(set->worker_thread, NULL);
  1087. #else
  1088. if (!pthread_equal(pthread_self(), set->worker_thread))
  1089. status = starpu_pthread_join(set->worker_thread, NULL);
  1090. #endif
  1091. if (status)
  1092. {
  1093. #ifdef STARPU_VERBOSE
  1094. _STARPU_DEBUG("starpu_pthread_join -> %d\n", status);
  1095. #endif
  1096. }
  1097. set->started = 0;
  1098. }
  1099. }
  1100. else
  1101. {
  1102. if (!worker->run_by_starpu)
  1103. goto out;
  1104. #ifdef STARPU_SIMGRID
  1105. status = starpu_pthread_join(worker->worker_thread, NULL);
  1106. #else
  1107. if (!pthread_equal(pthread_self(), worker->worker_thread))
  1108. status = starpu_pthread_join(worker->worker_thread, NULL);
  1109. #endif
  1110. if (status)
  1111. {
  1112. #ifdef STARPU_VERBOSE
  1113. _STARPU_DEBUG("starpu_pthread_join -> %d\n", status);
  1114. #endif
  1115. }
  1116. }
  1117. out:
  1118. STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
  1119. for (n = 0; n < worker->local_ordered_tasks_size; n++)
  1120. STARPU_ASSERT(worker->local_ordered_tasks[n] == NULL);
  1121. _starpu_sched_ctx_list_delete(&worker->sched_ctx_list);
  1122. _starpu_job_list_delete(worker->terminated_jobs);
  1123. free(worker->local_ordered_tasks);
  1124. }
  1125. }
  1126. /* Condition variable and mutex used to pause/resume. */
  1127. static starpu_pthread_cond_t pause_cond = STARPU_PTHREAD_COND_INITIALIZER;
  1128. static starpu_pthread_mutex_t pause_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
  1129. void _starpu_may_pause(void)
  1130. {
  1131. /* pause_depth is just protected by a memory barrier */
  1132. STARPU_RMB();
  1133. if (STARPU_UNLIKELY(config.pause_depth > 0)) {
  1134. STARPU_PTHREAD_MUTEX_LOCK(&pause_mutex);
  1135. if (config.pause_depth > 0) {
  1136. STARPU_PTHREAD_COND_WAIT(&pause_cond, &pause_mutex);
  1137. }
  1138. STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex);
  1139. }
  1140. }
  1141. unsigned _starpu_machine_is_running(void)
  1142. {
  1143. unsigned ret;
  1144. /* running is just protected by a memory barrier */
  1145. STARPU_RMB();
  1146. ANNOTATE_HAPPENS_AFTER(&config.running);
  1147. ret = config.running;
  1148. ANNOTATE_HAPPENS_BEFORE(&config.running);
  1149. return ret;
  1150. }
  1151. void starpu_pause()
  1152. {
  1153. STARPU_HG_DISABLE_CHECKING(config.pause_depth);
  1154. config.pause_depth += 1;
  1155. }
  1156. void starpu_resume()
  1157. {
  1158. STARPU_PTHREAD_MUTEX_LOCK(&pause_mutex);
  1159. config.pause_depth -= 1;
  1160. if (!config.pause_depth) {
  1161. STARPU_PTHREAD_COND_BROADCAST(&pause_cond);
  1162. }
  1163. STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex);
  1164. }
  1165. unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *worker STARPU_ATTRIBUTE_UNUSED)
  1166. {
  1167. #ifdef STARPU_NON_BLOCKING_DRIVERS
  1168. return 0;
  1169. #else
  1170. unsigned can_block = 1;
  1171. struct starpu_driver driver;
  1172. driver.type = worker->arch;
  1173. switch (driver.type)
  1174. {
  1175. case STARPU_CPU_WORKER:
  1176. driver.id.cpu_id = worker->devid;
  1177. break;
  1178. case STARPU_CUDA_WORKER:
  1179. driver.id.cuda_id = worker->devid;
  1180. break;
  1181. case STARPU_OPENCL_WORKER:
  1182. starpu_opencl_get_device(worker->devid, &driver.id.opencl_id);
  1183. break;
  1184. default:
  1185. goto always_launch;
  1186. }
  1187. if (!_starpu_may_launch_driver(config.conf, &driver))
  1188. return 0;
  1189. always_launch:
  1190. #ifndef STARPU_SIMGRID
  1191. if (!_starpu_check_that_no_data_request_exists(memnode))
  1192. can_block = 0;
  1193. #endif
  1194. if (!_starpu_machine_is_running())
  1195. can_block = 0;
  1196. if (!_starpu_execute_registered_progression_hooks())
  1197. can_block = 0;
  1198. return can_block;
  1199. #endif
  1200. }
  1201. static void _starpu_kill_all_workers(struct _starpu_machine_config *pconfig)
  1202. {
  1203. /* set the flag which will tell workers to stop */
  1204. ANNOTATE_HAPPENS_AFTER(&config.running);
  1205. pconfig->running = 0;
  1206. /* running is just protected by a memory barrier */
  1207. ANNOTATE_HAPPENS_BEFORE(&config.running);
  1208. STARPU_WMB();
  1209. starpu_wake_all_blocked_workers();
  1210. }
  1211. void starpu_display_stats()
  1212. {
  1213. starpu_profiling_bus_helper_display_summary();
  1214. starpu_profiling_worker_helper_display_summary();
  1215. }
  1216. void starpu_shutdown(void)
  1217. {
  1218. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  1219. init_count--;
  1220. if (init_count)
  1221. {
  1222. _STARPU_DEBUG("Still somebody needing StarPU, don't deinitialize\n");
  1223. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1224. return;
  1225. }
  1226. /* We're last */
  1227. initialized = CHANGING;
  1228. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1229. /* If the workers are frozen, no progress can be made. */
  1230. STARPU_ASSERT(config.pause_depth <= 0);
  1231. starpu_task_wait_for_no_ready();
  1232. /* tell all workers to shutdown */
  1233. _starpu_kill_all_workers(&config);
  1234. {
  1235. int stats = starpu_get_env_number("STARPU_STATS");
  1236. if (stats != 0)
  1237. {
  1238. _starpu_display_msi_stats();
  1239. _starpu_display_alloc_cache_stats();
  1240. _starpu_display_comm_amounts();
  1241. }
  1242. }
  1243. starpu_profiling_bus_helper_display_summary();
  1244. starpu_profiling_worker_helper_display_summary();
  1245. _starpu_deinitialize_registered_performance_models();
  1246. _starpu_watchdog_shutdown();
  1247. /* wait for their termination */
  1248. _starpu_terminate_workers(&config);
  1249. {
  1250. int stats = starpu_get_env_number("STARPU_MEMORY_STATS");
  1251. if (stats != 0)
  1252. {
  1253. // Display statistics on data which have not been unregistered
  1254. starpu_data_display_memory_stats();
  1255. }
  1256. }
  1257. _starpu_delete_all_sched_ctxs();
  1258. _starpu_disk_unregister();
  1259. #ifdef STARPU_HAVE_HWLOC
  1260. starpu_tree_free(config.topology.tree);
  1261. #endif
  1262. _starpu_destroy_topology(&config);
  1263. #ifdef STARPU_USE_FXT
  1264. _starpu_stop_fxt_profiling();
  1265. #endif
  1266. _starpu_data_interface_shutdown();
  1267. /* Drop all remaining tags */
  1268. _starpu_tag_clear();
  1269. #ifdef STARPU_OPENMP
  1270. _starpu_omp_dummy_shutdown();
  1271. #endif
  1272. _starpu_close_debug_logfile();
  1273. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  1274. initialized = UNINITIALIZED;
  1275. /* Let someone else that wants to initialize it again do it */
  1276. STARPU_PTHREAD_COND_SIGNAL(&init_cond);
  1277. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1278. /* Clear memory if it was allocated by StarPU */
  1279. if (config.default_conf)
  1280. free(config.conf);
  1281. #ifdef HAVE_AYUDAME_H
  1282. if (AYU_event) AYU_event(AYU_FINISH, 0, NULL);
  1283. #endif
  1284. #ifdef STARPU_USE_SCC
  1285. if (_starpu_scc_common_is_mp_initialized())
  1286. _starpu_scc_src_mp_deinit();
  1287. #endif
  1288. _starpu_print_idle_time();
  1289. _STARPU_DEBUG("Shutdown finished\n");
  1290. }
  1291. unsigned starpu_worker_get_count(void)
  1292. {
  1293. return config.topology.nworkers;
  1294. }
  1295. unsigned starpu_worker_is_slave(int workerid)
  1296. {
  1297. return config.workers[workerid].slave;
  1298. }
  1299. int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
  1300. {
  1301. switch (type)
  1302. {
  1303. case STARPU_CPU_WORKER:
  1304. return config.topology.ncpus;
  1305. case STARPU_CUDA_WORKER:
  1306. return config.topology.ncudagpus;
  1307. case STARPU_OPENCL_WORKER:
  1308. return config.topology.nopenclgpus;
  1309. case STARPU_MIC_WORKER:
  1310. return config.topology.nmicdevices;
  1311. case STARPU_SCC_WORKER:
  1312. return config.topology.nsccdevices;
  1313. default:
  1314. return -EINVAL;
  1315. }
  1316. }
  1317. unsigned starpu_combined_worker_get_count(void)
  1318. {
  1319. return config.topology.ncombinedworkers;
  1320. }
  1321. unsigned starpu_cpu_worker_get_count(void)
  1322. {
  1323. return config.topology.ncpus;
  1324. }
  1325. unsigned starpu_cuda_worker_get_count(void)
  1326. {
  1327. return config.topology.ncudagpus;
  1328. }
  1329. unsigned starpu_opencl_worker_get_count(void)
  1330. {
  1331. return config.topology.nopenclgpus;
  1332. }
  1333. int starpu_asynchronous_copy_disabled(void)
  1334. {
  1335. return config.conf->disable_asynchronous_copy;
  1336. }
  1337. int starpu_asynchronous_cuda_copy_disabled(void)
  1338. {
  1339. return config.conf->disable_asynchronous_cuda_copy;
  1340. }
  1341. int starpu_asynchronous_opencl_copy_disabled(void)
  1342. {
  1343. return config.conf->disable_asynchronous_opencl_copy;
  1344. }
  1345. int starpu_asynchronous_mic_copy_disabled(void)
  1346. {
  1347. return config.conf->disable_asynchronous_mic_copy;
  1348. }
  1349. unsigned starpu_mic_worker_get_count(void)
  1350. {
  1351. int i = 0, count = 0;
  1352. for (i = 0; i < STARPU_MAXMICDEVS; i++)
  1353. count += config.topology.nmiccores[i];
  1354. return count;
  1355. }
  1356. unsigned starpu_scc_worker_get_count(void)
  1357. {
  1358. return config.topology.nsccdevices;
  1359. }
  1360. /* When analyzing performance, it is useful to see what is the processing unit
  1361. * that actually performed the task. This function returns the id of the
  1362. * processing unit actually executing it, therefore it makes no sense to use it
  1363. * within the callbacks of SPU functions for instance. If called by some thread
  1364. * that is not controlled by StarPU, starpu_worker_get_id returns -1. */
  1365. int starpu_worker_get_id(void)
  1366. {
  1367. struct _starpu_worker * worker;
  1368. worker = _starpu_get_local_worker_key();
  1369. if (worker)
  1370. {
  1371. return worker->workerid;
  1372. }
  1373. else
  1374. {
  1375. /* there is no worker associated to that thread, perhaps it is
  1376. * a thread from the application or this is some SPU worker */
  1377. return -1;
  1378. }
  1379. }
  1380. int starpu_combined_worker_get_id(void)
  1381. {
  1382. struct _starpu_worker *worker;
  1383. worker = _starpu_get_local_worker_key();
  1384. if (worker)
  1385. {
  1386. return worker->combined_workerid;
  1387. }
  1388. else
  1389. {
  1390. /* there is no worker associated to that thread, perhaps it is
  1391. * a thread from the application or this is some SPU worker */
  1392. return -1;
  1393. }
  1394. }
  1395. int starpu_combined_worker_get_size(void)
  1396. {
  1397. struct _starpu_worker *worker;
  1398. worker = _starpu_get_local_worker_key();
  1399. if (worker)
  1400. {
  1401. return worker->worker_size;
  1402. }
  1403. else
  1404. {
  1405. /* there is no worker associated to that thread, perhaps it is
  1406. * a thread from the application or this is some SPU worker */
  1407. return -1;
  1408. }
  1409. }
  1410. int starpu_combined_worker_get_rank(void)
  1411. {
  1412. struct _starpu_worker *worker;
  1413. worker = _starpu_get_local_worker_key();
  1414. if (worker)
  1415. {
  1416. return worker->current_rank;
  1417. }
  1418. else
  1419. {
  1420. /* there is no worker associated to that thread, perhaps it is
  1421. * a thread from the application or this is some SPU worker */
  1422. return -1;
  1423. }
  1424. }
  1425. int starpu_worker_get_subworkerid(int id)
  1426. {
  1427. return config.workers[id].subworkerid;
  1428. }
  1429. int starpu_worker_get_devid(int id)
  1430. {
  1431. return config.workers[id].devid;
  1432. }
  1433. struct _starpu_worker *_starpu_get_worker_struct(unsigned id)
  1434. {
  1435. return &config.workers[id];
  1436. }
  1437. unsigned starpu_worker_is_combined_worker(int id)
  1438. {
  1439. return id >= (int)config.topology.nworkers;
  1440. }
  1441. struct _starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id)
  1442. {
  1443. if(id == STARPU_NMAX_SCHED_CTXS) return NULL;
  1444. return &config.sched_ctxs[id];
  1445. }
  1446. struct _starpu_combined_worker *_starpu_get_combined_worker_struct(unsigned id)
  1447. {
  1448. unsigned basic_worker_count = starpu_worker_get_count();
  1449. //_STARPU_DEBUG("basic_worker_count:%d\n",basic_worker_count);
  1450. STARPU_ASSERT(id >= basic_worker_count);
  1451. return &config.combined_workers[id - basic_worker_count];
  1452. }
  1453. enum starpu_worker_archtype starpu_worker_get_type(int id)
  1454. {
  1455. return config.workers[id].arch;
  1456. }
  1457. int starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
  1458. {
  1459. unsigned nworkers = starpu_worker_get_count();
  1460. int cnt = 0;
  1461. unsigned id;
  1462. for (id = 0; id < nworkers; id++)
  1463. {
  1464. if (starpu_worker_get_type(id) == type)
  1465. {
  1466. /* Perhaps the array is too small ? */
  1467. if (cnt >= maxsize)
  1468. return -ERANGE;
  1469. workerids[cnt++] = id;
  1470. }
  1471. }
  1472. return cnt;
  1473. }
  1474. int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num)
  1475. {
  1476. unsigned nworkers = starpu_worker_get_count();
  1477. int cnt = 0;
  1478. unsigned id;
  1479. for (id = 0; id < nworkers; id++)
  1480. {
  1481. if (starpu_worker_get_type(id) == type)
  1482. {
  1483. if (num == cnt)
  1484. return id;
  1485. cnt++;
  1486. }
  1487. }
  1488. /* Not found */
  1489. return -1;
  1490. }
  1491. int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid)
  1492. {
  1493. unsigned nworkers = starpu_worker_get_count();
  1494. unsigned id;
  1495. for (id = 0; id < nworkers; id++)
  1496. if (starpu_worker_get_type(id) == type && starpu_worker_get_devid(id) == devid)
  1497. return id;
  1498. /* Not found */
  1499. return -1;
  1500. }
  1501. void starpu_worker_get_name(int id, char *dst, size_t maxlen)
  1502. {
  1503. char *name = config.workers[id].name;
  1504. snprintf(dst, maxlen, "%s", name);
  1505. }
  1506. int starpu_worker_get_bindid(int workerid)
  1507. {
  1508. return config.workers[workerid].bindid;
  1509. }
  1510. int _starpu_worker_get_workerids(int bindid, int *workerids)
  1511. {
  1512. unsigned nworkers = starpu_worker_get_count();
  1513. int nw = 0;
  1514. unsigned id;
  1515. for (id = 0; id < nworkers; id++)
  1516. if (config.workers[id].bindid == bindid)
  1517. workerids[nw++] = id;
  1518. return nw;
  1519. }
  1520. /* Retrieve the status which indicates what the worker is currently doing. */
  1521. enum _starpu_worker_status _starpu_worker_get_status(int workerid)
  1522. {
  1523. return config.workers[workerid].status;
  1524. }
  1525. /* Change the status of the worker which indicates what the worker is currently
  1526. * doing (eg. executing a callback). */
  1527. void _starpu_worker_set_status(int workerid, enum _starpu_worker_status status)
  1528. {
  1529. config.workers[workerid].status = status;
  1530. }
  1531. void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond)
  1532. {
  1533. *sched_cond = &config.workers[workerid].sched_cond;
  1534. *sched_mutex = &config.workers[workerid].sched_mutex;
  1535. }
  1536. int starpu_wakeup_worker(int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex)
  1537. {
  1538. int success = 0;
  1539. STARPU_PTHREAD_MUTEX_LOCK(mutex);
  1540. if (config.workers[workerid].status == STATUS_SLEEPING)
  1541. {
  1542. config.workers[workerid].status = STATUS_WAKING_UP;
  1543. STARPU_PTHREAD_COND_SIGNAL(cond);
  1544. success = 1;
  1545. }
  1546. STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
  1547. return success;
  1548. }
  1549. int starpu_worker_get_nids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
  1550. {
  1551. unsigned nworkers = starpu_worker_get_count();
  1552. int cnt = 0;
  1553. unsigned id;
  1554. for (id = 0; id < nworkers; id++)
  1555. {
  1556. if (starpu_worker_get_type(id) == type)
  1557. {
  1558. /* Perhaps the array is too small ? */
  1559. if (cnt >= maxsize)
  1560. return cnt;
  1561. workerids[cnt++] = id;
  1562. }
  1563. }
  1564. return cnt;
  1565. }
  1566. int starpu_worker_get_nids_ctx_free_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
  1567. {
  1568. unsigned nworkers = starpu_worker_get_count();
  1569. int cnt = 0;
  1570. unsigned id, worker;
  1571. unsigned found = 0;
  1572. for (id = 0; id < nworkers; id++)
  1573. {
  1574. found = 0;
  1575. if (starpu_worker_get_type(id) == type)
  1576. {
  1577. /* Perhaps the array is too small ? */
  1578. if (cnt >= maxsize)
  1579. return cnt;
  1580. int s;
  1581. for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++)
  1582. {
  1583. if(config.sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  1584. {
  1585. struct starpu_worker_collection *workers = config.sched_ctxs[s].workers;
  1586. struct starpu_sched_ctx_iterator it;
  1587. workers->init_iterator(workers, &it);
  1588. while(workers->has_next(workers, &it))
  1589. {
  1590. worker = workers->get_next(workers, &it);
  1591. if(worker == id)
  1592. {
  1593. found = 1;
  1594. break;
  1595. }
  1596. }
  1597. if(found) break;
  1598. }
  1599. }
  1600. if(!found)
  1601. workerids[cnt++] = id;
  1602. }
  1603. }
  1604. return cnt;
  1605. }
  1606. struct _starpu_sched_ctx* _starpu_get_initial_sched_ctx(void)
  1607. {
  1608. return &config.sched_ctxs[STARPU_GLOBAL_SCHED_CTX];
  1609. }
  1610. int _starpu_worker_get_nsched_ctxs(int workerid)
  1611. {
  1612. return config.workers[workerid].nsched_ctxs;
  1613. }
  1614. static void *
  1615. _starpu_get_worker_from_driver(struct starpu_driver *d)
  1616. {
  1617. unsigned nworkers = starpu_worker_get_count();
  1618. unsigned workerid;
  1619. #ifdef STARPU_USE_CUDA
  1620. if (d->type == STARPU_CUDA_WORKER)
  1621. return &cuda_worker_set[d->id.cuda_id];
  1622. #endif
  1623. for (workerid = 0; workerid < nworkers; workerid++)
  1624. {
  1625. if (starpu_worker_get_type(workerid) == d->type)
  1626. {
  1627. struct _starpu_worker *worker;
  1628. worker = _starpu_get_worker_struct(workerid);
  1629. switch (d->type)
  1630. {
  1631. #ifdef STARPU_USE_CPU
  1632. case STARPU_CPU_WORKER:
  1633. if (worker->devid == d->id.cpu_id)
  1634. return worker;
  1635. break;
  1636. #endif
  1637. #ifdef STARPU_USE_OPENCL
  1638. case STARPU_OPENCL_WORKER:
  1639. {
  1640. cl_device_id device;
  1641. starpu_opencl_get_device(worker->devid, &device);
  1642. if (device == d->id.opencl_id)
  1643. return worker;
  1644. break;
  1645. }
  1646. #endif
  1647. default:
  1648. _STARPU_DEBUG("Invalid device type\n");
  1649. return NULL;
  1650. }
  1651. }
  1652. }
  1653. return NULL;
  1654. }
  1655. int
  1656. starpu_driver_run(struct starpu_driver *d)
  1657. {
  1658. if (!d)
  1659. {
  1660. _STARPU_DEBUG("Invalid argument\n");
  1661. return -EINVAL;
  1662. }
  1663. void *worker = _starpu_get_worker_from_driver(d);
  1664. switch (d->type)
  1665. {
  1666. #ifdef STARPU_USE_CPU
  1667. case STARPU_CPU_WORKER:
  1668. return _starpu_run_cpu(worker);
  1669. #endif
  1670. #ifdef STARPU_USE_CUDA
  1671. case STARPU_CUDA_WORKER:
  1672. return _starpu_run_cuda(worker);
  1673. #endif
  1674. #ifdef STARPU_USE_OPENCL
  1675. case STARPU_OPENCL_WORKER:
  1676. return _starpu_run_opencl(worker);
  1677. #endif
  1678. default:
  1679. _STARPU_DEBUG("Invalid device type\n");
  1680. return -EINVAL;
  1681. }
  1682. }
  1683. int
  1684. starpu_driver_init(struct starpu_driver *d)
  1685. {
  1686. STARPU_ASSERT(d);
  1687. void *worker = _starpu_get_worker_from_driver(d);
  1688. switch (d->type)
  1689. {
  1690. #ifdef STARPU_USE_CPU
  1691. case STARPU_CPU_WORKER:
  1692. return _starpu_cpu_driver_init(worker);
  1693. #endif
  1694. #ifdef STARPU_USE_CUDA
  1695. case STARPU_CUDA_WORKER:
  1696. return _starpu_cuda_driver_init(worker);
  1697. #endif
  1698. #ifdef STARPU_USE_OPENCL
  1699. case STARPU_OPENCL_WORKER:
  1700. return _starpu_opencl_driver_init(worker);
  1701. #endif
  1702. default:
  1703. return -EINVAL;
  1704. }
  1705. }
  1706. int
  1707. starpu_driver_run_once(struct starpu_driver *d)
  1708. {
  1709. STARPU_ASSERT(d);
  1710. void *worker = _starpu_get_worker_from_driver(d);
  1711. switch (d->type)
  1712. {
  1713. #ifdef STARPU_USE_CPU
  1714. case STARPU_CPU_WORKER:
  1715. return _starpu_cpu_driver_run_once(worker);
  1716. #endif
  1717. #ifdef STARPU_USE_CUDA
  1718. case STARPU_CUDA_WORKER:
  1719. return _starpu_cuda_driver_run_once(worker);
  1720. #endif
  1721. #ifdef STARPU_USE_OPENCL
  1722. case STARPU_OPENCL_WORKER:
  1723. return _starpu_opencl_driver_run_once(worker);
  1724. #endif
  1725. default:
  1726. return -EINVAL;
  1727. }
  1728. }
  1729. int
  1730. starpu_driver_deinit(struct starpu_driver *d)
  1731. {
  1732. STARPU_ASSERT(d);
  1733. void *worker = _starpu_get_worker_from_driver(d);
  1734. switch (d->type)
  1735. {
  1736. #ifdef STARPU_USE_CPU
  1737. case STARPU_CPU_WORKER:
  1738. return _starpu_cpu_driver_deinit(worker);
  1739. #endif
  1740. #ifdef STARPU_USE_CUDA
  1741. case STARPU_CUDA_WORKER:
  1742. return _starpu_cuda_driver_deinit(worker);
  1743. #endif
  1744. #ifdef STARPU_USE_OPENCL
  1745. case STARPU_OPENCL_WORKER:
  1746. return _starpu_opencl_driver_deinit(worker);
  1747. #endif
  1748. default:
  1749. return -EINVAL;
  1750. }
  1751. }
  1752. void starpu_get_version(int *major, int *minor, int *release)
  1753. {
  1754. *major = STARPU_MAJOR_VERSION;
  1755. *minor = STARPU_MINOR_VERSION;
  1756. *release = STARPU_RELEASE_VERSION;
  1757. }
  1758. void _starpu_unlock_mutex_if_prev_locked()
  1759. {
  1760. int workerid = starpu_worker_get_id();
  1761. if(workerid != -1)
  1762. {
  1763. struct _starpu_worker *w = _starpu_get_worker_struct(workerid);
  1764. if(w->sched_mutex_locked)
  1765. {
  1766. STARPU_PTHREAD_MUTEX_UNLOCK(&w->sched_mutex);
  1767. _starpu_worker_set_flag_sched_mutex_locked(workerid, 1);
  1768. }
  1769. }
  1770. return;
  1771. }
  1772. void _starpu_relock_mutex_if_prev_locked()
  1773. {
  1774. int workerid = starpu_worker_get_id();
  1775. if(workerid != -1)
  1776. {
  1777. struct _starpu_worker *w = _starpu_get_worker_struct(workerid);
  1778. if(w->sched_mutex_locked)
  1779. STARPU_PTHREAD_MUTEX_LOCK(&w->sched_mutex);
  1780. }
  1781. return;
  1782. }
  1783. void _starpu_worker_set_flag_sched_mutex_locked(int workerid, unsigned flag)
  1784. {
  1785. struct _starpu_worker *w = _starpu_get_worker_struct(workerid);
  1786. w->sched_mutex_locked = flag;
  1787. }
  1788. unsigned _starpu_worker_mutex_is_sched_mutex(int workerid, starpu_pthread_mutex_t *mutex)
  1789. {
  1790. struct _starpu_worker *w = _starpu_get_worker_struct(workerid);
  1791. return &w->sched_mutex == mutex;
  1792. }
  1793. unsigned starpu_worker_get_sched_ctx_list(int workerid, unsigned **sched_ctxs)
  1794. {
  1795. unsigned s = 0;
  1796. unsigned nsched_ctxs = _starpu_worker_get_nsched_ctxs(workerid);
  1797. *sched_ctxs = (unsigned*)malloc(nsched_ctxs*sizeof(unsigned));
  1798. struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
  1799. struct _starpu_sched_ctx_list *l = NULL;
  1800. for (l = worker->sched_ctx_list; l; l = l->next)
  1801. {
  1802. (*sched_ctxs)[s++] = l->sched_ctx;
  1803. }
  1804. return nsched_ctxs;
  1805. }
  1806. char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
  1807. {
  1808. if (type == STARPU_CPU_WORKER) return "STARPU_CPU_WORKER";
  1809. if (type == STARPU_CUDA_WORKER) return "STARPU_CUDA_WORKER";
  1810. if (type == STARPU_OPENCL_WORKER) return "STARPU_OPENCL_WORKER";
  1811. if (type == STARPU_MIC_WORKER) return "STARPU_MIC_WORKER";
  1812. if (type == STARPU_SCC_WORKER) return "STARPU_SCC_WORKER";
  1813. if (type == STARPU_ANY_WORKER) return "STARPU_ANY_WORKER";
  1814. return "STARPU_unknown_WORKER";
  1815. }