workers.c 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2013 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2010, 2011 Institut National de Recherche en Informatique et Automatique
  6. * Copyright (C) 2011 Télécom-SudParis
  7. * Copyright (C) 2011-2012 INRIA
  8. *
  9. * StarPU is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as published by
  11. * the Free Software Foundation; either version 2.1 of the License, or (at
  12. * your option) any later version.
  13. *
  14. * StarPU is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17. *
  18. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  19. */
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <common/config.h>
  23. #include <common/utils.h>
  24. #include <core/progress_hook.h>
  25. #include <core/workers.h>
  26. #include <core/debug.h>
  27. #include <core/disk.h>
  28. #include <core/task.h>
  29. #include <profiling/profiling.h>
  30. #include <starpu_task_list.h>
  31. #include <drivers/mp_common/sink_common.h>
  32. #include <drivers/scc/driver_scc_common.h>
  33. #include <drivers/cpu/driver_cpu.h>
  34. #include <drivers/cuda/driver_cuda.h>
  35. #include <drivers/opencl/driver_opencl.h>
  36. #ifdef STARPU_SIMGRID
  37. #include <msg/msg.h>
  38. #endif
  39. #ifdef __MINGW32__
  40. #include <windows.h>
  41. #endif
  42. /* acquire/release semantic for concurrent initialization/de-initialization */
  43. static starpu_pthread_mutex_t init_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
  44. static starpu_pthread_cond_t init_cond = STARPU_PTHREAD_COND_INITIALIZER;
  45. static int init_count = 0;
  46. static enum { UNINITIALIZED, CHANGING, INITIALIZED } initialized = UNINITIALIZED;
  47. static starpu_pthread_key_t worker_key;
  48. static struct _starpu_machine_config config;
  49. /* Pointers to argc and argv
  50. */
  51. static int *my_argc = 0;
  52. static char ***my_argv = NULL;
  53. /* Initialize value of static argc and argv, called when the process begins
  54. */
  55. void _starpu_set_argc_argv(int *argc_param, char ***argv_param)
  56. {
  57. my_argc = argc_param;
  58. my_argv = argv_param;
  59. }
  60. int *_starpu_get_argc()
  61. {
  62. return my_argc;
  63. }
  64. char ***_starpu_get_argv()
  65. {
  66. return my_argv;
  67. }
  68. int _starpu_is_initialized(void)
  69. {
  70. return initialized == INITIALIZED;
  71. }
  72. struct _starpu_machine_config *_starpu_get_machine_config(void)
  73. {
  74. return &config;
  75. }
  76. /* Makes sure that at least one of the workers of type <arch> can execute
  77. * <task>, for at least one of its implementations. */
  78. static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
  79. enum starpu_worker_archtype arch)
  80. {
  81. int i;
  82. int nworkers = starpu_worker_get_count();
  83. _starpu_codelet_check_deprecated_fields(task->cl);
  84. for (i = 0; i < nworkers; i++)
  85. {
  86. if (starpu_worker_get_type(i) != arch)
  87. continue;
  88. unsigned impl;
  89. for (impl = 0; impl < STARPU_MAXIMPLEMENTATIONS; impl++)
  90. {
  91. /* We could call task->cl->can_execute(i, task, impl)
  92. here, it would definitely work. It is probably
  93. cheaper to check whether it is necessary in order to
  94. avoid a useless function call, though. */
  95. unsigned test_implementation = 0;
  96. switch (arch)
  97. {
  98. case STARPU_CPU_WORKER:
  99. if (task->cl->cpu_funcs[impl] != NULL)
  100. test_implementation = 1;
  101. break;
  102. case STARPU_CUDA_WORKER:
  103. if (task->cl->cuda_funcs[impl] != NULL)
  104. test_implementation = 1;
  105. break;
  106. case STARPU_OPENCL_WORKER:
  107. if (task->cl->opencl_funcs[impl] != NULL)
  108. test_implementation = 1;
  109. break;
  110. case STARPU_MIC_WORKER:
  111. if (task->cl->cpu_funcs_name[impl] != NULL || task->cl->mic_funcs[impl] != NULL)
  112. test_implementation = 1;
  113. break;
  114. case STARPU_SCC_WORKER:
  115. if (task->cl->cpu_funcs_name[impl] != NULL || task->cl->scc_funcs[impl] != NULL)
  116. test_implementation = 1;
  117. break;
  118. default:
  119. STARPU_ABORT();
  120. }
  121. if (!test_implementation)
  122. break;
  123. if (task->cl->can_execute(i, task, impl))
  124. return 1;
  125. }
  126. }
  127. return 0;
  128. }
  129. /* in case a task is submitted, we may check whether there exists a worker
  130. that may execute the task or not */
  131. uint32_t _starpu_worker_exists(struct starpu_task *task)
  132. {
  133. _starpu_codelet_check_deprecated_fields(task->cl);
  134. if (!(task->cl->where & config.worker_mask))
  135. return 0;
  136. if (!task->cl->can_execute)
  137. return 1;
  138. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  139. if ((task->cl->where & STARPU_CPU) &&
  140. _starpu_worker_exists_and_can_execute(task, STARPU_CPU_WORKER))
  141. return 1;
  142. #endif
  143. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  144. if ((task->cl->where & STARPU_CUDA) &&
  145. _starpu_worker_exists_and_can_execute(task, STARPU_CUDA_WORKER))
  146. return 1;
  147. #endif
  148. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  149. if ((task->cl->where & STARPU_OPENCL) &&
  150. _starpu_worker_exists_and_can_execute(task, STARPU_OPENCL_WORKER))
  151. return 1;
  152. #endif
  153. #ifdef STARPU_USE_MIC
  154. if ((task->cl->where & STARPU_MIC) &&
  155. _starpu_worker_exists_and_can_execute(task, STARPU_MIC_WORKER))
  156. return 1;
  157. #endif
  158. #ifdef STARPU_USE_SCC
  159. if ((task->cl->where & STARPU_SCC) &&
  160. _starpu_worker_exists_and_can_execute(task, STARPU_SCC_WORKER))
  161. return 1;
  162. #endif
  163. return 0;
  164. }
  165. uint32_t _starpu_can_submit_cuda_task(void)
  166. {
  167. return (STARPU_CUDA & config.worker_mask);
  168. }
  169. uint32_t _starpu_can_submit_cpu_task(void)
  170. {
  171. return (STARPU_CPU & config.worker_mask);
  172. }
  173. uint32_t _starpu_can_submit_opencl_task(void)
  174. {
  175. return (STARPU_OPENCL & config.worker_mask);
  176. }
  177. uint32_t _starpu_can_submit_scc_task(void)
  178. {
  179. return (STARPU_SCC & config.worker_mask);
  180. }
  181. static int _starpu_can_use_nth_implementation(enum starpu_worker_archtype arch, struct starpu_codelet *cl, unsigned nimpl)
  182. {
  183. switch(arch)
  184. {
  185. case STARPU_ANY_WORKER:
  186. {
  187. int cpu_func_enabled=1, cuda_func_enabled=1, opencl_func_enabled=1;
  188. /* TODO: MIC/SCC */
  189. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  190. starpu_cpu_func_t cpu_func = _starpu_task_get_cpu_nth_implementation(cl, nimpl);
  191. cpu_func_enabled = cpu_func != NULL && starpu_cpu_worker_get_count();
  192. #endif
  193. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  194. starpu_cuda_func_t cuda_func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
  195. cuda_func_enabled = cuda_func != NULL && starpu_cuda_worker_get_count();
  196. #endif
  197. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  198. starpu_opencl_func_t opencl_func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
  199. opencl_func_enabled = opencl_func != NULL && starpu_opencl_worker_get_count();
  200. #endif
  201. return (cpu_func_enabled && cuda_func_enabled && opencl_func_enabled);
  202. }
  203. case STARPU_CPU_WORKER:
  204. {
  205. starpu_cpu_func_t func = _starpu_task_get_cpu_nth_implementation(cl, nimpl);
  206. return func != NULL;
  207. }
  208. case STARPU_CUDA_WORKER:
  209. {
  210. starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
  211. return func != NULL;
  212. }
  213. case STARPU_OPENCL_WORKER:
  214. {
  215. starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
  216. return func != NULL;
  217. }
  218. case STARPU_MIC_WORKER:
  219. {
  220. starpu_mic_func_t func = _starpu_task_get_mic_nth_implementation(cl, nimpl);
  221. char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
  222. return func != NULL || func_name != NULL;
  223. }
  224. case STARPU_SCC_WORKER:
  225. {
  226. starpu_scc_func_t func = _starpu_task_get_scc_nth_implementation(cl, nimpl);
  227. char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
  228. return func != NULL || func_name != NULL;
  229. }
  230. default:
  231. STARPU_ASSERT_MSG(0, "Unknown arch type %d", arch);
  232. }
  233. return 0;
  234. }
  235. int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
  236. {
  237. /* TODO: check that the task operand sizes will fit on that device */
  238. return (task->cl->where & config.workers[workerid].worker_mask) &&
  239. _starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl) &&
  240. (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl));
  241. }
  242. int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
  243. {
  244. /* TODO: check that the task operand sizes will fit on that device */
  245. /* TODO: call application-provided function for various cases like
  246. * double support, shared memory size limit, etc. */
  247. struct starpu_codelet *cl = task->cl;
  248. unsigned nworkers = config.topology.nworkers;
  249. /* Is this a parallel worker ? */
  250. if (workerid < nworkers)
  251. {
  252. return !!((task->cl->where & config.workers[workerid].worker_mask) &&
  253. _starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
  254. }
  255. else
  256. {
  257. if ((cl->type == STARPU_SPMD)
  258. #ifdef STARPU_HAVE_HWLOC
  259. || (cl->type == STARPU_FORKJOIN)
  260. #else
  261. #ifdef __GLIBC__
  262. || (cl->type == STARPU_FORKJOIN)
  263. #endif
  264. #endif
  265. )
  266. {
  267. /* TODO we should add other types of constraints */
  268. /* Is the worker larger than requested ? */
  269. int worker_size = (int)config.combined_workers[workerid - nworkers].worker_size;
  270. int worker0 = config.combined_workers[workerid - nworkers].combined_workerid[0];
  271. return !!((worker_size <= task->cl->max_parallelism) &&
  272. _starpu_can_use_nth_implementation(config.workers[worker0].arch, task->cl, nimpl));
  273. }
  274. else
  275. {
  276. /* We have a sequential task but a parallel worker */
  277. return 0;
  278. }
  279. }
  280. }
  281. /*
  282. * Runtime initialization methods
  283. */
  284. #ifdef STARPU_USE_MIC
  285. static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
  286. #endif
  287. static void _starpu_init_worker_queue(struct _starpu_worker *workerarg)
  288. {
  289. starpu_pthread_cond_t *cond = &workerarg->sched_cond;
  290. starpu_pthread_mutex_t *mutex = &workerarg->sched_mutex;
  291. unsigned memory_node = workerarg->memory_node;
  292. _starpu_memory_node_register_condition(cond, mutex, memory_node);
  293. }
  294. /*
  295. * Returns 0 if the given driver is one of the drivers that must be launched by
  296. * the application itself, and not by StarPU, 1 otherwise.
  297. */
  298. static unsigned _starpu_may_launch_driver(struct starpu_conf *conf,
  299. struct starpu_driver *d)
  300. {
  301. if (conf->n_not_launched_drivers == 0 ||
  302. conf->not_launched_drivers == NULL)
  303. return 1;
  304. /* Is <d> in conf->not_launched_drivers ? */
  305. unsigned i;
  306. for (i = 0; i < conf->n_not_launched_drivers; i++)
  307. {
  308. if (d->type != conf->not_launched_drivers[i].type)
  309. continue;
  310. switch (d->type)
  311. {
  312. case STARPU_CPU_WORKER:
  313. if (d->id.cpu_id == conf->not_launched_drivers[i].id.cpu_id)
  314. return 0;
  315. case STARPU_CUDA_WORKER:
  316. if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id)
  317. return 0;
  318. break;
  319. #ifdef STARPU_USE_OPENCL
  320. case STARPU_OPENCL_WORKER:
  321. if (d->id.opencl_id == conf->not_launched_drivers[i].id.opencl_id)
  322. return 0;
  323. break;
  324. #endif
  325. default:
  326. STARPU_ABORT();
  327. }
  328. }
  329. return 1;
  330. }
  331. #ifdef STARPU_PERF_DEBUG
  332. struct itimerval prof_itimer;
  333. #endif
  334. void _starpu_worker_init(struct _starpu_worker *worker, unsigned fut_key)
  335. {
  336. (void) fut_key;
  337. int devid = worker->devid;
  338. (void) devid;
  339. #if defined(STARPU_PERF_DEBUG) && !defined(STARPU_SIMGRID)
  340. setitimer(ITIMER_PROF, &prof_itimer, NULL);
  341. #endif
  342. #ifdef STARPU_USE_FXT
  343. _starpu_fxt_register_thread(worker->bindid);
  344. unsigned memnode = worker->memory_node;
  345. _STARPU_TRACE_WORKER_INIT_START(fut_key, worker->workerid, devid, memnode);
  346. #endif
  347. _starpu_bind_thread_on_cpu(worker->config, worker->bindid);
  348. _STARPU_DEBUG("worker %d is ready on logical cpu %d\n", devid, worker->bindid);
  349. #ifdef STARPU_HAVE_HWLOC
  350. _STARPU_DEBUG("worker %d cpuset start at %d\n", devid, hwloc_bitmap_first(worker->hwloc_cpu_set));
  351. #endif
  352. _starpu_memory_node_set_local_key(&worker->memory_node);
  353. _starpu_set_local_worker_key(worker);
  354. STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex);
  355. worker->worker_is_running = 1;
  356. STARPU_PTHREAD_COND_SIGNAL(&worker->started_cond);
  357. STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex);
  358. }
  359. static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
  360. {
  361. pconfig->running = 1;
  362. pconfig->submitting = 1;
  363. STARPU_PTHREAD_KEY_CREATE(&worker_key, NULL);
  364. unsigned nworkers = pconfig->topology.nworkers;
  365. /* Launch workers asynchronously */
  366. unsigned cpu = 0, cuda = 0;
  367. unsigned worker;
  368. #if defined(STARPU_PERF_DEBUG) && !defined(STARPU_SIMGRID)
  369. /* Get itimer of the main thread, to set it for the worker threads */
  370. getitimer(ITIMER_PROF, &prof_itimer);
  371. #endif
  372. #ifdef HAVE_AYUDAME_H
  373. if (AYU_event) AYU_event(AYU_INIT, 0, NULL);
  374. #endif
  375. for (worker = 0; worker < nworkers; worker++)
  376. {
  377. struct _starpu_worker *workerarg = &pconfig->workers[worker];
  378. #ifdef STARPU_USE_MIC
  379. unsigned mp_nodeid = workerarg->mp_nodeid;
  380. #endif
  381. workerarg->config = pconfig;
  382. _starpu_barrier_counter_init(&workerarg->tasks_barrier, 0);
  383. STARPU_PTHREAD_MUTEX_INIT(&workerarg->mutex, NULL);
  384. STARPU_PTHREAD_COND_INIT(&workerarg->started_cond, NULL);
  385. STARPU_PTHREAD_COND_INIT(&workerarg->ready_cond, NULL);
  386. workerarg->worker_size = 1;
  387. workerarg->combined_workerid = workerarg->workerid;
  388. workerarg->current_rank = 0;
  389. workerarg->has_prev_init = 0;
  390. /* mutex + cond only for the local list */
  391. /* we have a single local list */
  392. /* afterwards there would be a mutex + cond for the list of each strategy */
  393. workerarg->run_by_starpu = 1;
  394. workerarg->worker_is_running = 0;
  395. workerarg->worker_is_initialized = 0;
  396. workerarg->set = NULL;
  397. int ctx;
  398. for(ctx = 0; ctx < STARPU_NMAX_SCHED_CTXS; ctx++)
  399. {
  400. workerarg->removed_from_ctx[ctx] = 0;
  401. workerarg->shares_tasks_lists[ctx] = 0;
  402. }
  403. STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL);
  404. STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL);
  405. STARPU_PTHREAD_MUTEX_INIT(&workerarg->parallel_sect_mutex, NULL);
  406. STARPU_PTHREAD_COND_INIT(&workerarg->parallel_sect_cond, NULL);
  407. workerarg->parallel_sect = 0;
  408. /* if some codelet's termination cannot be handled directly :
  409. * for instance in the Gordon driver, Gordon tasks' callbacks
  410. * may be executed by another thread than that of the Gordon
  411. * driver so that we cannot call the push_codelet_output method
  412. * directly */
  413. workerarg->terminated_jobs = _starpu_job_list_new();
  414. starpu_task_list_init(&workerarg->local_tasks);
  415. workerarg->status = STATUS_INITIALIZING;
  416. _STARPU_DEBUG("initialising worker %u/%u\n", worker, nworkers);
  417. _starpu_init_worker_queue(workerarg);
  418. struct starpu_driver driver;
  419. driver.type = workerarg->arch;
  420. switch (workerarg->arch)
  421. {
  422. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  423. case STARPU_CPU_WORKER:
  424. driver.id.cpu_id = cpu;
  425. if (_starpu_may_launch_driver(pconfig->conf, &driver))
  426. {
  427. STARPU_PTHREAD_CREATE_ON(
  428. workerarg->name,
  429. &workerarg->worker_thread,
  430. NULL,
  431. _starpu_cpu_worker,
  432. workerarg,
  433. worker+1);
  434. #ifdef STARPU_USE_FXT
  435. /* In tracing mode, make sure the
  436. * thread is really started before
  437. * starting another one, to make sure
  438. * they appear in order in the trace.
  439. */
  440. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  441. while (!workerarg->worker_is_running)
  442. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  443. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  444. #endif
  445. }
  446. else
  447. {
  448. workerarg->run_by_starpu = 0;
  449. }
  450. cpu++;
  451. break;
  452. #endif
  453. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  454. case STARPU_CUDA_WORKER:
  455. driver.id.cuda_id = cuda;
  456. if (_starpu_may_launch_driver(pconfig->conf, &driver))
  457. {
  458. STARPU_PTHREAD_CREATE_ON(
  459. workerarg->name,
  460. &workerarg->worker_thread,
  461. NULL,
  462. _starpu_cuda_worker,
  463. workerarg,
  464. worker+1);
  465. #ifdef STARPU_USE_FXT
  466. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  467. while (!workerarg->worker_is_running)
  468. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  469. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  470. #endif
  471. }
  472. else
  473. {
  474. workerarg->run_by_starpu = 0;
  475. }
  476. cuda++;
  477. break;
  478. #endif
  479. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  480. case STARPU_OPENCL_WORKER:
  481. #ifndef STARPU_SIMGRID
  482. starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
  483. if (!_starpu_may_launch_driver(pconfig->conf, &driver))
  484. {
  485. workerarg->run_by_starpu = 0;
  486. break;
  487. }
  488. #endif
  489. STARPU_PTHREAD_CREATE_ON(
  490. workerarg->name,
  491. &workerarg->worker_thread,
  492. NULL,
  493. _starpu_opencl_worker,
  494. workerarg,
  495. worker+1);
  496. #ifdef STARPU_USE_FXT
  497. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  498. while (!workerarg->worker_is_running)
  499. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  500. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  501. #endif
  502. break;
  503. #endif
  504. #ifdef STARPU_USE_MIC
  505. case STARPU_MIC_WORKER:
  506. /* We use the Gordon approach for the MIC,
  507. * which consists in spawning only one thread
  508. * per MIC device, which will control all MIC
  509. * workers of this device. (by using a worker set). */
  510. if (mic_worker_set[mp_nodeid].started)
  511. goto worker_set_initialized;
  512. mic_worker_set[mp_nodeid].nworkers = pconfig->topology.nmiccores[mp_nodeid];
  513. /* We assume all MIC workers of a given MIC
  514. * device are contiguous so that we can
  515. * address them with the first one only. */
  516. mic_worker_set[mp_nodeid].workers = workerarg;
  517. mic_worker_set[mp_nodeid].set_is_initialized = 0;
  518. STARPU_PTHREAD_CREATE_ON(
  519. workerarg->name,
  520. &mic_worker_set[mp_nodeid].worker_thread,
  521. NULL,
  522. _starpu_mic_src_worker,
  523. &mic_worker_set[mp_nodeid],
  524. worker+1);
  525. #ifdef STARPU_USE_FXT
  526. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  527. while (!workerarg->worker_is_running)
  528. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  529. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  530. #endif
  531. STARPU_PTHREAD_MUTEX_LOCK(&mic_worker_set[mp_nodeid].mutex);
  532. while (!mic_worker_set[mp_nodeid].set_is_initialized)
  533. STARPU_PTHREAD_COND_WAIT(&mic_worker_set[mp_nodeid].ready_cond,
  534. &mic_worker_set[mp_nodeid].mutex);
  535. STARPU_PTHREAD_MUTEX_UNLOCK(&mic_worker_set[mp_nodeid].mutex);
  536. worker_set_initialized:
  537. workerarg->set = &mic_worker_set[mp_nodeid];
  538. mic_worker_set[mp_nodeid].started = 1;
  539. #ifdef STARPU_USE_FXT
  540. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  541. while (!workerarg->worker_is_running)
  542. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  543. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  544. #endif
  545. break;
  546. #endif /* STARPU_USE_MIC */
  547. #ifdef STARPU_USE_SCC
  548. case STARPU_SCC_WORKER:
  549. workerarg->worker_is_initialized = 0;
  550. STARPU_PTHREAD_CREATE_ON(
  551. workerarg->name,
  552. &workerarg->worker_thread,
  553. NULL,
  554. _starpu_scc_src_worker,
  555. workerarg,
  556. worker+1);
  557. #ifdef STARPU_USE_FXT
  558. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  559. while (!workerarg->worker_is_running)
  560. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  561. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  562. #endif
  563. break;
  564. #endif
  565. default:
  566. STARPU_ABORT();
  567. }
  568. }
  569. cpu = 0;
  570. cuda = 0;
  571. for (worker = 0; worker < nworkers; worker++)
  572. {
  573. struct _starpu_worker *workerarg = &pconfig->workers[worker];
  574. struct starpu_driver driver;
  575. driver.type = workerarg->arch;
  576. switch (workerarg->arch)
  577. {
  578. case STARPU_CPU_WORKER:
  579. driver.id.cpu_id = cpu;
  580. if (!_starpu_may_launch_driver(pconfig->conf, &driver))
  581. {
  582. cpu++;
  583. break;
  584. }
  585. _STARPU_DEBUG("waiting for worker %u initialization\n", worker);
  586. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  587. while (!workerarg->worker_is_initialized)
  588. STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  589. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  590. cpu++;
  591. break;
  592. case STARPU_CUDA_WORKER:
  593. driver.id.cuda_id = cuda;
  594. if (!_starpu_may_launch_driver(pconfig->conf, &driver))
  595. {
  596. cuda++;
  597. break;
  598. }
  599. _STARPU_DEBUG("waiting for worker %u initialization\n", worker);
  600. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  601. while (!workerarg->worker_is_initialized)
  602. STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  603. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  604. cuda++;
  605. break;
  606. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  607. case STARPU_OPENCL_WORKER:
  608. #ifndef STARPU_SIMGRID
  609. starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
  610. if (!_starpu_may_launch_driver(pconfig->conf, &driver))
  611. break;
  612. #endif
  613. _STARPU_DEBUG("waiting for worker %u initialization\n", worker);
  614. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  615. while (!workerarg->worker_is_initialized)
  616. STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  617. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  618. break;
  619. #endif
  620. case STARPU_MIC_WORKER:
  621. /* Already waited above */
  622. break;
  623. case STARPU_SCC_WORKER:
  624. /* TODO: implement may_launch? */
  625. _STARPU_DEBUG("waiting for worker %u initialization\n", worker);
  626. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  627. while (!workerarg->worker_is_initialized)
  628. STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  629. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  630. break;
  631. default:
  632. STARPU_ABORT();
  633. }
  634. }
  635. _STARPU_DEBUG("finished launching drivers\n");
  636. }
  637. void _starpu_set_local_worker_key(struct _starpu_worker *worker)
  638. {
  639. STARPU_PTHREAD_SETSPECIFIC(worker_key, worker);
  640. }
  641. struct _starpu_worker *_starpu_get_local_worker_key(void)
  642. {
  643. return (struct _starpu_worker *) STARPU_PTHREAD_GETSPECIFIC(worker_key);
  644. }
  645. /* Initialize the starpu_conf with default values */
  646. int starpu_conf_init(struct starpu_conf *conf)
  647. {
  648. if (!conf)
  649. return -EINVAL;
  650. memset(conf, 0, sizeof(*conf));
  651. conf->magic = 42;
  652. conf->sched_policy_name = getenv("STARPU_SCHED");
  653. conf->sched_policy = NULL;
  654. /* Note that starpu_get_env_number returns -1 in case the variable is
  655. * not defined */
  656. /* Backward compatibility: check the value of STARPU_NCPUS if
  657. * STARPU_NCPU is not set. */
  658. conf->ncpus = starpu_get_env_number("STARPU_NCPU");
  659. if (conf->ncpus == -1)
  660. conf->ncpus = starpu_get_env_number("STARPU_NCPUS");
  661. conf->ncuda = starpu_get_env_number("STARPU_NCUDA");
  662. conf->nopencl = starpu_get_env_number("STARPU_NOPENCL");
  663. conf->nmic = starpu_get_env_number("STARPU_NMIC");
  664. conf->nscc = starpu_get_env_number("STARPU_NSCC");
  665. conf->calibrate = starpu_get_env_number("STARPU_CALIBRATE");
  666. conf->bus_calibrate = starpu_get_env_number("STARPU_BUS_CALIBRATE");
  667. conf->mic_sink_program_path = getenv("STARPU_MIC_PROGRAM_PATH");
  668. if (conf->calibrate == -1)
  669. conf->calibrate = 0;
  670. if (conf->bus_calibrate == -1)
  671. conf->bus_calibrate = 0;
  672. conf->use_explicit_workers_bindid = 0; /* TODO */
  673. conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */
  674. conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */
  675. conf->use_explicit_workers_mic_deviceid = 0; /* TODO */
  676. conf->use_explicit_workers_scc_deviceid = 0; /* TODO */
  677. conf->single_combined_worker = starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER");
  678. if (conf->single_combined_worker == -1)
  679. conf->single_combined_worker = 0;
  680. #if defined(STARPU_DISABLE_ASYNCHRONOUS_COPY)
  681. conf->disable_asynchronous_copy = 1;
  682. #else
  683. conf->disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
  684. if (conf->disable_asynchronous_copy == -1)
  685. conf->disable_asynchronous_copy = 0;
  686. #endif
  687. #if defined(STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY)
  688. conf->disable_asynchronous_cuda_copy = 1;
  689. #else
  690. conf->disable_asynchronous_cuda_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY");
  691. if (conf->disable_asynchronous_cuda_copy == -1)
  692. conf->disable_asynchronous_cuda_copy = 0;
  693. #endif
  694. #if defined(STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY)
  695. conf->disable_asynchronous_opencl_copy = 1;
  696. #else
  697. conf->disable_asynchronous_opencl_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY");
  698. if (conf->disable_asynchronous_opencl_copy == -1)
  699. conf->disable_asynchronous_opencl_copy = 0;
  700. #endif
  701. #if defined(STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY)
  702. conf->disable_asynchronous_mic_copy = 1;
  703. #else
  704. conf->disable_asynchronous_mic_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY");
  705. if (conf->disable_asynchronous_mic_copy == -1)
  706. conf->disable_asynchronous_mic_copy = 0;
  707. #endif
  708. /* 64MiB by default */
  709. conf->trace_buffer_size = 64<<20;
  710. return 0;
  711. }
  712. static void _starpu_conf_set_value_against_environment(char *name, int *value)
  713. {
  714. int number;
  715. number = starpu_get_env_number(name);
  716. if (number != -1)
  717. {
  718. *value = number;
  719. }
  720. }
  721. void _starpu_conf_check_environment(struct starpu_conf *conf)
  722. {
  723. char *sched = getenv("STARPU_SCHED");
  724. if (sched)
  725. {
  726. conf->sched_policy_name = sched;
  727. }
  728. _starpu_conf_set_value_against_environment("STARPU_NCPUS", &conf->ncpus);
  729. _starpu_conf_set_value_against_environment("STARPU_NCPU", &conf->ncpus);
  730. _starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda);
  731. _starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl);
  732. _starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate);
  733. _starpu_conf_set_value_against_environment("STARPU_BUS_CALIBRATE", &conf->bus_calibrate);
  734. _starpu_conf_set_value_against_environment("STARPU_SINGLE_COMBINED_WORKER", &conf->single_combined_worker);
  735. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy);
  736. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY", &conf->disable_asynchronous_cuda_copy);
  737. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY", &conf->disable_asynchronous_opencl_copy);
  738. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY", &conf->disable_asynchronous_mic_copy);
  739. }
  740. int starpu_init(struct starpu_conf *user_conf)
  741. {
  742. return starpu_initialize(user_conf, NULL, NULL);
  743. }
  744. int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
  745. {
  746. int is_a_sink = 0; /* Always defined. If the MP infrastructure is not
  747. * used, we cannot be a sink. */
  748. #ifdef STARPU_USE_MP
  749. _starpu_set_argc_argv(argc, argv);
  750. # ifdef STARPU_USE_SCC
  751. /* In SCC case we look at the rank to know if we are a sink */
  752. if (_starpu_scc_common_mp_init() && !_starpu_scc_common_is_src_node())
  753. setenv("STARPU_SINK", "STARPU_SCC", 1);
  754. # endif
  755. /* If StarPU was configured to use MP sinks, we have to control the
  756. * kind on node we are running on : host or sink ? */
  757. if (getenv("STARPU_SINK"))
  758. is_a_sink = 1;
  759. #else
  760. (void)argc;
  761. (void)argv;
  762. #endif /* STARPU_USE_MP */
  763. int ret;
  764. #ifndef STARPU_SIMGRID
  765. #ifdef __GNUC__
  766. #ifndef __OPTIMIZE__
  767. _STARPU_DISP("Warning: StarPU was configured with --enable-debug (-O0), and is thus not optimized\n");
  768. #endif
  769. #endif
  770. #if 0
  771. #ifndef STARPU_NO_ASSERT
  772. _STARPU_DISP("Warning: StarPU was configured without --enable-fast\n");
  773. #endif
  774. #endif
  775. #ifdef STARPU_MEMORY_STATS
  776. _STARPU_DISP("Warning: StarPU was configured with --enable-memory-stats, which slows down a bit\n");
  777. #endif
  778. #ifdef STARPU_VERBOSE
  779. _STARPU_DISP("Warning: StarPU was configured with --enable-verbose, which slows down a bit\n");
  780. #endif
  781. #ifdef STARPU_USE_FXT
  782. _STARPU_DISP("Warning: StarPU was configured with --with-fxt, which slows down a bit\n");
  783. #endif
  784. #ifdef STARPU_PERF_DEBUG
  785. _STARPU_DISP("Warning: StarPU was configured with --enable-perf-debug, which slows down a bit\n");
  786. #endif
  787. #ifdef STARPU_MODEL_DEBUG
  788. _STARPU_DISP("Warning: StarPU was configured with --enable-model-debug, which slows down a bit\n");
  789. #endif
  790. #ifdef STARPU_ENABLE_STATS
  791. _STARPU_DISP("Warning: StarPU was configured with --enable-stats, which slows down a bit\n");
  792. #endif
  793. #endif
  794. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  795. while (initialized == CHANGING)
  796. /* Wait for the other one changing it */
  797. STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex);
  798. init_count++;
  799. if (initialized == INITIALIZED)
  800. {
  801. /* He initialized it, don't do it again, and let the others get the mutex */
  802. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  803. return 0;
  804. }
  805. /* initialized == UNINITIALIZED */
  806. initialized = CHANGING;
  807. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  808. #ifdef __MINGW32__
  809. WSADATA wsadata;
  810. WSAStartup(MAKEWORD(1,0), &wsadata);
  811. #endif
  812. srand(2008);
  813. #ifdef HAVE_AYUDAME_H
  814. #ifndef AYU_RT_STARPU
  815. /* Dumb value for now */
  816. #define AYU_RT_STARPU 32
  817. #endif
  818. if (AYU_event)
  819. {
  820. enum ayu_runtime_t ayu_rt = AYU_RT_STARPU;
  821. AYU_event(AYU_PREINIT, 0, (void*) &ayu_rt);
  822. }
  823. #endif
  824. /* store the pointer to the user explicit configuration during the
  825. * initialization */
  826. if (user_conf == NULL)
  827. {
  828. struct starpu_conf *conf = malloc(sizeof(struct starpu_conf));
  829. starpu_conf_init(conf);
  830. config.conf = conf;
  831. config.default_conf = 1;
  832. }
  833. else
  834. {
  835. if (user_conf->magic != 42)
  836. {
  837. _STARPU_DISP("starpu_conf structure needs to be initialized with starpu_conf_init\n");
  838. return -EINVAL;
  839. }
  840. config.conf = user_conf;
  841. config.default_conf = 0;
  842. }
  843. _starpu_conf_check_environment(config.conf);
  844. _starpu_init_all_sched_ctxs(&config);
  845. _starpu_init_progression_hooks();
  846. _starpu_init_tags();
  847. #ifdef STARPU_USE_FXT
  848. _starpu_init_fxt_profiling(config.conf->trace_buffer_size);
  849. #endif
  850. _starpu_open_debug_logfile();
  851. _starpu_data_interface_init();
  852. _starpu_timing_init();
  853. _starpu_profiling_init();
  854. _starpu_load_bus_performance_files();
  855. /* Depending on whether we are a MP sink or not, we must build the
  856. * topology with MP nodes or not. */
  857. ret = _starpu_build_topology(&config, is_a_sink ? 1 : 0);
  858. if (ret)
  859. {
  860. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  861. init_count--;
  862. #ifdef STARPU_USE_SCC
  863. if (_starpu_scc_common_is_mp_initialized())
  864. _starpu_scc_src_mp_deinit();
  865. #endif
  866. initialized = UNINITIALIZED;
  867. /* Let somebody else try to do it */
  868. STARPU_PTHREAD_COND_SIGNAL(&init_cond);
  869. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  870. return ret;
  871. }
  872. /* We need to store the current task handled by the different
  873. * threads */
  874. _starpu_initialize_current_task_key();
  875. if (!is_a_sink)
  876. {
  877. struct starpu_sched_policy *selected_policy = _starpu_select_sched_policy(&config, config.conf->sched_policy_name);
  878. _starpu_create_sched_ctx(selected_policy, NULL, -1, 1, "init");
  879. }
  880. _starpu_initialize_registered_performance_models();
  881. /* Launch "basic" workers (ie. non-combined workers) */
  882. if (!is_a_sink)
  883. _starpu_launch_drivers(&config);
  884. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  885. initialized = INITIALIZED;
  886. /* Tell everybody that we initialized */
  887. STARPU_PTHREAD_COND_BROADCAST(&init_cond);
  888. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  889. _STARPU_DEBUG("Initialisation finished\n");
  890. #ifdef STARPU_USE_MP
  891. /* Finally, if we are a MP sink, we never leave this function. Else,
  892. * we enter an infinite event loop which listen for MP commands from
  893. * the source. */
  894. if (is_a_sink) {
  895. _starpu_sink_common_worker();
  896. /* We should normally never leave the loop as we don't want to
  897. * really initialize STARPU */
  898. STARPU_ASSERT(0);
  899. }
  900. #endif
  901. return 0;
  902. }
  903. /*
  904. * Handle runtime termination
  905. */
  906. static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig)
  907. {
  908. int status = 0;
  909. unsigned workerid;
  910. for (workerid = 0; workerid < pconfig->topology.nworkers; workerid++)
  911. {
  912. starpu_wake_all_blocked_workers();
  913. _STARPU_DEBUG("wait for worker %u\n", workerid);
  914. struct _starpu_worker_set *set = pconfig->workers[workerid].set;
  915. struct _starpu_worker *worker = &pconfig->workers[workerid];
  916. /* in case StarPU termination code is called from a callback,
  917. * we have to check if pthread_self() is the worker itself */
  918. if (set)
  919. {
  920. if (set->started)
  921. {
  922. #ifdef STARPU_SIMGRID
  923. status = starpu_pthread_join(set->worker_thread, NULL);
  924. #else
  925. if (!pthread_equal(pthread_self(), set->worker_thread))
  926. status = starpu_pthread_join(set->worker_thread, NULL);
  927. #endif
  928. #ifdef STARPU_VERBOSE
  929. if (status)
  930. {
  931. _STARPU_DEBUG("starpu_pthread_join -> %d\n", status);
  932. }
  933. #endif
  934. set->started = 0;
  935. }
  936. }
  937. else
  938. {
  939. if (!worker->run_by_starpu)
  940. goto out;
  941. #ifdef STARPU_SIMGRID
  942. status = starpu_pthread_join(worker->worker_thread, NULL);
  943. #else
  944. if (!pthread_equal(pthread_self(), worker->worker_thread))
  945. status = starpu_pthread_join(worker->worker_thread, NULL);
  946. #endif
  947. #ifdef STARPU_VERBOSE
  948. if (status)
  949. {
  950. _STARPU_DEBUG("starpu_pthread_join -> %d\n", status);
  951. }
  952. #endif
  953. }
  954. out:
  955. STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
  956. _starpu_sched_ctx_list_delete(&worker->sched_ctx_list);
  957. _starpu_job_list_delete(worker->terminated_jobs);
  958. }
  959. }
  960. unsigned _starpu_machine_is_running(void)
  961. {
  962. unsigned ret;
  963. /* running is just protected by a memory barrier */
  964. STARPU_RMB();
  965. ANNOTATE_HAPPENS_AFTER(&config.running);
  966. ret = config.running;
  967. ANNOTATE_HAPPENS_BEFORE(&config.running);
  968. return ret;
  969. }
  970. unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED)
  971. {
  972. #ifdef STARPU_NON_BLOCKING_DRIVERS
  973. return 0;
  974. #else
  975. unsigned can_block = 1;
  976. #ifndef STARPU_SIMGRID
  977. if (!_starpu_check_that_no_data_request_exists(memnode))
  978. can_block = 0;
  979. #endif
  980. if (!_starpu_machine_is_running())
  981. can_block = 0;
  982. if (!_starpu_execute_registered_progression_hooks())
  983. can_block = 0;
  984. return can_block;
  985. #endif
  986. }
  987. static void _starpu_kill_all_workers(struct _starpu_machine_config *pconfig)
  988. {
  989. /* set the flag which will tell workers to stop */
  990. ANNOTATE_HAPPENS_AFTER(&config.running);
  991. pconfig->running = 0;
  992. /* running is just protected by a memory barrier */
  993. ANNOTATE_HAPPENS_BEFORE(&config.running);
  994. STARPU_WMB();
  995. starpu_wake_all_blocked_workers();
  996. }
  997. void starpu_display_stats()
  998. {
  999. starpu_profiling_bus_helper_display_summary();
  1000. starpu_profiling_worker_helper_display_summary();
  1001. }
  1002. void starpu_shutdown(void)
  1003. {
  1004. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  1005. init_count--;
  1006. if (init_count)
  1007. {
  1008. _STARPU_DEBUG("Still somebody needing StarPU, don't deinitialize\n");
  1009. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1010. return;
  1011. }
  1012. /* We're last */
  1013. initialized = CHANGING;
  1014. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1015. starpu_task_wait_for_no_ready();
  1016. /* tell all workers to shutdown */
  1017. _starpu_kill_all_workers(&config);
  1018. {
  1019. int stats = starpu_get_env_number("STARPU_STATS");
  1020. if (stats != 0)
  1021. {
  1022. _starpu_display_msi_stats();
  1023. _starpu_display_alloc_cache_stats();
  1024. _starpu_display_comm_amounts();
  1025. }
  1026. }
  1027. starpu_profiling_bus_helper_display_summary();
  1028. starpu_profiling_worker_helper_display_summary();
  1029. _starpu_deinitialize_registered_performance_models();
  1030. /* wait for their termination */
  1031. _starpu_terminate_workers(&config);
  1032. {
  1033. int stats = starpu_get_env_number("STARPU_MEMORY_STATS");
  1034. if (stats != 0)
  1035. {
  1036. // Display statistics on data which have not been unregistered
  1037. starpu_data_display_memory_stats();
  1038. }
  1039. }
  1040. _starpu_delete_all_sched_ctxs();
  1041. _starpu_disk_unregister();
  1042. _starpu_destroy_topology(&config);
  1043. #ifdef STARPU_USE_FXT
  1044. _starpu_stop_fxt_profiling();
  1045. #endif
  1046. _starpu_data_interface_shutdown();
  1047. /* Drop all remaining tags */
  1048. _starpu_tag_clear();
  1049. _starpu_close_debug_logfile();
  1050. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  1051. initialized = UNINITIALIZED;
  1052. /* Let someone else that wants to initialize it again do it */
  1053. STARPU_PTHREAD_COND_SIGNAL(&init_cond);
  1054. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1055. /* Clear memory if it was allocated by StarPU */
  1056. if (config.default_conf)
  1057. free(config.conf);
  1058. #ifdef HAVE_AYUDAME_H
  1059. if (AYU_event) AYU_event(AYU_FINISH, 0, NULL);
  1060. #endif
  1061. #ifdef STARPU_USE_SCC
  1062. if (_starpu_scc_common_is_mp_initialized())
  1063. _starpu_scc_src_mp_deinit();
  1064. #endif
  1065. _STARPU_DEBUG("Shutdown finished\n");
  1066. }
  1067. unsigned starpu_worker_get_count(void)
  1068. {
  1069. return config.topology.nworkers;
  1070. }
  1071. int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
  1072. {
  1073. switch (type)
  1074. {
  1075. case STARPU_CPU_WORKER:
  1076. return config.topology.ncpus;
  1077. case STARPU_CUDA_WORKER:
  1078. return config.topology.ncudagpus;
  1079. case STARPU_OPENCL_WORKER:
  1080. return config.topology.nopenclgpus;
  1081. case STARPU_MIC_WORKER:
  1082. return config.topology.nmicdevices;
  1083. case STARPU_SCC_WORKER:
  1084. return config.topology.nsccdevices;
  1085. default:
  1086. return -EINVAL;
  1087. }
  1088. }
  1089. unsigned starpu_combined_worker_get_count(void)
  1090. {
  1091. return config.topology.ncombinedworkers;
  1092. }
  1093. unsigned starpu_cpu_worker_get_count(void)
  1094. {
  1095. return config.topology.ncpus;
  1096. }
  1097. unsigned starpu_cuda_worker_get_count(void)
  1098. {
  1099. return config.topology.ncudagpus;
  1100. }
  1101. unsigned starpu_opencl_worker_get_count(void)
  1102. {
  1103. return config.topology.nopenclgpus;
  1104. }
  1105. int starpu_asynchronous_copy_disabled(void)
  1106. {
  1107. return config.conf->disable_asynchronous_copy;
  1108. }
  1109. int starpu_asynchronous_cuda_copy_disabled(void)
  1110. {
  1111. return config.conf->disable_asynchronous_cuda_copy;
  1112. }
  1113. int starpu_asynchronous_opencl_copy_disabled(void)
  1114. {
  1115. return config.conf->disable_asynchronous_opencl_copy;
  1116. }
  1117. int starpu_asynchronous_mic_copy_disabled(void)
  1118. {
  1119. return config.conf->disable_asynchronous_mic_copy;
  1120. }
  1121. unsigned starpu_mic_worker_get_count(void)
  1122. {
  1123. int i = 0, count = 0;
  1124. for (i = 0; i < STARPU_MAXMICDEVS; i++)
  1125. count += config.topology.nmiccores[i];
  1126. return count;
  1127. }
  1128. unsigned starpu_scc_worker_get_count(void)
  1129. {
  1130. return config.topology.nsccdevices;
  1131. }
  1132. /* When analyzing performance, it is useful to see what is the processing unit
  1133. * that actually performed the task. This function returns the id of the
  1134. * processing unit actually executing it, therefore it makes no sense to use it
  1135. * within the callbacks of SPU functions for instance. If called by some thread
  1136. * that is not controlled by StarPU, starpu_worker_get_id returns -1. */
  1137. int starpu_worker_get_id(void)
  1138. {
  1139. struct _starpu_worker * worker;
  1140. worker = _starpu_get_local_worker_key();
  1141. if (worker)
  1142. {
  1143. return worker->workerid;
  1144. }
  1145. else
  1146. {
  1147. /* there is no worker associated to that thread, perhaps it is
  1148. * a thread from the application or this is some SPU worker */
  1149. return -1;
  1150. }
  1151. }
  1152. int starpu_combined_worker_get_id(void)
  1153. {
  1154. struct _starpu_worker *worker;
  1155. worker = _starpu_get_local_worker_key();
  1156. if (worker)
  1157. {
  1158. return worker->combined_workerid;
  1159. }
  1160. else
  1161. {
  1162. /* there is no worker associated to that thread, perhaps it is
  1163. * a thread from the application or this is some SPU worker */
  1164. return -1;
  1165. }
  1166. }
  1167. int starpu_combined_worker_get_size(void)
  1168. {
  1169. struct _starpu_worker *worker;
  1170. worker = _starpu_get_local_worker_key();
  1171. if (worker)
  1172. {
  1173. return worker->worker_size;
  1174. }
  1175. else
  1176. {
  1177. /* there is no worker associated to that thread, perhaps it is
  1178. * a thread from the application or this is some SPU worker */
  1179. return -1;
  1180. }
  1181. }
  1182. int starpu_combined_worker_get_rank(void)
  1183. {
  1184. struct _starpu_worker *worker;
  1185. worker = _starpu_get_local_worker_key();
  1186. if (worker)
  1187. {
  1188. return worker->current_rank;
  1189. }
  1190. else
  1191. {
  1192. /* there is no worker associated to that thread, perhaps it is
  1193. * a thread from the application or this is some SPU worker */
  1194. return -1;
  1195. }
  1196. }
  1197. int starpu_worker_get_mp_nodeid(int id)
  1198. {
  1199. return config.workers[id].mp_nodeid;
  1200. }
  1201. int starpu_worker_get_devid(int id)
  1202. {
  1203. return config.workers[id].devid;
  1204. }
  1205. struct _starpu_worker *_starpu_get_worker_struct(unsigned id)
  1206. {
  1207. return &config.workers[id];
  1208. }
  1209. unsigned starpu_worker_is_combined_worker(int id)
  1210. {
  1211. return id >= (int)config.topology.nworkers;
  1212. }
  1213. struct _starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id)
  1214. {
  1215. if(id == STARPU_NMAX_SCHED_CTXS) return NULL;
  1216. return &config.sched_ctxs[id];
  1217. }
  1218. struct _starpu_combined_worker *_starpu_get_combined_worker_struct(unsigned id)
  1219. {
  1220. unsigned basic_worker_count = starpu_worker_get_count();
  1221. //_STARPU_DEBUG("basic_worker_count:%d\n",basic_worker_count);
  1222. STARPU_ASSERT(id >= basic_worker_count);
  1223. return &config.combined_workers[id - basic_worker_count];
  1224. }
  1225. enum starpu_worker_archtype starpu_worker_get_type(int id)
  1226. {
  1227. return config.workers[id].arch;
  1228. }
  1229. int starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
  1230. {
  1231. unsigned nworkers = starpu_worker_get_count();
  1232. int cnt = 0;
  1233. unsigned id;
  1234. for (id = 0; id < nworkers; id++)
  1235. {
  1236. if (starpu_worker_get_type(id) == type)
  1237. {
  1238. /* Perhaps the array is too small ? */
  1239. if (cnt >= maxsize)
  1240. return -ERANGE;
  1241. workerids[cnt++] = id;
  1242. }
  1243. }
  1244. return cnt;
  1245. }
  1246. int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num)
  1247. {
  1248. unsigned nworkers = starpu_worker_get_count();
  1249. int cnt = 0;
  1250. unsigned id;
  1251. for (id = 0; id < nworkers; id++)
  1252. {
  1253. if (starpu_worker_get_type(id) == type)
  1254. {
  1255. if (num == cnt)
  1256. return id;
  1257. cnt++;
  1258. }
  1259. }
  1260. /* Not found */
  1261. return -1;
  1262. }
  1263. int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid)
  1264. {
  1265. unsigned nworkers = starpu_worker_get_count();
  1266. unsigned id;
  1267. for (id = 0; id < nworkers; id++)
  1268. if (starpu_worker_get_type(id) == type && starpu_worker_get_devid(id) == devid)
  1269. return id;
  1270. /* Not found */
  1271. return -1;
  1272. }
  1273. void starpu_worker_get_name(int id, char *dst, size_t maxlen)
  1274. {
  1275. char *name = config.workers[id].name;
  1276. snprintf(dst, maxlen, "%s", name);
  1277. }
  1278. /* Retrieve the status which indicates what the worker is currently doing. */
  1279. enum _starpu_worker_status _starpu_worker_get_status(int workerid)
  1280. {
  1281. return config.workers[workerid].status;
  1282. }
  1283. /* Change the status of the worker which indicates what the worker is currently
  1284. * doing (eg. executing a callback). */
  1285. void _starpu_worker_set_status(int workerid, enum _starpu_worker_status status)
  1286. {
  1287. config.workers[workerid].status = status;
  1288. }
  1289. void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond)
  1290. {
  1291. *sched_cond = &config.workers[workerid].sched_cond;
  1292. *sched_mutex = &config.workers[workerid].sched_mutex;
  1293. }
  1294. int starpu_wakeup_worker(int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex)
  1295. {
  1296. int success = 0;
  1297. STARPU_PTHREAD_MUTEX_LOCK(mutex);
  1298. if (config.workers[workerid].status == STATUS_SLEEPING)
  1299. {
  1300. config.workers[workerid].status = STATUS_WAKING_UP;
  1301. STARPU_PTHREAD_COND_SIGNAL(cond);
  1302. success = 1;
  1303. }
  1304. STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
  1305. return success;
  1306. }
  1307. int starpu_worker_get_nids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
  1308. {
  1309. unsigned nworkers = starpu_worker_get_count();
  1310. int cnt = 0;
  1311. unsigned id;
  1312. for (id = 0; id < nworkers; id++)
  1313. {
  1314. if (starpu_worker_get_type(id) == type)
  1315. {
  1316. /* Perhaps the array is too small ? */
  1317. if (cnt >= maxsize)
  1318. return cnt;
  1319. workerids[cnt++] = id;
  1320. }
  1321. }
  1322. return cnt;
  1323. }
  1324. int starpu_worker_get_nids_ctx_free_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
  1325. {
  1326. unsigned nworkers = starpu_worker_get_count();
  1327. int cnt = 0;
  1328. unsigned id, worker;
  1329. unsigned found = 0;
  1330. for (id = 0; id < nworkers; id++)
  1331. {
  1332. found = 0;
  1333. if (starpu_worker_get_type(id) == type)
  1334. {
  1335. /* Perhaps the array is too small ? */
  1336. if (cnt >= maxsize)
  1337. return cnt;
  1338. int s;
  1339. for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++)
  1340. {
  1341. if(config.sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  1342. {
  1343. struct starpu_worker_collection *workers = config.sched_ctxs[s].workers;
  1344. struct starpu_sched_ctx_iterator it;
  1345. if(workers->init_iterator)
  1346. workers->init_iterator(workers, &it);
  1347. while(workers->has_next(workers, &it))
  1348. {
  1349. worker = workers->get_next(workers, &it);
  1350. if(worker == id)
  1351. {
  1352. found = 1;
  1353. break;
  1354. }
  1355. }
  1356. if(found) break;
  1357. }
  1358. }
  1359. if(!found)
  1360. workerids[cnt++] = id;
  1361. }
  1362. }
  1363. return cnt;
  1364. }
  1365. struct _starpu_sched_ctx* _starpu_get_initial_sched_ctx(void)
  1366. {
  1367. return &config.sched_ctxs[STARPU_GLOBAL_SCHED_CTX];
  1368. }
  1369. int
  1370. starpu_driver_run(struct starpu_driver *d)
  1371. {
  1372. if (!d)
  1373. {
  1374. _STARPU_DEBUG("Invalid argument\n");
  1375. return -EINVAL;
  1376. }
  1377. switch (d->type)
  1378. {
  1379. #ifdef STARPU_USE_CPU
  1380. case STARPU_CPU_WORKER:
  1381. return _starpu_run_cpu(d);
  1382. #endif
  1383. #ifdef STARPU_USE_CUDA
  1384. case STARPU_CUDA_WORKER:
  1385. return _starpu_run_cuda(d);
  1386. #endif
  1387. #ifdef STARPU_USE_OPENCL
  1388. case STARPU_OPENCL_WORKER:
  1389. return _starpu_run_opencl(d);
  1390. #endif
  1391. default:
  1392. {
  1393. _STARPU_DEBUG("Invalid device type\n");
  1394. return -EINVAL;
  1395. }
  1396. }
  1397. }
  1398. int
  1399. starpu_driver_init(struct starpu_driver *d)
  1400. {
  1401. STARPU_ASSERT(d);
  1402. switch (d->type)
  1403. {
  1404. #ifdef STARPU_USE_CPU
  1405. case STARPU_CPU_WORKER:
  1406. return _starpu_cpu_driver_init(d);
  1407. #endif
  1408. #ifdef STARPU_USE_CUDA
  1409. case STARPU_CUDA_WORKER:
  1410. return _starpu_cuda_driver_init(d);
  1411. #endif
  1412. #ifdef STARPU_USE_OPENCL
  1413. case STARPU_OPENCL_WORKER:
  1414. return _starpu_opencl_driver_init(d);
  1415. #endif
  1416. default:
  1417. return -EINVAL;
  1418. }
  1419. }
  1420. int
  1421. starpu_driver_run_once(struct starpu_driver *d)
  1422. {
  1423. STARPU_ASSERT(d);
  1424. switch (d->type)
  1425. {
  1426. #ifdef STARPU_USE_CPU
  1427. case STARPU_CPU_WORKER:
  1428. return _starpu_cpu_driver_run_once(d);
  1429. #endif
  1430. #ifdef STARPU_USE_CUDA
  1431. case STARPU_CUDA_WORKER:
  1432. return _starpu_cuda_driver_run_once(d);
  1433. #endif
  1434. #ifdef STARPU_USE_OPENCL
  1435. case STARPU_OPENCL_WORKER:
  1436. return _starpu_opencl_driver_run_once(d);
  1437. #endif
  1438. default:
  1439. return -EINVAL;
  1440. }
  1441. }
  1442. int
  1443. starpu_driver_deinit(struct starpu_driver *d)
  1444. {
  1445. STARPU_ASSERT(d);
  1446. switch (d->type)
  1447. {
  1448. #ifdef STARPU_USE_CPU
  1449. case STARPU_CPU_WORKER:
  1450. return _starpu_cpu_driver_deinit(d);
  1451. #endif
  1452. #ifdef STARPU_USE_CUDA
  1453. case STARPU_CUDA_WORKER:
  1454. return _starpu_cuda_driver_deinit(d);
  1455. #endif
  1456. #ifdef STARPU_USE_OPENCL
  1457. case STARPU_OPENCL_WORKER:
  1458. return _starpu_opencl_driver_deinit(d);
  1459. #endif
  1460. default:
  1461. return -EINVAL;
  1462. }
  1463. }
  1464. void starpu_get_version(int *major, int *minor, int *release)
  1465. {
  1466. *major = STARPU_MAJOR_VERSION;
  1467. *minor = STARPU_MINOR_VERSION;
  1468. *release = STARPU_RELEASE_VERSION;
  1469. }