workers.c 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2013 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2010, 2011 Institut National de Recherche en Informatique et Automatique
  6. * Copyright (C) 2011 Télécom-SudParis
  7. * Copyright (C) 2011-2012 INRIA
  8. *
  9. * StarPU is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as published by
  11. * the Free Software Foundation; either version 2.1 of the License, or (at
  12. * your option) any later version.
  13. *
  14. * StarPU is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17. *
  18. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  19. */
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <common/config.h>
  23. #include <common/utils.h>
  24. #include <core/progress_hook.h>
  25. #include <core/workers.h>
  26. #include <core/debug.h>
  27. #include <core/disk.h>
  28. #include <core/task.h>
  29. #include <profiling/profiling.h>
  30. #include <starpu_task_list.h>
  31. #include <drivers/mp_common/sink_common.h>
  32. #include <drivers/scc/driver_scc_common.h>
  33. #include <drivers/cpu/driver_cpu.h>
  34. #include <drivers/cuda/driver_cuda.h>
  35. #include <drivers/opencl/driver_opencl.h>
  36. #ifdef STARPU_SIMGRID
  37. #include <msg/msg.h>
  38. #endif
  39. #ifdef __MINGW32__
  40. #include <windows.h>
  41. #endif
  42. /* acquire/release semantic for concurrent initialization/de-initialization */
  43. static starpu_pthread_mutex_t init_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
  44. static starpu_pthread_cond_t init_cond = STARPU_PTHREAD_COND_INITIALIZER;
  45. static int init_count = 0;
  46. static enum { UNINITIALIZED, CHANGING, INITIALIZED } initialized = UNINITIALIZED;
  47. static starpu_pthread_key_t worker_key;
  48. static struct _starpu_machine_config config;
  49. /* Pointers to argc and argv
  50. */
  51. static int *my_argc = 0;
  52. static char ***my_argv = NULL;
  53. /* Initialize value of static argc and argv, called when the process begins
  54. */
  55. void _starpu_set_argc_argv(int *argc_param, char ***argv_param)
  56. {
  57. my_argc = argc_param;
  58. my_argv = argv_param;
  59. }
  60. int *_starpu_get_argc()
  61. {
  62. return my_argc;
  63. }
  64. char ***_starpu_get_argv()
  65. {
  66. return my_argv;
  67. }
  68. int _starpu_is_initialized(void)
  69. {
  70. return initialized == INITIALIZED;
  71. }
  72. struct _starpu_machine_config *_starpu_get_machine_config(void)
  73. {
  74. return &config;
  75. }
  76. /* Makes sure that at least one of the workers of type <arch> can execute
  77. * <task>, for at least one of its implementations. */
  78. static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
  79. enum starpu_worker_archtype arch)
  80. {
  81. int i;
  82. int nworkers = starpu_worker_get_count();
  83. _starpu_codelet_check_deprecated_fields(task->cl);
  84. for (i = 0; i < nworkers; i++)
  85. {
  86. if (starpu_worker_get_type(i) != arch)
  87. continue;
  88. unsigned impl;
  89. for (impl = 0; impl < STARPU_MAXIMPLEMENTATIONS; impl++)
  90. {
  91. /* We could call task->cl->can_execute(i, task, impl)
  92. here, it would definitely work. It is probably
  93. cheaper to check whether it is necessary in order to
  94. avoid a useless function call, though. */
  95. unsigned test_implementation = 0;
  96. switch (arch)
  97. {
  98. case STARPU_CPU_WORKER:
  99. if (task->cl->cpu_funcs[impl] != NULL)
  100. test_implementation = 1;
  101. break;
  102. case STARPU_CUDA_WORKER:
  103. if (task->cl->cuda_funcs[impl] != NULL)
  104. test_implementation = 1;
  105. break;
  106. case STARPU_OPENCL_WORKER:
  107. if (task->cl->opencl_funcs[impl] != NULL)
  108. test_implementation = 1;
  109. break;
  110. case STARPU_MIC_WORKER:
  111. if (task->cl->cpu_funcs_name[impl] != NULL || task->cl->mic_funcs[impl] != NULL)
  112. test_implementation = 1;
  113. break;
  114. case STARPU_SCC_WORKER:
  115. if (task->cl->cpu_funcs_name[impl] != NULL || task->cl->scc_funcs[impl] != NULL)
  116. test_implementation = 1;
  117. break;
  118. default:
  119. STARPU_ABORT();
  120. }
  121. if (!test_implementation)
  122. break;
  123. if (task->cl->can_execute(i, task, impl))
  124. return 1;
  125. }
  126. }
  127. return 0;
  128. }
  129. /* in case a task is submitted, we may check whether there exists a worker
  130. that may execute the task or not */
  131. uint32_t _starpu_worker_exists(struct starpu_task *task)
  132. {
  133. _starpu_codelet_check_deprecated_fields(task->cl);
  134. if (!(task->cl->where & config.worker_mask))
  135. return 0;
  136. if (!task->cl->can_execute)
  137. return 1;
  138. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  139. if ((task->cl->where & STARPU_CPU) &&
  140. _starpu_worker_exists_and_can_execute(task, STARPU_CPU_WORKER))
  141. return 1;
  142. #endif
  143. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  144. if ((task->cl->where & STARPU_CUDA) &&
  145. _starpu_worker_exists_and_can_execute(task, STARPU_CUDA_WORKER))
  146. return 1;
  147. #endif
  148. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  149. if ((task->cl->where & STARPU_OPENCL) &&
  150. _starpu_worker_exists_and_can_execute(task, STARPU_OPENCL_WORKER))
  151. return 1;
  152. #endif
  153. #ifdef STARPU_USE_MIC
  154. if ((task->cl->where & STARPU_MIC) &&
  155. _starpu_worker_exists_and_can_execute(task, STARPU_MIC_WORKER))
  156. return 1;
  157. #endif
  158. #ifdef STARPU_USE_SCC
  159. if ((task->cl->where & STARPU_SCC) &&
  160. _starpu_worker_exists_and_can_execute(task, STARPU_SCC_WORKER))
  161. return 1;
  162. #endif
  163. return 0;
  164. }
  165. uint32_t _starpu_can_submit_cuda_task(void)
  166. {
  167. return (STARPU_CUDA & config.worker_mask);
  168. }
  169. uint32_t _starpu_can_submit_cpu_task(void)
  170. {
  171. return (STARPU_CPU & config.worker_mask);
  172. }
  173. uint32_t _starpu_can_submit_opencl_task(void)
  174. {
  175. return (STARPU_OPENCL & config.worker_mask);
  176. }
  177. uint32_t _starpu_can_submit_scc_task(void)
  178. {
  179. return (STARPU_SCC & config.worker_mask);
  180. }
  181. static int _starpu_can_use_nth_implementation(enum starpu_worker_archtype arch, struct starpu_codelet *cl, unsigned nimpl)
  182. {
  183. switch(arch)
  184. {
  185. case STARPU_ANY_WORKER:
  186. {
  187. int cpu_func_enabled=1, cuda_func_enabled=1, opencl_func_enabled=1;
  188. /* TODO: MIC/SCC */
  189. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  190. starpu_cpu_func_t cpu_func = _starpu_task_get_cpu_nth_implementation(cl, nimpl);
  191. cpu_func_enabled = cpu_func != NULL && starpu_cpu_worker_get_count();
  192. #endif
  193. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  194. starpu_cuda_func_t cuda_func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
  195. cuda_func_enabled = cuda_func != NULL && starpu_cuda_worker_get_count();
  196. #endif
  197. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  198. starpu_opencl_func_t opencl_func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
  199. opencl_func_enabled = opencl_func != NULL && starpu_opencl_worker_get_count();
  200. #endif
  201. return (cpu_func_enabled && cuda_func_enabled && opencl_func_enabled);
  202. }
  203. case STARPU_CPU_WORKER:
  204. {
  205. starpu_cpu_func_t func = _starpu_task_get_cpu_nth_implementation(cl, nimpl);
  206. return func != NULL;
  207. }
  208. case STARPU_CUDA_WORKER:
  209. {
  210. starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
  211. return func != NULL;
  212. }
  213. case STARPU_OPENCL_WORKER:
  214. {
  215. starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
  216. return func != NULL;
  217. }
  218. case STARPU_MIC_WORKER:
  219. {
  220. starpu_mic_func_t func = _starpu_task_get_mic_nth_implementation(cl, nimpl);
  221. char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
  222. return func != NULL || func_name != NULL;
  223. }
  224. case STARPU_SCC_WORKER:
  225. {
  226. starpu_scc_func_t func = _starpu_task_get_scc_nth_implementation(cl, nimpl);
  227. char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
  228. return func != NULL || func_name != NULL;
  229. }
  230. default:
  231. STARPU_ASSERT_MSG(0, "Unknown arch type %d", arch);
  232. }
  233. return 0;
  234. }
  235. int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
  236. {
  237. /* TODO: check that the task operand sizes will fit on that device */
  238. return (task->cl->where & config.workers[workerid].worker_mask) &&
  239. _starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl) &&
  240. (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl));
  241. }
  242. int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
  243. {
  244. /* TODO: check that the task operand sizes will fit on that device */
  245. /* TODO: call application-provided function for various cases like
  246. * double support, shared memory size limit, etc. */
  247. struct starpu_codelet *cl = task->cl;
  248. unsigned nworkers = config.topology.nworkers;
  249. /* Is this a parallel worker ? */
  250. if (workerid < nworkers)
  251. {
  252. return !!((task->cl->where & config.workers[workerid].worker_mask) &&
  253. _starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
  254. }
  255. else
  256. {
  257. if ((cl->type == STARPU_SPMD)
  258. #ifdef STARPU_HAVE_HWLOC
  259. || (cl->type == STARPU_FORKJOIN)
  260. #else
  261. #ifdef __GLIBC__
  262. || (cl->type == STARPU_FORKJOIN)
  263. #endif
  264. #endif
  265. )
  266. {
  267. /* TODO we should add other types of constraints */
  268. /* Is the worker larger than requested ? */
  269. int worker_size = (int)config.combined_workers[workerid - nworkers].worker_size;
  270. int worker0 = config.combined_workers[workerid - nworkers].combined_workerid[0];
  271. return !!((worker_size <= task->cl->max_parallelism) &&
  272. _starpu_can_use_nth_implementation(config.workers[worker0].arch, task->cl, nimpl));
  273. }
  274. else
  275. {
  276. /* We have a sequential task but a parallel worker */
  277. return 0;
  278. }
  279. }
  280. }
  281. /*
  282. * Runtime initialization methods
  283. */
  284. #ifdef STARPU_USE_MIC
  285. static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
  286. #endif
  287. static void _starpu_init_worker_queue(struct _starpu_worker *workerarg)
  288. {
  289. starpu_pthread_cond_t *cond = &workerarg->sched_cond;
  290. starpu_pthread_mutex_t *mutex = &workerarg->sched_mutex;
  291. unsigned memory_node = workerarg->memory_node;
  292. _starpu_memory_node_register_condition(cond, mutex, memory_node);
  293. }
  294. /*
  295. * Returns 0 if the given driver is one of the drivers that must be launched by
  296. * the application itself, and not by StarPU, 1 otherwise.
  297. */
  298. static unsigned _starpu_may_launch_driver(struct starpu_conf *conf,
  299. struct starpu_driver *d)
  300. {
  301. if (conf->n_not_launched_drivers == 0 ||
  302. conf->not_launched_drivers == NULL)
  303. return 1;
  304. /* Is <d> in conf->not_launched_drivers ? */
  305. unsigned i;
  306. for (i = 0; i < conf->n_not_launched_drivers; i++)
  307. {
  308. if (d->type != conf->not_launched_drivers[i].type)
  309. continue;
  310. switch (d->type)
  311. {
  312. case STARPU_CPU_WORKER:
  313. if (d->id.cpu_id == conf->not_launched_drivers[i].id.cpu_id)
  314. return 0;
  315. case STARPU_CUDA_WORKER:
  316. if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id)
  317. return 0;
  318. break;
  319. #ifdef STARPU_USE_OPENCL
  320. case STARPU_OPENCL_WORKER:
  321. if (d->id.opencl_id == conf->not_launched_drivers[i].id.opencl_id)
  322. return 0;
  323. break;
  324. #endif
  325. default:
  326. STARPU_ABORT();
  327. }
  328. }
  329. return 1;
  330. }
  331. #ifdef STARPU_PERF_DEBUG
  332. struct itimerval prof_itimer;
  333. #endif
  334. void _starpu_worker_init(struct _starpu_worker *worker, unsigned fut_key)
  335. {
  336. (void) fut_key;
  337. int devid = worker->devid;
  338. (void) devid;
  339. #if defined(STARPU_PERF_DEBUG) && !defined(STARPU_SIMGRID)
  340. setitimer(ITIMER_PROF, &prof_itimer, NULL);
  341. #endif
  342. #ifdef STARPU_USE_FXT
  343. _starpu_fxt_register_thread(worker->bindid);
  344. unsigned memnode = worker->memory_node;
  345. _STARPU_TRACE_WORKER_INIT_START(fut_key, worker->workerid, devid, memnode);
  346. #endif
  347. _starpu_bind_thread_on_cpu(worker->config, worker->bindid);
  348. _STARPU_DEBUG("worker %d is ready on logical cpu %d\n", devid, worker->bindid);
  349. #ifdef STARPU_HAVE_HWLOC
  350. _STARPU_DEBUG("worker %d cpuset start at %d\n", devid, hwloc_bitmap_first(worker->hwloc_cpu_set));
  351. #endif
  352. _starpu_memory_node_set_local_key(&worker->memory_node);
  353. _starpu_set_local_worker_key(worker);
  354. STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex);
  355. worker->worker_is_running = 1;
  356. STARPU_PTHREAD_COND_SIGNAL(&worker->started_cond);
  357. STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex);
  358. worker->spinning_backoff = 1;
  359. }
  360. static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
  361. {
  362. pconfig->running = 1;
  363. pconfig->submitting = 1;
  364. STARPU_PTHREAD_KEY_CREATE(&worker_key, NULL);
  365. unsigned nworkers = pconfig->topology.nworkers;
  366. /* Launch workers asynchronously */
  367. unsigned cpu = 0, cuda = 0;
  368. unsigned worker;
  369. #if defined(STARPU_PERF_DEBUG) && !defined(STARPU_SIMGRID)
  370. /* Get itimer of the main thread, to set it for the worker threads */
  371. getitimer(ITIMER_PROF, &prof_itimer);
  372. #endif
  373. #ifdef HAVE_AYUDAME_H
  374. if (AYU_event) AYU_event(AYU_INIT, 0, NULL);
  375. #endif
  376. for (worker = 0; worker < nworkers; worker++)
  377. {
  378. struct _starpu_worker *workerarg = &pconfig->workers[worker];
  379. #ifdef STARPU_USE_MIC
  380. unsigned mp_nodeid = workerarg->mp_nodeid;
  381. #endif
  382. workerarg->config = pconfig;
  383. _starpu_barrier_counter_init(&workerarg->tasks_barrier, 0);
  384. STARPU_PTHREAD_MUTEX_INIT(&workerarg->mutex, NULL);
  385. STARPU_PTHREAD_COND_INIT(&workerarg->started_cond, NULL);
  386. STARPU_PTHREAD_COND_INIT(&workerarg->ready_cond, NULL);
  387. workerarg->worker_size = 1;
  388. workerarg->combined_workerid = workerarg->workerid;
  389. workerarg->current_rank = 0;
  390. workerarg->has_prev_init = 0;
  391. /* mutex + cond only for the local list */
  392. /* we have a single local list */
  393. /* afterwards there would be a mutex + cond for the list of each strategy */
  394. workerarg->run_by_starpu = 1;
  395. workerarg->worker_is_running = 0;
  396. workerarg->worker_is_initialized = 0;
  397. workerarg->set = NULL;
  398. int ctx;
  399. for(ctx = 0; ctx < STARPU_NMAX_SCHED_CTXS; ctx++)
  400. {
  401. workerarg->removed_from_ctx[ctx] = 0;
  402. workerarg->shares_tasks_lists[ctx] = 0;
  403. }
  404. STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL);
  405. STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL);
  406. STARPU_PTHREAD_MUTEX_INIT(&workerarg->parallel_sect_mutex, NULL);
  407. STARPU_PTHREAD_COND_INIT(&workerarg->parallel_sect_cond, NULL);
  408. workerarg->parallel_sect = 0;
  409. /* if some codelet's termination cannot be handled directly :
  410. * for instance in the Gordon driver, Gordon tasks' callbacks
  411. * may be executed by another thread than that of the Gordon
  412. * driver so that we cannot call the push_codelet_output method
  413. * directly */
  414. workerarg->terminated_jobs = _starpu_job_list_new();
  415. starpu_task_list_init(&workerarg->local_tasks);
  416. workerarg->status = STATUS_INITIALIZING;
  417. _STARPU_DEBUG("initialising worker %u/%u\n", worker, nworkers);
  418. _starpu_init_worker_queue(workerarg);
  419. struct starpu_driver driver;
  420. driver.type = workerarg->arch;
  421. switch (workerarg->arch)
  422. {
  423. #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
  424. case STARPU_CPU_WORKER:
  425. driver.id.cpu_id = cpu;
  426. if (_starpu_may_launch_driver(pconfig->conf, &driver))
  427. {
  428. STARPU_PTHREAD_CREATE_ON(
  429. workerarg->name,
  430. &workerarg->worker_thread,
  431. NULL,
  432. _starpu_cpu_worker,
  433. workerarg,
  434. worker+1);
  435. #ifdef STARPU_USE_FXT
  436. /* In tracing mode, make sure the
  437. * thread is really started before
  438. * starting another one, to make sure
  439. * they appear in order in the trace.
  440. */
  441. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  442. while (!workerarg->worker_is_running)
  443. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  444. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  445. #endif
  446. }
  447. else
  448. {
  449. workerarg->run_by_starpu = 0;
  450. }
  451. cpu++;
  452. break;
  453. #endif
  454. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  455. case STARPU_CUDA_WORKER:
  456. driver.id.cuda_id = cuda;
  457. if (_starpu_may_launch_driver(pconfig->conf, &driver))
  458. {
  459. STARPU_PTHREAD_CREATE_ON(
  460. workerarg->name,
  461. &workerarg->worker_thread,
  462. NULL,
  463. _starpu_cuda_worker,
  464. workerarg,
  465. worker+1);
  466. #ifdef STARPU_USE_FXT
  467. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  468. while (!workerarg->worker_is_running)
  469. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  470. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  471. #endif
  472. }
  473. else
  474. {
  475. workerarg->run_by_starpu = 0;
  476. }
  477. cuda++;
  478. break;
  479. #endif
  480. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  481. case STARPU_OPENCL_WORKER:
  482. #ifndef STARPU_SIMGRID
  483. starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
  484. if (!_starpu_may_launch_driver(pconfig->conf, &driver))
  485. {
  486. workerarg->run_by_starpu = 0;
  487. break;
  488. }
  489. #endif
  490. STARPU_PTHREAD_CREATE_ON(
  491. workerarg->name,
  492. &workerarg->worker_thread,
  493. NULL,
  494. _starpu_opencl_worker,
  495. workerarg,
  496. worker+1);
  497. #ifdef STARPU_USE_FXT
  498. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  499. while (!workerarg->worker_is_running)
  500. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  501. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  502. #endif
  503. break;
  504. #endif
  505. #ifdef STARPU_USE_MIC
  506. case STARPU_MIC_WORKER:
  507. /* We use the Gordon approach for the MIC,
  508. * which consists in spawning only one thread
  509. * per MIC device, which will control all MIC
  510. * workers of this device. (by using a worker set). */
  511. if (mic_worker_set[mp_nodeid].started)
  512. goto worker_set_initialized;
  513. mic_worker_set[mp_nodeid].nworkers = pconfig->topology.nmiccores[mp_nodeid];
  514. /* We assume all MIC workers of a given MIC
  515. * device are contiguous so that we can
  516. * address them with the first one only. */
  517. mic_worker_set[mp_nodeid].workers = workerarg;
  518. mic_worker_set[mp_nodeid].set_is_initialized = 0;
  519. STARPU_PTHREAD_CREATE_ON(
  520. workerarg->name,
  521. &mic_worker_set[mp_nodeid].worker_thread,
  522. NULL,
  523. _starpu_mic_src_worker,
  524. &mic_worker_set[mp_nodeid],
  525. worker+1);
  526. #ifdef STARPU_USE_FXT
  527. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  528. while (!workerarg->worker_is_running)
  529. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  530. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  531. #endif
  532. STARPU_PTHREAD_MUTEX_LOCK(&mic_worker_set[mp_nodeid].mutex);
  533. while (!mic_worker_set[mp_nodeid].set_is_initialized)
  534. STARPU_PTHREAD_COND_WAIT(&mic_worker_set[mp_nodeid].ready_cond,
  535. &mic_worker_set[mp_nodeid].mutex);
  536. STARPU_PTHREAD_MUTEX_UNLOCK(&mic_worker_set[mp_nodeid].mutex);
  537. worker_set_initialized:
  538. workerarg->set = &mic_worker_set[mp_nodeid];
  539. mic_worker_set[mp_nodeid].started = 1;
  540. #ifdef STARPU_USE_FXT
  541. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  542. while (!workerarg->worker_is_running)
  543. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  544. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  545. #endif
  546. break;
  547. #endif /* STARPU_USE_MIC */
  548. #ifdef STARPU_USE_SCC
  549. case STARPU_SCC_WORKER:
  550. workerarg->worker_is_initialized = 0;
  551. STARPU_PTHREAD_CREATE_ON(
  552. workerarg->name,
  553. &workerarg->worker_thread,
  554. NULL,
  555. _starpu_scc_src_worker,
  556. workerarg,
  557. worker+1);
  558. #ifdef STARPU_USE_FXT
  559. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  560. while (!workerarg->worker_is_running)
  561. STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
  562. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  563. #endif
  564. break;
  565. #endif
  566. default:
  567. STARPU_ABORT();
  568. }
  569. }
  570. cpu = 0;
  571. cuda = 0;
  572. for (worker = 0; worker < nworkers; worker++)
  573. {
  574. struct _starpu_worker *workerarg = &pconfig->workers[worker];
  575. struct starpu_driver driver;
  576. driver.type = workerarg->arch;
  577. switch (workerarg->arch)
  578. {
  579. case STARPU_CPU_WORKER:
  580. driver.id.cpu_id = cpu;
  581. if (!_starpu_may_launch_driver(pconfig->conf, &driver))
  582. {
  583. cpu++;
  584. break;
  585. }
  586. _STARPU_DEBUG("waiting for worker %u initialization\n", worker);
  587. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  588. while (!workerarg->worker_is_initialized)
  589. STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  590. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  591. cpu++;
  592. break;
  593. case STARPU_CUDA_WORKER:
  594. driver.id.cuda_id = cuda;
  595. if (!_starpu_may_launch_driver(pconfig->conf, &driver))
  596. {
  597. cuda++;
  598. break;
  599. }
  600. _STARPU_DEBUG("waiting for worker %u initialization\n", worker);
  601. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  602. while (!workerarg->worker_is_initialized)
  603. STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  604. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  605. cuda++;
  606. break;
  607. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  608. case STARPU_OPENCL_WORKER:
  609. #ifndef STARPU_SIMGRID
  610. starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
  611. if (!_starpu_may_launch_driver(pconfig->conf, &driver))
  612. break;
  613. #endif
  614. _STARPU_DEBUG("waiting for worker %u initialization\n", worker);
  615. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  616. while (!workerarg->worker_is_initialized)
  617. STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  618. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  619. break;
  620. #endif
  621. case STARPU_MIC_WORKER:
  622. /* Already waited above */
  623. break;
  624. case STARPU_SCC_WORKER:
  625. /* TODO: implement may_launch? */
  626. _STARPU_DEBUG("waiting for worker %u initialization\n", worker);
  627. STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  628. while (!workerarg->worker_is_initialized)
  629. STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  630. STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  631. break;
  632. default:
  633. STARPU_ABORT();
  634. }
  635. }
  636. _STARPU_DEBUG("finished launching drivers\n");
  637. }
  638. void _starpu_set_local_worker_key(struct _starpu_worker *worker)
  639. {
  640. STARPU_PTHREAD_SETSPECIFIC(worker_key, worker);
  641. }
  642. struct _starpu_worker *_starpu_get_local_worker_key(void)
  643. {
  644. return (struct _starpu_worker *) STARPU_PTHREAD_GETSPECIFIC(worker_key);
  645. }
  646. /* Initialize the starpu_conf with default values */
  647. int starpu_conf_init(struct starpu_conf *conf)
  648. {
  649. if (!conf)
  650. return -EINVAL;
  651. memset(conf, 0, sizeof(*conf));
  652. conf->magic = 42;
  653. conf->sched_policy_name = getenv("STARPU_SCHED");
  654. conf->sched_policy = NULL;
  655. /* Note that starpu_get_env_number returns -1 in case the variable is
  656. * not defined */
  657. /* Backward compatibility: check the value of STARPU_NCPUS if
  658. * STARPU_NCPU is not set. */
  659. conf->ncpus = starpu_get_env_number("STARPU_NCPU");
  660. if (conf->ncpus == -1)
  661. conf->ncpus = starpu_get_env_number("STARPU_NCPUS");
  662. conf->ncuda = starpu_get_env_number("STARPU_NCUDA");
  663. conf->nopencl = starpu_get_env_number("STARPU_NOPENCL");
  664. conf->nmic = starpu_get_env_number("STARPU_NMIC");
  665. conf->nscc = starpu_get_env_number("STARPU_NSCC");
  666. conf->calibrate = starpu_get_env_number("STARPU_CALIBRATE");
  667. conf->bus_calibrate = starpu_get_env_number("STARPU_BUS_CALIBRATE");
  668. conf->mic_sink_program_path = getenv("STARPU_MIC_PROGRAM_PATH");
  669. if (conf->calibrate == -1)
  670. conf->calibrate = 0;
  671. if (conf->bus_calibrate == -1)
  672. conf->bus_calibrate = 0;
  673. conf->use_explicit_workers_bindid = 0; /* TODO */
  674. conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */
  675. conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */
  676. conf->use_explicit_workers_mic_deviceid = 0; /* TODO */
  677. conf->use_explicit_workers_scc_deviceid = 0; /* TODO */
  678. conf->single_combined_worker = starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER");
  679. if (conf->single_combined_worker == -1)
  680. conf->single_combined_worker = 0;
  681. #if defined(STARPU_DISABLE_ASYNCHRONOUS_COPY)
  682. conf->disable_asynchronous_copy = 1;
  683. #else
  684. conf->disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
  685. if (conf->disable_asynchronous_copy == -1)
  686. conf->disable_asynchronous_copy = 0;
  687. #endif
  688. #if defined(STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY)
  689. conf->disable_asynchronous_cuda_copy = 1;
  690. #else
  691. conf->disable_asynchronous_cuda_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY");
  692. if (conf->disable_asynchronous_cuda_copy == -1)
  693. conf->disable_asynchronous_cuda_copy = 0;
  694. #endif
  695. #if defined(STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY)
  696. conf->disable_asynchronous_opencl_copy = 1;
  697. #else
  698. conf->disable_asynchronous_opencl_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY");
  699. if (conf->disable_asynchronous_opencl_copy == -1)
  700. conf->disable_asynchronous_opencl_copy = 0;
  701. #endif
  702. #if defined(STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY)
  703. conf->disable_asynchronous_mic_copy = 1;
  704. #else
  705. conf->disable_asynchronous_mic_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY");
  706. if (conf->disable_asynchronous_mic_copy == -1)
  707. conf->disable_asynchronous_mic_copy = 0;
  708. #endif
  709. /* 64MiB by default */
  710. conf->trace_buffer_size = 64<<20;
  711. return 0;
  712. }
  713. static void _starpu_conf_set_value_against_environment(char *name, int *value)
  714. {
  715. int number;
  716. number = starpu_get_env_number(name);
  717. if (number != -1)
  718. {
  719. *value = number;
  720. }
  721. }
  722. void _starpu_conf_check_environment(struct starpu_conf *conf)
  723. {
  724. char *sched = getenv("STARPU_SCHED");
  725. if (sched)
  726. {
  727. conf->sched_policy_name = sched;
  728. }
  729. _starpu_conf_set_value_against_environment("STARPU_NCPUS", &conf->ncpus);
  730. _starpu_conf_set_value_against_environment("STARPU_NCPU", &conf->ncpus);
  731. _starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda);
  732. _starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl);
  733. _starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate);
  734. _starpu_conf_set_value_against_environment("STARPU_BUS_CALIBRATE", &conf->bus_calibrate);
  735. _starpu_conf_set_value_against_environment("STARPU_SINGLE_COMBINED_WORKER", &conf->single_combined_worker);
  736. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy);
  737. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY", &conf->disable_asynchronous_cuda_copy);
  738. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY", &conf->disable_asynchronous_opencl_copy);
  739. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY", &conf->disable_asynchronous_mic_copy);
  740. }
  741. int starpu_init(struct starpu_conf *user_conf)
  742. {
  743. return starpu_initialize(user_conf, NULL, NULL);
  744. }
  745. int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
  746. {
  747. int is_a_sink = 0; /* Always defined. If the MP infrastructure is not
  748. * used, we cannot be a sink. */
  749. #ifdef STARPU_USE_MP
  750. _starpu_set_argc_argv(argc, argv);
  751. # ifdef STARPU_USE_SCC
  752. /* In SCC case we look at the rank to know if we are a sink */
  753. if (_starpu_scc_common_mp_init() && !_starpu_scc_common_is_src_node())
  754. setenv("STARPU_SINK", "STARPU_SCC", 1);
  755. # endif
  756. /* If StarPU was configured to use MP sinks, we have to control the
  757. * kind on node we are running on : host or sink ? */
  758. if (getenv("STARPU_SINK"))
  759. is_a_sink = 1;
  760. #else
  761. (void)argc;
  762. (void)argv;
  763. #endif /* STARPU_USE_MP */
  764. int ret;
  765. #ifndef STARPU_SIMGRID
  766. #ifdef __GNUC__
  767. #ifndef __OPTIMIZE__
  768. _STARPU_DISP("Warning: StarPU was configured with --enable-debug (-O0), and is thus not optimized\n");
  769. #endif
  770. #endif
  771. #if 0
  772. #ifndef STARPU_NO_ASSERT
  773. _STARPU_DISP("Warning: StarPU was configured without --enable-fast\n");
  774. #endif
  775. #endif
  776. #ifdef STARPU_MEMORY_STATS
  777. _STARPU_DISP("Warning: StarPU was configured with --enable-memory-stats, which slows down a bit\n");
  778. #endif
  779. #ifdef STARPU_VERBOSE
  780. _STARPU_DISP("Warning: StarPU was configured with --enable-verbose, which slows down a bit\n");
  781. #endif
  782. #ifdef STARPU_USE_FXT
  783. _STARPU_DISP("Warning: StarPU was configured with --with-fxt, which slows down a bit\n");
  784. #endif
  785. #ifdef STARPU_PERF_DEBUG
  786. _STARPU_DISP("Warning: StarPU was configured with --enable-perf-debug, which slows down a bit\n");
  787. #endif
  788. #ifdef STARPU_MODEL_DEBUG
  789. _STARPU_DISP("Warning: StarPU was configured with --enable-model-debug, which slows down a bit\n");
  790. #endif
  791. #ifdef STARPU_ENABLE_STATS
  792. _STARPU_DISP("Warning: StarPU was configured with --enable-stats, which slows down a bit\n");
  793. #endif
  794. #endif
  795. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  796. while (initialized == CHANGING)
  797. /* Wait for the other one changing it */
  798. STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex);
  799. init_count++;
  800. if (initialized == INITIALIZED)
  801. {
  802. /* He initialized it, don't do it again, and let the others get the mutex */
  803. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  804. return 0;
  805. }
  806. /* initialized == UNINITIALIZED */
  807. initialized = CHANGING;
  808. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  809. #ifdef __MINGW32__
  810. WSADATA wsadata;
  811. WSAStartup(MAKEWORD(1,0), &wsadata);
  812. #endif
  813. srand(2008);
  814. #ifdef HAVE_AYUDAME_H
  815. #ifndef AYU_RT_STARPU
  816. /* Dumb value for now */
  817. #define AYU_RT_STARPU 32
  818. #endif
  819. if (AYU_event)
  820. {
  821. enum ayu_runtime_t ayu_rt = AYU_RT_STARPU;
  822. AYU_event(AYU_PREINIT, 0, (void*) &ayu_rt);
  823. }
  824. #endif
  825. /* store the pointer to the user explicit configuration during the
  826. * initialization */
  827. if (user_conf == NULL)
  828. {
  829. struct starpu_conf *conf = malloc(sizeof(struct starpu_conf));
  830. starpu_conf_init(conf);
  831. config.conf = conf;
  832. config.default_conf = 1;
  833. }
  834. else
  835. {
  836. if (user_conf->magic != 42)
  837. {
  838. _STARPU_DISP("starpu_conf structure needs to be initialized with starpu_conf_init\n");
  839. return -EINVAL;
  840. }
  841. config.conf = user_conf;
  842. config.default_conf = 0;
  843. }
  844. _starpu_conf_check_environment(config.conf);
  845. _starpu_init_all_sched_ctxs(&config);
  846. _starpu_init_progression_hooks();
  847. _starpu_init_tags();
  848. #ifdef STARPU_USE_FXT
  849. _starpu_init_fxt_profiling(config.conf->trace_buffer_size);
  850. #endif
  851. _starpu_open_debug_logfile();
  852. _starpu_data_interface_init();
  853. _starpu_timing_init();
  854. _starpu_profiling_init();
  855. _starpu_load_bus_performance_files();
  856. /* Depending on whether we are a MP sink or not, we must build the
  857. * topology with MP nodes or not. */
  858. ret = _starpu_build_topology(&config, is_a_sink ? 1 : 0);
  859. if (ret)
  860. {
  861. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  862. init_count--;
  863. #ifdef STARPU_USE_SCC
  864. if (_starpu_scc_common_is_mp_initialized())
  865. _starpu_scc_src_mp_deinit();
  866. #endif
  867. initialized = UNINITIALIZED;
  868. /* Let somebody else try to do it */
  869. STARPU_PTHREAD_COND_SIGNAL(&init_cond);
  870. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  871. return ret;
  872. }
  873. /* We need to store the current task handled by the different
  874. * threads */
  875. _starpu_initialize_current_task_key();
  876. if (!is_a_sink)
  877. {
  878. struct starpu_sched_policy *selected_policy = _starpu_select_sched_policy(&config, config.conf->sched_policy_name);
  879. _starpu_create_sched_ctx(selected_policy, NULL, -1, 1, "init");
  880. }
  881. _starpu_initialize_registered_performance_models();
  882. /* Launch "basic" workers (ie. non-combined workers) */
  883. if (!is_a_sink)
  884. _starpu_launch_drivers(&config);
  885. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  886. initialized = INITIALIZED;
  887. /* Tell everybody that we initialized */
  888. STARPU_PTHREAD_COND_BROADCAST(&init_cond);
  889. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  890. _STARPU_DEBUG("Initialisation finished\n");
  891. #ifdef STARPU_USE_MP
  892. /* Finally, if we are a MP sink, we never leave this function. Else,
  893. * we enter an infinite event loop which listen for MP commands from
  894. * the source. */
  895. if (is_a_sink) {
  896. _starpu_sink_common_worker();
  897. /* We should normally never leave the loop as we don't want to
  898. * really initialize STARPU */
  899. STARPU_ASSERT(0);
  900. }
  901. #endif
  902. return 0;
  903. }
  904. /*
  905. * Handle runtime termination
  906. */
  907. static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig)
  908. {
  909. int status = 0;
  910. unsigned workerid;
  911. for (workerid = 0; workerid < pconfig->topology.nworkers; workerid++)
  912. {
  913. starpu_wake_all_blocked_workers();
  914. _STARPU_DEBUG("wait for worker %u\n", workerid);
  915. struct _starpu_worker_set *set = pconfig->workers[workerid].set;
  916. struct _starpu_worker *worker = &pconfig->workers[workerid];
  917. /* in case StarPU termination code is called from a callback,
  918. * we have to check if pthread_self() is the worker itself */
  919. if (set)
  920. {
  921. if (set->started)
  922. {
  923. #ifdef STARPU_SIMGRID
  924. status = starpu_pthread_join(set->worker_thread, NULL);
  925. #else
  926. if (!pthread_equal(pthread_self(), set->worker_thread))
  927. status = starpu_pthread_join(set->worker_thread, NULL);
  928. #endif
  929. #ifdef STARPU_VERBOSE
  930. if (status)
  931. {
  932. _STARPU_DEBUG("starpu_pthread_join -> %d\n", status);
  933. }
  934. #endif
  935. set->started = 0;
  936. }
  937. }
  938. else
  939. {
  940. if (!worker->run_by_starpu)
  941. goto out;
  942. #ifdef STARPU_SIMGRID
  943. status = starpu_pthread_join(worker->worker_thread, NULL);
  944. #else
  945. if (!pthread_equal(pthread_self(), worker->worker_thread))
  946. status = starpu_pthread_join(worker->worker_thread, NULL);
  947. #endif
  948. #ifdef STARPU_VERBOSE
  949. if (status)
  950. {
  951. _STARPU_DEBUG("starpu_pthread_join -> %d\n", status);
  952. }
  953. #endif
  954. }
  955. out:
  956. STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
  957. _starpu_sched_ctx_list_delete(&worker->sched_ctx_list);
  958. _starpu_job_list_delete(worker->terminated_jobs);
  959. }
  960. }
  961. unsigned _starpu_machine_is_running(void)
  962. {
  963. unsigned ret;
  964. /* running is just protected by a memory barrier */
  965. STARPU_RMB();
  966. ANNOTATE_HAPPENS_AFTER(&config.running);
  967. ret = config.running;
  968. ANNOTATE_HAPPENS_BEFORE(&config.running);
  969. return ret;
  970. }
  971. unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED)
  972. {
  973. #ifdef STARPU_NON_BLOCKING_DRIVERS
  974. return 0;
  975. #else
  976. unsigned can_block = 1;
  977. #ifndef STARPU_SIMGRID
  978. if (!_starpu_check_that_no_data_request_exists(memnode))
  979. can_block = 0;
  980. #endif
  981. if (!_starpu_machine_is_running())
  982. can_block = 0;
  983. if (!_starpu_execute_registered_progression_hooks())
  984. can_block = 0;
  985. return can_block;
  986. #endif
  987. }
  988. static void _starpu_kill_all_workers(struct _starpu_machine_config *pconfig)
  989. {
  990. /* set the flag which will tell workers to stop */
  991. ANNOTATE_HAPPENS_AFTER(&config.running);
  992. pconfig->running = 0;
  993. /* running is just protected by a memory barrier */
  994. ANNOTATE_HAPPENS_BEFORE(&config.running);
  995. STARPU_WMB();
  996. starpu_wake_all_blocked_workers();
  997. }
  998. void starpu_display_stats()
  999. {
  1000. starpu_profiling_bus_helper_display_summary();
  1001. starpu_profiling_worker_helper_display_summary();
  1002. }
  1003. void starpu_shutdown(void)
  1004. {
  1005. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  1006. init_count--;
  1007. if (init_count)
  1008. {
  1009. _STARPU_DEBUG("Still somebody needing StarPU, don't deinitialize\n");
  1010. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1011. return;
  1012. }
  1013. /* We're last */
  1014. initialized = CHANGING;
  1015. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1016. starpu_task_wait_for_no_ready();
  1017. /* tell all workers to shutdown */
  1018. _starpu_kill_all_workers(&config);
  1019. {
  1020. int stats = starpu_get_env_number("STARPU_STATS");
  1021. if (stats != 0)
  1022. {
  1023. _starpu_display_msi_stats();
  1024. _starpu_display_alloc_cache_stats();
  1025. _starpu_display_comm_amounts();
  1026. }
  1027. }
  1028. starpu_profiling_bus_helper_display_summary();
  1029. starpu_profiling_worker_helper_display_summary();
  1030. _starpu_deinitialize_registered_performance_models();
  1031. /* wait for their termination */
  1032. _starpu_terminate_workers(&config);
  1033. {
  1034. int stats = starpu_get_env_number("STARPU_MEMORY_STATS");
  1035. if (stats != 0)
  1036. {
  1037. // Display statistics on data which have not been unregistered
  1038. starpu_data_display_memory_stats();
  1039. }
  1040. }
  1041. _starpu_delete_all_sched_ctxs();
  1042. _starpu_disk_unregister();
  1043. _starpu_destroy_topology(&config);
  1044. #ifdef STARPU_USE_FXT
  1045. _starpu_stop_fxt_profiling();
  1046. #endif
  1047. _starpu_data_interface_shutdown();
  1048. /* Drop all remaining tags */
  1049. _starpu_tag_clear();
  1050. _starpu_close_debug_logfile();
  1051. STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  1052. initialized = UNINITIALIZED;
  1053. /* Let someone else that wants to initialize it again do it */
  1054. STARPU_PTHREAD_COND_SIGNAL(&init_cond);
  1055. STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  1056. /* Clear memory if it was allocated by StarPU */
  1057. if (config.default_conf)
  1058. free(config.conf);
  1059. #ifdef HAVE_AYUDAME_H
  1060. if (AYU_event) AYU_event(AYU_FINISH, 0, NULL);
  1061. #endif
  1062. #ifdef STARPU_USE_SCC
  1063. if (_starpu_scc_common_is_mp_initialized())
  1064. _starpu_scc_src_mp_deinit();
  1065. #endif
  1066. _STARPU_DEBUG("Shutdown finished\n");
  1067. }
  1068. unsigned starpu_worker_get_count(void)
  1069. {
  1070. return config.topology.nworkers;
  1071. }
  1072. int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
  1073. {
  1074. switch (type)
  1075. {
  1076. case STARPU_CPU_WORKER:
  1077. return config.topology.ncpus;
  1078. case STARPU_CUDA_WORKER:
  1079. return config.topology.ncudagpus;
  1080. case STARPU_OPENCL_WORKER:
  1081. return config.topology.nopenclgpus;
  1082. case STARPU_MIC_WORKER:
  1083. return config.topology.nmicdevices;
  1084. case STARPU_SCC_WORKER:
  1085. return config.topology.nsccdevices;
  1086. default:
  1087. return -EINVAL;
  1088. }
  1089. }
  1090. unsigned starpu_combined_worker_get_count(void)
  1091. {
  1092. return config.topology.ncombinedworkers;
  1093. }
  1094. unsigned starpu_cpu_worker_get_count(void)
  1095. {
  1096. return config.topology.ncpus;
  1097. }
  1098. unsigned starpu_cuda_worker_get_count(void)
  1099. {
  1100. return config.topology.ncudagpus;
  1101. }
  1102. unsigned starpu_opencl_worker_get_count(void)
  1103. {
  1104. return config.topology.nopenclgpus;
  1105. }
  1106. int starpu_asynchronous_copy_disabled(void)
  1107. {
  1108. return config.conf->disable_asynchronous_copy;
  1109. }
  1110. int starpu_asynchronous_cuda_copy_disabled(void)
  1111. {
  1112. return config.conf->disable_asynchronous_cuda_copy;
  1113. }
  1114. int starpu_asynchronous_opencl_copy_disabled(void)
  1115. {
  1116. return config.conf->disable_asynchronous_opencl_copy;
  1117. }
  1118. int starpu_asynchronous_mic_copy_disabled(void)
  1119. {
  1120. return config.conf->disable_asynchronous_mic_copy;
  1121. }
  1122. unsigned starpu_mic_worker_get_count(void)
  1123. {
  1124. int i = 0, count = 0;
  1125. for (i = 0; i < STARPU_MAXMICDEVS; i++)
  1126. count += config.topology.nmiccores[i];
  1127. return count;
  1128. }
  1129. unsigned starpu_scc_worker_get_count(void)
  1130. {
  1131. return config.topology.nsccdevices;
  1132. }
  1133. /* When analyzing performance, it is useful to see what is the processing unit
  1134. * that actually performed the task. This function returns the id of the
  1135. * processing unit actually executing it, therefore it makes no sense to use it
  1136. * within the callbacks of SPU functions for instance. If called by some thread
  1137. * that is not controlled by StarPU, starpu_worker_get_id returns -1. */
  1138. int starpu_worker_get_id(void)
  1139. {
  1140. struct _starpu_worker * worker;
  1141. worker = _starpu_get_local_worker_key();
  1142. if (worker)
  1143. {
  1144. return worker->workerid;
  1145. }
  1146. else
  1147. {
  1148. /* there is no worker associated to that thread, perhaps it is
  1149. * a thread from the application or this is some SPU worker */
  1150. return -1;
  1151. }
  1152. }
  1153. int starpu_combined_worker_get_id(void)
  1154. {
  1155. struct _starpu_worker *worker;
  1156. worker = _starpu_get_local_worker_key();
  1157. if (worker)
  1158. {
  1159. return worker->combined_workerid;
  1160. }
  1161. else
  1162. {
  1163. /* there is no worker associated to that thread, perhaps it is
  1164. * a thread from the application or this is some SPU worker */
  1165. return -1;
  1166. }
  1167. }
  1168. int starpu_combined_worker_get_size(void)
  1169. {
  1170. struct _starpu_worker *worker;
  1171. worker = _starpu_get_local_worker_key();
  1172. if (worker)
  1173. {
  1174. return worker->worker_size;
  1175. }
  1176. else
  1177. {
  1178. /* there is no worker associated to that thread, perhaps it is
  1179. * a thread from the application or this is some SPU worker */
  1180. return -1;
  1181. }
  1182. }
  1183. int starpu_combined_worker_get_rank(void)
  1184. {
  1185. struct _starpu_worker *worker;
  1186. worker = _starpu_get_local_worker_key();
  1187. if (worker)
  1188. {
  1189. return worker->current_rank;
  1190. }
  1191. else
  1192. {
  1193. /* there is no worker associated to that thread, perhaps it is
  1194. * a thread from the application or this is some SPU worker */
  1195. return -1;
  1196. }
  1197. }
  1198. int starpu_worker_get_mp_nodeid(int id)
  1199. {
  1200. return config.workers[id].mp_nodeid;
  1201. }
  1202. int starpu_worker_get_devid(int id)
  1203. {
  1204. return config.workers[id].devid;
  1205. }
  1206. struct _starpu_worker *_starpu_get_worker_struct(unsigned id)
  1207. {
  1208. return &config.workers[id];
  1209. }
  1210. unsigned starpu_worker_is_combined_worker(int id)
  1211. {
  1212. return id >= (int)config.topology.nworkers;
  1213. }
  1214. struct _starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id)
  1215. {
  1216. if(id == STARPU_NMAX_SCHED_CTXS) return NULL;
  1217. return &config.sched_ctxs[id];
  1218. }
  1219. struct _starpu_combined_worker *_starpu_get_combined_worker_struct(unsigned id)
  1220. {
  1221. unsigned basic_worker_count = starpu_worker_get_count();
  1222. //_STARPU_DEBUG("basic_worker_count:%d\n",basic_worker_count);
  1223. STARPU_ASSERT(id >= basic_worker_count);
  1224. return &config.combined_workers[id - basic_worker_count];
  1225. }
  1226. enum starpu_worker_archtype starpu_worker_get_type(int id)
  1227. {
  1228. return config.workers[id].arch;
  1229. }
  1230. int starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
  1231. {
  1232. unsigned nworkers = starpu_worker_get_count();
  1233. int cnt = 0;
  1234. unsigned id;
  1235. for (id = 0; id < nworkers; id++)
  1236. {
  1237. if (starpu_worker_get_type(id) == type)
  1238. {
  1239. /* Perhaps the array is too small ? */
  1240. if (cnt >= maxsize)
  1241. return -ERANGE;
  1242. workerids[cnt++] = id;
  1243. }
  1244. }
  1245. return cnt;
  1246. }
  1247. int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num)
  1248. {
  1249. unsigned nworkers = starpu_worker_get_count();
  1250. int cnt = 0;
  1251. unsigned id;
  1252. for (id = 0; id < nworkers; id++)
  1253. {
  1254. if (starpu_worker_get_type(id) == type)
  1255. {
  1256. if (num == cnt)
  1257. return id;
  1258. cnt++;
  1259. }
  1260. }
  1261. /* Not found */
  1262. return -1;
  1263. }
  1264. int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid)
  1265. {
  1266. unsigned nworkers = starpu_worker_get_count();
  1267. unsigned id;
  1268. for (id = 0; id < nworkers; id++)
  1269. if (starpu_worker_get_type(id) == type && starpu_worker_get_devid(id) == devid)
  1270. return id;
  1271. /* Not found */
  1272. return -1;
  1273. }
  1274. void starpu_worker_get_name(int id, char *dst, size_t maxlen)
  1275. {
  1276. char *name = config.workers[id].name;
  1277. snprintf(dst, maxlen, "%s", name);
  1278. }
  1279. /* Retrieve the status which indicates what the worker is currently doing. */
  1280. enum _starpu_worker_status _starpu_worker_get_status(int workerid)
  1281. {
  1282. return config.workers[workerid].status;
  1283. }
  1284. /* Change the status of the worker which indicates what the worker is currently
  1285. * doing (eg. executing a callback). */
  1286. void _starpu_worker_set_status(int workerid, enum _starpu_worker_status status)
  1287. {
  1288. config.workers[workerid].status = status;
  1289. }
  1290. void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond)
  1291. {
  1292. *sched_cond = &config.workers[workerid].sched_cond;
  1293. *sched_mutex = &config.workers[workerid].sched_mutex;
  1294. }
  1295. int starpu_wakeup_worker(int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex)
  1296. {
  1297. int success = 0;
  1298. STARPU_PTHREAD_MUTEX_LOCK(mutex);
  1299. if (config.workers[workerid].status == STATUS_SLEEPING)
  1300. {
  1301. config.workers[workerid].status = STATUS_WAKING_UP;
  1302. STARPU_PTHREAD_COND_SIGNAL(cond);
  1303. success = 1;
  1304. }
  1305. STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
  1306. return success;
  1307. }
  1308. int starpu_worker_get_nids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
  1309. {
  1310. unsigned nworkers = starpu_worker_get_count();
  1311. int cnt = 0;
  1312. unsigned id;
  1313. for (id = 0; id < nworkers; id++)
  1314. {
  1315. if (starpu_worker_get_type(id) == type)
  1316. {
  1317. /* Perhaps the array is too small ? */
  1318. if (cnt >= maxsize)
  1319. return cnt;
  1320. workerids[cnt++] = id;
  1321. }
  1322. }
  1323. return cnt;
  1324. }
  1325. int starpu_worker_get_nids_ctx_free_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
  1326. {
  1327. unsigned nworkers = starpu_worker_get_count();
  1328. int cnt = 0;
  1329. unsigned id, worker;
  1330. unsigned found = 0;
  1331. for (id = 0; id < nworkers; id++)
  1332. {
  1333. found = 0;
  1334. if (starpu_worker_get_type(id) == type)
  1335. {
  1336. /* Perhaps the array is too small ? */
  1337. if (cnt >= maxsize)
  1338. return cnt;
  1339. int s;
  1340. for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++)
  1341. {
  1342. if(config.sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  1343. {
  1344. struct starpu_worker_collection *workers = config.sched_ctxs[s].workers;
  1345. struct starpu_sched_ctx_iterator it;
  1346. if(workers->init_iterator)
  1347. workers->init_iterator(workers, &it);
  1348. while(workers->has_next(workers, &it))
  1349. {
  1350. worker = workers->get_next(workers, &it);
  1351. if(worker == id)
  1352. {
  1353. found = 1;
  1354. break;
  1355. }
  1356. }
  1357. if(found) break;
  1358. }
  1359. }
  1360. if(!found)
  1361. workerids[cnt++] = id;
  1362. }
  1363. }
  1364. return cnt;
  1365. }
  1366. struct _starpu_sched_ctx* _starpu_get_initial_sched_ctx(void)
  1367. {
  1368. return &config.sched_ctxs[STARPU_GLOBAL_SCHED_CTX];
  1369. }
  1370. int
  1371. starpu_driver_run(struct starpu_driver *d)
  1372. {
  1373. if (!d)
  1374. {
  1375. _STARPU_DEBUG("Invalid argument\n");
  1376. return -EINVAL;
  1377. }
  1378. switch (d->type)
  1379. {
  1380. #ifdef STARPU_USE_CPU
  1381. case STARPU_CPU_WORKER:
  1382. return _starpu_run_cpu(d);
  1383. #endif
  1384. #ifdef STARPU_USE_CUDA
  1385. case STARPU_CUDA_WORKER:
  1386. return _starpu_run_cuda(d);
  1387. #endif
  1388. #ifdef STARPU_USE_OPENCL
  1389. case STARPU_OPENCL_WORKER:
  1390. return _starpu_run_opencl(d);
  1391. #endif
  1392. default:
  1393. {
  1394. _STARPU_DEBUG("Invalid device type\n");
  1395. return -EINVAL;
  1396. }
  1397. }
  1398. }
  1399. int
  1400. starpu_driver_init(struct starpu_driver *d)
  1401. {
  1402. STARPU_ASSERT(d);
  1403. switch (d->type)
  1404. {
  1405. #ifdef STARPU_USE_CPU
  1406. case STARPU_CPU_WORKER:
  1407. return _starpu_cpu_driver_init(d);
  1408. #endif
  1409. #ifdef STARPU_USE_CUDA
  1410. case STARPU_CUDA_WORKER:
  1411. return _starpu_cuda_driver_init(d);
  1412. #endif
  1413. #ifdef STARPU_USE_OPENCL
  1414. case STARPU_OPENCL_WORKER:
  1415. return _starpu_opencl_driver_init(d);
  1416. #endif
  1417. default:
  1418. return -EINVAL;
  1419. }
  1420. }
  1421. int
  1422. starpu_driver_run_once(struct starpu_driver *d)
  1423. {
  1424. STARPU_ASSERT(d);
  1425. switch (d->type)
  1426. {
  1427. #ifdef STARPU_USE_CPU
  1428. case STARPU_CPU_WORKER:
  1429. return _starpu_cpu_driver_run_once(d);
  1430. #endif
  1431. #ifdef STARPU_USE_CUDA
  1432. case STARPU_CUDA_WORKER:
  1433. return _starpu_cuda_driver_run_once(d);
  1434. #endif
  1435. #ifdef STARPU_USE_OPENCL
  1436. case STARPU_OPENCL_WORKER:
  1437. return _starpu_opencl_driver_run_once(d);
  1438. #endif
  1439. default:
  1440. return -EINVAL;
  1441. }
  1442. }
  1443. int
  1444. starpu_driver_deinit(struct starpu_driver *d)
  1445. {
  1446. STARPU_ASSERT(d);
  1447. switch (d->type)
  1448. {
  1449. #ifdef STARPU_USE_CPU
  1450. case STARPU_CPU_WORKER:
  1451. return _starpu_cpu_driver_deinit(d);
  1452. #endif
  1453. #ifdef STARPU_USE_CUDA
  1454. case STARPU_CUDA_WORKER:
  1455. return _starpu_cuda_driver_deinit(d);
  1456. #endif
  1457. #ifdef STARPU_USE_OPENCL
  1458. case STARPU_OPENCL_WORKER:
  1459. return _starpu_opencl_driver_deinit(d);
  1460. #endif
  1461. default:
  1462. return -EINVAL;
  1463. }
  1464. }
  1465. void starpu_get_version(int *major, int *minor, int *release)
  1466. {
  1467. *major = STARPU_MAJOR_VERSION;
  1468. *minor = STARPU_MINOR_VERSION;
  1469. *release = STARPU_RELEASE_VERSION;
  1470. }