task.c 50 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2019 Inria
  4. * Copyright (C) 2009-2019 Université de Bordeaux
  5. * Copyright (C) 2017 Erwan Leria
  6. * Copyright (C) 2010-2019 CNRS
  7. * Copyright (C) 2013 Thibaut Lambert
  8. * Copyright (C) 2011 Télécom-SudParis
  9. * Copyright (C) 2016 Uppsala University
  10. *
  11. * StarPU is free software; you can redistribute it and/or modify
  12. * it under the terms of the GNU Lesser General Public License as published by
  13. * the Free Software Foundation; either version 2.1 of the License, or (at
  14. * your option) any later version.
  15. *
  16. * StarPU is distributed in the hope that it will be useful, but
  17. * WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  19. *
  20. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  21. */
  22. #include <starpu.h>
  23. #include <starpu_profiling.h>
  24. #include <core/workers.h>
  25. #include <core/sched_ctx.h>
  26. #include <core/jobs.h>
  27. #include <core/task.h>
  28. #include <core/task_bundle.h>
  29. #include <core/dependencies/data_concurrency.h>
  30. #include <common/config.h>
  31. #include <common/utils.h>
  32. #include <common/fxt.h>
  33. #include <common/knobs.h>
  34. #include <profiling/profiling.h>
  35. #include <profiling/bound.h>
  36. #include <math.h>
  37. #include <string.h>
  38. #include <core/debug.h>
  39. #include <core/sched_ctx.h>
  40. #include <time.h>
  41. #include <signal.h>
  42. #include <core/simgrid.h>
  43. #ifdef STARPU_HAVE_WINDOWS
  44. #include <windows.h>
  45. #endif
  46. /* global counters */
  47. static int __g_total_submitted;
  48. static int __g_peak_submitted;
  49. static int __g_peak_ready;
  50. /* global counter variables */
  51. int64_t _starpu_task__g_total_submitted__value;
  52. int64_t _starpu_task__g_peak_submitted__value;
  53. int64_t _starpu_task__g_current_submitted__value;
  54. int64_t _starpu_task__g_peak_ready__value;
  55. int64_t _starpu_task__g_current_ready__value;
  56. /* per-worker counters */
  57. static int __w_total_executed;
  58. static int __w_cumul_execution_time;
  59. /* per-codelet counters */
  60. static int __c_total_submitted;
  61. static int __c_peak_submitted;
  62. static int __c_peak_ready;
  63. static int __c_total_executed;
  64. static int __c_cumul_execution_time;
  65. /* - */
  66. /* per-scheduler knobs */
  67. static int __s_max_priority_cap_knob;
  68. static int __s_min_priority_cap_knob;
  69. /* knob variables */
  70. static int __s_max_priority_cap__value;
  71. static int __s_min_priority_cap__value;
  72. static struct starpu_perf_knob_group * __kg_starpu_task__per_scheduler;
  73. /* - */
  74. static void global_sample_updater(struct starpu_perf_counter_sample *sample, void *context)
  75. {
  76. STARPU_ASSERT(context == NULL); /* no context for the global updater */
  77. (void)context;
  78. _starpu_perf_counter_sample_set_int64_value(sample, __g_total_submitted, _starpu_task__g_total_submitted__value);
  79. _starpu_perf_counter_sample_set_int64_value(sample, __g_peak_submitted, _starpu_task__g_peak_submitted__value);
  80. _starpu_perf_counter_sample_set_int64_value(sample, __g_peak_ready, _starpu_task__g_peak_ready__value);
  81. }
  82. static void per_worker_sample_updater(struct starpu_perf_counter_sample *sample, void *context)
  83. {
  84. STARPU_ASSERT(context != NULL);
  85. struct _starpu_worker *worker = context;
  86. _starpu_perf_counter_sample_set_int64_value(sample, __w_total_executed, worker->__w_total_executed__value);
  87. _starpu_perf_counter_sample_set_double_value(sample, __w_cumul_execution_time, worker->__w_cumul_execution_time__value);
  88. }
  89. static void per_codelet_sample_updater(struct starpu_perf_counter_sample *sample, void *context)
  90. {
  91. STARPU_ASSERT(sample->listener != NULL && sample->listener->set != NULL);
  92. struct starpu_perf_counter_set *set = sample->listener->set;
  93. STARPU_ASSERT(set->scope == starpu_perf_counter_scope_per_codelet);
  94. STARPU_ASSERT(context != NULL);
  95. struct starpu_codelet *cl = context;
  96. _starpu_perf_counter_sample_set_int64_value(sample, __c_total_submitted, cl->perf_counter_values->task.total_submitted);
  97. _starpu_perf_counter_sample_set_int64_value(sample, __c_peak_submitted, cl->perf_counter_values->task.peak_submitted);
  98. _starpu_perf_counter_sample_set_int64_value(sample, __c_peak_ready, cl->perf_counter_values->task.peak_ready);
  99. _starpu_perf_counter_sample_set_int64_value(sample, __c_total_executed, cl->perf_counter_values->task.total_executed);
  100. _starpu_perf_counter_sample_set_double_value(sample, __c_cumul_execution_time, cl->perf_counter_values->task.cumul_execution_time);
  101. }
  102. void _starpu__task_c__register_counters(void)
  103. {
  104. {
  105. const enum starpu_perf_counter_scope scope = starpu_perf_counter_scope_global;
  106. __STARPU_PERF_COUNTER_REG("starpu.task", scope, g_total_submitted, int64, "number of tasks submitted globally (since StarPU initialization)");
  107. __STARPU_PERF_COUNTER_REG("starpu.task", scope, g_peak_submitted, int64, "maximum simultaneous number of tasks submitted and not yet ready, globally (since StarPU initialization)");
  108. __STARPU_PERF_COUNTER_REG("starpu.task", scope, g_peak_ready, int64, "maximum simultaneous number of tasks ready and not yet executing, globally (since StarPU initialization)");
  109. _starpu_perf_counter_register_updater(scope, global_sample_updater);
  110. }
  111. {
  112. const enum starpu_perf_counter_scope scope = starpu_perf_counter_scope_per_worker;
  113. __STARPU_PERF_COUNTER_REG("starpu.task", scope, w_total_executed, int64, "number of tasks executed on this worker (since StarPU initialization)");
  114. __STARPU_PERF_COUNTER_REG("starpu.task", scope, w_cumul_execution_time, double, "cumulated execution time of tasks executed on this worker (microseconds, since StarPU initialization)");
  115. _starpu_perf_counter_register_updater(scope, per_worker_sample_updater);
  116. }
  117. {
  118. const enum starpu_perf_counter_scope scope = starpu_perf_counter_scope_per_codelet;
  119. __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_total_submitted, int64, "number of codelet's task instances submitted using this codelet (since enabled)");
  120. __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_peak_submitted, int64, "maximum simultaneous number of codelet's task instances submitted and not yet ready (since enabled)");
  121. __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_peak_ready, int64, "maximum simultaneous number of codelet's task instances ready and not yet executing (since enabled)");
  122. __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_total_executed, int64, "number of codelet's task instances executed using this codelet (since enabled)");
  123. __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_cumul_execution_time, double, "cumulated execution time of codelet's task instances (since enabled)");
  124. _starpu_perf_counter_register_updater(scope, per_codelet_sample_updater);
  125. }
  126. }
  127. /* - */
  128. static void sched_knobs__set(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value)
  129. {
  130. const char * const sched_policy_name = *(const char **)context;
  131. (void) sched_policy_name;
  132. if (knob->id == __s_max_priority_cap_knob)
  133. {
  134. STARPU_ASSERT(value->val_int32_t <= STARPU_MAX_PRIO);
  135. STARPU_ASSERT(value->val_int32_t >= STARPU_MIN_PRIO);
  136. STARPU_ASSERT(value->val_int32_t >= __s_min_priority_cap__value);
  137. __s_max_priority_cap__value = value->val_int32_t;
  138. }
  139. else if (knob->id == __s_min_priority_cap_knob)
  140. {
  141. STARPU_ASSERT(value->val_int32_t <= STARPU_MAX_PRIO);
  142. STARPU_ASSERT(value->val_int32_t >= STARPU_MIN_PRIO);
  143. STARPU_ASSERT(value->val_int32_t <= __s_max_priority_cap__value);
  144. __s_min_priority_cap__value = value->val_int32_t;
  145. }
  146. else
  147. {
  148. STARPU_ASSERT(0);
  149. abort();
  150. }
  151. }
  152. static void sched_knobs__get(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value)
  153. {
  154. const char * const sched_policy_name = *(const char **)context;
  155. (void) sched_policy_name;
  156. if (knob->id == __s_max_priority_cap_knob)
  157. {
  158. value->val_int32_t = __s_max_priority_cap__value;
  159. }
  160. else if (knob->id == __s_min_priority_cap_knob)
  161. {
  162. value->val_int32_t = __s_min_priority_cap__value;
  163. }
  164. else
  165. {
  166. STARPU_ASSERT(0);
  167. abort();
  168. }
  169. }
  170. void _starpu__task_c__register_knobs(void)
  171. {
  172. #if 0
  173. {
  174. const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_global;
  175. __kg_starpu_global = _starpu_perf_knob_group_register(scope, global_knobs__set, global_knobs__get);
  176. }
  177. #endif
  178. #if 0
  179. {
  180. const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_worker;
  181. __kg_starpu_worker__per_worker = _starpu_perf_knob_group_register(scope, worker_knobs__set, worker_knobs__get);
  182. }
  183. #endif
  184. {
  185. const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_scheduler;
  186. __kg_starpu_task__per_scheduler = _starpu_perf_knob_group_register(scope, sched_knobs__set, sched_knobs__get);
  187. /* TODO: priority capping knobs actually work globally for now, the sched policy name is ignored */
  188. __STARPU_PERF_KNOB_REG("starpu.task", __kg_starpu_task__per_scheduler, s_max_priority_cap_knob, int32, "force task priority to this value or below (priority value)");
  189. __s_max_priority_cap__value = STARPU_MAX_PRIO;
  190. __STARPU_PERF_KNOB_REG("starpu.task", __kg_starpu_task__per_scheduler, s_min_priority_cap_knob, int32, "force task priority to this value or above (priority value)");
  191. __s_min_priority_cap__value = STARPU_MIN_PRIO;
  192. }
  193. }
  194. /* - */
  195. /* XXX this should be reinitialized when StarPU is shutdown (or we should make
  196. * sure that no task remains !) */
  197. /* TODO we could make this hierarchical to avoid contention ? */
  198. //static starpu_pthread_cond_t submitted_cond = STARPU_PTHREAD_COND_INITIALIZER;
  199. /* This key stores the task currently handled by the thread, note that we
  200. * cannot use the worker structure to store that information because it is
  201. * possible that we have a task with a NULL codelet, which means its callback
  202. * could be executed by a user thread as well. */
  203. static starpu_pthread_key_t current_task_key;
  204. static int limit_min_submitted_tasks;
  205. static int limit_max_submitted_tasks;
  206. static int watchdog_crash;
  207. static int watchdog_delay;
  208. #define _STARPU_TASK_MAGIC 42
  209. /* Called once at starpu_init */
  210. void _starpu_task_init(void)
  211. {
  212. STARPU_PTHREAD_KEY_CREATE(&current_task_key, NULL);
  213. limit_min_submitted_tasks = starpu_get_env_number("STARPU_LIMIT_MIN_SUBMITTED_TASKS");
  214. limit_max_submitted_tasks = starpu_get_env_number("STARPU_LIMIT_MAX_SUBMITTED_TASKS");
  215. watchdog_crash = starpu_get_env_number("STARPU_WATCHDOG_CRASH");
  216. watchdog_delay = starpu_get_env_number_default("STARPU_WATCHDOG_DELAY", 0);
  217. }
  218. void _starpu_task_deinit(void)
  219. {
  220. STARPU_PTHREAD_KEY_DELETE(current_task_key);
  221. }
  222. void starpu_task_init(struct starpu_task *task)
  223. {
  224. /* TODO: memcpy from a template instead? benchmark it */
  225. STARPU_ASSERT(task);
  226. /* As most of the fields must be initialised at NULL, let's put 0
  227. * everywhere */
  228. memset(task, 0, sizeof(struct starpu_task));
  229. task->sequential_consistency = 1;
  230. task->where = -1;
  231. /* Now we can initialise fields which recquire custom value */
  232. /* Note: remember to update STARPU_TASK_INITIALIZER as well */
  233. #if STARPU_DEFAULT_PRIO != 0
  234. task->priority = STARPU_DEFAULT_PRIO;
  235. #endif
  236. task->detach = 1;
  237. #if STARPU_TASK_INVALID != 0
  238. task->status = STARPU_TASK_INVALID;
  239. #endif
  240. task->predicted = NAN;
  241. task->predicted_transfer = NAN;
  242. task->predicted_start = NAN;
  243. task->magic = _STARPU_TASK_MAGIC;
  244. task->sched_ctx = STARPU_NMAX_SCHED_CTXS;
  245. task->flops = 0.0;
  246. }
  247. /* Free all the ressources allocated for a task, without deallocating the task
  248. * structure itself (this is required for statically allocated tasks).
  249. * All values previously set by the user, like codelet and handles, remain
  250. * unchanged */
  251. void starpu_task_clean(struct starpu_task *task)
  252. {
  253. STARPU_ASSERT(task);
  254. task->magic = 0;
  255. /* If a buffer was allocated to store the profiling info, we free it. */
  256. if (task->profiling_info)
  257. {
  258. free(task->profiling_info);
  259. task->profiling_info = NULL;
  260. }
  261. /* If case the task is (still) part of a bundle */
  262. starpu_task_bundle_t bundle = task->bundle;
  263. if (bundle)
  264. starpu_task_bundle_remove(bundle, task);
  265. if (task->dyn_handles)
  266. {
  267. free(task->dyn_handles);
  268. task->dyn_handles = NULL;
  269. free(task->dyn_interfaces);
  270. task->dyn_interfaces = NULL;
  271. }
  272. if (task->dyn_modes)
  273. {
  274. free(task->dyn_modes);
  275. task->dyn_modes = NULL;
  276. }
  277. struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
  278. if (j)
  279. {
  280. _starpu_job_destroy(j);
  281. task->starpu_private = NULL;
  282. }
  283. }
  284. struct starpu_task * STARPU_ATTRIBUTE_MALLOC starpu_task_create(void)
  285. {
  286. struct starpu_task *task;
  287. _STARPU_MALLOC(task, sizeof(struct starpu_task));
  288. starpu_task_init(task);
  289. /* Dynamically allocated tasks are destroyed by default */
  290. task->destroy = 1;
  291. return task;
  292. }
  293. /* Free the ressource allocated during starpu_task_create. This function can be
  294. * called automatically after the execution of a task by setting the "destroy"
  295. * flag of the starpu_task structure (default behaviour). Calling this function
  296. * on a statically allocated task results in an undefined behaviour. */
  297. void _starpu_task_destroy(struct starpu_task *task)
  298. {
  299. /* If starpu_task_destroy is called in a callback, we just set the destroy
  300. flag. The task will be destroyed after the callback returns */
  301. if (task == starpu_task_get_current()
  302. && _starpu_get_local_worker_status() == STATUS_CALLBACK)
  303. {
  304. task->destroy = 1;
  305. }
  306. else
  307. {
  308. starpu_task_clean(task);
  309. /* TODO handle the case of task with detach = 1 and destroy = 1 */
  310. /* TODO handle the case of non terminated tasks -> assertion failure, it's too dangerous to be doing something like this */
  311. /* Does user want StarPU release cl_arg ? */
  312. if (task->cl_arg_free)
  313. free(task->cl_arg);
  314. /* Does user want StarPU release callback_arg ? */
  315. if (task->callback_arg_free)
  316. free(task->callback_arg);
  317. /* Does user want StarPU release prologue_callback_arg ? */
  318. if (task->prologue_callback_arg_free)
  319. free(task->prologue_callback_arg);
  320. /* Does user want StarPU release prologue_pop_arg ? */
  321. if (task->prologue_callback_pop_arg_free)
  322. free(task->prologue_callback_pop_arg);
  323. free(task);
  324. }
  325. }
  326. void starpu_task_destroy(struct starpu_task *task)
  327. {
  328. STARPU_ASSERT(task);
  329. STARPU_ASSERT_MSG(!task->destroy || !task->detach, "starpu_task_destroy must not be called for task with destroy = 1 and detach = 1");
  330. _starpu_task_destroy(task);
  331. }
  332. int starpu_task_finished(struct starpu_task *task)
  333. {
  334. STARPU_ASSERT(task);
  335. STARPU_ASSERT_MSG(!task->detach, "starpu_task_finished can only be called on tasks with detach = 0");
  336. return _starpu_job_finished(_starpu_get_job_associated_to_task(task));
  337. }
  338. int starpu_task_wait(struct starpu_task *task)
  339. {
  340. _STARPU_LOG_IN();
  341. STARPU_ASSERT(task);
  342. STARPU_ASSERT_MSG(!task->detach, "starpu_task_wait can only be called on tasks with detach = 0");
  343. if (task->detach || task->synchronous)
  344. {
  345. _STARPU_DEBUG("Task is detached or synchronous. Waiting returns immediately\n");
  346. _STARPU_LOG_OUT_TAG("einval");
  347. return -EINVAL;
  348. }
  349. STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait must not be called from a task or callback");
  350. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  351. _STARPU_TRACE_TASK_WAIT_START(j);
  352. starpu_do_schedule();
  353. _starpu_wait_job(j);
  354. /* as this is a synchronous task, the liberation of the job
  355. structure was deferred */
  356. if (task->destroy)
  357. _starpu_task_destroy(task);
  358. _starpu_perf_counter_update_global_sample();
  359. _STARPU_TRACE_TASK_WAIT_END();
  360. _STARPU_LOG_OUT();
  361. return 0;
  362. }
  363. int starpu_task_wait_array(struct starpu_task **tasks, unsigned nb_tasks)
  364. {
  365. unsigned i;
  366. for (i = 0; i < nb_tasks; i++)
  367. {
  368. int ret = starpu_task_wait(tasks[i]);
  369. if (ret)
  370. return ret;
  371. }
  372. return 0;
  373. }
  374. #ifdef STARPU_OPENMP
  375. int _starpu_task_test_termination(struct starpu_task *task)
  376. {
  377. STARPU_ASSERT(task);
  378. STARPU_ASSERT_MSG(!task->detach, "starpu_task_wait can only be called on tasks with detach = 0");
  379. if (task->detach || task->synchronous)
  380. {
  381. _STARPU_DEBUG("Task is detached or synchronous\n");
  382. _STARPU_LOG_OUT_TAG("einval");
  383. return -EINVAL;
  384. }
  385. struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
  386. int ret = _starpu_test_job_termination(j);
  387. if (ret)
  388. {
  389. if (task->destroy)
  390. _starpu_task_destroy(task);
  391. }
  392. return ret;
  393. }
  394. #endif
  395. /* NB in case we have a regenerable task, it is possible that the job was
  396. * already counted. */
  397. int _starpu_submit_job(struct _starpu_job *j, int nodeps)
  398. {
  399. struct starpu_task *task = j->task;
  400. int ret;
  401. #ifdef STARPU_OPENMP
  402. const unsigned continuation = j->continuation;
  403. #else
  404. const unsigned continuation = 0;
  405. #endif
  406. _STARPU_LOG_IN();
  407. /* notify bound computation of a new task */
  408. _starpu_bound_record(j);
  409. _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
  410. _starpu_sched_task_submit(task);
  411. #ifdef STARPU_USE_SC_HYPERVISOR
  412. struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx);
  413. if(sched_ctx != NULL && j->task->sched_ctx != _starpu_get_initial_sched_ctx()->id && j->task->sched_ctx != STARPU_NMAX_SCHED_CTXS
  414. && sched_ctx->perf_counters != NULL)
  415. {
  416. struct starpu_perfmodel_arch arch;
  417. _STARPU_MALLOC(arch.devices, sizeof(struct starpu_perfmodel_device));
  418. arch.ndevices = 1;
  419. arch.devices[0].type = STARPU_CPU_WORKER;
  420. arch.devices[0].devid = 0;
  421. arch.devices[0].ncores = 1;
  422. _starpu_compute_buffers_footprint(j->task->cl->model, &arch, 0, j);
  423. free(arch.devices);
  424. size_t data_size = 0;
  425. if (j->task->cl)
  426. {
  427. unsigned i, nbuffers = STARPU_TASK_GET_NBUFFERS(j->task);
  428. for(i = 0; i < nbuffers; i++)
  429. {
  430. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
  431. if (handle != NULL)
  432. data_size += _starpu_data_get_size(handle);
  433. }
  434. }
  435. _STARPU_TRACE_HYPERVISOR_BEGIN();
  436. sched_ctx->perf_counters->notify_submitted_job(j->task, j->footprint, data_size);
  437. _STARPU_TRACE_HYPERVISOR_END();
  438. }
  439. #endif//STARPU_USE_SC_HYPERVISOR
  440. /* We retain handle reference count */
  441. if (task->cl && !continuation)
  442. {
  443. unsigned i;
  444. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
  445. for (i=0; i<nbuffers; i++)
  446. {
  447. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
  448. _starpu_spin_lock(&handle->header_lock);
  449. handle->busy_count++;
  450. _starpu_spin_unlock(&handle->header_lock);
  451. }
  452. }
  453. STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
  454. _starpu_handle_job_submission(j);
  455. #ifdef STARPU_OPENMP
  456. if (continuation)
  457. {
  458. j->discontinuous = 1;
  459. j->continuation = 0;
  460. }
  461. #endif
  462. if (nodeps)
  463. {
  464. ret = _starpu_take_deps_and_schedule(j);
  465. }
  466. else
  467. {
  468. #ifdef STARPU_OPENMP
  469. if (continuation)
  470. {
  471. ret = _starpu_reenforce_task_deps_and_schedule(j);
  472. }
  473. else
  474. #endif
  475. {
  476. ret = _starpu_enforce_deps_and_schedule(j);
  477. }
  478. }
  479. _STARPU_LOG_OUT();
  480. return ret;
  481. }
  482. /* Note: this is racy, so valgrind would complain. But since we'll always put
  483. * the same values, this is not a problem. */
  484. void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
  485. {
  486. if (!cl)
  487. return;
  488. uint32_t where = cl->where;
  489. int is_where_unset = where == 0;
  490. unsigned i, some_impl;
  491. /* Check deprecated and unset fields (where, <device>_func,
  492. * <device>_funcs) */
  493. /* CPU */
  494. if (cl->cpu_func && cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS && cl->cpu_funcs[0])
  495. {
  496. _STARPU_DISP("[warning] [struct starpu_codelet] both cpu_func and cpu_funcs are set. Ignoring cpu_func.\n");
  497. cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS;
  498. }
  499. if (cl->cpu_func && cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS)
  500. {
  501. cl->cpu_funcs[0] = cl->cpu_func;
  502. cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS;
  503. }
  504. some_impl = 0;
  505. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  506. if (cl->cpu_funcs[i])
  507. {
  508. some_impl = 1;
  509. break;
  510. }
  511. if (some_impl && cl->cpu_func == 0)
  512. {
  513. cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS;
  514. }
  515. if (some_impl && is_where_unset)
  516. {
  517. where |= STARPU_CPU;
  518. }
  519. /* CUDA */
  520. if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS && cl->cuda_funcs[0])
  521. {
  522. _STARPU_DISP("[warning] [struct starpu_codelet] both cuda_func and cuda_funcs are set. Ignoring cuda_func.\n");
  523. cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS;
  524. }
  525. if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS)
  526. {
  527. cl->cuda_funcs[0] = cl->cuda_func;
  528. cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS;
  529. }
  530. some_impl = 0;
  531. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  532. if (cl->cuda_funcs[i])
  533. {
  534. some_impl = 1;
  535. break;
  536. }
  537. if (some_impl && cl->cuda_func == 0)
  538. {
  539. cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS;
  540. }
  541. if (some_impl && is_where_unset)
  542. {
  543. where |= STARPU_CUDA;
  544. }
  545. /* OpenCL */
  546. if (cl->opencl_func && cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS && cl->opencl_funcs[0])
  547. {
  548. _STARPU_DISP("[warning] [struct starpu_codelet] both opencl_func and opencl_funcs are set. Ignoring opencl_func.\n");
  549. cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS;
  550. }
  551. if (cl->opencl_func && cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS)
  552. {
  553. cl->opencl_funcs[0] = cl->opencl_func;
  554. cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS;
  555. }
  556. some_impl = 0;
  557. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  558. if (cl->opencl_funcs[i])
  559. {
  560. some_impl = 1;
  561. break;
  562. }
  563. if (some_impl && cl->opencl_func == 0)
  564. {
  565. cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS;
  566. }
  567. if (some_impl && is_where_unset)
  568. {
  569. where |= STARPU_OPENCL;
  570. }
  571. some_impl = 0;
  572. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  573. if (cl->mic_funcs[i])
  574. {
  575. some_impl = 1;
  576. break;
  577. }
  578. if (some_impl && is_where_unset)
  579. {
  580. where |= STARPU_MIC;
  581. }
  582. some_impl = 0;
  583. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  584. if (cl->mpi_ms_funcs[i])
  585. {
  586. some_impl = 1;
  587. break;
  588. }
  589. if (some_impl && is_where_unset)
  590. {
  591. where |= STARPU_MPI_MS;
  592. }
  593. some_impl = 0;
  594. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  595. if (cl->cpu_funcs_name[i])
  596. {
  597. some_impl = 1;
  598. break;
  599. }
  600. if (some_impl && is_where_unset)
  601. {
  602. where |= STARPU_MIC|STARPU_MPI_MS;
  603. }
  604. cl->where = where;
  605. }
  606. void _starpu_task_check_deprecated_fields(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED)
  607. {
  608. /* None any more */
  609. }
  610. static int _starpu_task_submit_head(struct starpu_task *task)
  611. {
  612. unsigned is_sync = task->synchronous;
  613. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  614. if (task->status == STARPU_TASK_STOPPED || task->status == STARPU_TASK_FINISHED)
  615. task->status = STARPU_TASK_INVALID;
  616. else
  617. STARPU_ASSERT(task->status == STARPU_TASK_INVALID);
  618. if (j->internal)
  619. {
  620. // Internal tasks are submitted to initial context
  621. task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
  622. }
  623. else if (task->sched_ctx == STARPU_NMAX_SCHED_CTXS)
  624. {
  625. // If the task has not specified a context, we set the current context
  626. task->sched_ctx = _starpu_sched_ctx_get_current_context();
  627. }
  628. if (is_sync)
  629. {
  630. /* Perhaps it is not possible to submit a synchronous
  631. * (blocking) task */
  632. STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "submitting a synchronous task must not be done from a task or a callback");
  633. task->detach = 0;
  634. }
  635. _starpu_task_check_deprecated_fields(task);
  636. _starpu_codelet_check_deprecated_fields(task->cl);
  637. if (task->where== -1 && task->cl)
  638. task->where = task->cl->where;
  639. if (task->cl)
  640. {
  641. unsigned i;
  642. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
  643. _STARPU_TRACE_UPDATE_TASK_CNT(0);
  644. /* Check buffers */
  645. if (task->dyn_handles == NULL)
  646. STARPU_ASSERT_MSG(STARPU_TASK_GET_NBUFFERS(task) <= STARPU_NMAXBUFS,
  647. "Codelet %p has too many buffers (%d vs max %d). Either use --enable-maxbuffers configure option to increase the max, or use dyn_handles instead of handles.",
  648. task->cl, STARPU_TASK_GET_NBUFFERS(task), STARPU_NMAXBUFS);
  649. if (task->dyn_handles)
  650. {
  651. _STARPU_MALLOC(task->dyn_interfaces, nbuffers * sizeof(void *));
  652. }
  653. for (i = 0; i < nbuffers; i++)
  654. {
  655. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
  656. enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, i);
  657. int node = task->cl->specific_nodes ? STARPU_CODELET_GET_NODE(task->cl, i) : -1;
  658. /* Make sure handles are valid */
  659. STARPU_ASSERT_MSG(handle->magic == _STARPU_TASK_MAGIC, "data %p is invalid (was it already unregistered?)", handle);
  660. /* Make sure handles are not partitioned */
  661. STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data (or the pieces of a partitioned data) can be used in a task");
  662. /* Make sure the specified node exists */
  663. STARPU_ASSERT_MSG(node == STARPU_SPECIFIC_NODE_LOCAL || node == STARPU_SPECIFIC_NODE_CPU || node == STARPU_SPECIFIC_NODE_SLOW || (node >= 0 && node < (int) starpu_memory_nodes_get_count()), "The codelet-specified memory node does not exist");
  664. /* Provide the home interface for now if any,
  665. * for can_execute hooks */
  666. if (handle->home_node != -1)
  667. _STARPU_TASK_SET_INTERFACE(task, starpu_data_get_interface_on_node(handle, handle->home_node), i);
  668. if (!(task->cl->flags & STARPU_CODELET_NOPLANS) &&
  669. ((handle->nplans && !handle->nchildren) || handle->siblings)
  670. && handle->partition_automatic_disabled == 0
  671. )
  672. /* This handle is involved with asynchronous
  673. * partitioning as a parent or a child, make
  674. * sure the right plan is active, submit
  675. * appropiate partitioning / unpartitioning if
  676. * not */
  677. _starpu_data_partition_access_submit(handle, (mode & STARPU_W) != 0);
  678. }
  679. /* Check the type of worker(s) required by the task exist */
  680. if (!_starpu_worker_exists(task))
  681. {
  682. _STARPU_LOG_OUT_TAG("ENODEV");
  683. return -ENODEV;
  684. }
  685. /* In case we require that a task should be explicitely
  686. * executed on a specific worker, we make sure that the worker
  687. * is able to execute this task. */
  688. if (task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0))
  689. {
  690. _STARPU_LOG_OUT_TAG("ENODEV");
  691. return -ENODEV;
  692. }
  693. if (task->cl->model)
  694. _starpu_init_and_load_perfmodel(task->cl->model);
  695. if (task->cl->energy_model)
  696. _starpu_init_and_load_perfmodel(task->cl->energy_model);
  697. }
  698. return 0;
  699. }
  700. /* application should submit new tasks to StarPU through this function */
  701. int _starpu_task_submit(struct starpu_task *task, int nodeps)
  702. {
  703. _STARPU_LOG_IN();
  704. STARPU_ASSERT(task);
  705. STARPU_ASSERT_MSG(task->magic == _STARPU_TASK_MAGIC, "Tasks must be created with starpu_task_create, or initialized with starpu_task_init.");
  706. int ret;
  707. {
  708. /* task knobs */
  709. if (task->priority > __s_max_priority_cap__value)
  710. task->priority = __s_max_priority_cap__value;
  711. if (task->priority < __s_min_priority_cap__value)
  712. task->priority = __s_min_priority_cap__value;
  713. }
  714. unsigned is_sync = task->synchronous;
  715. starpu_task_bundle_t bundle = task->bundle;
  716. STARPU_ASSERT_MSG(!(nodeps && bundle), "not supported\n");
  717. /* internally, StarPU manipulates a struct _starpu_job * which is a wrapper around a
  718. * task structure, it is possible that this job structure was already
  719. * allocated. */
  720. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  721. const unsigned continuation =
  722. #ifdef STARPU_OPENMP
  723. j->continuation
  724. #else
  725. 0
  726. #endif
  727. ;
  728. if (!j->internal && !continuation)
  729. {
  730. (void) STARPU_ATOMIC_ADDL(&_starpu_task__g_total_submitted__value, 1);
  731. int64_t value = STARPU_ATOMIC_ADDL(&_starpu_task__g_current_submitted__value, 1);
  732. _starpu_perf_counter_update_max_int64(&_starpu_task__g_peak_submitted__value, value);
  733. _starpu_perf_counter_update_global_sample();
  734. if (task->cl && task->cl->perf_counter_values)
  735. {
  736. struct starpu_perf_counter_sample_cl_values * const pcv = task->cl->perf_counter_values;
  737. (void) STARPU_ATOMIC_ADD(&pcv->task.total_submitted, 1);
  738. int64_t value = STARPU_ATOMIC_ADDL(&pcv->task.current_submitted, 1);
  739. _starpu_perf_counter_update_max_int64(&pcv->task.peak_submitted, value);
  740. _starpu_perf_counter_update_per_codelet_sample(task->cl);
  741. }
  742. }
  743. STARPU_ASSERT_MSG(!(nodeps && continuation), "not supported\n");
  744. if (!j->internal)
  745. {
  746. int nsubmitted_tasks = starpu_task_nsubmitted();
  747. if (limit_max_submitted_tasks >= 0 && limit_max_submitted_tasks < nsubmitted_tasks
  748. && limit_min_submitted_tasks >= 0 && limit_min_submitted_tasks < nsubmitted_tasks)
  749. {
  750. starpu_do_schedule();
  751. _STARPU_TRACE_TASK_THROTTLE_START();
  752. starpu_task_wait_for_n_submitted(limit_min_submitted_tasks);
  753. _STARPU_TRACE_TASK_THROTTLE_END();
  754. }
  755. }
  756. _STARPU_TRACE_TASK_SUBMIT_START();
  757. ret = _starpu_task_submit_head(task);
  758. if (ret)
  759. {
  760. _STARPU_TRACE_TASK_SUBMIT_END();
  761. return ret;
  762. }
  763. if (!continuation)
  764. {
  765. STARPU_ASSERT_MSG(!j->submitted || j->terminated >= 1, "Tasks can not be submitted a second time before being terminated. Please use different task structures, or use the regenerate flag to let the task resubmit itself automatically.");
  766. _STARPU_TRACE_TASK_SUBMIT(j,
  767. _starpu_get_sched_ctx_struct(task->sched_ctx)->iterations[0],
  768. _starpu_get_sched_ctx_struct(task->sched_ctx)->iterations[1]);
  769. }
  770. /* If this is a continuation, we don't modify the implicit data dependencies detected earlier. */
  771. if (task->cl && !continuation)
  772. {
  773. _starpu_job_set_ordered_buffers(j);
  774. if (!nodeps)
  775. _starpu_detect_implicit_data_deps(task);
  776. }
  777. if (bundle)
  778. {
  779. /* We need to make sure that models for other tasks of the
  780. * bundle are also loaded, so the scheduler can estimate the
  781. * duration of the whole bundle */
  782. STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex);
  783. struct _starpu_task_bundle_entry *entry;
  784. entry = bundle->list;
  785. while (entry)
  786. {
  787. if (entry->task->cl->model)
  788. _starpu_init_and_load_perfmodel(entry->task->cl->model);
  789. if (entry->task->cl->energy_model)
  790. _starpu_init_and_load_perfmodel(entry->task->cl->energy_model);
  791. entry = entry->next;
  792. }
  793. STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
  794. }
  795. /* If profiling is activated, we allocate a structure to store the
  796. * appropriate info. */
  797. struct starpu_profiling_task_info *info;
  798. int profiling = starpu_profiling_status_get();
  799. info = _starpu_allocate_profiling_info_if_needed(task);
  800. task->profiling_info = info;
  801. /* The task is considered as block until we are sure there remains not
  802. * dependency. */
  803. task->status = STARPU_TASK_BLOCKED;
  804. if (profiling)
  805. _starpu_clock_gettime(&info->submit_time);
  806. ret = _starpu_submit_job(j, nodeps);
  807. #ifdef STARPU_SIMGRID
  808. if (_starpu_simgrid_task_submit_cost())
  809. MSG_process_sleep(0.000001);
  810. #endif
  811. if (is_sync)
  812. {
  813. _starpu_sched_do_schedule(task->sched_ctx);
  814. _starpu_wait_job(j);
  815. if (task->destroy)
  816. _starpu_task_destroy(task);
  817. }
  818. _STARPU_TRACE_TASK_SUBMIT_END();
  819. _STARPU_LOG_OUT();
  820. return ret;
  821. }
  822. int starpu_task_submit(struct starpu_task *task)
  823. {
  824. return _starpu_task_submit(task, 0);
  825. }
  826. int _starpu_task_submit_internally(struct starpu_task *task)
  827. {
  828. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  829. j->internal = 1;
  830. return starpu_task_submit(task);
  831. }
  832. /* application should submit new tasks to StarPU through this function */
  833. int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id)
  834. {
  835. task->sched_ctx = sched_ctx_id;
  836. return starpu_task_submit(task);
  837. }
  838. /* The StarPU core can submit tasks directly to the scheduler or a worker,
  839. * skipping dependencies completely (when it knows what it is doing). */
  840. int starpu_task_submit_nodeps(struct starpu_task *task)
  841. {
  842. return _starpu_task_submit(task, 1);
  843. }
  844. /*
  845. * worker->sched_mutex must be locked when calling this function.
  846. */
  847. int _starpu_task_submit_conversion_task(struct starpu_task *task,
  848. unsigned int workerid)
  849. {
  850. int ret;
  851. STARPU_ASSERT(task->cl);
  852. STARPU_ASSERT(task->execute_on_a_specific_worker);
  853. ret = _starpu_task_submit_head(task);
  854. STARPU_ASSERT(ret == 0);
  855. /* We retain handle reference count that would have been acquired by data dependencies. */
  856. unsigned i;
  857. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
  858. for (i=0; i<nbuffers; i++)
  859. {
  860. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
  861. _starpu_spin_lock(&handle->header_lock);
  862. handle->busy_count++;
  863. _starpu_spin_unlock(&handle->header_lock);
  864. }
  865. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  866. _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
  867. _starpu_sched_task_submit(task);
  868. STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
  869. _starpu_handle_job_submission(j);
  870. _starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task);
  871. _starpu_job_set_ordered_buffers(j);
  872. STARPU_ASSERT(task->status == STARPU_TASK_INVALID);
  873. task->status = STARPU_TASK_READY;
  874. _starpu_profiling_set_task_push_start_time(task);
  875. unsigned node = starpu_worker_get_memory_node(workerid);
  876. if (starpu_get_prefetch_flag())
  877. starpu_prefetch_task_input_on_node(task, node);
  878. struct _starpu_worker *worker;
  879. worker = _starpu_get_worker_struct(workerid);
  880. starpu_task_list_push_back(&worker->local_tasks, task);
  881. starpu_wake_worker_locked(worker->workerid);
  882. _starpu_profiling_set_task_push_end_time(task);
  883. STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
  884. return 0;
  885. }
  886. void starpu_codelet_init(struct starpu_codelet *cl)
  887. {
  888. memset(cl, 0, sizeof(struct starpu_codelet));
  889. }
  890. #define _STARPU_CODELET_WORKER_NAME_LEN 32
  891. void starpu_codelet_display_stats(struct starpu_codelet *cl)
  892. {
  893. unsigned worker;
  894. unsigned nworkers = starpu_worker_get_count();
  895. if (cl->name)
  896. fprintf(stderr, "Statistics for codelet %s\n", cl->name);
  897. else if (cl->model && cl->model->symbol)
  898. fprintf(stderr, "Statistics for codelet %s\n", cl->model->symbol);
  899. unsigned long total = 0;
  900. for (worker = 0; worker < nworkers; worker++)
  901. total += cl->per_worker_stats[worker];
  902. for (worker = 0; worker < nworkers; worker++)
  903. {
  904. char name[_STARPU_CODELET_WORKER_NAME_LEN];
  905. starpu_worker_get_name(worker, name, _STARPU_CODELET_WORKER_NAME_LEN);
  906. fprintf(stderr, "\t%s -> %lu / %lu (%2.2f %%)\n", name, cl->per_worker_stats[worker], total, (100.0f*cl->per_worker_stats[worker])/total);
  907. }
  908. }
  909. /*
  910. * We wait for all the tasks that have already been submitted. Note that a
  911. * regenerable is not considered finished until it was explicitely set as
  912. * non-regenerale anymore (eg. from a callback).
  913. */
  914. int _starpu_task_wait_for_all_and_return_nb_waited_tasks(void)
  915. {
  916. unsigned nsched_ctxs = _starpu_get_nsched_ctxs();
  917. unsigned sched_ctx_id = nsched_ctxs == 1 ? 0 : starpu_sched_ctx_get_context();
  918. /* if there is no indication about which context to wait,
  919. we wait for all tasks submitted to starpu */
  920. if (sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
  921. {
  922. _STARPU_DEBUG("Waiting for all tasks\n");
  923. STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_all must not be called from a task or callback");
  924. STARPU_AYU_BARRIER();
  925. struct _starpu_machine_config *config = _starpu_get_machine_config();
  926. if(config->topology.nsched_ctxs == 1)
  927. {
  928. _starpu_sched_do_schedule(0);
  929. return _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(0);
  930. }
  931. else
  932. {
  933. int s;
  934. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  935. {
  936. if(config->sched_ctxs[s].do_schedule == 1)
  937. {
  938. _starpu_sched_do_schedule(config->sched_ctxs[s].id);
  939. }
  940. }
  941. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  942. {
  943. if(config->sched_ctxs[s].do_schedule == 1)
  944. {
  945. starpu_task_wait_for_all_in_ctx(config->sched_ctxs[s].id);
  946. }
  947. }
  948. return 0;
  949. }
  950. }
  951. else
  952. {
  953. _starpu_sched_do_schedule(sched_ctx_id);
  954. _STARPU_DEBUG("Waiting for tasks submitted to context %u\n", sched_ctx_id);
  955. return _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(sched_ctx_id);
  956. }
  957. }
  958. int starpu_task_wait_for_all(void)
  959. {
  960. _starpu_task_wait_for_all_and_return_nb_waited_tasks();
  961. _starpu_perf_counter_update_global_sample();
  962. return 0;
  963. }
  964. int _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(unsigned sched_ctx)
  965. {
  966. _STARPU_TRACE_TASK_WAIT_FOR_ALL_START();
  967. int ret = _starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx);
  968. _STARPU_TRACE_TASK_WAIT_FOR_ALL_END();
  969. /* TODO: improve Temanejo into knowing about contexts ... */
  970. STARPU_AYU_BARRIER();
  971. return ret;
  972. }
  973. int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx)
  974. {
  975. _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(sched_ctx);
  976. _starpu_perf_counter_update_global_sample();
  977. return 0;
  978. }
  979. /*
  980. * We wait until there's a certain number of the tasks that have already been
  981. * submitted left. Note that a regenerable is not considered finished until it
  982. * was explicitely set as non-regenerale anymore (eg. from a callback).
  983. */
  984. int starpu_task_wait_for_n_submitted(unsigned n)
  985. {
  986. unsigned nsched_ctxs = _starpu_get_nsched_ctxs();
  987. unsigned sched_ctx_id = nsched_ctxs == 1 ? 0 : starpu_sched_ctx_get_context();
  988. /* if there is no indication about which context to wait,
  989. we wait for all tasks submitted to starpu */
  990. if (sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
  991. {
  992. _STARPU_DEBUG("Waiting for all tasks\n");
  993. STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_n_submitted must not be called from a task or callback");
  994. struct _starpu_machine_config *config = _starpu_get_machine_config();
  995. if(config->topology.nsched_ctxs == 1)
  996. _starpu_wait_for_n_submitted_tasks_of_sched_ctx(0, n);
  997. else
  998. {
  999. int s;
  1000. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  1001. {
  1002. if(config->sched_ctxs[s].do_schedule == 1)
  1003. {
  1004. _starpu_wait_for_n_submitted_tasks_of_sched_ctx(config->sched_ctxs[s].id, n);
  1005. }
  1006. }
  1007. }
  1008. }
  1009. else
  1010. {
  1011. _STARPU_DEBUG("Waiting for tasks submitted to context %u\n", sched_ctx_id);
  1012. _starpu_wait_for_n_submitted_tasks_of_sched_ctx(sched_ctx_id, n);
  1013. }
  1014. _starpu_perf_counter_update_global_sample();
  1015. return 0;
  1016. }
  1017. int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx, unsigned n)
  1018. {
  1019. _starpu_wait_for_n_submitted_tasks_of_sched_ctx(sched_ctx, n);
  1020. _starpu_perf_counter_update_global_sample();
  1021. return 0;
  1022. }
  1023. /*
  1024. * We wait until there is no ready task any more (i.e. StarPU will not be able
  1025. * to progress any more).
  1026. */
  1027. int starpu_task_wait_for_no_ready(void)
  1028. {
  1029. STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_no_ready must not be called from a task or callback");
  1030. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1031. if(config->topology.nsched_ctxs == 1)
  1032. {
  1033. _starpu_sched_do_schedule(0);
  1034. _starpu_wait_for_no_ready_of_sched_ctx(0);
  1035. }
  1036. else
  1037. {
  1038. int s;
  1039. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  1040. {
  1041. if(config->sched_ctxs[s].do_schedule == 1)
  1042. {
  1043. _starpu_sched_do_schedule(config->sched_ctxs[s].id);
  1044. }
  1045. }
  1046. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  1047. {
  1048. if(config->sched_ctxs[s].do_schedule == 1)
  1049. {
  1050. _starpu_wait_for_no_ready_of_sched_ctx(config->sched_ctxs[s].id);
  1051. }
  1052. }
  1053. }
  1054. _starpu_perf_counter_update_global_sample();
  1055. return 0;
  1056. }
  1057. void starpu_iteration_push(unsigned long iteration)
  1058. {
  1059. struct _starpu_sched_ctx *ctx = _starpu_get_sched_ctx_struct(_starpu_sched_ctx_get_current_context());
  1060. unsigned level = ctx->iteration_level++;
  1061. if (level < sizeof(ctx->iterations)/sizeof(ctx->iterations[0]))
  1062. ctx->iterations[level] = iteration;
  1063. }
  1064. void starpu_iteration_pop(void)
  1065. {
  1066. struct _starpu_sched_ctx *ctx = _starpu_get_sched_ctx_struct(_starpu_sched_ctx_get_current_context());
  1067. STARPU_ASSERT_MSG(ctx->iteration_level > 0, "calls to starpu_iteration_pop must match starpu_iteration_push calls");
  1068. unsigned level = ctx->iteration_level--;
  1069. if (level < sizeof(ctx->iterations)/sizeof(ctx->iterations[0]))
  1070. ctx->iterations[level] = -1;
  1071. }
  1072. void starpu_do_schedule(void)
  1073. {
  1074. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1075. if(config->topology.nsched_ctxs == 1)
  1076. _starpu_sched_do_schedule(0);
  1077. else
  1078. {
  1079. int s;
  1080. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  1081. {
  1082. if(config->sched_ctxs[s].do_schedule == 1)
  1083. {
  1084. _starpu_sched_do_schedule(config->sched_ctxs[s].id);
  1085. }
  1086. }
  1087. }
  1088. }
  1089. void
  1090. starpu_drivers_request_termination(void)
  1091. {
  1092. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1093. STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex);
  1094. int nsubmitted = starpu_task_nsubmitted();
  1095. config->submitting = 0;
  1096. if (nsubmitted == 0)
  1097. {
  1098. ANNOTATE_HAPPENS_AFTER(&config->running);
  1099. config->running = 0;
  1100. ANNOTATE_HAPPENS_BEFORE(&config->running);
  1101. STARPU_WMB();
  1102. int s;
  1103. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  1104. {
  1105. if(config->sched_ctxs[s].do_schedule == 1)
  1106. {
  1107. _starpu_check_nsubmitted_tasks_of_sched_ctx(config->sched_ctxs[s].id);
  1108. }
  1109. }
  1110. }
  1111. STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex);
  1112. }
  1113. int starpu_task_nsubmitted(void)
  1114. {
  1115. int nsubmitted = 0;
  1116. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1117. if(config->topology.nsched_ctxs == 1)
  1118. nsubmitted = _starpu_get_nsubmitted_tasks_of_sched_ctx(0);
  1119. else
  1120. {
  1121. int s;
  1122. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  1123. {
  1124. if(config->sched_ctxs[s].do_schedule == 1)
  1125. {
  1126. nsubmitted += _starpu_get_nsubmitted_tasks_of_sched_ctx(config->sched_ctxs[s].id);
  1127. }
  1128. }
  1129. }
  1130. return nsubmitted;
  1131. }
  1132. int starpu_task_nready(void)
  1133. {
  1134. int nready = 0;
  1135. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1136. if(config->topology.nsched_ctxs == 1)
  1137. nready = starpu_sched_ctx_get_nready_tasks(0);
  1138. else
  1139. {
  1140. int s;
  1141. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  1142. {
  1143. if(config->sched_ctxs[s].do_schedule == 1)
  1144. {
  1145. nready += starpu_sched_ctx_get_nready_tasks(config->sched_ctxs[s].id);
  1146. }
  1147. }
  1148. }
  1149. return nready;
  1150. }
  1151. /* Return the task currently executed by the worker, or NULL if this is called
  1152. * either from a thread that is not a task or simply because there is no task
  1153. * being executed at the moment. */
  1154. struct starpu_task *starpu_task_get_current(void)
  1155. {
  1156. return (struct starpu_task *) STARPU_PTHREAD_GETSPECIFIC(current_task_key);
  1157. }
  1158. void _starpu_set_current_task(struct starpu_task *task)
  1159. {
  1160. STARPU_PTHREAD_SETSPECIFIC(current_task_key, task);
  1161. }
  1162. int starpu_task_get_current_data_node(unsigned i)
  1163. {
  1164. struct starpu_task *task = starpu_task_get_current();
  1165. if (!task)
  1166. return -1;
  1167. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  1168. struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
  1169. unsigned orderedindex = descrs[i].orderedindex;
  1170. return descrs[orderedindex].node;
  1171. }
  1172. #ifdef STARPU_OPENMP
  1173. /* Prepare the fields of the currentl task for accepting a new set of
  1174. * dependencies in anticipation of becoming a continuation.
  1175. *
  1176. * When the task becomes 'continued', it will only be queued again when the new
  1177. * set of dependencies is fulfilled. */
  1178. void _starpu_task_prepare_for_continuation(void)
  1179. {
  1180. _starpu_job_prepare_for_continuation(_starpu_get_job_associated_to_task(starpu_task_get_current()));
  1181. }
  1182. void _starpu_task_prepare_for_continuation_ext(unsigned continuation_resubmit,
  1183. void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg)
  1184. {
  1185. _starpu_job_prepare_for_continuation_ext(_starpu_get_job_associated_to_task(starpu_task_get_current()),
  1186. continuation_resubmit, continuation_callback_on_sleep, continuation_callback_on_sleep_arg);
  1187. }
  1188. void _starpu_task_set_omp_cleanup_callback(struct starpu_task *task, void (*omp_cleanup_callback)(void *arg), void *omp_cleanup_callback_arg)
  1189. {
  1190. _starpu_job_set_omp_cleanup_callback(_starpu_get_job_associated_to_task(task),
  1191. omp_cleanup_callback, omp_cleanup_callback_arg);
  1192. }
  1193. #endif
  1194. /*
  1195. * Returns 0 if tasks does not use any multiformat handle, 1 otherwise.
  1196. */
  1197. int
  1198. _starpu_task_uses_multiformat_handles(struct starpu_task *task)
  1199. {
  1200. unsigned i;
  1201. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
  1202. for (i = 0; i < nbuffers; i++)
  1203. {
  1204. if (_starpu_data_is_multiformat_handle(STARPU_TASK_GET_HANDLE(task, i)))
  1205. return 1;
  1206. }
  1207. return 0;
  1208. }
  1209. /*
  1210. * Checks whether the given handle needs to be converted in order to be used on
  1211. * the node given as the second argument.
  1212. */
  1213. int
  1214. _starpu_handle_needs_conversion_task(starpu_data_handle_t handle,
  1215. unsigned int node)
  1216. {
  1217. return _starpu_handle_needs_conversion_task_for_arch(handle, starpu_node_get_kind(node));
  1218. }
  1219. int
  1220. _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
  1221. enum starpu_node_kind node_kind)
  1222. {
  1223. /*
  1224. * Here, we assume that CUDA devices and OpenCL devices use the
  1225. * same data structure. A conversion is only needed when moving
  1226. * data from a CPU to a GPU, or the other way around.
  1227. */
  1228. switch (node_kind)
  1229. {
  1230. case STARPU_CPU_RAM:
  1231. switch(starpu_node_get_kind(handle->mf_node))
  1232. {
  1233. case STARPU_CPU_RAM:
  1234. return 0;
  1235. case STARPU_CUDA_RAM: /* Fall through */
  1236. case STARPU_OPENCL_RAM:
  1237. case STARPU_MIC_RAM:
  1238. case STARPU_MPI_MS_RAM:
  1239. return 1;
  1240. default:
  1241. STARPU_ABORT();
  1242. }
  1243. break;
  1244. case STARPU_CUDA_RAM: /* Fall through */
  1245. case STARPU_OPENCL_RAM:
  1246. case STARPU_MIC_RAM:
  1247. case STARPU_MPI_MS_RAM:
  1248. switch(starpu_node_get_kind(handle->mf_node))
  1249. {
  1250. case STARPU_CPU_RAM:
  1251. return 1;
  1252. case STARPU_CUDA_RAM:
  1253. case STARPU_OPENCL_RAM:
  1254. case STARPU_MIC_RAM:
  1255. case STARPU_MPI_MS_RAM:
  1256. return 0;
  1257. default:
  1258. STARPU_ABORT();
  1259. }
  1260. break;
  1261. default:
  1262. STARPU_ABORT();
  1263. }
  1264. /* that instruction should never be reached */
  1265. return -EINVAL;
  1266. }
  1267. void starpu_task_set_implementation(struct starpu_task *task, unsigned impl)
  1268. {
  1269. _starpu_get_job_associated_to_task(task)->nimpl = impl;
  1270. }
  1271. unsigned starpu_task_get_implementation(struct starpu_task *task)
  1272. {
  1273. return _starpu_get_job_associated_to_task(task)->nimpl;
  1274. }
  1275. unsigned long starpu_task_get_job_id(struct starpu_task *task)
  1276. {
  1277. return _starpu_get_job_associated_to_task(task)->job_id;
  1278. }
  1279. static starpu_pthread_t watchdog_thread;
  1280. static int sleep_some(float timeout)
  1281. {
  1282. /* If we do a sleep(timeout), we might have to wait too long at the end of the computation. */
  1283. /* To avoid that, we do several sleep() of 1s (and check after each if starpu is still running) */
  1284. float t;
  1285. for (t = timeout ; t > 1.; t--)
  1286. {
  1287. starpu_sleep(1.);
  1288. if (!_starpu_machine_is_running())
  1289. /* Application finished, don't bother finishing the sleep */
  1290. return 0;
  1291. }
  1292. /* and one final sleep (of less than 1 s) with the rest (if needed) */
  1293. if (t > 0.)
  1294. starpu_sleep(t);
  1295. return 1;
  1296. }
  1297. /* Check from times to times that StarPU does finish some tasks */
  1298. static void *watchdog_func(void *arg)
  1299. {
  1300. char *timeout_env = arg;
  1301. float timeout, delay;
  1302. #ifdef _MSC_VER
  1303. timeout = ((float) _atoi64(timeout_env)) / 1000000;
  1304. #else
  1305. timeout = ((float) atoll(timeout_env)) / 1000000;
  1306. #endif
  1307. delay = ((float) watchdog_delay) / 1000000;
  1308. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1309. starpu_pthread_setname("watchdog");
  1310. if (!sleep_some(delay))
  1311. return NULL;
  1312. STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex);
  1313. while (_starpu_machine_is_running())
  1314. {
  1315. int last_nsubmitted = starpu_task_nsubmitted();
  1316. config->watchdog_ok = 0;
  1317. STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex);
  1318. if (!sleep_some(timeout))
  1319. return NULL;
  1320. STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex);
  1321. if (!config->watchdog_ok && last_nsubmitted
  1322. && last_nsubmitted == starpu_task_nsubmitted())
  1323. {
  1324. _STARPU_MSG("The StarPU watchdog detected that no task finished for %fs (can be configured through STARPU_WATCHDOG_TIMEOUT)\n",
  1325. timeout);
  1326. if (watchdog_crash)
  1327. {
  1328. _STARPU_MSG("Crashing the process\n");
  1329. raise(SIGABRT);
  1330. }
  1331. else
  1332. _STARPU_MSG("Set the STARPU_WATCHDOG_CRASH environment variable if you want to abort the process in such a case\n");
  1333. }
  1334. /* Only shout again after another period */
  1335. config->watchdog_ok = 1;
  1336. }
  1337. STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex);
  1338. return NULL;
  1339. }
  1340. void _starpu_watchdog_init(void)
  1341. {
  1342. struct _starpu_machine_config *config = _starpu_get_machine_config();
  1343. char *timeout_env = starpu_getenv("STARPU_WATCHDOG_TIMEOUT");
  1344. STARPU_PTHREAD_MUTEX_INIT(&config->submitted_mutex, NULL);
  1345. if (!timeout_env)
  1346. return;
  1347. STARPU_PTHREAD_CREATE(&watchdog_thread, NULL, watchdog_func, timeout_env);
  1348. }
  1349. void _starpu_watchdog_shutdown(void)
  1350. {
  1351. char *timeout_env = starpu_getenv("STARPU_WATCHDOG_TIMEOUT");
  1352. if (!timeout_env)
  1353. return;
  1354. STARPU_PTHREAD_JOIN(watchdog_thread, NULL);
  1355. }
  1356. static void _starpu_ft_check_support(const struct starpu_task *task)
  1357. {
  1358. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
  1359. unsigned i;
  1360. for (i = 0; i < nbuffers; i++)
  1361. {
  1362. enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, i);
  1363. STARPU_ASSERT_MSG (mode == STARPU_R || mode == STARPU_W,
  1364. "starpu_task_failed is only supported for tasks with access modes STARPU_R and STARPU_W");
  1365. }
  1366. }
  1367. struct starpu_task *starpu_task_ft_create_retry
  1368. (const struct starpu_task *meta_task, const struct starpu_task *template_task, void (*check_ft)(void *))
  1369. {
  1370. /* Create a new task to actually perform the result */
  1371. struct starpu_task *new_task = starpu_task_create();
  1372. *new_task = *template_task;
  1373. new_task->prologue_callback_func = NULL;
  1374. /* XXX: cl_arg needs to be duplicated */
  1375. STARPU_ASSERT_MSG(!meta_task->cl_arg_free || !meta_task->cl_arg, "not supported yet");
  1376. STARPU_ASSERT_MSG(!meta_task->callback_func, "not supported");
  1377. new_task->callback_func = check_ft;
  1378. new_task->callback_arg = (void*) meta_task;
  1379. new_task->callback_arg_free = 0;
  1380. new_task->prologue_callback_arg_free = 0;
  1381. STARPU_ASSERT_MSG(!new_task->prologue_callback_pop_arg_free, "not supported");
  1382. new_task->use_tag = 0;
  1383. new_task->synchronous = 0;
  1384. new_task->destroy = 1;
  1385. new_task->regenerate = 0;
  1386. new_task->no_submitorder = 1;
  1387. new_task->failed = 0;
  1388. new_task->status = STARPU_TASK_INVALID;
  1389. new_task->profiling_info = NULL;
  1390. new_task->prev = NULL;
  1391. new_task->next = NULL;
  1392. new_task->starpu_private = NULL;
  1393. new_task->omp_task = NULL;
  1394. return new_task;
  1395. }
  1396. static void _starpu_default_check_ft(void *arg)
  1397. {
  1398. struct starpu_task *meta_task = arg;
  1399. struct starpu_task *current_task = starpu_task_get_current();
  1400. struct starpu_task *new_task;
  1401. int ret;
  1402. if (!current_task->failed)
  1403. {
  1404. starpu_task_ft_success(meta_task);
  1405. return;
  1406. }
  1407. new_task = starpu_task_ft_create_retry
  1408. (meta_task, current_task, _starpu_default_check_ft);
  1409. ret = starpu_task_submit_nodeps(new_task);
  1410. STARPU_ASSERT(!ret);
  1411. }
  1412. void starpu_task_ft_prologue(void *arg)
  1413. {
  1414. struct starpu_task *meta_task = starpu_task_get_current();
  1415. struct starpu_task *new_task;
  1416. void (*check_ft)(void*) = arg;
  1417. int ret;
  1418. if (!check_ft)
  1419. check_ft = _starpu_default_check_ft;
  1420. /* Create a task which will do the actual computation */
  1421. new_task = starpu_task_ft_create_retry
  1422. (meta_task, meta_task, check_ft);
  1423. ret = starpu_task_submit_nodeps(new_task);
  1424. STARPU_ASSERT(!ret);
  1425. /* Make the parent task wait for the result getting correct */
  1426. starpu_task_end_dep_add(meta_task, 1);
  1427. meta_task->where = STARPU_NOWHERE;
  1428. }
  1429. void starpu_task_ft_failed(struct starpu_task *task)
  1430. {
  1431. _starpu_ft_check_support(task);
  1432. task->failed = 1;
  1433. }
  1434. void starpu_task_ft_success(struct starpu_task *meta_task)
  1435. {
  1436. starpu_task_end_dep_release(meta_task);
  1437. }