task.c 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2015 Université de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2011 Télécom-SudParis
  6. * Copyright (C) 2011, 2014 INRIA
  7. *
  8. * StarPU is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU Lesser General Public License as published by
  10. * the Free Software Foundation; either version 2.1 of the License, or (at
  11. * your option) any later version.
  12. *
  13. * StarPU is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. *
  17. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  18. */
  19. #include <starpu.h>
  20. #include <starpu_profiling.h>
  21. #include <core/workers.h>
  22. #include <core/sched_ctx.h>
  23. #include <core/jobs.h>
  24. #include <core/task.h>
  25. #include <core/task_bundle.h>
  26. #include <common/config.h>
  27. #include <common/utils.h>
  28. #include <common/fxt.h>
  29. #include <profiling/profiling.h>
  30. #include <profiling/bound.h>
  31. #include <math.h>
  32. #include <string.h>
  33. #include <core/debug.h>
  34. #include <core/sched_ctx.h>
  35. #include <time.h>
  36. #include <signal.h>
  37. #ifdef STARPU_HAVE_WINDOWS
  38. #include <windows.h>
  39. #endif
  40. /* XXX this should be reinitialized when StarPU is shutdown (or we should make
  41. * sure that no task remains !) */
  42. /* TODO we could make this hierarchical to avoid contention ? */
  43. //static starpu_pthread_cond_t submitted_cond = STARPU_PTHREAD_COND_INITIALIZER;
  44. /* This key stores the task currently handled by the thread, note that we
  45. * cannot use the worker structure to store that information because it is
  46. * possible that we have a task with a NULL codelet, which means its callback
  47. * could be executed by a user thread as well. */
  48. static starpu_pthread_key_t current_task_key;
  49. void starpu_task_init(struct starpu_task *task)
  50. {
  51. /* TODO: memcpy from a template instead? benchmark it */
  52. STARPU_ASSERT(task);
  53. /* As most of the fields must be initialised at NULL, let's put 0
  54. * everywhere */
  55. memset(task, 0, sizeof(struct starpu_task));
  56. task->sequential_consistency = 1;
  57. /* Now we can initialise fields which recquire custom value */
  58. #if STARPU_DEFAULT_PRIO != 0
  59. task->priority = STARPU_DEFAULT_PRIO;
  60. #endif
  61. task->detach = 1;
  62. #if STARPU_TASK_INVALID != 0
  63. task->status = STARPU_TASK_INVALID;
  64. #endif
  65. task->predicted = NAN;
  66. task->predicted_transfer = NAN;
  67. task->magic = 42;
  68. task->sched_ctx = STARPU_NMAX_SCHED_CTXS;
  69. task->flops = 0.0;
  70. task->scheduled = 0;
  71. task->prefetched = 0;
  72. task->dyn_handles = NULL;
  73. task->dyn_interfaces = NULL;
  74. task->name = NULL;
  75. }
  76. /* Free all the ressources allocated for a task, without deallocating the task
  77. * structure itself (this is required for statically allocated tasks).
  78. * All values previously set by the user, like codelet and handles, remain
  79. * unchanged */
  80. void starpu_task_clean(struct starpu_task *task)
  81. {
  82. STARPU_ASSERT(task);
  83. /* If a buffer was allocated to store the profiling info, we free it. */
  84. if (task->profiling_info)
  85. {
  86. free(task->profiling_info);
  87. task->profiling_info = NULL;
  88. }
  89. /* If case the task is (still) part of a bundle */
  90. starpu_task_bundle_t bundle = task->bundle;
  91. if (bundle)
  92. starpu_task_bundle_remove(bundle, task);
  93. if (task->dyn_handles)
  94. {
  95. free(task->dyn_handles);
  96. task->dyn_handles = NULL;
  97. free(task->dyn_interfaces);
  98. task->dyn_interfaces = NULL;
  99. }
  100. if (task->dyn_modes)
  101. {
  102. free(task->dyn_modes);
  103. task->dyn_modes = NULL;
  104. }
  105. struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
  106. if (j)
  107. {
  108. _starpu_job_destroy(j);
  109. task->starpu_private = NULL;
  110. }
  111. }
  112. struct starpu_task * STARPU_ATTRIBUTE_MALLOC starpu_task_create(void)
  113. {
  114. struct starpu_task *task;
  115. task = (struct starpu_task *) malloc(sizeof(struct starpu_task));
  116. STARPU_ASSERT(task);
  117. starpu_task_init(task);
  118. /* Dynamically allocated tasks are destroyed by default */
  119. task->destroy = 1;
  120. return task;
  121. }
  122. /* Free the ressource allocated during starpu_task_create. This function can be
  123. * called automatically after the execution of a task by setting the "destroy"
  124. * flag of the starpu_task structure (default behaviour). Calling this function
  125. * on a statically allocated task results in an undefined behaviour. */
  126. void _starpu_task_destroy(struct starpu_task *task)
  127. {
  128. /* If starpu_task_destroy is called in a callback, we just set the destroy
  129. flag. The task will be destroyed after the callback returns */
  130. if (task == starpu_task_get_current()
  131. && _starpu_get_local_worker_status() == STATUS_CALLBACK)
  132. {
  133. task->destroy = 1;
  134. }
  135. else
  136. {
  137. starpu_task_clean(task);
  138. /* TODO handle the case of task with detach = 1 and destroy = 1 */
  139. /* TODO handle the case of non terminated tasks -> return -EINVAL */
  140. /* Does user want StarPU release cl_arg ? */
  141. if (task->cl_arg_free)
  142. free(task->cl_arg);
  143. /* Does user want StarPU release callback_arg ? */
  144. if (task->callback_arg_free)
  145. free(task->callback_arg);
  146. /* Does user want StarPU release prologue_callback_arg ? */
  147. if (task->prologue_callback_arg_free)
  148. free(task->prologue_callback_arg);
  149. /* Does user want StarPU release prologue_pop_arg ? */
  150. if (task->prologue_callback_pop_arg_free)
  151. free(task->prologue_callback_pop_arg);
  152. free(task);
  153. }
  154. }
  155. void starpu_task_destroy(struct starpu_task *task)
  156. {
  157. STARPU_ASSERT(task);
  158. STARPU_ASSERT_MSG(!task->destroy || !task->detach, "starpu_task_destroy must not be called for task with destroy = 1 and detach = 1");
  159. _starpu_task_destroy(task);
  160. }
  161. int starpu_task_finished(struct starpu_task *task)
  162. {
  163. STARPU_ASSERT(task);
  164. STARPU_ASSERT_MSG(!task->detach, "starpu_task_finished can only be called on tasks with detach = 0");
  165. return _starpu_job_finished(_starpu_get_job_associated_to_task(task));
  166. }
  167. int starpu_task_wait(struct starpu_task *task)
  168. {
  169. _STARPU_LOG_IN();
  170. STARPU_ASSERT(task);
  171. STARPU_ASSERT_MSG(!task->detach, "starpu_task_wait can only be called on tasks with detach = 0");
  172. if (task->detach || task->synchronous)
  173. {
  174. _STARPU_DEBUG("Task is detached or synchronous. Waiting returns immediately\n");
  175. _STARPU_LOG_OUT_TAG("einval");
  176. return -EINVAL;
  177. }
  178. STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait must not be called from a task or callback");
  179. struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
  180. _starpu_wait_job(j);
  181. /* as this is a synchronous task, the liberation of the job
  182. structure was deferred */
  183. if (task->destroy)
  184. _starpu_task_destroy(task);
  185. _STARPU_LOG_OUT();
  186. return 0;
  187. }
  188. #ifdef STARPU_OPENMP
  189. int _starpu_task_test_termination(struct starpu_task *task)
  190. {
  191. STARPU_ASSERT(task);
  192. STARPU_ASSERT_MSG(!task->detach, "starpu_task_wait can only be called on tasks with detach = 0");
  193. if (task->detach || task->synchronous)
  194. {
  195. _STARPU_DEBUG("Task is detached or synchronous\n");
  196. _STARPU_LOG_OUT_TAG("einval");
  197. return -EINVAL;
  198. }
  199. struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
  200. int ret = _starpu_test_job_termination(j);
  201. if (ret)
  202. {
  203. if (task->destroy)
  204. _starpu_task_destroy(task);
  205. }
  206. return ret;
  207. }
  208. #endif
  209. struct _starpu_job *_starpu_get_job_associated_to_task(struct starpu_task *task)
  210. {
  211. STARPU_ASSERT(task);
  212. if (!task->starpu_private)
  213. {
  214. struct _starpu_job *j = _starpu_job_create(task);
  215. task->starpu_private = j;
  216. }
  217. return (struct _starpu_job *)task->starpu_private;
  218. }
  219. /* NB in case we have a regenerable task, it is possible that the job was
  220. * already counted. */
  221. int _starpu_submit_job(struct _starpu_job *j)
  222. {
  223. struct starpu_task *task = j->task;
  224. int ret;
  225. #ifdef STARPU_OPENMP
  226. const unsigned continuation = j->continuation;
  227. #else
  228. const unsigned continuation = 0;
  229. #endif
  230. _STARPU_LOG_IN();
  231. /* notify bound computation of a new task */
  232. _starpu_bound_record(j);
  233. _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
  234. #ifdef STARPU_USE_SC_HYPERVISOR
  235. struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx);
  236. if(sched_ctx != NULL && j->task->sched_ctx != _starpu_get_initial_sched_ctx()->id && j->task->sched_ctx != STARPU_NMAX_SCHED_CTXS
  237. && sched_ctx->perf_counters != NULL)
  238. {
  239. struct starpu_perfmodel_arch arch;
  240. arch.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
  241. arch.ndevices = 1;
  242. arch.devices[0].type = STARPU_CPU_WORKER;
  243. arch.devices[0].devid = 0;
  244. arch.devices[0].ncores = 1;
  245. _starpu_compute_buffers_footprint(j->task->cl->model, &arch, 0, j);
  246. free(arch.devices);
  247. int i;
  248. size_t data_size = 0;
  249. if (j->task->cl)
  250. {
  251. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task);
  252. for(i = 0; i < nbuffers; i++)
  253. {
  254. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
  255. if (handle != NULL)
  256. data_size += _starpu_data_get_size(handle);
  257. }
  258. }
  259. _STARPU_TRACE_HYPERVISOR_BEGIN();
  260. sched_ctx->perf_counters->notify_submitted_job(j->task, j->footprint, data_size);
  261. _STARPU_TRACE_HYPERVISOR_END();
  262. }
  263. #endif//STARPU_USE_SC_HYPERVISOR
  264. /* We retain handle reference count */
  265. if (task->cl && !continuation)
  266. {
  267. unsigned i;
  268. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
  269. for (i=0; i<nbuffers; i++)
  270. {
  271. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
  272. _starpu_spin_lock(&handle->header_lock);
  273. handle->busy_count++;
  274. _starpu_spin_unlock(&handle->header_lock);
  275. }
  276. }
  277. STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
  278. /* Need to atomically set submitted to 1 and check dependencies, since
  279. * this is concucrent with _starpu_notify_cg */
  280. j->terminated = 0;
  281. #ifdef STARPU_OPENMP
  282. if (continuation)
  283. {
  284. j->discontinuous = 1;
  285. j->continuation = 0;
  286. }
  287. #endif
  288. if (!j->submitted)
  289. j->submitted = 1;
  290. else
  291. j->submitted = 2;
  292. #ifdef STARPU_OPENMP
  293. if (continuation)
  294. {
  295. ret = _starpu_reenforce_task_deps_and_schedule(j);
  296. }
  297. else
  298. #endif
  299. {
  300. ret = _starpu_enforce_deps_and_schedule(j);
  301. }
  302. _STARPU_LOG_OUT();
  303. return ret;
  304. }
  305. /* Note: this is racy, so valgrind would complain. But since we'll always put
  306. * the same values, this is not a problem. */
  307. void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
  308. {
  309. if (!cl)
  310. return;
  311. int is_where_unset = cl->where == 0;
  312. unsigned i, some_impl;
  313. /* Check deprecated and unset fields (where, <device>_func,
  314. * <device>_funcs) */
  315. /* CPU */
  316. if (cl->cpu_func && cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS && cl->cpu_funcs[0])
  317. {
  318. _STARPU_DISP("[warning] [struct starpu_codelet] both cpu_func and cpu_funcs are set. Ignoring cpu_func.\n");
  319. cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS;
  320. }
  321. if (cl->cpu_func && cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS)
  322. {
  323. cl->cpu_funcs[0] = cl->cpu_func;
  324. cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS;
  325. }
  326. some_impl = 0;
  327. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  328. if (cl->cpu_funcs[i])
  329. {
  330. some_impl = 1;
  331. break;
  332. }
  333. if (some_impl && cl->cpu_func == 0)
  334. {
  335. cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS;
  336. }
  337. if (some_impl && is_where_unset)
  338. {
  339. cl->where |= STARPU_CPU;
  340. }
  341. /* CUDA */
  342. if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS && cl->cuda_funcs[0])
  343. {
  344. _STARPU_DISP("[warning] [struct starpu_codelet] both cuda_func and cuda_funcs are set. Ignoring cuda_func.\n");
  345. cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS;
  346. }
  347. if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS)
  348. {
  349. cl->cuda_funcs[0] = cl->cuda_func;
  350. cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS;
  351. }
  352. some_impl = 0;
  353. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  354. if (cl->cuda_funcs[i])
  355. {
  356. some_impl = 1;
  357. break;
  358. }
  359. if (some_impl && cl->cuda_func == 0)
  360. {
  361. cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS;
  362. }
  363. if (some_impl && is_where_unset)
  364. {
  365. cl->where |= STARPU_CUDA;
  366. }
  367. /* OpenCL */
  368. if (cl->opencl_func && cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS && cl->opencl_funcs[0])
  369. {
  370. _STARPU_DISP("[warning] [struct starpu_codelet] both opencl_func and opencl_funcs are set. Ignoring opencl_func.\n");
  371. cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS;
  372. }
  373. if (cl->opencl_func && cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS)
  374. {
  375. cl->opencl_funcs[0] = cl->opencl_func;
  376. cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS;
  377. }
  378. some_impl = 0;
  379. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  380. if (cl->opencl_funcs[i])
  381. {
  382. some_impl = 1;
  383. break;
  384. }
  385. if (some_impl && cl->opencl_func == 0)
  386. {
  387. cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS;
  388. }
  389. if (some_impl && is_where_unset)
  390. {
  391. cl->where |= STARPU_OPENCL;
  392. }
  393. some_impl = 0;
  394. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  395. if (cl->mic_funcs[i])
  396. {
  397. some_impl = 1;
  398. break;
  399. }
  400. if (some_impl && is_where_unset)
  401. {
  402. cl->where |= STARPU_MIC;
  403. }
  404. some_impl = 0;
  405. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  406. if (cl->scc_funcs[i])
  407. {
  408. some_impl = 1;
  409. break;
  410. }
  411. if (some_impl && is_where_unset)
  412. {
  413. cl->where |= STARPU_SCC;
  414. }
  415. some_impl = 0;
  416. for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
  417. if (cl->cpu_funcs_name[i])
  418. {
  419. some_impl = 1;
  420. break;
  421. }
  422. if (some_impl && is_where_unset)
  423. {
  424. cl->where |= STARPU_MIC|STARPU_SCC;
  425. }
  426. }
  427. void _starpu_task_check_deprecated_fields(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED)
  428. {
  429. /* None any more */
  430. }
  431. /* application should submit new tasks to StarPU through this function */
  432. int starpu_task_submit(struct starpu_task *task)
  433. {
  434. _STARPU_LOG_IN();
  435. STARPU_ASSERT(task);
  436. STARPU_ASSERT_MSG(task->magic == 42, "Tasks must be created with starpu_task_create, or initialized with starpu_task_init.");
  437. int ret;
  438. unsigned is_sync = task->synchronous;
  439. starpu_task_bundle_t bundle = task->bundle;
  440. /* internally, StarPU manipulates a struct _starpu_job * which is a wrapper around a
  441. * task structure, it is possible that this job structure was already
  442. * allocated. */
  443. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  444. const unsigned continuation =
  445. #ifdef STARPU_OPENMP
  446. j->continuation
  447. #else
  448. 0
  449. #endif
  450. ;
  451. if (j->internal)
  452. {
  453. // Internal tasks are submitted to initial context
  454. task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
  455. }
  456. else if (task->sched_ctx == STARPU_NMAX_SCHED_CTXS)
  457. {
  458. // If the task has not specified a context, we set the current context
  459. task->sched_ctx = _starpu_sched_ctx_get_current_context();
  460. }
  461. if (is_sync)
  462. {
  463. /* Perhaps it is not possible to submit a synchronous
  464. * (blocking) task */
  465. STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "submitting a synchronous task must not be done from a task or a callback");
  466. task->detach = 0;
  467. }
  468. _starpu_task_check_deprecated_fields(task);
  469. _starpu_codelet_check_deprecated_fields(task->cl);
  470. if (task->cl)
  471. {
  472. unsigned i;
  473. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
  474. _STARPU_TRACE_UPDATE_TASK_CNT(0);
  475. /* Check buffers */
  476. if (task->dyn_handles == NULL)
  477. STARPU_ASSERT_MSG(STARPU_TASK_GET_NBUFFERS(task) <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d). Either use --enable-maxbuffers configure option to increase the max, or use dyn_handles instead of handles.", task->cl, STARPU_TASK_GET_NBUFFERS(task), STARPU_NMAXBUFS);
  478. if (task->dyn_handles)
  479. {
  480. task->dyn_interfaces = malloc(nbuffers * sizeof(void *));
  481. }
  482. for (i = 0; i < nbuffers; i++)
  483. {
  484. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
  485. /* Make sure handles are not partitioned */
  486. STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data (or the pieces of a partitioned data) can be used in a task");
  487. /* Provide the home interface for now if any,
  488. * for can_execute hooks */
  489. if (handle->home_node != -1)
  490. _STARPU_TASK_SET_INTERFACE(task, starpu_data_get_interface_on_node(handle, handle->home_node), i);
  491. }
  492. /* Check the type of worker(s) required by the task exist */
  493. if (!_starpu_worker_exists(task))
  494. {
  495. _STARPU_LOG_OUT_TAG("ENODEV");
  496. return -ENODEV;
  497. }
  498. /* In case we require that a task should be explicitely
  499. * executed on a specific worker, we make sure that the worker
  500. * is able to execute this task. */
  501. if (task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0))
  502. {
  503. _STARPU_LOG_OUT_TAG("ENODEV");
  504. return -ENODEV;
  505. }
  506. if (task->execute_on_a_specific_worker)
  507. /* We already know where it will go, so already push
  508. * idle requests to get the data there */
  509. starpu_idle_prefetch_task_input_on_node(task, starpu_worker_get_memory_node(task->workerid));
  510. /* If this is a continuation, we don't modify the implicit data dependencies detected earlier. */
  511. if (!continuation)
  512. {
  513. _starpu_detect_implicit_data_deps(task);
  514. }
  515. if (task->cl->model)
  516. _starpu_init_and_load_perfmodel(task->cl->model);
  517. if (task->cl->power_model)
  518. _starpu_init_and_load_perfmodel(task->cl->power_model);
  519. }
  520. if (bundle)
  521. {
  522. /* We need to make sure that models for other tasks of the
  523. * bundle are also loaded, so the scheduler can estimate the
  524. * duration of the whole bundle */
  525. STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex);
  526. struct _starpu_task_bundle_entry *entry;
  527. entry = bundle->list;
  528. while (entry)
  529. {
  530. if (entry->task->cl->model)
  531. _starpu_init_and_load_perfmodel(entry->task->cl->model);
  532. if (entry->task->cl->power_model)
  533. _starpu_init_and_load_perfmodel(entry->task->cl->power_model);
  534. entry = entry->next;
  535. }
  536. STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
  537. }
  538. /* If profiling is activated, we allocate a structure to store the
  539. * appropriate info. */
  540. struct starpu_profiling_task_info *info;
  541. int profiling = starpu_profiling_status_get();
  542. info = _starpu_allocate_profiling_info_if_needed(task);
  543. task->profiling_info = info;
  544. /* The task is considered as block until we are sure there remains not
  545. * dependency. */
  546. task->status = STARPU_TASK_BLOCKED;
  547. if (profiling)
  548. _starpu_clock_gettime(&info->submit_time);
  549. ret = _starpu_submit_job(j);
  550. if (is_sync)
  551. {
  552. _starpu_wait_job(j);
  553. if (task->destroy)
  554. _starpu_task_destroy(task);
  555. }
  556. _STARPU_LOG_OUT();
  557. return ret;
  558. }
  559. int _starpu_task_submit_internally(struct starpu_task *task)
  560. {
  561. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  562. j->internal = 1;
  563. return starpu_task_submit(task);
  564. }
  565. /* application should submit new tasks to StarPU through this function */
  566. int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id)
  567. {
  568. task->sched_ctx = sched_ctx_id;
  569. return starpu_task_submit(task);
  570. }
  571. /* The StarPU core can submit tasks directly to the scheduler or a worker,
  572. * skipping dependencies completely (when it knows what it is doing). */
  573. int _starpu_task_submit_nodeps(struct starpu_task *task)
  574. {
  575. _starpu_task_check_deprecated_fields(task);
  576. _starpu_codelet_check_deprecated_fields(task->cl);
  577. if (task->cl)
  578. {
  579. if (task->cl->model)
  580. _starpu_init_and_load_perfmodel(task->cl->model);
  581. if (task->cl->power_model)
  582. _starpu_init_and_load_perfmodel(task->cl->power_model);
  583. }
  584. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  585. if (j->internal)
  586. {
  587. // Internal tasks are submitted to initial context
  588. j->task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
  589. }
  590. else if (task->sched_ctx == STARPU_NMAX_SCHED_CTXS)
  591. {
  592. // If the task has not specified a context, we set the current context
  593. j->task->sched_ctx = _starpu_sched_ctx_get_current_context();
  594. }
  595. _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
  596. STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
  597. j->submitted = 1;
  598. if (task->cl)
  599. {
  600. /* This would be done by data dependencies checking */
  601. unsigned i;
  602. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
  603. for (i=0 ; i<nbuffers ; i++)
  604. {
  605. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
  606. _STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
  607. enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(j->task, i);
  608. _STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
  609. int node = -1;
  610. if (j->task->cl->specific_nodes)
  611. node = STARPU_CODELET_GET_NODE(j->task->cl, i);
  612. _STARPU_JOB_SET_ORDERED_BUFFER_NODE(j, node, i);
  613. }
  614. }
  615. STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
  616. return _starpu_push_task(j);
  617. }
  618. /*
  619. * worker->sched_mutex must be locked when calling this function.
  620. */
  621. int _starpu_task_submit_conversion_task(struct starpu_task *task,
  622. unsigned int workerid)
  623. {
  624. STARPU_ASSERT(task->cl);
  625. STARPU_ASSERT(task->execute_on_a_specific_worker);
  626. _starpu_task_check_deprecated_fields(task);
  627. _starpu_codelet_check_deprecated_fields(task->cl);
  628. /* We should factorize that */
  629. if (task->cl->model)
  630. _starpu_init_and_load_perfmodel(task->cl->model);
  631. if (task->cl->power_model)
  632. _starpu_init_and_load_perfmodel(task->cl->power_model);
  633. /* We retain handle reference count */
  634. unsigned i;
  635. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
  636. for (i=0; i<nbuffers; i++)
  637. {
  638. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
  639. _starpu_spin_lock(&handle->header_lock);
  640. handle->busy_count++;
  641. _starpu_spin_unlock(&handle->header_lock);
  642. }
  643. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  644. if (j->internal)
  645. {
  646. // Internal tasks are submitted to initial context
  647. j->task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
  648. }
  649. else if (task->sched_ctx == STARPU_NMAX_SCHED_CTXS)
  650. {
  651. // If the task has not specified a context, we set the current context
  652. j->task->sched_ctx = _starpu_sched_ctx_get_current_context();
  653. }
  654. _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
  655. STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
  656. j->submitted = 1;
  657. _starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops);
  658. for (i=0 ; i<nbuffers ; i++)
  659. {
  660. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
  661. _STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
  662. enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(j->task, i);
  663. _STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
  664. int node = -1;
  665. if (j->task->cl->specific_nodes)
  666. node = STARPU_CODELET_GET_NODE(j->task->cl, i);
  667. _STARPU_JOB_SET_ORDERED_BUFFER_NODE(j, node, i);
  668. }
  669. _STARPU_LOG_IN();
  670. task->status = STARPU_TASK_READY;
  671. _starpu_profiling_set_task_push_start_time(task);
  672. unsigned node = starpu_worker_get_memory_node(workerid);
  673. if (starpu_get_prefetch_flag())
  674. starpu_prefetch_task_input_on_node(task, node);
  675. struct _starpu_worker *worker;
  676. worker = _starpu_get_worker_struct(workerid);
  677. starpu_task_list_push_back(&worker->local_tasks, task);
  678. _starpu_profiling_set_task_push_end_time(task);
  679. _STARPU_LOG_OUT();
  680. STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
  681. return 0;
  682. }
  683. void starpu_codelet_init(struct starpu_codelet *cl)
  684. {
  685. memset(cl, 0, sizeof(struct starpu_codelet));
  686. }
  687. void starpu_codelet_display_stats(struct starpu_codelet *cl)
  688. {
  689. unsigned worker;
  690. unsigned nworkers = starpu_worker_get_count();
  691. if (cl->name)
  692. fprintf(stderr, "Statistics for codelet %s\n", cl->name);
  693. else if (cl->model && cl->model->symbol)
  694. fprintf(stderr, "Statistics for codelet %s\n", cl->model->symbol);
  695. unsigned long total = 0;
  696. for (worker = 0; worker < nworkers; worker++)
  697. total += cl->per_worker_stats[worker];
  698. for (worker = 0; worker < nworkers; worker++)
  699. {
  700. char name[32];
  701. starpu_worker_get_name(worker, name, 32);
  702. fprintf(stderr, "\t%s -> %lu / %lu (%2.2f %%)\n", name, cl->per_worker_stats[worker], total, (100.0f*cl->per_worker_stats[worker])/total);
  703. }
  704. }
  705. /*
  706. * We wait for all the tasks that have already been submitted. Note that a
  707. * regenerable is not considered finished until it was explicitely set as
  708. * non-regenerale anymore (eg. from a callback).
  709. */
  710. int starpu_task_wait_for_all(void)
  711. {
  712. unsigned nsched_ctxs = _starpu_get_nsched_ctxs();
  713. unsigned sched_ctx_id = nsched_ctxs == 1 ? 0 : starpu_sched_ctx_get_context();
  714. /* if there is no indication about which context to wait,
  715. we wait for all tasks submitted to starpu */
  716. if (sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
  717. {
  718. _STARPU_DEBUG("Waiting for all tasks\n");
  719. STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_all must not be called from a task or callback");
  720. #ifdef HAVE_AYUDAME_H
  721. if (AYU_event) AYU_event(AYU_BARRIER, 0, NULL);
  722. #endif
  723. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
  724. if(config->topology.nsched_ctxs == 1)
  725. starpu_task_wait_for_all_in_ctx(0);
  726. else
  727. {
  728. int s;
  729. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  730. {
  731. if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  732. {
  733. starpu_task_wait_for_all_in_ctx(config->sched_ctxs[s].id);
  734. }
  735. }
  736. }
  737. return 0;
  738. }
  739. else
  740. {
  741. _STARPU_DEBUG("Waiting for tasks submitted to context %u\n", sched_ctx_id);
  742. return starpu_task_wait_for_all_in_ctx(sched_ctx_id);
  743. }
  744. }
  745. int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx)
  746. {
  747. _starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx);
  748. #ifdef HAVE_AYUDAME_H
  749. /* TODO: improve Temanejo into knowing about contexts ... */
  750. if (AYU_event) AYU_event(AYU_BARRIER, 0, NULL);
  751. #endif
  752. return 0;
  753. }
  754. /*
  755. * We wait until there is no ready task any more (i.e. StarPU will not be able
  756. * to progress any more).
  757. */
  758. int starpu_task_wait_for_no_ready(void)
  759. {
  760. STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_no_ready must not be called from a task or callback");
  761. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
  762. if(config->topology.nsched_ctxs == 1)
  763. _starpu_wait_for_no_ready_of_sched_ctx(0);
  764. else
  765. {
  766. int s;
  767. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  768. {
  769. if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  770. {
  771. _starpu_wait_for_no_ready_of_sched_ctx(config->sched_ctxs[s].id);
  772. }
  773. }
  774. }
  775. return 0;
  776. }
  777. void
  778. starpu_drivers_request_termination(void)
  779. {
  780. struct _starpu_machine_config *config = _starpu_get_machine_config();
  781. STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex);
  782. int nsubmitted = starpu_task_nsubmitted();
  783. config->submitting = 0;
  784. if (nsubmitted == 0)
  785. {
  786. ANNOTATE_HAPPENS_AFTER(&config->running);
  787. config->running = 0;
  788. ANNOTATE_HAPPENS_BEFORE(&config->running);
  789. STARPU_WMB();
  790. int s;
  791. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  792. {
  793. if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  794. {
  795. _starpu_check_nsubmitted_tasks_of_sched_ctx(config->sched_ctxs[s].id);
  796. }
  797. }
  798. }
  799. STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex);
  800. }
  801. int starpu_task_nsubmitted(void)
  802. {
  803. int nsubmitted = 0;
  804. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
  805. if(config->topology.nsched_ctxs == 1)
  806. nsubmitted = _starpu_get_nsubmitted_tasks_of_sched_ctx(0);
  807. else
  808. {
  809. int s;
  810. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  811. {
  812. if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  813. {
  814. nsubmitted += _starpu_get_nsubmitted_tasks_of_sched_ctx(config->sched_ctxs[s].id);
  815. }
  816. }
  817. }
  818. return nsubmitted;
  819. }
  820. int starpu_task_nready(void)
  821. {
  822. int nready = 0;
  823. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
  824. if(config->topology.nsched_ctxs == 1)
  825. nready = starpu_sched_ctx_get_nready_tasks(0);
  826. else
  827. {
  828. int s;
  829. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  830. {
  831. if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  832. {
  833. nready += starpu_sched_ctx_get_nready_tasks(config->sched_ctxs[s].id);
  834. }
  835. }
  836. }
  837. return nready;
  838. }
  839. void _starpu_initialize_current_task_key(void)
  840. {
  841. STARPU_PTHREAD_KEY_CREATE(&current_task_key, NULL);
  842. }
  843. void _starpu_deinitialize_current_task_key(void)
  844. {
  845. STARPU_PTHREAD_KEY_DELETE(current_task_key);
  846. }
  847. /* Return the task currently executed by the worker, or NULL if this is called
  848. * either from a thread that is not a task or simply because there is no task
  849. * being executed at the moment. */
  850. struct starpu_task *starpu_task_get_current(void)
  851. {
  852. return (struct starpu_task *) STARPU_PTHREAD_GETSPECIFIC(current_task_key);
  853. }
  854. void _starpu_set_current_task(struct starpu_task *task)
  855. {
  856. STARPU_PTHREAD_SETSPECIFIC(current_task_key, task);
  857. }
  858. #ifdef STARPU_OPENMP
  859. /* Prepare the fields of the currentl task for accepting a new set of
  860. * dependencies in anticipation of becoming a continuation.
  861. *
  862. * When the task becomes 'continued', it will only be queued again when the new
  863. * set of dependencies is fulfilled. */
  864. void _starpu_task_prepare_for_continuation(void)
  865. {
  866. _starpu_job_prepare_for_continuation(_starpu_get_job_associated_to_task(starpu_task_get_current()));
  867. }
  868. void _starpu_task_prepare_for_continuation_ext(unsigned continuation_resubmit,
  869. void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg)
  870. {
  871. _starpu_job_prepare_for_continuation_ext(_starpu_get_job_associated_to_task(starpu_task_get_current()),
  872. continuation_resubmit, continuation_callback_on_sleep, continuation_callback_on_sleep_arg);
  873. }
  874. void _starpu_task_set_omp_cleanup_callback(struct starpu_task *task, void (*omp_cleanup_callback)(void *arg), void *omp_cleanup_callback_arg)
  875. {
  876. _starpu_job_set_omp_cleanup_callback(_starpu_get_job_associated_to_task(task),
  877. omp_cleanup_callback, omp_cleanup_callback_arg);
  878. }
  879. #endif
  880. /*
  881. * Returns 0 if tasks does not use any multiformat handle, 1 otherwise.
  882. */
  883. int
  884. _starpu_task_uses_multiformat_handles(struct starpu_task *task)
  885. {
  886. unsigned i;
  887. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
  888. for (i = 0; i < nbuffers; i++)
  889. {
  890. if (_starpu_data_is_multiformat_handle(STARPU_TASK_GET_HANDLE(task, i)))
  891. return 1;
  892. }
  893. return 0;
  894. }
  895. /*
  896. * Checks whether the given handle needs to be converted in order to be used on
  897. * the node given as the second argument.
  898. */
  899. int
  900. _starpu_handle_needs_conversion_task(starpu_data_handle_t handle,
  901. unsigned int node)
  902. {
  903. return _starpu_handle_needs_conversion_task_for_arch(handle, starpu_node_get_kind(node));
  904. }
  905. int
  906. _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
  907. enum starpu_node_kind node_kind)
  908. {
  909. /*
  910. * Here, we assume that CUDA devices and OpenCL devices use the
  911. * same data structure. A conversion is only needed when moving
  912. * data from a CPU to a GPU, or the other way around.
  913. */
  914. switch (node_kind)
  915. {
  916. case STARPU_CPU_RAM:
  917. switch(starpu_node_get_kind(handle->mf_node))
  918. {
  919. case STARPU_CPU_RAM:
  920. return 0;
  921. case STARPU_CUDA_RAM: /* Fall through */
  922. case STARPU_OPENCL_RAM:
  923. case STARPU_MIC_RAM:
  924. case STARPU_SCC_RAM:
  925. return 1;
  926. default:
  927. STARPU_ABORT();
  928. }
  929. break;
  930. case STARPU_CUDA_RAM: /* Fall through */
  931. case STARPU_OPENCL_RAM:
  932. case STARPU_MIC_RAM:
  933. case STARPU_SCC_RAM:
  934. switch(starpu_node_get_kind(handle->mf_node))
  935. {
  936. case STARPU_CPU_RAM:
  937. return 1;
  938. case STARPU_CUDA_RAM:
  939. case STARPU_OPENCL_RAM:
  940. case STARPU_MIC_RAM:
  941. case STARPU_SCC_RAM:
  942. return 0;
  943. default:
  944. STARPU_ABORT();
  945. }
  946. break;
  947. default:
  948. STARPU_ABORT();
  949. }
  950. /* that instruction should never be reached */
  951. return -EINVAL;
  952. }
  953. void starpu_task_set_implementation(struct starpu_task *task, unsigned impl)
  954. {
  955. _starpu_get_job_associated_to_task(task)->nimpl = impl;
  956. }
  957. unsigned starpu_task_get_implementation(struct starpu_task *task)
  958. {
  959. return _starpu_get_job_associated_to_task(task)->nimpl;
  960. }
  961. static starpu_pthread_t watchdog_thread;
  962. /* Check from times to times that StarPU does finish some tasks */
  963. static void *watchdog_func(void *arg)
  964. {
  965. struct timespec ts;
  966. char *timeout_env = arg;
  967. unsigned long long timeout;
  968. #ifdef _MSC_VER
  969. timeout = (unsigned long long) _atoi64(timeout_env);
  970. #else
  971. timeout = atoll(timeout_env);
  972. #endif
  973. ts.tv_sec = timeout / 1000000;
  974. ts.tv_nsec = (timeout % 1000000) * 1000;
  975. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
  976. STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex);
  977. while (_starpu_machine_is_running())
  978. {
  979. int last_nsubmitted = starpu_task_nsubmitted();
  980. config->watchdog_ok = 0;
  981. STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex);
  982. _starpu_sleep(ts);
  983. STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex);
  984. if (!config->watchdog_ok && last_nsubmitted
  985. && last_nsubmitted == starpu_task_nsubmitted())
  986. {
  987. fprintf(stderr,"The StarPU watchdog detected that no task finished for %u.%06us (can be configure through STARPU_WATCHDOG_TIMEOUT)\n", (unsigned)ts.tv_sec, (unsigned)ts.tv_nsec/1000);
  988. if (getenv("STARPU_WATCHDOG_CRASH"))
  989. {
  990. fprintf(stderr,"Crashing the process\n");
  991. raise(SIGABRT);
  992. }
  993. else
  994. fprintf(stderr,"Set the STARPU_WATCHDOG_CRASH environment variable if you want to abort the process in such a case\n");
  995. }
  996. /* Only shout again after another period */
  997. config->watchdog_ok = 1;
  998. }
  999. STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex);
  1000. return NULL;
  1001. }
  1002. void _starpu_watchdog_init(void)
  1003. {
  1004. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
  1005. char *timeout_env = getenv("STARPU_WATCHDOG_TIMEOUT");
  1006. STARPU_PTHREAD_MUTEX_INIT(&config->submitted_mutex, NULL);
  1007. if (!timeout_env)
  1008. return;
  1009. STARPU_PTHREAD_CREATE(&watchdog_thread, NULL, watchdog_func, timeout_env);
  1010. }
  1011. void _starpu_watchdog_shutdown(void)
  1012. {
  1013. char *timeout_env = getenv("STARPU_WATCHDOG_TIMEOUT");
  1014. if (!timeout_env)
  1015. return;
  1016. starpu_pthread_join(watchdog_thread, NULL);
  1017. }