task.c 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2013 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2011 Télécom-SudParis
  6. * Copyright (C) 2011 INRIA
  7. *
  8. * StarPU is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU Lesser General Public License as published by
  10. * the Free Software Foundation; either version 2.1 of the License, or (at
  11. * your option) any later version.
  12. *
  13. * StarPU is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. *
  17. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  18. */
  19. #include <starpu.h>
  20. #include <starpu_profiling.h>
  21. #include <core/workers.h>
  22. #include <core/sched_ctx.h>
  23. #include <core/jobs.h>
  24. #include <core/task.h>
  25. #include <core/task_bundle.h>
  26. #include <common/config.h>
  27. #include <common/utils.h>
  28. #include <common/fxt.h>
  29. #include <profiling/profiling.h>
  30. #include <profiling/bound.h>
  31. #include <math.h>
  32. #include <string.h>
  33. #include <core/debug.h>
  34. #include <core/sched_ctx.h>
  35. #include <time.h>
  36. #ifdef STARPU_HAVE_WINDOWS
  37. #include <windows.h>
  38. #endif
  39. /* XXX this should be reinitialized when StarPU is shutdown (or we should make
  40. * sure that no task remains !) */
  41. /* TODO we could make this hierarchical to avoid contention ? */
  42. //static starpu_pthread_cond_t submitted_cond = STARPU_PTHREAD_COND_INITIALIZER;
  43. /* This key stores the task currently handled by the thread, note that we
  44. * cannot use the worker structure to store that information because it is
  45. * possible that we have a task with a NULL codelet, which means its callback
  46. * could be executed by a user thread as well. */
  47. static starpu_pthread_key_t current_task_key;
  48. void starpu_task_init(struct starpu_task *task)
  49. {
  50. /* TODO: memcpy from a template instead? benchmark it */
  51. STARPU_ASSERT(task);
  52. /* As most of the fields must be initialised at NULL, let's put 0
  53. * everywhere */
  54. memset(task, 0, sizeof(struct starpu_task));
  55. task->sequential_consistency = 1;
  56. /* Now we can initialise fields which recquire custom value */
  57. #if STARPU_DEFAULT_PRIO != 0
  58. task->priority = STARPU_DEFAULT_PRIO;
  59. #endif
  60. task->detach = 1;
  61. #if STARPU_TASK_INVALID != 0
  62. task->status = STARPU_TASK_INVALID;
  63. #endif
  64. task->predicted = NAN;
  65. task->predicted_transfer = NAN;
  66. task->magic = 42;
  67. task->sched_ctx = STARPU_NMAX_SCHED_CTXS;
  68. task->flops = 0.0;
  69. task->scheduled = 0;
  70. task->prefetched = 0;
  71. task->dyn_handles = NULL;
  72. task->dyn_interfaces = NULL;
  73. task->name = NULL;
  74. }
  75. /* Free all the ressources allocated for a task, without deallocating the task
  76. * structure itself (this is required for statically allocated tasks).
  77. * All values previously set by the user, like codelet and handles, remain
  78. * unchanged */
  79. void starpu_task_clean(struct starpu_task *task)
  80. {
  81. STARPU_ASSERT(task);
  82. /* If a buffer was allocated to store the profiling info, we free it. */
  83. if (task->profiling_info)
  84. {
  85. free(task->profiling_info);
  86. task->profiling_info = NULL;
  87. }
  88. /* If case the task is (still) part of a bundle */
  89. starpu_task_bundle_t bundle = task->bundle;
  90. if (bundle)
  91. starpu_task_bundle_remove(bundle, task);
  92. if (task->dyn_handles)
  93. {
  94. free(task->dyn_handles);
  95. task->dyn_handles = NULL;
  96. free(task->dyn_interfaces);
  97. task->dyn_interfaces = NULL;
  98. }
  99. struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
  100. if (j)
  101. {
  102. _starpu_job_destroy(j);
  103. task->starpu_private = NULL;
  104. }
  105. }
  106. struct starpu_task * STARPU_ATTRIBUTE_MALLOC starpu_task_create(void)
  107. {
  108. struct starpu_task *task;
  109. task = (struct starpu_task *) malloc(sizeof(struct starpu_task));
  110. STARPU_ASSERT(task);
  111. starpu_task_init(task);
  112. /* Dynamically allocated tasks are destroyed by default */
  113. task->destroy = 1;
  114. return task;
  115. }
  116. /* Free the ressource allocated during starpu_task_create. This function can be
  117. * called automatically after the execution of a task by setting the "destroy"
  118. * flag of the starpu_task structure (default behaviour). Calling this function
  119. * on a statically allocated task results in an undefined behaviour. */
  120. void _starpu_task_destroy(struct starpu_task *task)
  121. {
  122. /* If starpu_task_destroy is called in a callback, we just set the destroy
  123. flag. The task will be destroyed after the callback returns */
  124. if (task == starpu_task_get_current()
  125. && _starpu_get_local_worker_status() == STATUS_CALLBACK)
  126. {
  127. task->destroy = 1;
  128. }
  129. else
  130. {
  131. starpu_task_clean(task);
  132. /* TODO handle the case of task with detach = 1 and destroy = 1 */
  133. /* TODO handle the case of non terminated tasks -> return -EINVAL */
  134. /* Does user want StarPU release cl_arg ? */
  135. if (task->cl_arg_free)
  136. free(task->cl_arg);
  137. /* Does user want StarPU release callback_arg ? */
  138. if (task->callback_arg_free)
  139. free(task->callback_arg);
  140. /* Does user want StarPU release prologue_callback_arg ? */
  141. if (task->prologue_callback_arg_free)
  142. free(task->prologue_callback_arg);
  143. free(task);
  144. }
  145. }
  146. void starpu_task_destroy(struct starpu_task *task)
  147. {
  148. STARPU_ASSERT(task);
  149. STARPU_ASSERT_MSG(!task->destroy || !task->detach, "starpu_task_destroy must not be called for task with destroy = 1 and detach = 1");
  150. _starpu_task_destroy(task);
  151. }
  152. int starpu_task_finished(struct starpu_task *task)
  153. {
  154. STARPU_ASSERT(task);
  155. STARPU_ASSERT_MSG(!task->detach, "starpu_task_finished can only be called on tasks with detach = 0");
  156. return _starpu_job_finished(_starpu_get_job_associated_to_task(task));
  157. }
  158. int starpu_task_wait(struct starpu_task *task)
  159. {
  160. _STARPU_LOG_IN();
  161. STARPU_ASSERT(task);
  162. STARPU_ASSERT_MSG(!task->detach, "starpu_task_wait can only be called on tasks with detach = 0");
  163. if (task->detach || task->synchronous)
  164. {
  165. _STARPU_DEBUG("Task is detached or asynchronous. Waiting returns immediately\n");
  166. _STARPU_LOG_OUT_TAG("einval");
  167. return -EINVAL;
  168. }
  169. if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
  170. {
  171. _STARPU_LOG_OUT_TAG("edeadlk");
  172. return -EDEADLK;
  173. }
  174. struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
  175. _starpu_wait_job(j);
  176. /* as this is a synchronous task, the liberation of the job
  177. structure was deferred */
  178. if (task->destroy)
  179. _starpu_task_destroy(task);
  180. _STARPU_LOG_OUT();
  181. return 0;
  182. }
  183. struct _starpu_job *_starpu_get_job_associated_to_task(struct starpu_task *task)
  184. {
  185. STARPU_ASSERT(task);
  186. if (!task->starpu_private)
  187. {
  188. struct _starpu_job *j = _starpu_job_create(task);
  189. task->starpu_private = j;
  190. }
  191. return (struct _starpu_job *)task->starpu_private;
  192. }
  193. /* NB in case we have a regenerable task, it is possible that the job was
  194. * already counted. */
  195. int _starpu_submit_job(struct _starpu_job *j)
  196. {
  197. struct starpu_task *task = j->task;
  198. _STARPU_LOG_IN();
  199. /* notify bound computation of a new task */
  200. _starpu_bound_record(j);
  201. _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
  202. #ifdef STARPU_USE_SC_HYPERVISOR
  203. struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx);
  204. if(sched_ctx != NULL && j->task->sched_ctx != _starpu_get_initial_sched_ctx()->id && j->task->sched_ctx != STARPU_NMAX_SCHED_CTXS
  205. && sched_ctx->perf_counters != NULL)
  206. {
  207. struct starpu_perfmodel_arch arch;
  208. arch.type = STARPU_CPU_WORKER;
  209. arch.devid = 0;
  210. arch.ncore = 0;
  211. _starpu_compute_buffers_footprint(j->task->cl->model, &arch, 0, j);
  212. int i;
  213. size_t data_size = 0;
  214. for(i = 0; i < STARPU_NMAXBUFS; i++)
  215. {
  216. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
  217. if (handle != NULL)
  218. data_size += _starpu_data_get_size(handle);
  219. }
  220. _STARPU_TRACE_HYPERVISOR_BEGIN();
  221. sched_ctx->perf_counters->notify_submitted_job(j->task, j->footprint, data_size);
  222. _STARPU_TRACE_HYPERVISOR_END();
  223. }
  224. #endif//STARPU_USE_SC_HYPERVISOR
  225. /* We retain handle reference count */
  226. if (task->cl)
  227. {
  228. unsigned i;
  229. for (i=0; i<task->cl->nbuffers; i++)
  230. {
  231. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
  232. _starpu_spin_lock(&handle->header_lock);
  233. handle->busy_count++;
  234. _starpu_spin_unlock(&handle->header_lock);
  235. }
  236. }
  237. STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
  238. /* Need to atomically set submitted to 1 and check dependencies, since
  239. * this is concucrent with _starpu_notify_cg */
  240. j->terminated = 0;
  241. if (!j->submitted)
  242. j->submitted = 1;
  243. else
  244. j->submitted = 2;
  245. int ret = _starpu_enforce_deps_and_schedule(j);
  246. _STARPU_LOG_OUT();
  247. return ret;
  248. }
  249. /* Note: this is racy, so valgrind would complain. But since we'll always put
  250. * the same values, this is not a problem. */
  251. void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
  252. {
  253. if (!cl)
  254. return;
  255. int is_where_unset = cl->where == 0;
  256. /* Check deprecated and unset fields (where, <device>_func,
  257. * <device>_funcs) */
  258. /* CPU */
  259. if (cl->cpu_func && cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS && cl->cpu_funcs[0])
  260. {
  261. _STARPU_DISP("[warning] [struct starpu_codelet] both cpu_func and cpu_funcs are set. Ignoring cpu_func.\n");
  262. cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS;
  263. }
  264. if (cl->cpu_func && cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS)
  265. {
  266. cl->cpu_funcs[0] = cl->cpu_func;
  267. cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS;
  268. }
  269. if (cl->cpu_funcs[0] && cl->cpu_func == 0)
  270. {
  271. cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS;
  272. }
  273. if (cl->cpu_funcs[0] && is_where_unset)
  274. {
  275. cl->where |= STARPU_CPU;
  276. }
  277. /* CUDA */
  278. if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS && cl->cuda_funcs[0])
  279. {
  280. _STARPU_DISP("[warning] [struct starpu_codelet] both cuda_func and cuda_funcs are set. Ignoring cuda_func.\n");
  281. cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS;
  282. }
  283. if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS)
  284. {
  285. cl->cuda_funcs[0] = cl->cuda_func;
  286. cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS;
  287. }
  288. if (cl->cuda_funcs[0] && cl->cuda_func == 0)
  289. {
  290. cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS;
  291. }
  292. if (cl->cuda_funcs[0] && is_where_unset)
  293. {
  294. cl->where |= STARPU_CUDA;
  295. }
  296. /* OpenCL */
  297. if (cl->opencl_func && cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS && cl->opencl_funcs[0])
  298. {
  299. _STARPU_DISP("[warning] [struct starpu_codelet] both opencl_func and opencl_funcs are set. Ignoring opencl_func.\n");
  300. cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS;
  301. }
  302. if (cl->opencl_func && cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS)
  303. {
  304. cl->opencl_funcs[0] = cl->opencl_func;
  305. cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS;
  306. }
  307. if (cl->opencl_funcs[0] && cl->opencl_func == 0)
  308. {
  309. cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS;
  310. }
  311. if (cl->opencl_funcs[0] && is_where_unset)
  312. {
  313. cl->where |= STARPU_OPENCL;
  314. }
  315. if (cl->mic_funcs[0] && is_where_unset)
  316. {
  317. cl->where |= STARPU_MIC;
  318. }
  319. if (cl->scc_funcs[0] && is_where_unset)
  320. {
  321. cl->where |= STARPU_SCC;
  322. }
  323. if (cl->cpu_funcs_name[0] && is_where_unset)
  324. {
  325. cl->where |= STARPU_MIC|STARPU_SCC;
  326. }
  327. }
  328. void _starpu_task_check_deprecated_fields(struct starpu_task *task)
  329. {
  330. if (task->cl)
  331. {
  332. unsigned i;
  333. for(i=0; i<STARPU_MIN(task->cl->nbuffers, STARPU_NMAXBUFS) ; i++)
  334. {
  335. if (task->buffers[i].handle && task->handles[i])
  336. {
  337. _STARPU_DISP("[warning][struct starpu_task] task->buffers[%u] and task->handles[%u] both set. Ignoring task->buffers[%u] ?\n", i, i, i);
  338. STARPU_ASSERT(task->buffers[i].mode == task->cl->modes[i]);
  339. STARPU_ABORT();
  340. }
  341. if (task->buffers[i].handle)
  342. {
  343. task->handles[i] = task->buffers[i].handle;
  344. task->cl->modes[i] = task->buffers[i].mode;
  345. }
  346. }
  347. }
  348. }
  349. /* application should submit new tasks to StarPU through this function */
  350. int starpu_task_submit(struct starpu_task *task)
  351. {
  352. _STARPU_LOG_IN();
  353. STARPU_ASSERT(task);
  354. STARPU_ASSERT_MSG(task->magic == 42, "Tasks must be created with starpu_task_create, or initialized with starpu_task_init.");
  355. int ret;
  356. unsigned is_sync = task->synchronous;
  357. starpu_task_bundle_t bundle = task->bundle;
  358. /* internally, StarPU manipulates a struct _starpu_job * which is a wrapper around a
  359. * task structure, it is possible that this job structure was already
  360. * allocated. */
  361. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  362. if (j->internal)
  363. {
  364. // Internal tasks are submitted to initial context
  365. task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
  366. }
  367. else if (task->sched_ctx == STARPU_NMAX_SCHED_CTXS)
  368. {
  369. // If the task has not specified a context, we set the current context
  370. task->sched_ctx = _starpu_sched_ctx_get_current_context();
  371. }
  372. if (is_sync)
  373. {
  374. /* Perhaps it is not possible to submit a synchronous
  375. * (blocking) task */
  376. if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
  377. {
  378. _STARPU_LOG_OUT_TAG("EDEADLK");
  379. return -EDEADLK;
  380. }
  381. task->detach = 0;
  382. }
  383. _starpu_task_check_deprecated_fields(task);
  384. _starpu_codelet_check_deprecated_fields(task->cl);
  385. if (task->cl)
  386. {
  387. unsigned i;
  388. /* Check buffers */
  389. if (task->dyn_handles == NULL)
  390. STARPU_ASSERT_MSG(task->cl->nbuffers <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d). Either use --enable-maxbuffers configure option to increase the max, or use dyn_handles instead of handles.", task->cl, task->cl->nbuffers, STARPU_NMAXBUFS);
  391. if (task->dyn_handles)
  392. {
  393. task->dyn_interfaces = malloc(task->cl->nbuffers * sizeof(void *));
  394. }
  395. for (i = 0; i < task->cl->nbuffers; i++)
  396. {
  397. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
  398. /* Make sure handles are not partitioned */
  399. STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data (or the pieces of a partitioned data) can be used in a task");
  400. /* Provide the home interface for now if any,
  401. * for can_execute hooks */
  402. if (handle->home_node != -1)
  403. _STARPU_TASK_SET_INTERFACE(task, starpu_data_get_interface_on_node(handle, handle->home_node), i);
  404. }
  405. /* Check the type of worker(s) required by the task exist */
  406. if (!_starpu_worker_exists(task))
  407. {
  408. _STARPU_LOG_OUT_TAG("ENODEV");
  409. return -ENODEV;
  410. }
  411. /* In case we require that a task should be explicitely
  412. * executed on a specific worker, we make sure that the worker
  413. * is able to execute this task. */
  414. if (task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0))
  415. {
  416. _STARPU_LOG_OUT_TAG("ENODEV");
  417. return -ENODEV;
  418. }
  419. _starpu_detect_implicit_data_deps(task);
  420. if (task->cl->model && task->cl->model->symbol)
  421. _starpu_load_perfmodel(task->cl->model);
  422. if (task->cl->power_model && task->cl->power_model->symbol)
  423. _starpu_load_perfmodel(task->cl->power_model);
  424. }
  425. if (bundle)
  426. {
  427. /* We need to make sure that models for other tasks of the
  428. * bundle are also loaded, so the scheduler can estimate the
  429. * duration of the whole bundle */
  430. STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex);
  431. struct _starpu_task_bundle_entry *entry;
  432. entry = bundle->list;
  433. while (entry)
  434. {
  435. if (entry->task->cl->model && entry->task->cl->model->symbol)
  436. _starpu_load_perfmodel(entry->task->cl->model);
  437. if (entry->task->cl->power_model && entry->task->cl->power_model->symbol)
  438. _starpu_load_perfmodel(entry->task->cl->power_model);
  439. entry = entry->next;
  440. }
  441. STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
  442. }
  443. /* If profiling is activated, we allocate a structure to store the
  444. * appropriate info. */
  445. struct starpu_profiling_task_info *info;
  446. int profiling = starpu_profiling_status_get();
  447. info = _starpu_allocate_profiling_info_if_needed(task);
  448. task->profiling_info = info;
  449. /* The task is considered as block until we are sure there remains not
  450. * dependency. */
  451. task->status = STARPU_TASK_BLOCKED;
  452. if (profiling)
  453. _starpu_clock_gettime(&info->submit_time);
  454. ret = _starpu_submit_job(j);
  455. if (is_sync)
  456. {
  457. _starpu_wait_job(j);
  458. if (task->destroy)
  459. _starpu_task_destroy(task);
  460. }
  461. _STARPU_LOG_OUT();
  462. return ret;
  463. }
  464. int _starpu_task_submit_internally(struct starpu_task *task)
  465. {
  466. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  467. j->internal = 1;
  468. return starpu_task_submit(task);
  469. }
  470. /* application should submit new tasks to StarPU through this function */
  471. int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id)
  472. {
  473. task->sched_ctx = sched_ctx_id;
  474. return starpu_task_submit(task);
  475. }
  476. /* The StarPU core can submit tasks directly to the scheduler or a worker,
  477. * skipping dependencies completely (when it knows what it is doing). */
  478. int _starpu_task_submit_nodeps(struct starpu_task *task)
  479. {
  480. _starpu_task_check_deprecated_fields(task);
  481. _starpu_codelet_check_deprecated_fields(task->cl);
  482. if (task->cl)
  483. {
  484. if (task->cl->model)
  485. _starpu_load_perfmodel(task->cl->model);
  486. if (task->cl->power_model)
  487. _starpu_load_perfmodel(task->cl->power_model);
  488. }
  489. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  490. if (j->internal)
  491. {
  492. // Internal tasks are submitted to initial context
  493. j->task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
  494. }
  495. else if (task->sched_ctx == STARPU_NMAX_SCHED_CTXS)
  496. {
  497. // If the task has not specified a context, we set the current context
  498. j->task->sched_ctx = _starpu_sched_ctx_get_current_context();
  499. }
  500. _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
  501. STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
  502. j->submitted = 1;
  503. if (task->cl)
  504. {
  505. /* This would be done by data dependencies checking */
  506. unsigned i;
  507. for (i=0 ; i<task->cl->nbuffers ; i++)
  508. {
  509. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
  510. _STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
  511. enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
  512. _STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
  513. }
  514. }
  515. STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
  516. return _starpu_push_task(j);
  517. }
  518. /*
  519. * worker->sched_mutex must be locked when calling this function.
  520. */
  521. int _starpu_task_submit_conversion_task(struct starpu_task *task,
  522. unsigned int workerid)
  523. {
  524. STARPU_ASSERT(task->cl);
  525. STARPU_ASSERT(task->execute_on_a_specific_worker);
  526. _starpu_task_check_deprecated_fields(task);
  527. _starpu_codelet_check_deprecated_fields(task->cl);
  528. /* We should factorize that */
  529. if (task->cl->model)
  530. _starpu_load_perfmodel(task->cl->model);
  531. if (task->cl->power_model)
  532. _starpu_load_perfmodel(task->cl->power_model);
  533. /* We retain handle reference count */
  534. unsigned i;
  535. for (i=0; i<task->cl->nbuffers; i++)
  536. {
  537. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
  538. _starpu_spin_lock(&handle->header_lock);
  539. handle->busy_count++;
  540. _starpu_spin_unlock(&handle->header_lock);
  541. }
  542. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  543. if (j->internal)
  544. {
  545. // Internal tasks are submitted to initial context
  546. j->task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
  547. }
  548. else if (task->sched_ctx == STARPU_NMAX_SCHED_CTXS)
  549. {
  550. // If the task has not specified a context, we set the current context
  551. j->task->sched_ctx = _starpu_sched_ctx_get_current_context();
  552. }
  553. _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
  554. STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
  555. j->submitted = 1;
  556. _starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops);
  557. for (i=0 ; i<task->cl->nbuffers ; i++)
  558. {
  559. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
  560. _STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
  561. enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
  562. _STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
  563. }
  564. _STARPU_LOG_IN();
  565. task->status = STARPU_TASK_READY;
  566. _starpu_profiling_set_task_push_start_time(task);
  567. unsigned node = starpu_worker_get_memory_node(workerid);
  568. if (starpu_get_prefetch_flag())
  569. starpu_prefetch_task_input_on_node(task, node);
  570. struct _starpu_worker *worker;
  571. worker = _starpu_get_worker_struct(workerid);
  572. starpu_task_list_push_back(&worker->local_tasks, task);
  573. _starpu_profiling_set_task_push_end_time(task);
  574. _STARPU_LOG_OUT();
  575. STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
  576. return 0;
  577. }
  578. void starpu_codelet_init(struct starpu_codelet *cl)
  579. {
  580. memset(cl, 0, sizeof(struct starpu_codelet));
  581. }
  582. void starpu_codelet_display_stats(struct starpu_codelet *cl)
  583. {
  584. unsigned worker;
  585. unsigned nworkers = starpu_worker_get_count();
  586. if (cl->name)
  587. fprintf(stderr, "Statistics for codelet %s\n", cl->name);
  588. else if (cl->model && cl->model->symbol)
  589. fprintf(stderr, "Statistics for codelet %s\n", cl->model->symbol);
  590. unsigned long total = 0;
  591. for (worker = 0; worker < nworkers; worker++)
  592. total += cl->per_worker_stats[worker];
  593. for (worker = 0; worker < nworkers; worker++)
  594. {
  595. char name[32];
  596. starpu_worker_get_name(worker, name, 32);
  597. fprintf(stderr, "\t%s -> %lu / %lu (%2.2f %%)\n", name, cl->per_worker_stats[worker], total, (100.0f*cl->per_worker_stats[worker])/total);
  598. }
  599. }
  600. /*
  601. * We wait for all the tasks that have already been submitted. Note that a
  602. * regenerable is not considered finished until it was explicitely set as
  603. * non-regenerale anymore (eg. from a callback).
  604. */
  605. int starpu_task_wait_for_all(void)
  606. {
  607. unsigned nsched_ctxs = _starpu_get_nsched_ctxs();
  608. unsigned sched_ctx_id = nsched_ctxs == 1 ? 0 : starpu_sched_ctx_get_context();
  609. /* if there is no indication about which context to wait,
  610. we wait for all tasks submitted to starpu */
  611. if (sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
  612. {
  613. _STARPU_DEBUG("Waiting for all tasks\n");
  614. if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
  615. return -EDEADLK;
  616. #ifdef HAVE_AYUDAME_H
  617. if (AYU_event) AYU_event(AYU_BARRIER, 0, NULL);
  618. #endif
  619. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
  620. if(config->topology.nsched_ctxs == 1)
  621. starpu_task_wait_for_all_in_ctx(0);
  622. else
  623. {
  624. int s;
  625. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  626. {
  627. if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  628. {
  629. starpu_task_wait_for_all_in_ctx(config->sched_ctxs[s].id);
  630. }
  631. }
  632. }
  633. return 0;
  634. }
  635. else
  636. {
  637. _STARPU_DEBUG("Waiting for tasks submitted to context %u\n", sched_ctx_id);
  638. return starpu_task_wait_for_all_in_ctx(sched_ctx_id);
  639. }
  640. }
  641. int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx)
  642. {
  643. _starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx);
  644. #ifdef HAVE_AYUDAME_H
  645. /* TODO: improve Temanejo into knowing about contexts ... */
  646. if (AYU_event) AYU_event(AYU_BARRIER, 0, NULL);
  647. #endif
  648. return 0;
  649. }
  650. /*
  651. * We wait until there is no ready task any more (i.e. StarPU will not be able
  652. * to progress any more).
  653. */
  654. int starpu_task_wait_for_no_ready(void)
  655. {
  656. if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
  657. return -EDEADLK;
  658. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
  659. if(config->topology.nsched_ctxs == 1)
  660. _starpu_wait_for_no_ready_of_sched_ctx(0);
  661. else
  662. {
  663. int s;
  664. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  665. {
  666. if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  667. {
  668. _starpu_wait_for_no_ready_of_sched_ctx(config->sched_ctxs[s].id);
  669. }
  670. }
  671. }
  672. return 0;
  673. }
  674. void
  675. starpu_drivers_request_termination(void)
  676. {
  677. struct _starpu_machine_config *config = _starpu_get_machine_config();
  678. STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex);
  679. int nsubmitted = starpu_task_nsubmitted();
  680. config->submitting = 0;
  681. if (nsubmitted == 0)
  682. {
  683. ANNOTATE_HAPPENS_AFTER(&config->running);
  684. config->running = 0;
  685. ANNOTATE_HAPPENS_BEFORE(&config->running);
  686. STARPU_WMB();
  687. int s;
  688. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  689. {
  690. if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  691. {
  692. _starpu_check_nsubmitted_tasks_of_sched_ctx(config->sched_ctxs[s].id);
  693. }
  694. }
  695. }
  696. STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex);
  697. }
  698. int starpu_task_nsubmitted(void)
  699. {
  700. int nsubmitted = 0;
  701. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
  702. if(config->topology.nsched_ctxs == 1)
  703. nsubmitted = _starpu_get_nsubmitted_tasks_of_sched_ctx(0);
  704. else
  705. {
  706. int s;
  707. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  708. {
  709. if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  710. {
  711. nsubmitted += _starpu_get_nsubmitted_tasks_of_sched_ctx(config->sched_ctxs[s].id);
  712. }
  713. }
  714. }
  715. return nsubmitted;
  716. }
  717. int starpu_task_nready(void)
  718. {
  719. int nready = 0;
  720. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
  721. if(config->topology.nsched_ctxs == 1)
  722. nready = starpu_get_nready_tasks_of_sched_ctx(0);
  723. else
  724. {
  725. int s;
  726. for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++)
  727. {
  728. if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  729. {
  730. nready += starpu_get_nready_tasks_of_sched_ctx(config->sched_ctxs[s].id);
  731. }
  732. }
  733. }
  734. return nready;
  735. }
  736. void _starpu_initialize_current_task_key(void)
  737. {
  738. STARPU_PTHREAD_KEY_CREATE(&current_task_key, NULL);
  739. }
  740. /* Return the task currently executed by the worker, or NULL if this is called
  741. * either from a thread that is not a task or simply because there is no task
  742. * being executed at the moment. */
  743. struct starpu_task *starpu_task_get_current(void)
  744. {
  745. return (struct starpu_task *) STARPU_PTHREAD_GETSPECIFIC(current_task_key);
  746. }
  747. void _starpu_set_current_task(struct starpu_task *task)
  748. {
  749. STARPU_PTHREAD_SETSPECIFIC(current_task_key, task);
  750. }
  751. /*
  752. * Returns 0 if tasks does not use any multiformat handle, 1 otherwise.
  753. */
  754. int
  755. _starpu_task_uses_multiformat_handles(struct starpu_task *task)
  756. {
  757. unsigned i;
  758. for (i = 0; i < task->cl->nbuffers; i++)
  759. {
  760. if (_starpu_data_is_multiformat_handle(STARPU_TASK_GET_HANDLE(task, i)))
  761. return 1;
  762. }
  763. return 0;
  764. }
  765. /*
  766. * Checks whether the given handle needs to be converted in order to be used on
  767. * the node given as the second argument.
  768. */
  769. int
  770. _starpu_handle_needs_conversion_task(starpu_data_handle_t handle,
  771. unsigned int node)
  772. {
  773. return _starpu_handle_needs_conversion_task_for_arch(handle, starpu_node_get_kind(node));
  774. }
  775. int
  776. _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
  777. enum starpu_node_kind node_kind)
  778. {
  779. /*
  780. * Here, we assume that CUDA devices and OpenCL devices use the
  781. * same data structure. A conversion is only needed when moving
  782. * data from a CPU to a GPU, or the other way around.
  783. */
  784. switch (node_kind)
  785. {
  786. case STARPU_CPU_RAM:
  787. switch(starpu_node_get_kind(handle->mf_node))
  788. {
  789. case STARPU_CPU_RAM:
  790. return 0;
  791. case STARPU_CUDA_RAM: /* Fall through */
  792. case STARPU_OPENCL_RAM:
  793. case STARPU_MIC_RAM:
  794. case STARPU_SCC_RAM:
  795. return 1;
  796. default:
  797. STARPU_ABORT();
  798. }
  799. break;
  800. case STARPU_CUDA_RAM: /* Fall through */
  801. case STARPU_OPENCL_RAM:
  802. case STARPU_MIC_RAM:
  803. case STARPU_SCC_RAM:
  804. switch(starpu_node_get_kind(handle->mf_node))
  805. {
  806. case STARPU_CPU_RAM:
  807. return 1;
  808. case STARPU_CUDA_RAM:
  809. case STARPU_OPENCL_RAM:
  810. case STARPU_MIC_RAM:
  811. case STARPU_SCC_RAM:
  812. return 0;
  813. default:
  814. STARPU_ABORT();
  815. }
  816. break;
  817. default:
  818. STARPU_ABORT();
  819. }
  820. /* that instruction should never be reached */
  821. return -EINVAL;
  822. }
  823. starpu_cpu_func_t _starpu_task_get_cpu_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
  824. {
  825. return cl->cpu_funcs[nimpl];
  826. }
  827. starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
  828. {
  829. return cl->cuda_funcs[nimpl];
  830. }
  831. starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
  832. {
  833. return cl->opencl_funcs[nimpl];
  834. }
  835. void starpu_task_set_implementation(struct starpu_task *task, unsigned impl)
  836. {
  837. _starpu_get_job_associated_to_task(task)->nimpl = impl;
  838. }
  839. unsigned starpu_task_get_implementation(struct starpu_task *task)
  840. {
  841. return _starpu_get_job_associated_to_task(task)->nimpl;
  842. }
  843. starpu_mic_func_t _starpu_task_get_mic_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
  844. {
  845. return cl->mic_funcs[nimpl];
  846. }
  847. starpu_scc_func_t _starpu_task_get_scc_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
  848. {
  849. return cl->scc_funcs[nimpl];
  850. }
  851. char *_starpu_task_get_cpu_name_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
  852. {
  853. return cl->cpu_funcs_name[nimpl];
  854. }
  855. static starpu_pthread_t watchdog_thread;
  856. /* Check from times to times that StarPU does finish some tasks */
  857. static void *watchdog_func(void *foo STARPU_ATTRIBUTE_UNUSED)
  858. {
  859. struct timespec ts;
  860. char *timeout_env;
  861. unsigned long long timeout;
  862. if (! (timeout_env = getenv("STARPU_WATCHDOG_TIMEOUT")))
  863. return NULL;
  864. timeout = atoll(timeout_env);
  865. ts.tv_sec = timeout / 1000000;
  866. ts.tv_nsec = (timeout % 1000000) * 1000;
  867. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
  868. STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex);
  869. while (_starpu_machine_is_running())
  870. {
  871. int last_nsubmitted = starpu_task_nsubmitted();
  872. config->watchdog_ok = 0;
  873. STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex);
  874. _starpu_sleep(ts);
  875. STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex);
  876. if (!config->watchdog_ok && last_nsubmitted
  877. && last_nsubmitted == starpu_task_nsubmitted())
  878. {
  879. fprintf(stderr,"The StarPU watchdog detected that no task finished for %u.%06us (can be configure through STARPU_WATCHDOG_TIMEOUT)\n", (unsigned)ts.tv_sec, (unsigned)ts.tv_nsec/1000);
  880. if (getenv("STARPU_WATCHDOG_CRASH"))
  881. {
  882. fprintf(stderr,"Crashing the process\n");
  883. assert(0);
  884. }
  885. else
  886. fprintf(stderr,"Set the STARPU_WATCHDOG_CRASH environment variable if you want to abort the process in such a case\n");
  887. }
  888. }
  889. STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex);
  890. return NULL;
  891. }
  892. void _starpu_watchdog_init(void)
  893. {
  894. struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config();
  895. STARPU_PTHREAD_MUTEX_INIT(&config->submitted_mutex, NULL);
  896. STARPU_PTHREAD_CREATE(&watchdog_thread, NULL, watchdog_func, NULL);
  897. }
  898. void _starpu_watchdog_shutdown(void)
  899. {
  900. starpu_pthread_join(watchdog_thread, NULL);
  901. }