component_worker.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2017 Université de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2014, 2015, 2016, 2017 CNRS
  5. * Copyright (C) 2011 Télécom-SudParis
  6. * Copyright (C) 2011-2013, 2017 INRIA
  7. * Copyright (C) 2013 Simon Archipoff
  8. *
  9. * StarPU is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as published by
  11. * the Free Software Foundation; either version 2.1 of the License, or (at
  12. * your option) any later version.
  13. *
  14. * StarPU is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17. *
  18. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  19. */
  20. #include <starpu_sched_component.h>
  21. #include <sched_policies/sched_component.h>
  22. #include <core/workers.h>
  23. #include <float.h>
  24. /* data structure for worker's queue look like this :
  25. * W = worker
  26. * T = simple task
  27. * P = parallel task
  28. *
  29. *
  30. * P--P T
  31. * | | \|
  32. * P--P T T P T
  33. * | | | | | |
  34. * T T P--P--P T
  35. * | | | | | |
  36. * W W W W W W
  37. *
  38. *
  39. *
  40. * its possible that a _starpu_task_grid wont have task, because it have been
  41. * poped by a worker.
  42. *
  43. * N = no task
  44. *
  45. * T T T
  46. * | | |
  47. * P--N--N
  48. * | | |
  49. * W W W
  50. *
  51. *
  52. * this API is a little asymmetric : struct _starpu_task_grid are allocated by the caller and freed by the data structure
  53. *
  54. */
  55. /******************************************************************************
  56. * Worker Components' Data Structures *
  57. *****************************************************************************/
  58. struct _starpu_task_grid
  59. {
  60. /* this member may be NULL if a worker have poped it but its a
  61. * parallel task and we dont want mad pointers
  62. */
  63. struct starpu_task * task;
  64. struct _starpu_task_grid *up, *down, *left, *right;
  65. /* this is used to count the number of task to be poped by a worker
  66. * the leftist _starpu_task_grid maintain the ntasks counter (ie .left == NULL),
  67. * all the others use the pntasks that point to it
  68. *
  69. * when the counter reach 0, all the left and right member are set to NULL,
  70. * that mean that we will free that components.
  71. */
  72. union
  73. {
  74. int ntasks;
  75. int * pntasks;
  76. };
  77. };
  78. /* list->exp_start, list->exp_len, list-exp_end and list->ntasks
  79. * are updated by starpu_sched_component_worker_push_task(component, task) and pre_exec_hook
  80. */
  81. struct _starpu_worker_task_list
  82. {
  83. double exp_start, exp_len, exp_end, pipeline_len;
  84. struct _starpu_task_grid *first, *last;
  85. unsigned ntasks;
  86. starpu_pthread_mutex_t mutex;
  87. };
  88. /* This is called when a transfer request is actually pushed to the worker */
  89. static void _starpu_worker_task_list_transfer_started(struct _starpu_worker_task_list *l, struct starpu_task *task)
  90. {
  91. double transfer_model = task->predicted_transfer;
  92. if (isnan(transfer_model))
  93. return;
  94. /* We now start the transfer, move it from predicted to pipelined */
  95. l->exp_len -= transfer_model;
  96. l->pipeline_len += transfer_model;
  97. l->exp_start = starpu_timing_now() + l->pipeline_len;
  98. l->exp_end = l->exp_start + l->exp_len;
  99. }
  100. #ifdef STARPU_DEVEL
  101. #warning FIXME: merge with deque_modeling_policy_data_aware
  102. #endif
  103. /* This is called when a task is actually pushed to the worker (i.e. the transfer finished */
  104. static void _starpu_worker_task_list_started(struct _starpu_worker_task_list *l, struct starpu_task *task)
  105. {
  106. double model = task->predicted;
  107. double transfer_model = task->predicted_transfer;
  108. if(!isnan(transfer_model))
  109. /* The transfer is over, remove it from pipelined */
  110. l->pipeline_len -= transfer_model;
  111. if(!isnan(model))
  112. {
  113. /* We now start the computation, move it from predicted to pipelined */
  114. l->exp_len -= model;
  115. l->pipeline_len += model;
  116. l->exp_start = starpu_timing_now() + l->pipeline_len;
  117. l->exp_end= l->exp_start + l->exp_len;
  118. }
  119. }
  120. /* This is called when a task is actually finished */
  121. static void _starpu_worker_task_list_finished(struct _starpu_worker_task_list *l, struct starpu_task *task)
  122. {
  123. if(!isnan(task->predicted))
  124. /* The execution is over, remove it from pipelined */
  125. l->pipeline_len -= task->predicted;
  126. l->exp_start = STARPU_MAX(starpu_timing_now() + l->pipeline_len, l->exp_start);
  127. l->exp_end = l->exp_start + l->exp_len;
  128. }
  129. struct _starpu_worker_component_data
  130. {
  131. union
  132. {
  133. struct _starpu_worker * worker;
  134. struct _starpu_combined_worker * combined_worker;
  135. };
  136. struct _starpu_worker_task_list * list;
  137. };
  138. /* this array store worker components */
  139. static struct starpu_sched_component * _worker_components[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS];
  140. /******************************************************************************
  141. * Worker Components' Task List and Grid Functions *
  142. *****************************************************************************/
  143. static struct _starpu_worker_task_list * _starpu_worker_task_list_create(void)
  144. {
  145. struct _starpu_worker_task_list *l;
  146. _STARPU_MALLOC(l, sizeof(*l));
  147. memset(l, 0, sizeof(*l));
  148. l->exp_len = l->pipeline_len = 0.0;
  149. l->exp_start = l->exp_end = starpu_timing_now();
  150. /* These are only for statistics */
  151. STARPU_HG_DISABLE_CHECKING(l->exp_end);
  152. STARPU_HG_DISABLE_CHECKING(l->exp_start);
  153. STARPU_PTHREAD_MUTEX_INIT(&l->mutex,NULL);
  154. return l;
  155. }
  156. static struct _starpu_task_grid * _starpu_task_grid_create(void)
  157. {
  158. struct _starpu_task_grid *t;
  159. _STARPU_MALLOC(t, sizeof(*t));
  160. memset(t, 0, sizeof(*t));
  161. return t;
  162. }
  163. static struct _starpu_worker_task_list * _worker_get_list(unsigned sched_ctx_id)
  164. {
  165. unsigned workerid = starpu_worker_get_id_check();
  166. STARPU_ASSERT(workerid < starpu_worker_get_count());
  167. struct _starpu_worker_component_data * d = starpu_sched_component_worker_get(sched_ctx_id, workerid)->data;
  168. return d->list;
  169. }
  170. static void _starpu_task_grid_destroy(struct _starpu_task_grid * t)
  171. {
  172. free(t);
  173. }
  174. static void _starpu_worker_task_list_destroy(struct _starpu_worker_task_list * l)
  175. {
  176. if(l)
  177. {
  178. /* There can be empty task grids, when we picked the last task after the front task grid */
  179. struct _starpu_task_grid *t = l->first, *nextt;
  180. while(t)
  181. {
  182. STARPU_ASSERT(!t->task);
  183. nextt = t->up;
  184. _starpu_task_grid_destroy(t);
  185. t = nextt;
  186. }
  187. STARPU_PTHREAD_MUTEX_DESTROY(&l->mutex);
  188. free(l);
  189. }
  190. }
  191. static inline void _starpu_worker_task_list_add(struct _starpu_worker_task_list * l, struct starpu_task *task)
  192. {
  193. double predicted = task->predicted;
  194. double predicted_transfer = task->predicted_transfer;
  195. double end = l->exp_end;
  196. const double now = starpu_timing_now();
  197. /* Sometimes workers didn't take the tasks as early as we expected */
  198. l->exp_start = STARPU_MAX(l->exp_start, now);
  199. if (now + predicted_transfer < end)
  200. {
  201. /* We may hope that the transfer will be finished by
  202. * the start of the task. */
  203. predicted_transfer = 0.0;
  204. }
  205. else
  206. {
  207. /* The transfer will not be finished by then, take the
  208. * remainder into account */
  209. predicted_transfer = (now + predicted_transfer) - end;
  210. }
  211. if(!isnan(predicted_transfer))
  212. l->exp_len += predicted_transfer;
  213. if(!isnan(predicted))
  214. l->exp_len += predicted;
  215. l->exp_end = l->exp_start + l->exp_len;
  216. task->predicted = predicted;
  217. task->predicted_transfer = predicted_transfer;
  218. }
  219. static inline void _starpu_worker_task_list_push(struct _starpu_worker_task_list * l, struct _starpu_task_grid * t)
  220. {
  221. /* the task, ntasks, pntasks, left and right members of t are set by the caller */
  222. STARPU_ASSERT(t->task);
  223. if(l->first == NULL)
  224. l->first = l->last = t;
  225. t->down = l->last;
  226. l->last->up = t;
  227. t->up = NULL;
  228. l->last = t;
  229. l->ntasks++;
  230. _starpu_worker_task_list_add(l, t->task);
  231. }
  232. /* recursively set left and right pointers to NULL */
  233. static inline void _starpu_task_grid_unset_left_right_member(struct _starpu_task_grid * t)
  234. {
  235. STARPU_ASSERT(t->task == NULL);
  236. struct _starpu_task_grid * t_left = t->left;
  237. struct _starpu_task_grid * t_right = t->right;
  238. t->left = t->right = NULL;
  239. while(t_left)
  240. {
  241. STARPU_ASSERT(t_left->task == NULL);
  242. t = t_left;
  243. t_left = t_left->left;
  244. t->left = NULL;
  245. t->right = NULL;
  246. }
  247. while(t_right)
  248. {
  249. STARPU_ASSERT(t_right->task == NULL);
  250. t = t_right;
  251. t_right = t_right->right;
  252. t->left = NULL;
  253. t->right = NULL;
  254. }
  255. }
  256. static inline struct starpu_task * _starpu_worker_task_list_pop(struct _starpu_worker_task_list * l)
  257. {
  258. if(!l->first)
  259. {
  260. l->exp_len = l->pipeline_len = 0.0;
  261. l->exp_start = l->exp_end = starpu_timing_now();
  262. return NULL;
  263. }
  264. struct _starpu_task_grid * t = l->first;
  265. /* if there is no task there is no tasks linked to this, then we can free it */
  266. if(t->task == NULL && t->right == NULL && t->left == NULL)
  267. {
  268. l->first = t->up;
  269. if(l->first)
  270. l->first->down = NULL;
  271. if(l->last == t)
  272. l->last = NULL;
  273. _starpu_task_grid_destroy(t);
  274. return _starpu_worker_task_list_pop(l);
  275. }
  276. while(t)
  277. {
  278. if(t->task)
  279. {
  280. struct starpu_task * task = t->task;
  281. t->task = NULL;
  282. /* the leftist thing hold the number of tasks, other have a pointer to it */
  283. int * p = t->left ? t->pntasks : &t->ntasks;
  284. /* the worker who pop the last task allow the rope to be freed */
  285. if(STARPU_ATOMIC_ADD(p, -1) == 0)
  286. _starpu_task_grid_unset_left_right_member(t);
  287. l->ntasks--;
  288. return task;
  289. }
  290. t = t->up;
  291. }
  292. return NULL;
  293. }
  294. /******************************************************************************
  295. * Worker Components' Public Helper Functions (Part 1) *
  296. *****************************************************************************/
  297. struct _starpu_worker * _starpu_sched_component_worker_get_worker(struct starpu_sched_component * worker_component)
  298. {
  299. STARPU_ASSERT(starpu_sched_component_is_simple_worker(worker_component));
  300. struct _starpu_worker_component_data * data = worker_component->data;
  301. return data->worker;
  302. }
  303. struct _starpu_combined_worker * _starpu_sched_component_combined_worker_get_combined_worker(struct starpu_sched_component * worker_component)
  304. {
  305. STARPU_ASSERT(starpu_sched_component_is_combined_worker(worker_component));
  306. struct _starpu_worker_component_data * data = worker_component->data;
  307. return data->combined_worker;
  308. }
  309. /******************************************************************************
  310. * Worker Components' Private Helper Functions *
  311. *****************************************************************************/
  312. #ifndef STARPU_NO_ASSERT
  313. static int _worker_consistant(struct starpu_sched_component * component)
  314. {
  315. int is_a_worker = 0;
  316. int i;
  317. for(i = 0; i<STARPU_NMAXWORKERS; i++)
  318. if(_worker_components[component->tree->sched_ctx_id][i] == component)
  319. is_a_worker = 1;
  320. if(!is_a_worker)
  321. return 0;
  322. struct _starpu_worker_component_data * data = component->data;
  323. if(data->worker)
  324. {
  325. int id = data->worker->workerid;
  326. return (_worker_components[component->tree->sched_ctx_id][id] == component)
  327. && component->nchildren == 0;
  328. }
  329. return 1;
  330. }
  331. #endif
  332. /******************************************************************************
  333. * Simple Worker Components' Interface Functions *
  334. *****************************************************************************/
  335. static void simple_worker_can_pull(struct starpu_sched_component * worker_component)
  336. {
  337. struct _starpu_worker * worker = _starpu_sched_component_worker_get_worker(worker_component);
  338. int workerid = worker->workerid;
  339. _starpu_wake_worker_relax(workerid);
  340. }
  341. static int simple_worker_push_task(struct starpu_sched_component * component, struct starpu_task *task)
  342. {
  343. STARPU_ASSERT(starpu_sched_component_is_worker(component));
  344. /*this function take the worker's mutex */
  345. struct _starpu_worker_component_data * data = component->data;
  346. struct _starpu_task_grid * t = _starpu_task_grid_create();
  347. t->task = task;
  348. t->ntasks = 1;
  349. task->workerid = starpu_bitmap_first(component->workers);
  350. #if 1 /* dead lock problem? */
  351. if (starpu_get_prefetch_flag() && !task->prefetched)
  352. {
  353. unsigned memory_node = starpu_worker_get_memory_node(task->workerid);
  354. starpu_prefetch_task_input_on_node(task, memory_node);
  355. }
  356. #endif
  357. struct _starpu_worker_task_list * list = data->list;
  358. STARPU_COMPONENT_MUTEX_LOCK(&list->mutex);
  359. _starpu_worker_task_list_push(list, t);
  360. STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex);
  361. simple_worker_can_pull(component);
  362. return 0;
  363. }
  364. static struct starpu_task * simple_worker_pull_task(struct starpu_sched_component *component)
  365. {
  366. unsigned workerid = starpu_worker_get_id_check();
  367. struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
  368. struct _starpu_worker_component_data * data = component->data;
  369. struct _starpu_worker_task_list * list = data->list;
  370. struct starpu_task * task;
  371. int i;
  372. int n_tries = 0;
  373. do
  374. {
  375. const double now = starpu_timing_now();
  376. /* do not reset state_keep_awake here has it may hide tasks in worker->local_tasks */
  377. n_tries++;
  378. STARPU_COMPONENT_MUTEX_LOCK(&list->mutex);
  379. /* Take the opportunity to update start time */
  380. data->list->exp_start = STARPU_MAX(now, data->list->exp_start);
  381. data->list->exp_end = data->list->exp_start + data->list->exp_len;
  382. task = _starpu_worker_task_list_pop(list);
  383. if(task)
  384. {
  385. _starpu_worker_task_list_transfer_started(list, task);
  386. STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex);
  387. starpu_push_task_end(task);
  388. goto ret;
  389. }
  390. STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex);
  391. for(i=0; i < component->nparents; i++)
  392. {
  393. if(component->parents[i] == NULL)
  394. continue;
  395. else
  396. {
  397. task = starpu_sched_component_pull_task(component->parents[i],component);
  398. if(task)
  399. break;
  400. }
  401. }
  402. }
  403. while((!task) && worker->state_keep_awake && n_tries < 2);
  404. if(!task)
  405. goto ret;
  406. if(task->cl->type == STARPU_SPMD)
  407. {
  408. if(!starpu_worker_is_combined_worker(workerid))
  409. {
  410. STARPU_COMPONENT_MUTEX_LOCK(&list->mutex);
  411. _starpu_worker_task_list_add(list, task);
  412. _starpu_worker_task_list_transfer_started(list, task);
  413. STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex);
  414. starpu_push_task_end(task);
  415. goto ret;
  416. }
  417. struct starpu_sched_component * combined_worker_component = starpu_sched_component_worker_get(component->tree->sched_ctx_id, workerid);
  418. starpu_sched_component_push_task(component, combined_worker_component, task);
  419. /* we have pushed a task in queue, so can make a recursive call */
  420. task = simple_worker_pull_task(component);
  421. goto ret;
  422. }
  423. if(task)
  424. {
  425. STARPU_COMPONENT_MUTEX_LOCK(&list->mutex);
  426. _starpu_worker_task_list_add(list, task);
  427. _starpu_worker_task_list_transfer_started(list, task);
  428. STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex);
  429. starpu_push_task_end(task);
  430. }
  431. ret:
  432. return task;
  433. }
  434. static double simple_worker_estimated_end(struct starpu_sched_component * component)
  435. {
  436. struct _starpu_worker_component_data * data = component->data;
  437. double now = starpu_timing_now();
  438. if (now + data->list->pipeline_len > data->list->exp_start )
  439. {
  440. data->list->exp_start = now + data->list->pipeline_len;
  441. data->list->exp_end = data->list->exp_start + data->list->exp_len;
  442. }
  443. return data->list->exp_end;
  444. }
  445. static double simple_worker_estimated_load(struct starpu_sched_component * component)
  446. {
  447. struct _starpu_worker * worker = _starpu_sched_component_worker_get_worker(component);
  448. int nb_task = 0;
  449. STARPU_COMPONENT_MUTEX_LOCK(&worker->mutex);
  450. struct starpu_task_list list = worker->local_tasks;
  451. struct starpu_task * task;
  452. for(task = starpu_task_list_front(&list);
  453. task != starpu_task_list_end(&list);
  454. task = starpu_task_list_next(task))
  455. nb_task++;
  456. STARPU_COMPONENT_MUTEX_UNLOCK(&worker->mutex);
  457. struct _starpu_worker_component_data * d = component->data;
  458. struct _starpu_worker_task_list * l = d->list;
  459. int ntasks_in_fifo = l ? l->ntasks : 0;
  460. return (double) (nb_task + ntasks_in_fifo)
  461. / starpu_worker_get_relative_speedup(
  462. starpu_worker_get_perf_archtype(starpu_bitmap_first(component->workers), component->tree->sched_ctx_id));
  463. }
  464. static void _worker_component_deinit_data(struct starpu_sched_component * component)
  465. {
  466. struct _starpu_worker_component_data * d = component->data;
  467. _starpu_worker_task_list_destroy(d->list);
  468. int i, j;
  469. for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++)
  470. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  471. if(_worker_components[j][i] == component)
  472. {
  473. _worker_components[j][i] = NULL;
  474. break;
  475. }
  476. free(d);
  477. }
  478. static struct starpu_sched_component * starpu_sched_component_worker_create(struct starpu_sched_tree *tree, int workerid)
  479. {
  480. STARPU_ASSERT(workerid >= 0 && workerid < (int) starpu_worker_get_count());
  481. if(_worker_components[tree->sched_ctx_id][workerid])
  482. return _worker_components[tree->sched_ctx_id][workerid];
  483. struct _starpu_worker * worker = _starpu_get_worker_struct(workerid);
  484. if(worker == NULL)
  485. return NULL;
  486. char name[32];
  487. snprintf(name, sizeof(name), "worker %d", workerid);
  488. struct starpu_sched_component * component = starpu_sched_component_create(tree, name);
  489. struct _starpu_worker_component_data *data;
  490. _STARPU_MALLOC(data, sizeof(*data));
  491. memset(data, 0, sizeof(*data));
  492. data->worker = worker;
  493. data->list = _starpu_worker_task_list_create();
  494. component->data = data;
  495. component->push_task = simple_worker_push_task;
  496. component->pull_task = simple_worker_pull_task;
  497. component->can_pull = simple_worker_can_pull;
  498. component->estimated_end = simple_worker_estimated_end;
  499. component->estimated_load = simple_worker_estimated_load;
  500. component->deinit_data = _worker_component_deinit_data;
  501. starpu_bitmap_set(component->workers, workerid);
  502. starpu_bitmap_or(component->workers_in_ctx, component->workers);
  503. _worker_components[tree->sched_ctx_id][workerid] = component;
  504. /*
  505. #ifdef STARPU_HAVE_HWLOC
  506. struct _starpu_machine_config *config = _starpu_get_machine_config();
  507. struct _starpu_machine_topology *topology = &config->topology;
  508. hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->cpu_depth, worker->bindid);
  509. STARPU_ASSERT(obj);
  510. component->obj = obj;
  511. #endif
  512. */
  513. return component;
  514. }
  515. /******************************************************************************
  516. * Combined Worker Components' Interface Functions *
  517. *****************************************************************************/
  518. static void combined_worker_can_pull(struct starpu_sched_component * component)
  519. {
  520. (void) component;
  521. STARPU_ASSERT(starpu_sched_component_is_combined_worker(component));
  522. struct _starpu_worker_component_data * data = component->data;
  523. unsigned workerid = starpu_worker_get_id_check();
  524. int i;
  525. for(i = 0; i < data->combined_worker->worker_size; i++)
  526. {
  527. if((unsigned) i == workerid)
  528. continue;
  529. _starpu_wake_worker_relax(workerid);
  530. }
  531. }
  532. static int combined_worker_push_task(struct starpu_sched_component * component, struct starpu_task *task)
  533. {
  534. STARPU_ASSERT(starpu_sched_component_is_combined_worker(component));
  535. struct _starpu_worker_component_data * data = component->data;
  536. STARPU_ASSERT(data->combined_worker && !data->worker);
  537. struct _starpu_combined_worker * combined_worker = data->combined_worker;
  538. STARPU_ASSERT(combined_worker->worker_size >= 1);
  539. struct _starpu_task_grid * task_alias[combined_worker->worker_size];
  540. starpu_parallel_task_barrier_init(task, starpu_bitmap_first(component->workers));
  541. task_alias[0] = _starpu_task_grid_create();
  542. task_alias[0]->task = starpu_task_dup(task);
  543. task_alias[0]->task->workerid = combined_worker->combined_workerid[0];
  544. task_alias[0]->task->destroy = 1;
  545. task_alias[0]->left = NULL;
  546. task_alias[0]->ntasks = combined_worker->worker_size;
  547. int i;
  548. for(i = 1; i < combined_worker->worker_size; i++)
  549. {
  550. task_alias[i] = _starpu_task_grid_create();
  551. task_alias[i]->task = starpu_task_dup(task);
  552. task_alias[i]->task->destroy = 1;
  553. task_alias[i]->task->workerid = combined_worker->combined_workerid[i];
  554. task_alias[i]->left = task_alias[i-1];
  555. task_alias[i - 1]->right = task_alias[i];
  556. task_alias[i]->pntasks = &(task_alias[0]->ntasks);
  557. }
  558. starpu_pthread_mutex_t * mutex_to_unlock = NULL;
  559. i = 0;
  560. do
  561. {
  562. struct starpu_sched_component * worker_component = starpu_sched_component_worker_get(component->tree->sched_ctx_id, combined_worker->combined_workerid[i]);
  563. struct _starpu_worker_component_data * worker_data = worker_component->data;
  564. struct _starpu_worker_task_list * list = worker_data->list;
  565. STARPU_COMPONENT_MUTEX_LOCK(&list->mutex);
  566. if(mutex_to_unlock)
  567. STARPU_COMPONENT_MUTEX_UNLOCK(mutex_to_unlock);
  568. mutex_to_unlock = &list->mutex;
  569. _starpu_worker_task_list_push(list, task_alias[i]);
  570. i++;
  571. }
  572. while(i < combined_worker->worker_size);
  573. STARPU_COMPONENT_MUTEX_UNLOCK(mutex_to_unlock);
  574. int workerid = starpu_worker_get_id();
  575. if(-1 == workerid)
  576. {
  577. combined_worker_can_pull(component);
  578. }
  579. else
  580. {
  581. /* wake up all other workers of combined worker */
  582. for(i = 0; i < combined_worker->worker_size; i++)
  583. {
  584. struct starpu_sched_component * worker_component = starpu_sched_component_worker_get(component->tree->sched_ctx_id, combined_worker->combined_workerid[i]);
  585. simple_worker_can_pull(worker_component);
  586. }
  587. combined_worker_can_pull(component);
  588. }
  589. return 0;
  590. }
  591. static double combined_worker_estimated_end(struct starpu_sched_component * component)
  592. {
  593. STARPU_ASSERT(starpu_sched_component_is_combined_worker(component));
  594. struct _starpu_worker_component_data * data = component->data;
  595. struct _starpu_combined_worker * combined_worker = data->combined_worker;
  596. double max = 0.0;
  597. int i;
  598. for(i = 0; i < combined_worker->worker_size; i++)
  599. {
  600. data = _worker_components[component->tree->sched_ctx_id][combined_worker->combined_workerid[i]]->data;
  601. double tmp = data->list->exp_end;
  602. max = tmp > max ? tmp : max;
  603. }
  604. return max;
  605. }
  606. static double combined_worker_estimated_load(struct starpu_sched_component * component)
  607. {
  608. struct _starpu_worker_component_data * d = component->data;
  609. struct _starpu_combined_worker * c = d->combined_worker;
  610. double load = 0;
  611. int i;
  612. for(i = 0; i < c->worker_size; i++)
  613. {
  614. struct starpu_sched_component * n = starpu_sched_component_worker_get(component->tree->sched_ctx_id, c->combined_workerid[i]);
  615. load += n->estimated_load(n);
  616. }
  617. return load;
  618. }
  619. static struct starpu_sched_component * starpu_sched_component_combined_worker_create(struct starpu_sched_tree *tree, int workerid)
  620. {
  621. STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS);
  622. if(_worker_components[tree->sched_ctx_id][workerid])
  623. return _worker_components[tree->sched_ctx_id][workerid];
  624. struct _starpu_combined_worker * combined_worker = _starpu_get_combined_worker_struct(workerid);
  625. if(combined_worker == NULL)
  626. return NULL;
  627. struct starpu_sched_component * component = starpu_sched_component_create(tree, "combined_worker");
  628. struct _starpu_worker_component_data *data;
  629. _STARPU_MALLOC(data, sizeof(*data));
  630. memset(data, 0, sizeof(*data));
  631. data->combined_worker = combined_worker;
  632. component->data = data;
  633. component->push_task = combined_worker_push_task;
  634. component->pull_task = NULL;
  635. component->estimated_end = combined_worker_estimated_end;
  636. component->estimated_load = combined_worker_estimated_load;
  637. component->can_pull = combined_worker_can_pull;
  638. component->deinit_data = _worker_component_deinit_data;
  639. starpu_bitmap_set(component->workers, workerid);
  640. starpu_bitmap_or(component->workers_in_ctx, component->workers);
  641. _worker_components[tree->sched_ctx_id][workerid] = component;
  642. #ifdef STARPU_HAVE_HWLOC
  643. struct _starpu_machine_config *config = _starpu_get_machine_config();
  644. struct _starpu_machine_topology *topology = &config->topology;
  645. hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->cpu_depth, combined_worker->combined_workerid[0]);
  646. STARPU_ASSERT(obj);
  647. component->obj = obj;
  648. #endif
  649. return component;
  650. }
  651. /******************************************************************************
  652. * Worker Components' Public Helper Functions (Part 2) *
  653. *****************************************************************************/
  654. void _starpu_sched_component_lock_all_workers(void)
  655. {
  656. unsigned i;
  657. for(i = 0; i < starpu_worker_get_count(); i++)
  658. _starpu_worker_lock(i);
  659. }
  660. void _starpu_sched_component_unlock_all_workers(void)
  661. {
  662. unsigned i;
  663. for(i = 0; i < starpu_worker_get_count(); i++)
  664. _starpu_worker_unlock(i);
  665. }
  666. void _starpu_sched_component_workers_destroy(void)
  667. {
  668. int i, j;
  669. for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++)
  670. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  671. if (_worker_components[j][i])
  672. starpu_sched_component_destroy(_worker_components[j][i]);
  673. }
  674. int starpu_sched_component_worker_get_workerid(struct starpu_sched_component * worker_component)
  675. {
  676. #ifndef STARPU_NO_ASSERT
  677. STARPU_ASSERT(_worker_consistant(worker_component));
  678. #endif
  679. STARPU_ASSERT(1 == starpu_bitmap_cardinal(worker_component->workers));
  680. return starpu_bitmap_first(worker_component->workers);
  681. }
  682. void starpu_sched_component_worker_pre_exec_hook(struct starpu_task * task, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED)
  683. {
  684. struct _starpu_worker_task_list * list = _worker_get_list(sched_ctx_id);
  685. const double now = starpu_timing_now();
  686. STARPU_COMPONENT_MUTEX_LOCK(&list->mutex);
  687. _starpu_worker_task_list_started(list, task);
  688. /* Take the opportunity to update start time */
  689. list->exp_start = STARPU_MAX(now + list->pipeline_len, list->exp_start);
  690. STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex);
  691. }
  692. void starpu_sched_component_worker_post_exec_hook(struct starpu_task * task, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED)
  693. {
  694. if(task->execute_on_a_specific_worker)
  695. return;
  696. struct _starpu_worker_task_list * list = _worker_get_list(sched_ctx_id);
  697. STARPU_COMPONENT_MUTEX_LOCK(&list->mutex);
  698. _starpu_worker_task_list_finished(list, task);
  699. STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex);
  700. }
  701. int starpu_sched_component_is_simple_worker(struct starpu_sched_component * component)
  702. {
  703. return component->push_task == simple_worker_push_task;
  704. }
  705. int starpu_sched_component_is_combined_worker(struct starpu_sched_component * component)
  706. {
  707. return component->push_task == combined_worker_push_task;
  708. }
  709. int starpu_sched_component_is_worker(struct starpu_sched_component * component)
  710. {
  711. return starpu_sched_component_is_simple_worker(component)
  712. || starpu_sched_component_is_combined_worker(component);
  713. }
  714. /* As Worker Components' creating functions are protected, this function allows
  715. * the user to get a Worker Component from a worker id */
  716. struct starpu_sched_component * starpu_sched_component_worker_get(unsigned sched_ctx, int workerid)
  717. {
  718. STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS);
  719. /* we may need to take a mutex here */
  720. STARPU_ASSERT(_worker_components[sched_ctx][workerid]);
  721. return _worker_components[sched_ctx][workerid];
  722. }
  723. struct starpu_sched_component * starpu_sched_component_worker_new(unsigned sched_ctx, int workerid)
  724. {
  725. STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS);
  726. /* we may need to take a mutex here */
  727. STARPU_ASSERT(!_worker_components[sched_ctx][workerid]);
  728. struct starpu_sched_component * component;
  729. if(workerid < (int) starpu_worker_get_count())
  730. component = starpu_sched_component_worker_create(starpu_sched_tree_get(sched_ctx), workerid);
  731. else
  732. component = starpu_sched_component_combined_worker_create(starpu_sched_tree_get(sched_ctx), workerid);
  733. _worker_components[sched_ctx][workerid] = component;
  734. return component;
  735. }