component_worker.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2013 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2011 Télécom-SudParis
  6. * Copyright (C) 2011-2013 INRIA
  7. * Copyright (C) 2013 Simon Archipoff
  8. *
  9. * StarPU is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as published by
  11. * the Free Software Foundation; either version 2.1 of the License, or (at
  12. * your option) any later version.
  13. *
  14. * StarPU is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17. *
  18. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  19. */
  20. #include <starpu_sched_component.h>
  21. #include <core/workers.h>
  22. #include <float.h>
  23. /* data structure for worker's queue look like this :
  24. * W = worker
  25. * T = simple task
  26. * P = parallel task
  27. *
  28. *
  29. * P--P T
  30. * | | \|
  31. * P--P T T P T
  32. * | | | | | |
  33. * T T P--P--P T
  34. * | | | | | |
  35. * W W W W W W
  36. *
  37. *
  38. *
  39. * its possible that a _starpu_task_grid wont have task, because it have been
  40. * poped by a worker.
  41. *
  42. * N = no task
  43. *
  44. * T T T
  45. * | | |
  46. * P--N--N
  47. * | | |
  48. * W W W
  49. *
  50. *
  51. * this API is a little asymmetric : struct _starpu_task_grid are allocated by the caller and freed by the data structure
  52. *
  53. */
  54. struct _starpu_task_grid
  55. {
  56. /* this member may be NULL if a worker have poped it but its a
  57. * parallel task and we dont want mad pointers
  58. */
  59. struct starpu_task * task;
  60. struct _starpu_task_grid *up, *down, *left, *right;
  61. /* this is used to count the number of task to be poped by a worker
  62. * the leftist _starpu_task_grid maintain the ntasks counter (ie .left == NULL),
  63. * all the others use the pntasks that point to it
  64. *
  65. * when the counter reach 0, all the left and right member are set to NULL,
  66. * that mean that we will free that components.
  67. */
  68. union
  69. {
  70. int ntasks;
  71. int * pntasks;
  72. };
  73. };
  74. /* list->exp_start, list->exp_len, list-exp_end and list->ntasks
  75. * are updated by starpu_sched_component_worker_push_task(component, task) and pre_exec_hook
  76. */
  77. struct _starpu_worker_task_list
  78. {
  79. double exp_start, exp_len, exp_end;
  80. struct _starpu_task_grid *first, *last;
  81. unsigned ntasks;
  82. starpu_pthread_mutex_t mutex;
  83. };
  84. enum _starpu_worker_component_status
  85. {
  86. COMPONENT_STATUS_SLEEPING,
  87. COMPONENT_STATUS_RESET,
  88. COMPONENT_STATUS_CHANGED
  89. };
  90. struct _starpu_worker_component_data
  91. {
  92. union
  93. {
  94. struct
  95. {
  96. struct _starpu_worker * worker;
  97. starpu_pthread_mutex_t lock;
  98. };
  99. struct _starpu_combined_worker * combined_worker;
  100. };
  101. struct _starpu_worker_task_list * list;
  102. enum _starpu_worker_component_status status;
  103. };
  104. /* this array store worker components */
  105. static struct starpu_sched_component * _worker_components[STARPU_NMAXWORKERS];
  106. static struct _starpu_worker_task_list * _starpu_worker_task_list_create(void)
  107. {
  108. struct _starpu_worker_task_list * l = malloc(sizeof(*l));
  109. memset(l, 0, sizeof(*l));
  110. l->exp_len = 0.0;
  111. l->exp_start = l->exp_end = starpu_timing_now();
  112. STARPU_PTHREAD_MUTEX_INIT(&l->mutex,NULL);
  113. return l;
  114. }
  115. static struct _starpu_task_grid * _starpu_task_grid_create(void)
  116. {
  117. struct _starpu_task_grid * t = malloc(sizeof(*t));
  118. memset(t, 0, sizeof(*t));
  119. return t;
  120. }
  121. static void _starpu_task_grid_destroy(struct _starpu_task_grid * t)
  122. {
  123. free(t);
  124. }
  125. static void _starpu_worker_task_list_destroy(struct _starpu_worker_task_list * l)
  126. {
  127. if(l)
  128. {
  129. STARPU_PTHREAD_MUTEX_DESTROY(&l->mutex);
  130. free(l);
  131. }
  132. }
  133. static inline void _starpu_worker_task_list_push(struct _starpu_worker_task_list * l, struct _starpu_task_grid * t)
  134. {
  135. /* the task, ntasks, pntasks, left and right members of t are set by the caller */
  136. STARPU_ASSERT(t->task);
  137. if(l->first == NULL)
  138. l->first = l->last = t;
  139. t->down = l->last;
  140. l->last->up = t;
  141. t->up = NULL;
  142. l->last = t;
  143. l->ntasks++;
  144. double predicted = t->task->predicted;
  145. double predicted_transfer = t->task->predicted_transfer;
  146. /* Sometimes workers didn't take the tasks as early as we expected */
  147. l->exp_start = STARPU_MAX(l->exp_start, starpu_timing_now());
  148. l->exp_end = l->exp_start + l->exp_len;
  149. if (starpu_timing_now() + predicted_transfer < l->exp_end)
  150. {
  151. /* We may hope that the transfer will be finished by
  152. * the start of the task. */
  153. predicted_transfer = 0.0;
  154. }
  155. else
  156. {
  157. /* The transfer will not be finished by then, take the
  158. * remainder into account */
  159. predicted_transfer = (starpu_timing_now() + predicted_transfer) - l->exp_end;
  160. }
  161. if(!isnan(predicted_transfer))
  162. {
  163. l->exp_end += predicted_transfer;
  164. l->exp_len += predicted_transfer;
  165. }
  166. if(!isnan(predicted))
  167. {
  168. l->exp_end += predicted;
  169. l->exp_len += predicted;
  170. }
  171. t->task->predicted = predicted;
  172. t->task->predicted_transfer = predicted_transfer;
  173. }
  174. /* recursively set left and right pointers to NULL */
  175. static inline void _starpu_task_grid_unset_left_right_member(struct _starpu_task_grid * t)
  176. {
  177. STARPU_ASSERT(t->task == NULL);
  178. struct _starpu_task_grid * t_left = t->left;
  179. struct _starpu_task_grid * t_right = t->right;
  180. t->left = t->right = NULL;
  181. while(t_left)
  182. {
  183. STARPU_ASSERT(t_left->task == NULL);
  184. t = t_left;
  185. t_left = t_left->left;
  186. t->left = NULL;
  187. t->right = NULL;
  188. }
  189. while(t_right)
  190. {
  191. STARPU_ASSERT(t_right->task == NULL);
  192. t = t_right;
  193. t_right = t_right->right;
  194. t->left = NULL;
  195. t->right = NULL;
  196. }
  197. }
  198. static inline struct starpu_task * _starpu_worker_task_list_pop(struct _starpu_worker_task_list * l)
  199. {
  200. if(!l->first)
  201. {
  202. l->exp_start = l->exp_end = starpu_timing_now();
  203. l->exp_len = 0;
  204. return NULL;
  205. }
  206. struct _starpu_task_grid * t = l->first;
  207. /* if there is no task there is no tasks linked to this, then we can free it */
  208. if(t->task == NULL && t->right == NULL && t->left == NULL)
  209. {
  210. l->first = t->up;
  211. if(l->first)
  212. l->first->down = NULL;
  213. if(l->last == t)
  214. l->last = NULL;
  215. _starpu_task_grid_destroy(t);
  216. return _starpu_worker_task_list_pop(l);
  217. }
  218. while(t)
  219. {
  220. if(t->task)
  221. {
  222. struct starpu_task * task = t->task;
  223. t->task = NULL;
  224. /* the leftist thing hold the number of tasks, other have a pointer to it */
  225. int * p = t->left ? t->pntasks : &t->ntasks;
  226. /* the worker who pop the last task allow the rope to be freed */
  227. if(STARPU_ATOMIC_ADD(p, -1) == 0)
  228. _starpu_task_grid_unset_left_right_member(t);
  229. l->ntasks--;
  230. if(!isnan(task->predicted))
  231. {
  232. l->exp_len -= task->predicted_transfer;
  233. l->exp_end = l->exp_start + l->exp_len;
  234. }
  235. return task;
  236. }
  237. t = t->up;
  238. }
  239. return NULL;
  240. }
  241. static struct starpu_sched_component * starpu_sched_component_worker_create(int workerid);
  242. static struct starpu_sched_component * starpu_sched_component_combined_worker_create(int workerid);
  243. struct starpu_sched_component * starpu_sched_component_worker_get(int workerid)
  244. {
  245. STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS);
  246. /* we may need to take a mutex here */
  247. if(_worker_components[workerid])
  248. return _worker_components[workerid];
  249. else
  250. {
  251. struct starpu_sched_component * component;
  252. if(workerid < (int) starpu_worker_get_count())
  253. component = starpu_sched_component_worker_create(workerid);
  254. else
  255. component = starpu_sched_component_combined_worker_create(workerid);
  256. _worker_components[workerid] = component;
  257. return component;
  258. }
  259. }
  260. struct _starpu_worker * _starpu_sched_component_worker_get_worker(struct starpu_sched_component * worker_component)
  261. {
  262. STARPU_ASSERT(starpu_sched_component_is_simple_worker(worker_component));
  263. struct _starpu_worker_component_data * data = worker_component->data;
  264. return data->worker;
  265. }
  266. struct _starpu_combined_worker * _starpu_sched_component_combined_worker_get_combined_worker(struct starpu_sched_component * worker_component)
  267. {
  268. STARPU_ASSERT(starpu_sched_component_is_combined_worker(worker_component));
  269. struct _starpu_worker_component_data * data = worker_component->data;
  270. return data->combined_worker;
  271. }
  272. /*
  273. enum starpu_perfmodel_archtype starpu_sched_component_worker_get_perf_arch(struct starpu_sched_component * worker_component)
  274. {
  275. STARPU_ASSERT(starpu_sched_component_is_worker(worker_component));
  276. if(starpu_sched_component_is_simple_worker(worker_component))
  277. return _starpu_sched_component_worker_get_worker(worker_component)->perf_arch;
  278. else
  279. return _starpu_sched_component_combined_worker_get_combined_worker(worker_component)->perf_arch;
  280. }
  281. */
  282. static void _starpu_sched_component_worker_set_sleep_status(struct starpu_sched_component * worker_component)
  283. {
  284. STARPU_ASSERT(starpu_sched_component_is_worker(worker_component));
  285. struct _starpu_worker_component_data * data = worker_component->data;
  286. data->status = COMPONENT_STATUS_SLEEPING;
  287. }
  288. static void _starpu_sched_component_worker_set_changed_status(struct starpu_sched_component * worker_component)
  289. {
  290. STARPU_ASSERT(starpu_sched_component_is_worker(worker_component));
  291. struct _starpu_worker_component_data * data = worker_component->data;
  292. data->status = COMPONENT_STATUS_CHANGED;
  293. }
  294. static void _starpu_sched_component_worker_reset_status(struct starpu_sched_component * worker_component)
  295. {
  296. STARPU_ASSERT(starpu_sched_component_is_worker(worker_component));
  297. struct _starpu_worker_component_data * data = worker_component->data;
  298. data->status = COMPONENT_STATUS_RESET;
  299. }
  300. static int _starpu_sched_component_worker_is_reset_status(struct starpu_sched_component * worker_component)
  301. {
  302. STARPU_ASSERT(starpu_sched_component_is_worker(worker_component));
  303. struct _starpu_worker_component_data * data = worker_component->data;
  304. return (data->status == COMPONENT_STATUS_RESET);
  305. }
  306. static int _starpu_sched_component_worker_is_changed_status(struct starpu_sched_component * worker_component)
  307. {
  308. STARPU_ASSERT(starpu_sched_component_is_worker(worker_component));
  309. struct _starpu_worker_component_data * data = worker_component->data;
  310. return (data->status == COMPONENT_STATUS_CHANGED);
  311. }
  312. static int _starpu_sched_component_worker_is_sleeping_status(struct starpu_sched_component * worker_component)
  313. {
  314. STARPU_ASSERT(starpu_sched_component_is_worker(worker_component));
  315. struct _starpu_worker_component_data * data = worker_component->data;
  316. return (data->status == COMPONENT_STATUS_SLEEPING);
  317. }
  318. void _starpu_sched_component_lock_worker(int workerid)
  319. {
  320. STARPU_ASSERT(0 <= workerid && workerid < (int) starpu_worker_get_count());
  321. struct _starpu_worker_component_data * data = starpu_sched_component_worker_create(workerid)->data;
  322. STARPU_PTHREAD_MUTEX_LOCK(&data->lock);
  323. }
  324. void _starpu_sched_component_unlock_worker(int workerid)
  325. {
  326. STARPU_ASSERT(0 <= workerid && workerid < (int)starpu_worker_get_count());
  327. struct _starpu_worker_component_data * data = starpu_sched_component_worker_create(workerid)->data;
  328. STARPU_PTHREAD_MUTEX_UNLOCK(&data->lock);
  329. }
  330. static void simple_worker_can_pull(struct starpu_sched_component * worker_component)
  331. {
  332. (void) worker_component;
  333. struct _starpu_worker * w = _starpu_sched_component_worker_get_worker(worker_component);
  334. _starpu_sched_component_lock_worker(w->workerid);
  335. if(_starpu_sched_component_worker_is_reset_status(worker_component))
  336. _starpu_sched_component_worker_set_changed_status(worker_component);
  337. if(w->workerid == starpu_worker_get_id())
  338. {
  339. _starpu_sched_component_unlock_worker(w->workerid);
  340. return;
  341. }
  342. if(_starpu_sched_component_worker_is_sleeping_status(worker_component))
  343. {
  344. starpu_pthread_mutex_t *sched_mutex;
  345. starpu_pthread_cond_t *sched_cond;
  346. starpu_worker_get_sched_condition(w->workerid, &sched_mutex, &sched_cond);
  347. _starpu_sched_component_unlock_worker(w->workerid);
  348. starpu_wakeup_worker(w->workerid, sched_cond, sched_mutex);
  349. }
  350. else
  351. _starpu_sched_component_unlock_worker(w->workerid);
  352. }
  353. static void combined_worker_can_pull(struct starpu_sched_component * component)
  354. {
  355. (void) component;
  356. STARPU_ASSERT(starpu_sched_component_is_combined_worker(component));
  357. struct _starpu_worker_component_data * data = component->data;
  358. int workerid = starpu_worker_get_id();
  359. int i;
  360. for(i = 0; i < data->combined_worker->worker_size; i++)
  361. {
  362. if(i == workerid)
  363. continue;
  364. int worker = data->combined_worker->combined_workerid[i];
  365. _starpu_sched_component_lock_worker(worker);
  366. if(_starpu_sched_component_worker_is_sleeping_status(component))
  367. {
  368. starpu_pthread_mutex_t *sched_mutex;
  369. starpu_pthread_cond_t *sched_cond;
  370. starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
  371. starpu_wakeup_worker(worker, sched_cond, sched_mutex);
  372. }
  373. if(_starpu_sched_component_worker_is_reset_status(component))
  374. _starpu_sched_component_worker_set_changed_status(component);
  375. _starpu_sched_component_unlock_worker(worker);
  376. }
  377. }
  378. static int simple_worker_push_task(struct starpu_sched_component * component, struct starpu_task *task)
  379. {
  380. STARPU_ASSERT(starpu_sched_component_is_worker(component));
  381. /*this function take the worker's mutex */
  382. struct _starpu_worker_component_data * data = component->data;
  383. struct _starpu_task_grid * t = _starpu_task_grid_create();
  384. t->task = task;
  385. t->ntasks = 1;
  386. task->workerid = starpu_bitmap_first(component->workers);
  387. #if 1 /* dead lock problem? */
  388. if (starpu_get_prefetch_flag())
  389. {
  390. unsigned memory_node = starpu_worker_get_memory_node(task->workerid);
  391. starpu_prefetch_task_input_on_node(task, memory_node);
  392. }
  393. #endif
  394. STARPU_PTHREAD_MUTEX_LOCK(&data->list->mutex);
  395. _starpu_worker_task_list_push(data->list, t);
  396. STARPU_PTHREAD_MUTEX_UNLOCK(&data->list->mutex);
  397. simple_worker_can_pull(component);
  398. return 0;
  399. }
  400. struct starpu_task * simple_worker_pop_task(struct starpu_sched_component *component)
  401. {
  402. struct _starpu_worker_component_data * data = component->data;
  403. struct _starpu_worker_task_list * list = data->list;
  404. STARPU_PTHREAD_MUTEX_LOCK(&list->mutex);
  405. struct starpu_task * task = _starpu_worker_task_list_pop(list);
  406. STARPU_PTHREAD_MUTEX_UNLOCK(&list->mutex);
  407. if(task)
  408. {
  409. starpu_push_task_end(task);
  410. return task;
  411. }
  412. STARPU_PTHREAD_MUTEX_LOCK(&data->lock);
  413. int i;
  414. do {
  415. _starpu_sched_component_worker_reset_status(component);
  416. for(i=0; i < component->nfathers; i++)
  417. {
  418. if(component->fathers[i] == NULL)
  419. continue;
  420. else
  421. {
  422. task = component->fathers[i]->pop_task(component->fathers[i]);
  423. if(task)
  424. break;
  425. }
  426. }
  427. } while((!task) && _starpu_sched_component_worker_is_changed_status(component));
  428. _starpu_sched_component_worker_set_sleep_status(component);
  429. STARPU_PTHREAD_MUTEX_UNLOCK(&data->lock);
  430. if(!task)
  431. return NULL;
  432. if(task->cl->type == STARPU_SPMD)
  433. {
  434. int workerid = starpu_worker_get_id();
  435. if(!starpu_worker_is_combined_worker(workerid))
  436. {
  437. starpu_push_task_end(task);
  438. return task;
  439. }
  440. struct starpu_sched_component * combined_worker_component = starpu_sched_component_worker_get(workerid);
  441. (void)combined_worker_component->push_task(combined_worker_component, task);
  442. /* we have pushed a task in queue, so can make a recursive call */
  443. return simple_worker_pop_task(component);
  444. }
  445. if(task)
  446. starpu_push_task_end(task);
  447. return task;
  448. }
  449. void starpu_sched_component_worker_destroy(struct starpu_sched_component *component)
  450. {
  451. struct _starpu_worker * worker = _starpu_sched_component_worker_get_worker(component);
  452. unsigned id = worker->workerid;
  453. assert(_worker_components[id] == component);
  454. int i;
  455. for(i = 0; i < STARPU_NMAX_SCHED_CTXS ; i++)
  456. if(component->fathers[i] != NULL)
  457. return;//this component is shared between several contexts
  458. starpu_sched_component_destroy(component);
  459. _worker_components[id] = NULL;
  460. }
  461. void _starpu_sched_component_lock_all_workers(void)
  462. {
  463. unsigned i;
  464. for(i = 0; i < starpu_worker_get_count(); i++)
  465. _starpu_sched_component_lock_worker(i);
  466. }
  467. void _starpu_sched_component_unlock_all_workers(void)
  468. {
  469. unsigned i;
  470. for(i = 0; i < starpu_worker_get_count(); i++)
  471. _starpu_sched_component_unlock_worker(i);
  472. }
  473. /*
  474. dont know if this may be usefull…
  475. static double worker_estimated_finish_time(struct _starpu_worker * worker)
  476. {
  477. STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex);
  478. double sum = 0.0;
  479. struct starpu_task_list list = worker->local_tasks;
  480. struct starpu_task * task;
  481. for(task = starpu_task_list_front(&list);
  482. task != starpu_task_list_end(&list);
  483. task = starpu_task_list_next(task))
  484. if(!isnan(task->predicted))
  485. sum += task->predicted;
  486. if(worker->current_task)
  487. {
  488. struct starpu_task * t = worker->current_task;
  489. if(t && !isnan(t->predicted))
  490. sum += t->predicted/2;
  491. }
  492. STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex);
  493. return sum + starpu_timing_now();
  494. }
  495. */
  496. static double combined_worker_estimated_end(struct starpu_sched_component * component)
  497. {
  498. STARPU_ASSERT(starpu_sched_component_is_combined_worker(component));
  499. struct _starpu_worker_component_data * data = component->data;
  500. struct _starpu_combined_worker * combined_worker = data->combined_worker;
  501. double max = 0.0;
  502. int i;
  503. for(i = 0; i < combined_worker->worker_size; i++)
  504. {
  505. data = _worker_components[combined_worker->combined_workerid[i]]->data;
  506. STARPU_PTHREAD_MUTEX_LOCK(&data->list->mutex);
  507. double tmp = data->list->exp_end;
  508. STARPU_PTHREAD_MUTEX_UNLOCK(&data->list->mutex);
  509. max = tmp > max ? tmp : max;
  510. }
  511. return max;
  512. }
  513. static double simple_worker_estimated_end(struct starpu_sched_component * component)
  514. {
  515. struct _starpu_worker_component_data * data = component->data;
  516. STARPU_PTHREAD_MUTEX_LOCK(&data->list->mutex);
  517. data->list->exp_start = STARPU_MAX(starpu_timing_now(), data->list->exp_start);
  518. double tmp = data->list->exp_end = data->list->exp_start + data->list->exp_len;
  519. STARPU_PTHREAD_MUTEX_UNLOCK(&data->list->mutex);
  520. return tmp;
  521. }
  522. static double simple_worker_estimated_load(struct starpu_sched_component * component)
  523. {
  524. struct _starpu_worker * worker = _starpu_sched_component_worker_get_worker(component);
  525. int nb_task = 0;
  526. STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex);
  527. struct starpu_task_list list = worker->local_tasks;
  528. struct starpu_task * task;
  529. for(task = starpu_task_list_front(&list);
  530. task != starpu_task_list_end(&list);
  531. task = starpu_task_list_next(task))
  532. nb_task++;
  533. STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex);
  534. struct _starpu_worker_component_data * d = component->data;
  535. struct _starpu_worker_task_list * l = d->list;
  536. int ntasks_in_fifo = l ? l->ntasks : 0;
  537. return (double) (nb_task + ntasks_in_fifo)
  538. / starpu_worker_get_relative_speedup(
  539. starpu_worker_get_perf_archtype(starpu_bitmap_first(component->workers)));
  540. }
  541. static double combined_worker_estimated_load(struct starpu_sched_component * component)
  542. {
  543. struct _starpu_worker_component_data * d = component->data;
  544. struct _starpu_combined_worker * c = d->combined_worker;
  545. double load = 0;
  546. int i;
  547. for(i = 0; i < c->worker_size; i++)
  548. {
  549. struct starpu_sched_component * n = starpu_sched_component_worker_get(c->combined_workerid[i]);
  550. load += n->estimated_load(n);
  551. }
  552. return load;
  553. }
  554. static int combined_worker_push_task(struct starpu_sched_component * component, struct starpu_task *task)
  555. {
  556. STARPU_ASSERT(starpu_sched_component_is_combined_worker(component));
  557. struct _starpu_worker_component_data * data = component->data;
  558. STARPU_ASSERT(data->combined_worker && !data->worker);
  559. struct _starpu_combined_worker * combined_worker = data->combined_worker;
  560. STARPU_ASSERT(combined_worker->worker_size >= 1);
  561. struct _starpu_task_grid * task_alias[combined_worker->worker_size];
  562. starpu_parallel_task_barrier_init(task, starpu_bitmap_first(component->workers));
  563. task_alias[0] = _starpu_task_grid_create();
  564. task_alias[0]->task = starpu_task_dup(task);
  565. task_alias[0]->task->workerid = combined_worker->combined_workerid[0];
  566. task_alias[0]->left = NULL;
  567. task_alias[0]->ntasks = combined_worker->worker_size;
  568. int i;
  569. for(i = 1; i < combined_worker->worker_size; i++)
  570. {
  571. task_alias[i] = _starpu_task_grid_create();
  572. task_alias[i]->task = starpu_task_dup(task);
  573. task_alias[i]->task->workerid = combined_worker->combined_workerid[i];
  574. task_alias[i]->left = task_alias[i-1];
  575. task_alias[i - 1]->right = task_alias[i];
  576. task_alias[i]->pntasks = &task_alias[0]->ntasks;
  577. }
  578. starpu_pthread_mutex_t * mutex_to_unlock = NULL;
  579. i = 0;
  580. do
  581. {
  582. struct starpu_sched_component * worker_component = starpu_sched_component_worker_get(combined_worker->combined_workerid[i]);
  583. struct _starpu_worker_component_data * worker_data = worker_component->data;
  584. struct _starpu_worker_task_list * list = worker_data->list;
  585. STARPU_PTHREAD_MUTEX_LOCK(&list->mutex);
  586. if(mutex_to_unlock)
  587. STARPU_PTHREAD_MUTEX_UNLOCK(mutex_to_unlock);
  588. mutex_to_unlock = &list->mutex;
  589. _starpu_worker_task_list_push(list, task_alias[i]);
  590. i++;
  591. }
  592. while(i < combined_worker->worker_size);
  593. STARPU_PTHREAD_MUTEX_UNLOCK(mutex_to_unlock);
  594. int workerid = starpu_worker_get_id();
  595. if(-1 == workerid)
  596. {
  597. combined_worker_can_pull(component);
  598. }
  599. else
  600. {
  601. starpu_pthread_mutex_t *worker_sched_mutex;
  602. starpu_pthread_cond_t *worker_sched_cond;
  603. starpu_worker_get_sched_condition(workerid, &worker_sched_mutex, &worker_sched_cond);
  604. STARPU_PTHREAD_MUTEX_UNLOCK(worker_sched_mutex);
  605. /* wake up all other workers of combined worker */
  606. for(i = 0; i < combined_worker->worker_size; i++)
  607. {
  608. struct starpu_sched_component * worker_component = starpu_sched_component_worker_get(combined_worker->combined_workerid[i]);
  609. simple_worker_can_pull(worker_component);
  610. }
  611. combined_worker_can_pull(component);
  612. STARPU_PTHREAD_MUTEX_LOCK(worker_sched_mutex);
  613. }
  614. return 0;
  615. }
  616. void _worker_component_deinit_data(struct starpu_sched_component * component)
  617. {
  618. struct _starpu_worker_component_data * d = component->data;
  619. _starpu_worker_task_list_destroy(d->list);
  620. if(starpu_sched_component_is_simple_worker(component))
  621. STARPU_PTHREAD_MUTEX_DESTROY(&d->lock);
  622. int i;
  623. for(i = 0; i < STARPU_NMAXWORKERS; i++)
  624. if(_worker_components[i] == component)
  625. {
  626. _worker_components[i] = NULL;
  627. return;
  628. }
  629. free(d);
  630. }
  631. static struct starpu_sched_component * starpu_sched_component_worker_create(int workerid)
  632. {
  633. STARPU_ASSERT(0 <= workerid && workerid < (int) starpu_worker_get_count());
  634. if(_worker_components[workerid])
  635. return _worker_components[workerid];
  636. struct _starpu_worker * worker = _starpu_get_worker_struct(workerid);
  637. if(worker == NULL)
  638. return NULL;
  639. struct starpu_sched_component * component = starpu_sched_component_create();
  640. struct _starpu_worker_component_data * data = malloc(sizeof(*data));
  641. memset(data, 0, sizeof(*data));
  642. data->worker = worker;
  643. STARPU_PTHREAD_MUTEX_INIT(&data->lock,NULL);
  644. data->status = COMPONENT_STATUS_SLEEPING;
  645. data->list = _starpu_worker_task_list_create();
  646. component->data = data;
  647. component->push_task = simple_worker_push_task;
  648. component->pop_task = simple_worker_pop_task;
  649. component->can_pull = simple_worker_can_pull;
  650. component->estimated_end = simple_worker_estimated_end;
  651. component->estimated_load = simple_worker_estimated_load;
  652. component->deinit_data = _worker_component_deinit_data;
  653. starpu_bitmap_set(component->workers, workerid);
  654. starpu_bitmap_or(component->workers_in_ctx, component->workers);
  655. _worker_components[workerid] = component;
  656. #ifdef STARPU_HAVE_HWLOC
  657. struct _starpu_machine_config *config = _starpu_get_machine_config();
  658. struct _starpu_machine_topology *topology = &config->topology;
  659. hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->cpu_depth, worker->bindid);
  660. STARPU_ASSERT(obj);
  661. component->obj = obj;
  662. #endif
  663. return component;
  664. }
  665. static struct starpu_sched_component * starpu_sched_component_combined_worker_create(int workerid)
  666. {
  667. STARPU_ASSERT(0 <= workerid && workerid < STARPU_NMAXWORKERS);
  668. if(_worker_components[workerid])
  669. return _worker_components[workerid];
  670. struct _starpu_combined_worker * combined_worker = _starpu_get_combined_worker_struct(workerid);
  671. if(combined_worker == NULL)
  672. return NULL;
  673. struct starpu_sched_component * component = starpu_sched_component_create();
  674. struct _starpu_worker_component_data * data = malloc(sizeof(*data));
  675. memset(data, 0, sizeof(*data));
  676. data->combined_worker = combined_worker;
  677. data->status = COMPONENT_STATUS_SLEEPING;
  678. component->data = data;
  679. component->push_task = combined_worker_push_task;
  680. component->pop_task = NULL;
  681. component->estimated_end = combined_worker_estimated_end;
  682. component->estimated_load = combined_worker_estimated_load;
  683. component->can_pull = combined_worker_can_pull;
  684. component->deinit_data = _worker_component_deinit_data;
  685. starpu_bitmap_set(component->workers, workerid);
  686. starpu_bitmap_or(component->workers_in_ctx, component->workers);
  687. _worker_components[workerid] = component;
  688. #ifdef STARPU_HAVE_HWLOC
  689. struct _starpu_machine_config *config = _starpu_get_machine_config();
  690. struct _starpu_machine_topology *topology = &config->topology;
  691. hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->cpu_depth, combined_worker->combined_workerid[0]);
  692. STARPU_ASSERT(obj);
  693. component->obj = obj;
  694. #endif
  695. return component;
  696. }
  697. int starpu_sched_component_is_simple_worker(struct starpu_sched_component * component)
  698. {
  699. return component->push_task == simple_worker_push_task;
  700. }
  701. int starpu_sched_component_is_combined_worker(struct starpu_sched_component * component)
  702. {
  703. return component->push_task == combined_worker_push_task;
  704. }
  705. int starpu_sched_component_is_worker(struct starpu_sched_component * component)
  706. {
  707. return starpu_sched_component_is_simple_worker(component)
  708. || starpu_sched_component_is_combined_worker(component);
  709. }
  710. #ifndef STARPU_NO_ASSERT
  711. static int _worker_consistant(struct starpu_sched_component * component)
  712. {
  713. int is_a_worker = 0;
  714. int i;
  715. for(i = 0; i<STARPU_NMAXWORKERS; i++)
  716. if(_worker_components[i] == component)
  717. is_a_worker = 1;
  718. if(!is_a_worker)
  719. return 0;
  720. struct _starpu_worker_component_data * data = component->data;
  721. if(data->worker)
  722. {
  723. int id = data->worker->workerid;
  724. return (_worker_components[id] == component)
  725. && component->nchildren == 0;
  726. }
  727. return 1;
  728. }
  729. #endif
  730. int starpu_sched_component_worker_get_workerid(struct starpu_sched_component * worker_component)
  731. {
  732. #ifndef STARPU_NO_ASSERT
  733. STARPU_ASSERT(_worker_consistant(worker_component));
  734. #endif
  735. STARPU_ASSERT(1 == starpu_bitmap_cardinal(worker_component->workers));
  736. return starpu_bitmap_first(worker_component->workers);
  737. }
  738. static struct _starpu_worker_task_list * _worker_get_list(void)
  739. {
  740. int workerid = starpu_worker_get_id();
  741. STARPU_ASSERT(0 <= workerid && workerid < (int) starpu_worker_get_count());
  742. struct _starpu_worker_component_data * d = starpu_sched_component_worker_get(workerid)->data;
  743. return d->list;
  744. }
  745. void starpu_sched_component_worker_pre_exec_hook(struct starpu_task * task)
  746. {
  747. if(!isnan(task->predicted))
  748. {
  749. struct _starpu_worker_task_list * list = _worker_get_list();
  750. STARPU_PTHREAD_MUTEX_LOCK(&list->mutex);
  751. list->exp_start = starpu_timing_now() + task->predicted;
  752. if(list->ntasks == 0)
  753. {
  754. list->exp_end = list->exp_start;
  755. list->exp_len = 0.0;
  756. }
  757. else
  758. list->exp_end = list->exp_start + list->exp_len;
  759. STARPU_PTHREAD_MUTEX_UNLOCK(&list->mutex);
  760. }
  761. }
  762. void starpu_sched_component_worker_post_exec_hook(struct starpu_task * task)
  763. {
  764. if(task->execute_on_a_specific_worker)
  765. return;
  766. struct _starpu_worker_task_list * list = _worker_get_list();
  767. STARPU_PTHREAD_MUTEX_LOCK(&list->mutex);
  768. list->exp_start = starpu_timing_now();
  769. list->exp_end = list->exp_start + list->exp_len;
  770. STARPU_PTHREAD_MUTEX_UNLOCK(&list->mutex);
  771. }
  772. #if 0
  773. static void starpu_sched_component_worker_push_task_notify(struct starpu_task * task, int workerid, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED)
  774. {
  775. struct starpu_sched_component * worker_component = starpu_sched_component_worker_get(workerid);
  776. /* dont work with parallel tasks */
  777. if(starpu_sched_component_is_combined_worker(worker_component))
  778. return;
  779. struct _starpu_worker_component_data * d = worker_component->data;
  780. struct _starpu_worker_task_list * list = d->list;
  781. /* Compute the expected penality */
  782. enum starpu_perfmodel_archtype perf_arch = starpu_worker_get_perf_archtype(workerid);
  783. unsigned memory_node = starpu_worker_get_memory_node(workerid);
  784. double predicted = starpu_task_expected_length(task, perf_arch,
  785. starpu_task_get_implementation(task));
  786. double predicted_transfer = starpu_task_expected_data_transfer_time(memory_node, task);
  787. /* Update the predictions */
  788. STARPU_PTHREAD_MUTEX_LOCK(&list->mutex);
  789. /* Sometimes workers didn't take the tasks as early as we expected */
  790. list->exp_start = STARPU_MAX(list->exp_start, starpu_timing_now());
  791. list->exp_end = list->exp_start + list->exp_len;
  792. /* If there is no prediction available, we consider the task has a null length */
  793. if (!isnan(predicted_transfer))
  794. {
  795. if (starpu_timing_now() + predicted_transfer < list->exp_end)
  796. {
  797. /* We may hope that the transfer will be finshied by
  798. * the start of the task. */
  799. predicted_transfer = 0;
  800. }
  801. else
  802. {
  803. /* The transfer will not be finished by then, take the
  804. * remainder into account */
  805. predicted_transfer = (starpu_timing_now() + predicted_transfer) - list->exp_end;
  806. }
  807. task->predicted_transfer = predicted_transfer;
  808. list->exp_end += predicted_transfer;
  809. list->exp_len += predicted_transfer;
  810. }
  811. /* If there is no prediction available, we consider the task has a null length */
  812. if (!isnan(predicted))
  813. {
  814. task->predicted = predicted;
  815. list->exp_end += predicted;
  816. list->exp_len += predicted;
  817. }
  818. list->ntasks++;
  819. STARPU_PTHREAD_MUTEX_UNLOCK(&list->mutex);
  820. }
  821. #endif