work_stealing_policy.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2016 Université de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013 CNRS
  5. * Copyright (C) 2011, 2012 INRIA
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. /* Work stealing policy */
  19. #include <float.h>
  20. #include <core/workers.h>
  21. #include <sched_policies/fifo_queues.h>
  22. #include <core/debug.h>
  23. #include <starpu_scheduler.h>
  24. #ifdef HAVE_AYUDAME_H
  25. #include <Ayudame.h>
  26. #endif
  27. /* Experimental (dead) code which needs to be tested, fixed... */
  28. /* #define USE_OVERLOAD */
  29. struct _starpu_work_stealing_data
  30. {
  31. unsigned (*select_victim)(unsigned, int);
  32. struct _starpu_fifo_taskq **queue_array;
  33. int **proxlist;
  34. /* keep track of the work performed from the beginning of the algorithm to make
  35. * better decisions about which queue to select when stealing or deferring work
  36. */
  37. unsigned last_pop_worker;
  38. unsigned last_push_worker;
  39. };
  40. #ifdef USE_OVERLOAD
  41. /**
  42. * Minimum number of task we wait for being processed before we start assuming
  43. * on which worker the computation would be faster.
  44. */
  45. static int calibration_value = 0;
  46. #endif /* USE_OVERLOAD */
  47. /**
  48. * Return a worker from which a task can be stolen.
  49. * Selecting a worker is done in a round-robin fashion, unless
  50. * the worker previously selected doesn't own any task,
  51. * then we return the first non-empty worker.
  52. */
  53. static unsigned select_victim_round_robin(unsigned sched_ctx_id)
  54. {
  55. struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
  56. unsigned worker = ws->last_pop_worker;
  57. unsigned nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
  58. int *workerids = NULL;
  59. starpu_sched_ctx_get_workers_list(sched_ctx_id, &workerids);
  60. /* If the worker's queue is empty, let's try
  61. * the next ones */
  62. while (1)
  63. {
  64. unsigned ntasks;
  65. /* Here helgrind would shout that this is unprotected, but we
  66. * are fine with getting outdated values, this is just an
  67. * estimation */
  68. ntasks = ws->queue_array[workerids[worker]]->ntasks;
  69. if (ntasks)
  70. break;
  71. worker = (worker + 1) % nworkers;
  72. if (worker == ws->last_pop_worker)
  73. {
  74. /* We got back to the first worker,
  75. * don't go in infinite loop */
  76. break;
  77. }
  78. }
  79. ws->last_pop_worker = (worker + 1) % nworkers;
  80. worker = workerids[worker];
  81. free(workerids);
  82. return worker;
  83. }
  84. /**
  85. * Return a worker to whom add a task.
  86. * Selecting a worker is done in a round-robin fashion.
  87. */
  88. static unsigned select_worker_round_robin(unsigned sched_ctx_id)
  89. {
  90. struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
  91. unsigned worker = ws->last_push_worker;
  92. unsigned nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
  93. int *workerids = NULL;
  94. starpu_sched_ctx_get_workers_list(sched_ctx_id, &workerids);
  95. ws->last_push_worker = (ws->last_push_worker + 1) % nworkers;
  96. worker = workerids[worker];
  97. free(workerids);
  98. return worker;
  99. }
  100. #ifdef USE_OVERLOAD
  101. /**
  102. * Return a ratio helpful to determine whether a worker is suitable to steal
  103. * tasks from or to put some tasks in its queue.
  104. *
  105. * \return a ratio with a positive or negative value, describing the current state of the worker :
  106. * a smaller value implies a faster worker with an relatively emptier queue : more suitable to put tasks in
  107. * a bigger value implies a slower worker with an reletively more replete queue : more suitable to steal tasks from
  108. */
  109. static float overload_metric(unsigned sched_ctx_id, unsigned id)
  110. {
  111. struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
  112. float execution_ratio = 0.0f;
  113. float current_ratio = 0.0f;
  114. int nprocessed = _starpu_get_deque_nprocessed(ws->queue_array[id]);
  115. unsigned njobs = _starpu_get_deque_njobs(ws->queue_array[id]);
  116. /* Did we get enough information ? */
  117. if (performed_total > 0 && nprocessed > 0)
  118. {
  119. /* How fast or slow is the worker compared to the other workers */
  120. execution_ratio = (float) nprocessed / performed_total;
  121. /* How replete is its queue */
  122. current_ratio = (float) njobs / nprocessed;
  123. }
  124. else
  125. {
  126. return 0.0f;
  127. }
  128. return (current_ratio - execution_ratio);
  129. }
  130. /**
  131. * Return the most suitable worker from which a task can be stolen.
  132. * The number of previously processed tasks, total and local,
  133. * and the number of tasks currently awaiting to be processed
  134. * by the tasks are taken into account to select the most suitable
  135. * worker to steal task from.
  136. */
  137. static unsigned select_victim_overload(unsigned sched_ctx_id)
  138. {
  139. unsigned worker;
  140. float worker_ratio;
  141. unsigned best_worker = 0;
  142. float best_ratio = FLT_MIN;
  143. /* Don't try to play smart until we get
  144. * enough informations. */
  145. if (performed_total < calibration_value)
  146. return select_victim_round_robin(sched_ctx_id);
  147. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
  148. struct starpu_sched_ctx_iterator it;
  149. workers->init_iterator(workers, &it);
  150. while(workers->has_next(workers, &it))
  151. {
  152. worker = workers->get_next(workers, &it);
  153. worker_ratio = overload_metric(sched_ctx_id, worker);
  154. if (worker_ratio > best_ratio)
  155. {
  156. best_worker = worker;
  157. best_ratio = worker_ratio;
  158. }
  159. }
  160. return best_worker;
  161. }
  162. /**
  163. * Return the most suitable worker to whom add a task.
  164. * The number of previously processed tasks, total and local,
  165. * and the number of tasks currently awaiting to be processed
  166. * by the tasks are taken into account to select the most suitable
  167. * worker to add a task to.
  168. */
  169. static unsigned select_worker_overload(unsigned sched_ctx_id)
  170. {
  171. unsigned worker;
  172. float worker_ratio;
  173. unsigned best_worker = 0;
  174. float best_ratio = FLT_MAX;
  175. /* Don't try to play smart until we get
  176. * enough informations. */
  177. if (performed_total < calibration_value)
  178. return select_worker_round_robin(sched_ctx_id);
  179. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
  180. struct starpu_sched_ctx_iterator it;
  181. workers->init_iterator(workers, &it);
  182. while(workers->has_next(workers, &it))
  183. {
  184. worker = workers->get_next(workers, &it);
  185. worker_ratio = overload_metric(sched_ctx_id, worker);
  186. if (worker_ratio < best_ratio)
  187. {
  188. best_worker = worker;
  189. best_ratio = worker_ratio;
  190. }
  191. }
  192. return best_worker;
  193. }
  194. #endif /* USE_OVERLOAD */
  195. /**
  196. * Return a worker from which a task can be stolen.
  197. * This is a phony function used to call the right
  198. * function depending on the value of USE_OVERLOAD.
  199. */
  200. static inline unsigned select_victim(unsigned sched_ctx_id,
  201. int workerid STARPU_ATTRIBUTE_UNUSED)
  202. {
  203. #ifdef USE_OVERLOAD
  204. return select_victim_overload(sched_ctx_id);
  205. #else
  206. return select_victim_round_robin(sched_ctx_id);
  207. #endif /* USE_OVERLOAD */
  208. }
  209. /**
  210. * Return a worker from which a task can be stolen.
  211. * This is a phony function used to call the right
  212. * function depending on the value of USE_OVERLOAD.
  213. */
  214. static inline unsigned select_worker(unsigned sched_ctx_id)
  215. {
  216. #ifdef USE_OVERLOAD
  217. return select_worker_overload(sched_ctx_id);
  218. #else
  219. return select_worker_round_robin(sched_ctx_id);
  220. #endif /* USE_OVERLOAD */
  221. }
  222. /* Note: this is not scalable work stealing, use lws instead */
  223. static struct starpu_task *ws_pop_task(unsigned sched_ctx_id)
  224. {
  225. struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
  226. struct starpu_task *task;
  227. int workerid = starpu_worker_get_id();
  228. STARPU_ASSERT(workerid != -1);
  229. task = _starpu_fifo_pop_task(ws->queue_array[workerid], workerid);
  230. if (task)
  231. {
  232. /* there was a local task */
  233. return task;
  234. }
  235. starpu_pthread_mutex_t *worker_sched_mutex;
  236. starpu_pthread_cond_t *worker_sched_cond;
  237. starpu_worker_get_sched_condition(workerid, &worker_sched_mutex, &worker_sched_cond);
  238. /* we need to steal someone's job */
  239. unsigned victim = ws->select_victim(sched_ctx_id, workerid);
  240. /* Note: Releasing this mutex before taking the victim mutex, to avoid interlock*/
  241. STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(worker_sched_mutex);
  242. starpu_pthread_mutex_t *victim_sched_mutex;
  243. starpu_pthread_cond_t *victim_sched_cond;
  244. starpu_worker_get_sched_condition(victim, &victim_sched_mutex, &victim_sched_cond);
  245. STARPU_PTHREAD_MUTEX_LOCK_SCHED(victim_sched_mutex);
  246. if (ws->queue_array[victim] != NULL && ws->queue_array[victim]->ntasks > 0)
  247. task = _starpu_fifo_pop_task(ws->queue_array[victim], workerid);
  248. if (task)
  249. {
  250. _STARPU_TRACE_WORK_STEALING(workerid, victim);
  251. }
  252. STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(victim_sched_mutex);
  253. STARPU_PTHREAD_MUTEX_LOCK_SCHED(worker_sched_mutex);
  254. if(!task)
  255. {
  256. if (ws->queue_array[workerid] != NULL && ws->queue_array[workerid]->ntasks > 0)
  257. task = _starpu_fifo_pop_task(ws->queue_array[workerid], workerid);
  258. if (task)
  259. {
  260. /* there was a local task */
  261. return task;
  262. }
  263. }
  264. return task;
  265. }
  266. static
  267. int ws_push_task(struct starpu_task *task)
  268. {
  269. unsigned sched_ctx_id = task->sched_ctx;
  270. struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
  271. int workerid = starpu_worker_get_id();
  272. /* If the current thread is not a worker but
  273. * the main thread (-1), we find the better one to
  274. * put task on its queue */
  275. if (workerid == -1)
  276. workerid = select_worker(sched_ctx_id);
  277. starpu_pthread_mutex_t *sched_mutex;
  278. starpu_pthread_cond_t *sched_cond;
  279. starpu_worker_get_sched_condition(workerid, &sched_mutex, &sched_cond);
  280. STARPU_PTHREAD_MUTEX_LOCK_SCHED(sched_mutex);
  281. #ifdef HAVE_AYUDAME_H
  282. struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
  283. if (AYU_event)
  284. {
  285. intptr_t id = workerid;
  286. AYU_event(AYU_ADDTASKTOQUEUE, j->job_id, &id);
  287. }
  288. #endif
  289. _starpu_fifo_push_task(ws->queue_array[workerid], task);
  290. starpu_push_task_end(task);
  291. STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(sched_mutex);
  292. #if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID)
  293. /* TODO: implement fine-grain signaling, similar to what eager does */
  294. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
  295. struct starpu_sched_ctx_iterator it;
  296. workers->init_iterator(workers, &it);
  297. while(workers->has_next(workers, &it))
  298. starpu_wake_worker(workers->get_next(workers, &it));
  299. #endif
  300. return 0;
  301. }
  302. static void ws_add_workers(unsigned sched_ctx_id, int *workerids,unsigned nworkers)
  303. {
  304. struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
  305. unsigned i;
  306. int workerid;
  307. for (i = 0; i < nworkers; i++)
  308. {
  309. workerid = workerids[i];
  310. starpu_sched_ctx_worker_shares_tasks_lists(workerid, sched_ctx_id);
  311. ws->queue_array[workerid] = _starpu_create_fifo();
  312. /* Tell helgrid that we are fine with getting outdated values,
  313. * this is just an estimation */
  314. STARPU_HG_DISABLE_CHECKING(ws->queue_array[workerid]->ntasks);
  315. }
  316. }
  317. static void ws_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
  318. {
  319. struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
  320. unsigned i;
  321. int workerid;
  322. for (i = 0; i < nworkers; i++)
  323. {
  324. workerid = workerids[i];
  325. if (ws->queue_array[workerid] != NULL)
  326. {
  327. _starpu_destroy_fifo(ws->queue_array[workerid]);
  328. ws->queue_array[workerid] = NULL;
  329. }
  330. if (ws->proxlist != NULL)
  331. free(ws->proxlist[workerid]);
  332. }
  333. }
  334. static void initialize_ws_policy(unsigned sched_ctx_id)
  335. {
  336. struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)malloc(sizeof(struct _starpu_work_stealing_data));
  337. starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)ws);
  338. ws->last_pop_worker = 0;
  339. ws->last_push_worker = 0;
  340. ws->proxlist = NULL;
  341. ws->select_victim = select_victim;
  342. unsigned nw = starpu_worker_get_count();
  343. ws->queue_array = (struct _starpu_fifo_taskq**)malloc(nw*sizeof(struct _starpu_fifo_taskq*));
  344. }
  345. static void deinit_ws_policy(unsigned sched_ctx_id)
  346. {
  347. struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
  348. free(ws->queue_array);
  349. free(ws->proxlist);
  350. free(ws);
  351. }
  352. struct starpu_sched_policy _starpu_sched_ws_policy =
  353. {
  354. .init_sched = initialize_ws_policy,
  355. .deinit_sched = deinit_ws_policy,
  356. .add_workers = ws_add_workers,
  357. .remove_workers = ws_remove_workers,
  358. .push_task = ws_push_task,
  359. .pop_task = ws_pop_task,
  360. .pre_exec_hook = NULL,
  361. .post_exec_hook = NULL,
  362. .pop_every_task = NULL,
  363. .policy_name = "ws",
  364. .policy_description = "work stealing",
  365. .worker_type = STARPU_WORKER_LIST,
  366. };
  367. /* local work stealing policy */
  368. /* Return a worker to steal a task from. The worker is selected according to
  369. * the proximity list built using the info on te architecture provided by hwloc
  370. */
  371. static unsigned lws_select_victim(unsigned sched_ctx_id, int workerid)
  372. {
  373. struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data *)starpu_sched_ctx_get_policy_data(sched_ctx_id);
  374. int nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
  375. int neighbor;
  376. int i;
  377. for (i = 0; i < nworkers; i++)
  378. {
  379. neighbor = ws->proxlist[workerid][i];
  380. int ntasks = ws->queue_array[neighbor]->ntasks;
  381. if (ntasks)
  382. return neighbor;
  383. }
  384. return workerid;
  385. }
  386. static void lws_add_workers(unsigned sched_ctx_id, int *workerids,
  387. unsigned nworkers)
  388. {
  389. struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
  390. unsigned i;
  391. int workerid;
  392. ws_add_workers(sched_ctx_id, workerids, nworkers);
  393. #ifdef STARPU_HAVE_HWLOC
  394. /* Build a proximity list for every worker. It is cheaper to
  395. * build this once and then use it for popping tasks rather
  396. * than traversing the hwloc tree every time a task must be
  397. * stolen */
  398. ws->proxlist = (int**)malloc(starpu_worker_get_count()*sizeof(int*));
  399. struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
  400. struct starpu_tree *tree = (struct starpu_tree*)workers->workerids;
  401. for (i = 0; i < nworkers; i++)
  402. {
  403. workerid = workerids[i];
  404. ws->proxlist[workerid] = (int*)malloc(nworkers*sizeof(int));
  405. int bindid;
  406. struct starpu_tree *neighbour = NULL;
  407. struct starpu_sched_ctx_iterator it;
  408. workers->init_iterator(workers, &it);
  409. bindid = starpu_worker_get_bindid(workerid);
  410. it.value = starpu_tree_get(tree, bindid);
  411. int cnt = 0;
  412. for(;;)
  413. {
  414. neighbour = (struct starpu_tree*)it.value;
  415. int neigh_workerids[STARPU_NMAXWORKERS];
  416. int neigh_nworkers = starpu_worker_get_workerids(neighbour->id, neigh_workerids);
  417. int w;
  418. for(w = 0; w < neigh_nworkers; w++)
  419. {
  420. if(!it.visited[neigh_workerids[w]] && workers->present[neigh_workerids[w]])
  421. {
  422. ws->proxlist[workerid][cnt++] = neigh_workerids[w];
  423. it.visited[neigh_workerids[w]] = 1;
  424. }
  425. }
  426. if(!workers->has_next(workers, &it))
  427. break;
  428. it.value = it.possible_value;
  429. it.possible_value = NULL;
  430. }
  431. }
  432. #endif
  433. }
  434. static void initialize_lws_policy(unsigned sched_ctx_id)
  435. {
  436. /* lws is loosely based on ws, except that it might use hwloc. */
  437. initialize_ws_policy(sched_ctx_id);
  438. #ifdef STARPU_HAVE_HWLOC
  439. struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data *)starpu_sched_ctx_get_policy_data(sched_ctx_id);
  440. ws->select_victim = lws_select_victim;
  441. #endif
  442. }
  443. struct starpu_sched_policy _starpu_sched_lws_policy =
  444. {
  445. .init_sched = initialize_lws_policy,
  446. .deinit_sched = deinit_ws_policy,
  447. .add_workers = lws_add_workers,
  448. .remove_workers = ws_remove_workers,
  449. .push_task = ws_push_task,
  450. .pop_task = ws_pop_task,
  451. .pre_exec_hook = NULL,
  452. .post_exec_hook = NULL,
  453. .pop_every_task = NULL,
  454. .policy_name = "lws",
  455. .policy_description = "locality work stealing",
  456. #ifdef STARPU_HAVE_HWLOC
  457. .worker_type = STARPU_WORKER_TREE,
  458. #else
  459. .worker_type = STARPU_WORKER_LIST,
  460. #endif
  461. };