deque_modeling_policy_data_aware.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010, 2011 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2011 Télécom-SudParis
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. /* Distributed queues using performance modeling to assign tasks */
  19. #include <limits.h>
  20. #include <core/workers.h>
  21. #include <sched_policies/fifo_queues.h>
  22. #include <core/perfmodel/perfmodel.h>
  23. #include <starpu_parameters.h>
  24. static unsigned nworkers;
  25. static struct starpu_fifo_taskq_s *queue_array[STARPU_NMAXWORKERS];
  26. static pthread_cond_t sched_cond[STARPU_NMAXWORKERS];
  27. static pthread_mutex_t sched_mutex[STARPU_NMAXWORKERS];
  28. static double alpha = STARPU_DEFAULT_ALPHA;
  29. static double beta = STARPU_DEFAULT_BETA;
  30. static double _gamma = STARPU_DEFAULT_GAMMA;
  31. static double idle_power = 0.0;
  32. #ifdef STARPU_VERBOSE
  33. static long int total_task_cnt = 0;
  34. static long int ready_task_cnt = 0;
  35. #endif
  36. static int count_non_ready_buffers(struct starpu_task *task, uint32_t node)
  37. {
  38. int cnt = 0;
  39. starpu_buffer_descr *descrs = task->buffers;
  40. unsigned nbuffers = task->cl->nbuffers;
  41. unsigned index;
  42. for (index = 0; index < nbuffers; index++)
  43. {
  44. starpu_buffer_descr *descr;
  45. starpu_data_handle handle;
  46. descr = &descrs[index];
  47. handle = descr->handle;
  48. int is_valid;
  49. starpu_data_query_status(handle, node, NULL, &is_valid, NULL);
  50. if (!is_valid)
  51. cnt++;
  52. }
  53. return cnt;
  54. }
  55. static struct starpu_task *_starpu_fifo_pop_first_ready_task(struct starpu_fifo_taskq_s *fifo_queue, unsigned node)
  56. {
  57. struct starpu_task *task = NULL, *current;
  58. if (fifo_queue->ntasks == 0)
  59. return NULL;
  60. if (fifo_queue->ntasks > 0)
  61. {
  62. fifo_queue->ntasks--;
  63. task = starpu_task_list_back(&fifo_queue->taskq);
  64. int first_task_priority = task->priority;
  65. current = task;
  66. int non_ready_best = INT_MAX;
  67. while (current)
  68. {
  69. int priority = current->priority;
  70. if (priority <= first_task_priority)
  71. {
  72. int non_ready = count_non_ready_buffers(current, node);
  73. if (non_ready < non_ready_best)
  74. {
  75. non_ready_best = non_ready;
  76. task = current;
  77. if (non_ready == 0)
  78. break;
  79. }
  80. }
  81. current = current->prev;
  82. }
  83. starpu_task_list_erase(&fifo_queue->taskq, task);
  84. STARPU_TRACE_JOB_POP(task, 0);
  85. }
  86. return task;
  87. }
  88. static struct starpu_task *dmda_pop_ready_task(void)
  89. {
  90. struct starpu_task *task;
  91. int workerid = starpu_worker_get_id();
  92. struct starpu_fifo_taskq_s *fifo = queue_array[workerid];
  93. unsigned node = starpu_worker_get_memory_node(workerid);
  94. task = _starpu_fifo_pop_first_ready_task(fifo, node);
  95. if (task) {
  96. double model = task->predicted;
  97. fifo->exp_len -= model;
  98. fifo->exp_start = starpu_timing_now() + model;
  99. fifo->exp_end = fifo->exp_start + fifo->exp_len;
  100. #ifdef STARPU_VERBOSE
  101. if (task->cl)
  102. {
  103. int non_ready = count_non_ready_buffers(task, starpu_worker_get_memory_node(workerid));
  104. if (non_ready == 0)
  105. ready_task_cnt++;
  106. }
  107. total_task_cnt++;
  108. #endif
  109. }
  110. return task;
  111. }
  112. static struct starpu_task *dmda_pop_task(void)
  113. {
  114. struct starpu_task *task;
  115. int workerid = starpu_worker_get_id();
  116. struct starpu_fifo_taskq_s *fifo = queue_array[workerid];
  117. task = _starpu_fifo_pop_task(fifo, -1);
  118. if (task) {
  119. double model = task->predicted;
  120. fifo->exp_len -= model;
  121. fifo->exp_start = starpu_timing_now() + model;
  122. fifo->exp_end = fifo->exp_start + fifo->exp_len;
  123. #ifdef STARPU_VERBOSE
  124. if (task->cl)
  125. {
  126. int non_ready = count_non_ready_buffers(task, starpu_worker_get_memory_node(workerid));
  127. if (non_ready == 0)
  128. ready_task_cnt++;
  129. }
  130. total_task_cnt++;
  131. #endif
  132. }
  133. return task;
  134. }
  135. static struct starpu_task *dmda_pop_every_task(void)
  136. {
  137. struct starpu_task *new_list;
  138. int workerid = starpu_worker_get_id();
  139. struct starpu_fifo_taskq_s *fifo = queue_array[workerid];
  140. new_list = _starpu_fifo_pop_every_task(fifo, &sched_mutex[workerid], workerid);
  141. while (new_list)
  142. {
  143. double model = new_list->predicted;
  144. fifo->exp_len -= model;
  145. fifo->exp_start = starpu_timing_now() + model;
  146. fifo->exp_end = fifo->exp_start + fifo->exp_len;
  147. new_list = new_list->next;
  148. }
  149. return new_list;
  150. }
  151. static
  152. int _starpu_fifo_push_sorted_task(struct starpu_fifo_taskq_s *fifo_queue, pthread_mutex_t *sched_mutex, pthread_cond_t *sched_cond, struct starpu_task *task)
  153. {
  154. struct starpu_task_list *list = &fifo_queue->taskq;
  155. PTHREAD_MUTEX_LOCK(sched_mutex);
  156. STARPU_TRACE_JOB_PUSH(task, 0);
  157. if (list->head == NULL)
  158. {
  159. list->head = task;
  160. list->tail = task;
  161. task->prev = NULL;
  162. task->next = NULL;
  163. }
  164. else {
  165. struct starpu_task *current = list->head;
  166. struct starpu_task *prev = NULL;
  167. while (current)
  168. {
  169. if (current->priority >= task->priority)
  170. break;
  171. prev = current;
  172. current = current->next;
  173. }
  174. if (prev == NULL)
  175. {
  176. /* Insert at the front of the list */
  177. list->head->prev = task;
  178. task->prev = NULL;
  179. task->next = list->head;
  180. list->head = task;
  181. }
  182. else {
  183. if (current)
  184. {
  185. /* Insert between prev and current */
  186. task->prev = prev;
  187. prev->next = task;
  188. task->next = current;
  189. current->prev = task;
  190. }
  191. else {
  192. /* Insert at the tail of the list */
  193. list->tail->next = task;
  194. task->next = NULL;
  195. task->prev = list->tail;
  196. list->tail = task;
  197. }
  198. }
  199. }
  200. fifo_queue->ntasks++;
  201. fifo_queue->nprocessed++;
  202. PTHREAD_COND_SIGNAL(sched_cond);
  203. PTHREAD_MUTEX_UNLOCK(sched_mutex);
  204. return 0;
  205. }
  206. static int push_task_on_best_worker(struct starpu_task *task, int best_workerid, double predicted, int prio)
  207. {
  208. /* make sure someone coule execute that task ! */
  209. STARPU_ASSERT(best_workerid != -1);
  210. struct starpu_fifo_taskq_s *fifo;
  211. fifo = queue_array[best_workerid];
  212. fifo->exp_end += predicted;
  213. fifo->exp_len += predicted;
  214. task->predicted = predicted;
  215. /* TODO predicted_transfer */
  216. unsigned memory_node = starpu_worker_get_memory_node(best_workerid);
  217. if (starpu_get_prefetch_flag())
  218. starpu_prefetch_task_input_on_node(task, memory_node);
  219. if (prio)
  220. return _starpu_fifo_push_sorted_task(queue_array[best_workerid],
  221. &sched_mutex[best_workerid], &sched_cond[best_workerid], task);
  222. else
  223. return _starpu_fifo_push_task(queue_array[best_workerid],
  224. &sched_mutex[best_workerid], &sched_cond[best_workerid], task);
  225. }
  226. static int _dm_push_task(struct starpu_task *task, unsigned prio)
  227. {
  228. /* find the queue */
  229. struct starpu_fifo_taskq_s *fifo;
  230. unsigned worker;
  231. int best = -1;
  232. double best_exp_end = 0.0;
  233. double model_best = 0.0;
  234. int ntasks_best = -1;
  235. double ntasks_best_end = 0.0;
  236. int calibrating = 0;
  237. /* A priori, we know all estimations */
  238. int unknown = 0;
  239. unsigned best_impl = 0;
  240. unsigned nimpl;
  241. for (worker = 0; worker < nworkers; worker++)
  242. {
  243. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  244. {
  245. double exp_end;
  246. fifo = queue_array[worker];
  247. /* Sometimes workers didn't take the tasks as early as we expected */
  248. fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
  249. fifo->exp_end = fifo->exp_start + fifo->exp_len;
  250. if (!starpu_worker_may_execute_task(worker, task, nimpl))
  251. {
  252. /* no one on that queue may execute this task */
  253. continue;
  254. }
  255. enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
  256. double local_length = starpu_task_expected_length(task, perf_arch, nimpl);
  257. double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
  258. //_STARPU_DEBUG("Scheduler dm: task length (%lf) worker (%u) kernel (%u) \n", local_length,worker,nimpl);
  259. if (ntasks_best == -1
  260. || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
  261. || (!calibrating && local_length == -1.0) /* Not calibrating but this worker is being calibrated */
  262. || (calibrating && local_length == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
  263. ) {
  264. ntasks_best_end = ntasks_end;
  265. ntasks_best = worker;
  266. }
  267. if (local_length == -1.0)
  268. /* we are calibrating, we want to speed-up calibration time
  269. * so we privilege non-calibrated tasks (but still
  270. * greedily distribute them to avoid dumb schedules) */
  271. calibrating = 1;
  272. if (local_length <= 0.0)
  273. /* there is no prediction available for that task
  274. * with that arch yet, so switch to a greedy strategy */
  275. unknown = 1;
  276. if (unknown)
  277. continue;
  278. exp_end = fifo->exp_start + fifo->exp_len + local_length;
  279. if (best == -1 || exp_end < best_exp_end)
  280. {
  281. /* a better solution was found */
  282. best_exp_end = exp_end;
  283. best = worker;
  284. model_best = local_length;
  285. best_impl = nimpl;
  286. }
  287. }
  288. }
  289. if (unknown) {
  290. best = ntasks_best;
  291. model_best = 0.0;
  292. }
  293. //_STARPU_DEBUG("Scheduler dm: kernel (%u)\n", best_impl);
  294. _starpu_get_job_associated_to_task(task)->nimpl = 0;//best_impl;
  295. /* we should now have the best worker in variable "best" */
  296. return push_task_on_best_worker(task, best, model_best, prio);
  297. }
  298. static int _dmda_push_task(struct starpu_task *task, unsigned prio)
  299. {
  300. /* find the queue */
  301. struct starpu_fifo_taskq_s *fifo;
  302. unsigned worker;
  303. int best = -1;
  304. /* this flag is set if the corresponding worker is selected because
  305. there is no performance prediction available yet */
  306. int forced_best = -1;
  307. double local_task_length[nworkers];
  308. double local_data_penalty[nworkers];
  309. double local_power[nworkers];
  310. double exp_end[nworkers];
  311. double max_exp_end = 0.0;
  312. double fitness[nworkers];
  313. double best_exp_end = 10e240;
  314. double model_best = 0.0;
  315. //double penality_best = 0.0;
  316. int ntasks_best = -1;
  317. double ntasks_best_end = 0.0;
  318. int calibrating = 0;
  319. /* A priori, we know all estimations */
  320. int unknown = 0;
  321. unsigned best_impl = 0;
  322. unsigned nimpl=0;
  323. for (worker = 0; worker < nworkers; worker++)
  324. {
  325. for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  326. {
  327. fifo = queue_array[worker];
  328. /* Sometimes workers didn't take the tasks as early as we expected */
  329. fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
  330. fifo->exp_end = fifo->exp_start + fifo->exp_len;
  331. if (fifo->exp_end > max_exp_end)
  332. max_exp_end = fifo->exp_end;
  333. if (!starpu_worker_may_execute_task(worker, task, nimpl))
  334. {
  335. /* no one on that queue may execute this task */
  336. continue;
  337. }
  338. enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
  339. local_task_length[worker] = starpu_task_expected_length(task, perf_arch, nimpl);
  340. //_STARPU_DEBUG("Scheduler dmda: task length (%lf) worker (%u) kernel (%u) \n", local_task_length[worker],worker,nimpl);
  341. unsigned memory_node = starpu_worker_get_memory_node(worker);
  342. local_data_penalty[worker] = starpu_task_expected_data_transfer_time(memory_node, task);
  343. double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
  344. if (ntasks_best == -1
  345. || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
  346. || (!calibrating && local_task_length[worker] == -1.0) /* Not calibrating but this worker is being calibrated */
  347. || (calibrating && local_task_length[worker] == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
  348. ) {
  349. ntasks_best_end = ntasks_end;
  350. ntasks_best = worker;
  351. }
  352. if (local_task_length[worker] == -1.0)
  353. /* we are calibrating, we want to speed-up calibration time
  354. * so we privilege non-calibrated tasks (but still
  355. * greedily distribute them to avoid dumb schedules) */
  356. calibrating = 1;
  357. if (local_task_length[worker] <= 0.0)
  358. /* there is no prediction available for that task
  359. * with that arch yet, so switch to a greedy strategy */
  360. unknown = 1;
  361. if (unknown)
  362. continue;
  363. exp_end[worker] = fifo->exp_start + fifo->exp_len + local_task_length[worker];
  364. if (exp_end[worker] < best_exp_end)
  365. {
  366. /* a better solution was found */
  367. best_exp_end = exp_end[worker];
  368. best_impl = nimpl;
  369. }
  370. local_power[worker] = starpu_task_expected_power(task, perf_arch, nimpl);
  371. if (local_power[worker] == -1.0)
  372. local_power[worker] = 0.;
  373. }
  374. }
  375. if (unknown)
  376. forced_best = ntasks_best;
  377. double best_fitness = -1;
  378. if (forced_best == -1)
  379. {
  380. for (worker = 0; worker < nworkers; worker++)
  381. {
  382. fifo = queue_array[worker];
  383. if (!starpu_worker_may_execute_task(worker, task, 0))
  384. {
  385. /* no one on that queue may execute this task */
  386. continue;
  387. }
  388. fitness[worker] = alpha*(exp_end[worker] - best_exp_end)
  389. + beta*(local_data_penalty[worker])
  390. + _gamma*(local_power[worker]);
  391. if (exp_end[worker] > max_exp_end)
  392. /* This placement will make the computation
  393. * longer, take into account the idle
  394. * consumption of other cpus */
  395. fitness[worker] += _gamma * idle_power * (exp_end[worker] - max_exp_end) / 1000000.0;
  396. if (best == -1 || fitness[worker] < best_fitness)
  397. {
  398. /* we found a better solution */
  399. best_fitness = fitness[worker];
  400. best = worker;
  401. // _STARPU_DEBUG("best fitness (worker %d) %e = alpha*(%e) + beta(%e) +gamma(%e)\n", worker, best_fitness, exp_end[worker] - best_exp_end, local_data_penalty[worker], local_power[worker]);
  402. }
  403. }
  404. }
  405. STARPU_ASSERT(forced_best != -1 || best != -1);
  406. if (forced_best != -1)
  407. {
  408. /* there is no prediction available for that task
  409. * with that arch we want to speed-up calibration time
  410. * so we force this measurement */
  411. best = forced_best;
  412. model_best = 0.0;
  413. //penality_best = 0.0;
  414. }
  415. else
  416. {
  417. model_best = local_task_length[best];
  418. //penality_best = local_data_penalty[best];
  419. }
  420. //_STARPU_DEBUG("Scheduler dmda: kernel (%u)\n", best_impl);
  421. _starpu_get_job_associated_to_task(task)->nimpl = best_impl;
  422. /* we should now have the best worker in variable "best" */
  423. return push_task_on_best_worker(task, best, model_best, prio);
  424. }
  425. static int dmda_push_sorted_task(struct starpu_task *task)
  426. {
  427. return _dmda_push_task(task, 1);
  428. }
  429. static int dm_push_task(struct starpu_task *task)
  430. {
  431. return _dm_push_task(task, 0);
  432. }
  433. static int dmda_push_task(struct starpu_task *task)
  434. {
  435. return _dmda_push_task(task, 0);
  436. }
  437. static void initialize_dmda_policy(struct starpu_machine_topology_s *topology,
  438. __attribute__ ((unused)) struct starpu_sched_policy_s *_policy)
  439. {
  440. nworkers = topology->nworkers;
  441. const char *strval_alpha = getenv("STARPU_SCHED_ALPHA");
  442. if (strval_alpha)
  443. alpha = atof(strval_alpha);
  444. const char *strval_beta = getenv("STARPU_SCHED_BETA");
  445. if (strval_beta)
  446. beta = atof(strval_beta);
  447. const char *strval_gamma = getenv("STARPU_SCHED_GAMMA");
  448. if (strval_gamma)
  449. _gamma = atof(strval_gamma);
  450. const char *strval_idle_power = getenv("STARPU_IDLE_POWER");
  451. if (strval_idle_power)
  452. idle_power = atof(strval_idle_power);
  453. unsigned workerid;
  454. for (workerid = 0; workerid < nworkers; workerid++)
  455. {
  456. queue_array[workerid] = _starpu_create_fifo();
  457. PTHREAD_MUTEX_INIT(&sched_mutex[workerid], NULL);
  458. PTHREAD_COND_INIT(&sched_cond[workerid], NULL);
  459. starpu_worker_set_sched_condition(workerid, &sched_cond[workerid], &sched_mutex[workerid]);
  460. }
  461. }
  462. static void initialize_dmda_sorted_policy(struct starpu_machine_topology_s *topology,
  463. struct starpu_sched_policy_s *_policy)
  464. {
  465. initialize_dmda_policy(topology, _policy);
  466. /* The application may use any integer */
  467. starpu_sched_set_min_priority(INT_MIN);
  468. starpu_sched_set_max_priority(INT_MAX);
  469. }
  470. static void deinitialize_dmda_policy(struct starpu_machine_topology_s *topology,
  471. __attribute__ ((unused)) struct starpu_sched_policy_s *_policy)
  472. {
  473. unsigned workerid;
  474. for (workerid = 0; workerid < topology->nworkers; workerid++)
  475. _starpu_destroy_fifo(queue_array[workerid]);
  476. _STARPU_DEBUG("total_task_cnt %ld ready_task_cnt %ld -> %f\n", total_task_cnt, ready_task_cnt, (100.0f*ready_task_cnt)/total_task_cnt);
  477. }
  478. /* TODO: use post_exec_hook to fix the expected start */
  479. struct starpu_sched_policy_s _starpu_sched_dm_policy = {
  480. .init_sched = initialize_dmda_policy,
  481. .deinit_sched = deinitialize_dmda_policy,
  482. .push_task = dm_push_task,
  483. .pop_task = dmda_pop_task,
  484. .post_exec_hook = NULL,
  485. .pop_every_task = dmda_pop_every_task,
  486. .policy_name = "dm",
  487. .policy_description = "performance model"
  488. };
  489. struct starpu_sched_policy_s _starpu_sched_dmda_policy = {
  490. .init_sched = initialize_dmda_policy,
  491. .deinit_sched = deinitialize_dmda_policy,
  492. .push_task = dmda_push_task,
  493. .pop_task = dmda_pop_task,
  494. .post_exec_hook = NULL,
  495. .pop_every_task = dmda_pop_every_task,
  496. .policy_name = "dmda",
  497. .policy_description = "data-aware performance model"
  498. };
  499. struct starpu_sched_policy_s _starpu_sched_dmda_sorted_policy = {
  500. .init_sched = initialize_dmda_sorted_policy,
  501. .deinit_sched = deinitialize_dmda_policy,
  502. .push_task = dmda_push_sorted_task,
  503. .pop_task = dmda_pop_ready_task,
  504. .post_exec_hook = NULL,
  505. .pop_every_task = dmda_pop_every_task,
  506. .policy_name = "dmdas",
  507. .policy_description = "data-aware performance model (sorted)"
  508. };
  509. struct starpu_sched_policy_s _starpu_sched_dmda_ready_policy = {
  510. .init_sched = initialize_dmda_policy,
  511. .deinit_sched = deinitialize_dmda_policy,
  512. .push_task = dmda_push_task,
  513. .pop_task = dmda_pop_ready_task,
  514. .post_exec_hook = NULL,
  515. .pop_every_task = dmda_pop_every_task,
  516. .policy_name = "dmdar",
  517. .policy_description = "data-aware performance model (ready)"
  518. };