bound.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique
  4. * Copyright (C) 2010, 2011 Université de Bordeaux 1
  5. * Copyright (C) 2011 Télécom-SudParis
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. /*
  19. * Record which kinds of tasks have been executed, to later on compute an upper
  20. * bound of the performance that could have theoretically been achieved
  21. */
  22. #include <starpu.h>
  23. #include <starpu_config.h>
  24. #include <profiling/bound.h>
  25. #include <core/jobs.h>
  26. #ifdef HAVE_GLPK_H
  27. #include <glpk.h>
  28. #endif /* HAVE_GLPK_H */
  29. /* TODO: output duration between starpu_bound_start and starpu_bound_stop */
  30. /*
  31. * Record without dependencies: just count each kind of task
  32. *
  33. * The linear programming problem will just have as variables:
  34. * - the number of tasks of kind `t' executed by worker `w'
  35. * - the total duration
  36. *
  37. * and the constraints will be:
  38. * - the time taken by each worker to complete its assigned tasks is lower than
  39. * the total duration.
  40. * - the total numer of tasks of a given kind is equal to the number run by the
  41. * application.
  42. */
  43. struct bound_task_pool
  44. {
  45. /* Which codelet has been executed */
  46. struct starpu_codelet *cl;
  47. /* Task footprint key */
  48. uint32_t footprint;
  49. /* Number of tasks of this kind */
  50. unsigned long n;
  51. /* Other task kinds */
  52. struct bound_task_pool *next;
  53. };
  54. /*
  55. * Record with dependencies: each task is recorded separately
  56. *
  57. * The linear programming problem will have as variables:
  58. * - The start time of each task
  59. * - The completion time of each tag
  60. * - The total duration
  61. * - For each task and for each worker, whether the task is executing on that worker.
  62. * - For each pair of task, which task is scheduled first.
  63. *
  64. * and the constraints will be:
  65. * - All task start time plus duration are less than total duration
  66. * - Each task is executed on exactly one worker.
  67. * - Each task starts after all its task dependencies finish.
  68. * - Each task starts after all its tag dependencies finish.
  69. * - For each task pair and each worker, if both tasks are executed by that worker,
  70. * one is started after the other's completion.
  71. */
  72. /* Note: only task-task, implicit data dependencies or task-tag dependencies
  73. * are taken into account. Tags released in a callback or something like this
  74. * is not taken into account, only tags associated with a task are. */
  75. struct bound_task
  76. {
  77. /* Unique ID */
  78. unsigned long id;
  79. /* Tag ID, if any */
  80. starpu_tag_t tag_id;
  81. int use_tag;
  82. /* Which codelet has been executed */
  83. struct starpu_codelet *cl;
  84. /* Task footprint key */
  85. uint32_t footprint;
  86. /* Task priority */
  87. int priority;
  88. /* Tasks this one depends on */
  89. struct bound_task **deps;
  90. int depsn;
  91. /* Estimated duration */
  92. double duration[STARPU_NARCH_VARIATIONS];
  93. /* Other tasks */
  94. struct bound_task *next;
  95. };
  96. struct bound_tag_dep
  97. {
  98. starpu_tag_t tag;
  99. starpu_tag_t dep_tag;
  100. struct bound_tag_dep *next;
  101. };
  102. static struct bound_task_pool *task_pools, *last;
  103. static struct bound_task *tasks;
  104. static struct bound_tag_dep *tag_deps;
  105. int _starpu_bound_recording;
  106. static int recorddeps;
  107. static int recordprio;
  108. static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
  109. void starpu_bound_start(int deps, int prio)
  110. {
  111. struct bound_task_pool *tp;
  112. struct bound_task *t;
  113. struct bound_tag_dep *td;
  114. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  115. tp = task_pools;
  116. task_pools = NULL;
  117. last = NULL;
  118. t = tasks;
  119. tasks = NULL;
  120. td = tag_deps;
  121. tag_deps = NULL;
  122. _starpu_bound_recording = 1;
  123. recorddeps = deps;
  124. recordprio = prio;
  125. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  126. for ( ; tp; tp = tp->next)
  127. free(tp);
  128. for ( ; t; t = t->next)
  129. free(t);
  130. for ( ; td; td = td->next)
  131. free(td);
  132. }
  133. static int good_job(struct _starpu_job *j)
  134. {
  135. /* No codelet, nothing to measure */
  136. if (j->exclude_from_dag)
  137. return 0;
  138. if (!j->task->cl)
  139. return 0;
  140. /* No performance model, no time duration estimation */
  141. if (!j->task->cl->model)
  142. return 0;
  143. /* Only support history based */
  144. if (j->task->cl->model->type != STARPU_HISTORY_BASED)
  145. return 0;
  146. return 1;
  147. }
  148. static void new_task(struct _starpu_job *j)
  149. {
  150. struct bound_task *t;
  151. if (j->bound_task)
  152. return;
  153. t = (struct bound_task *) malloc(sizeof(*t));
  154. memset(t, 0, sizeof(*t));
  155. t->id = j->job_id;
  156. t->tag_id = j->task->tag_id;
  157. t->use_tag = j->task->use_tag;
  158. t->cl = j->task->cl;
  159. t->footprint = _starpu_compute_buffers_footprint(NULL, STARPU_CPU_DEFAULT, 0, j);
  160. t->priority = j->task->priority;
  161. t->deps = NULL;
  162. t->depsn = 0;
  163. t->next = tasks;
  164. j->bound_task = t;
  165. tasks = t;
  166. }
  167. void _starpu_bound_record(struct _starpu_job *j)
  168. {
  169. if (!_starpu_bound_recording)
  170. return;
  171. if (!good_job(j))
  172. return;
  173. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  174. /* Re-check, this time with mutex held */
  175. if (!_starpu_bound_recording)
  176. {
  177. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  178. return;
  179. }
  180. if (recorddeps)
  181. {
  182. new_task(j);
  183. }
  184. else
  185. {
  186. struct bound_task_pool *tp;
  187. _starpu_compute_buffers_footprint(NULL, STARPU_CPU_DEFAULT, 0, j);
  188. if (last && last->cl == j->task->cl && last->footprint == j->footprint)
  189. tp = last;
  190. else
  191. for (tp = task_pools; tp; tp = tp->next)
  192. if (tp->cl == j->task->cl && tp->footprint == j->footprint)
  193. break;
  194. if (!tp)
  195. {
  196. tp = (struct bound_task_pool *) malloc(sizeof(*tp));
  197. tp->cl = j->task->cl;
  198. tp->footprint = j->footprint;
  199. tp->n = 0;
  200. tp->next = task_pools;
  201. task_pools = tp;
  202. }
  203. /* One more task of this kind */
  204. tp->n++;
  205. }
  206. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  207. }
  208. void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id)
  209. {
  210. struct bound_tag_dep *td;
  211. if (!_starpu_bound_recording || !recorddeps)
  212. return;
  213. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  214. /* Re-check, this time with mutex held */
  215. if (!_starpu_bound_recording || !recorddeps)
  216. {
  217. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  218. return;
  219. }
  220. td = (struct bound_tag_dep *) malloc(sizeof(*td));
  221. td->tag = id;
  222. td->dep_tag = dep_id;
  223. td->next = tag_deps;
  224. tag_deps = td;
  225. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  226. }
  227. void _starpu_bound_task_dep(struct _starpu_job *j, struct _starpu_job *dep_j)
  228. {
  229. struct bound_task *t;
  230. if (!_starpu_bound_recording || !recorddeps)
  231. return;
  232. if (!good_job(j) || !good_job(dep_j))
  233. return;
  234. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  235. /* Re-check, this time with mutex held */
  236. if (!_starpu_bound_recording || !recorddeps)
  237. {
  238. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  239. return;
  240. }
  241. new_task(j);
  242. new_task(dep_j);
  243. t = j->bound_task;
  244. t->deps = (struct bound_task **) realloc(t->deps, ++t->depsn * sizeof(t->deps[0]));
  245. t->deps[t->depsn-1] = dep_j->bound_task;
  246. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  247. }
  248. static struct bound_task *find_job(unsigned long id)
  249. {
  250. struct bound_task *t;
  251. for (t = tasks; t; t = t->next)
  252. if (t->id == id)
  253. return t;
  254. return NULL;
  255. }
  256. void _starpu_bound_job_id_dep(struct _starpu_job *j, unsigned long id)
  257. {
  258. struct bound_task *t, *dep_t;
  259. if (!_starpu_bound_recording || !recorddeps)
  260. return;
  261. if (!good_job(j))
  262. return;
  263. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  264. /* Re-check, this time with mutex held */
  265. if (!_starpu_bound_recording || !recorddeps)
  266. {
  267. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  268. return;
  269. }
  270. new_task(j);
  271. dep_t = find_job(id);
  272. if (!dep_t)
  273. {
  274. fprintf(stderr,"dependency %lu not found !\n", id);
  275. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  276. return;
  277. }
  278. t = j->bound_task;
  279. t->deps = (struct bound_task **) realloc(t->deps, ++t->depsn * sizeof(t->deps[0]));
  280. t->deps[t->depsn-1] = dep_t;
  281. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  282. }
  283. void starpu_bound_stop(void)
  284. {
  285. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  286. _starpu_bound_recording = 0;
  287. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  288. }
  289. static void _starpu_get_tasks_times(int nw, int nt, double *times)
  290. {
  291. struct bound_task_pool *tp;
  292. int w, t;
  293. for (w = 0; w < nw; w++)
  294. {
  295. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  296. {
  297. struct _starpu_job j =
  298. {
  299. .footprint = tp->footprint,
  300. .footprint_is_computed = 1,
  301. };
  302. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  303. double length = _starpu_history_based_job_expected_perf(tp->cl->model, arch, &j, j.nimpl);
  304. if (isnan(length))
  305. times[w*nt+t] = NAN;
  306. else
  307. times[w*nt+t] = length / 1000.;
  308. }
  309. }
  310. }
  311. static int ancestor(struct bound_task *child, struct bound_task *parent)
  312. {
  313. int i;
  314. for (i = 0; i < child->depsn; i++)
  315. {
  316. if (parent == child->deps[i])
  317. return 1;
  318. if (ancestor(child->deps[i], parent))
  319. return -1;
  320. }
  321. return 0;
  322. }
  323. void starpu_bound_print_dot(FILE *output)
  324. {
  325. struct bound_task *t;
  326. struct bound_tag_dep *td;
  327. int i;
  328. if (!recorddeps)
  329. {
  330. fprintf(output, "Not supported\n");
  331. return;
  332. }
  333. fprintf(output, "strict digraph bounddeps {\n");
  334. for (t = tasks; t; t = t->next)
  335. {
  336. fprintf(output, "\"t%lu\" [label=\"%lu: %s\"]\n", t->id, t->id, t->cl->name);
  337. for (i = 0; i < t->depsn; i++)
  338. fprintf(output, "\"t%lu\" -> \"t%lu\"\n", t->deps[i]->id, t->id);
  339. }
  340. for (td = tag_deps; td; td = td->next)
  341. fprintf(output, "\"tag%lu\" -> \"tag%lu\";\n", (unsigned long) td->dep_tag, (unsigned long) td->tag);
  342. fprintf(output, "}\n");
  343. }
  344. /*
  345. * lp_solve format
  346. *
  347. * When dependencies are enabled, you can check the set of tasks and deps that
  348. * were recorded by using tools/lp2paje and vite.
  349. */
  350. void starpu_bound_print_lp(FILE *output)
  351. {
  352. int nt; /* Number of different kinds of tasks */
  353. int nw; /* Number of different workers */
  354. int t, w;
  355. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  356. nw = starpu_worker_get_count();
  357. if (recorddeps)
  358. {
  359. struct bound_task *t1, *t2;
  360. struct bound_tag_dep *td;
  361. int i;
  362. nt = 0;
  363. for (t1 = tasks; t1; t1 = t1->next)
  364. {
  365. struct _starpu_job j =
  366. {
  367. .footprint = t1->footprint,
  368. .footprint_is_computed = 1,
  369. };
  370. for (w = 0; w < nw; w++)
  371. {
  372. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  373. if (_STARPU_IS_ZERO(t1->duration[arch]))
  374. {
  375. double length = _starpu_history_based_job_expected_perf(t1->cl->model, arch, &j,j.nimpl);
  376. if (isnan(length))
  377. /* Avoid problems with binary coding of doubles */
  378. t1->duration[arch] = NAN;
  379. else
  380. t1->duration[arch] = length / 1000.;
  381. }
  382. }
  383. nt++;
  384. }
  385. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  386. fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n");
  387. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  388. fprintf(output, "min: tmax;\n\n");
  389. fprintf(output, "/* Which is the maximum of all task completion times (ms) */\n");
  390. for (t1 = tasks; t1; t1 = t1->next)
  391. fprintf(output, "c%lu <= tmax;\n", t1->id);
  392. fprintf(output, "\n/* We have tasks executing on workers, exactly one worker executes each task */\n");
  393. for (t1 = tasks; t1; t1 = t1->next)
  394. {
  395. for (w = 0; w < nw; w++)
  396. {
  397. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  398. if (!isnan(t1->duration[arch]))
  399. fprintf(output, " +t%luw%d", t1->id, w);
  400. }
  401. fprintf(output, " = 1;\n");
  402. }
  403. fprintf(output, "\n/* Completion time is start time plus computation time */\n");
  404. fprintf(output, "/* According to where the task is indeed executed */\n");
  405. for (t1 = tasks; t1; t1 = t1->next)
  406. {
  407. fprintf(output, "/* %s %x */\tc%lu = s%lu", t1->cl->name, (unsigned) t1->footprint, t1->id, t1->id);
  408. for (w = 0; w < nw; w++)
  409. {
  410. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  411. if (!isnan(t1->duration[arch]))
  412. fprintf(output, " + %f t%luw%d", t1->duration[arch], t1->id, w);
  413. }
  414. fprintf(output, ";\n");
  415. }
  416. fprintf(output, "\n/* Each task starts after all its task dependencies finish. */\n");
  417. fprintf(output, "/* Note that the dependency finish time depends on the worker where it's working */\n");
  418. for (t1 = tasks; t1; t1 = t1->next)
  419. for (i = 0; i < t1->depsn; i++)
  420. fprintf(output, "s%lu >= c%lu;\n", t1->id, t1->deps[i]->id);
  421. fprintf(output, "\n/* Each tag finishes when its corresponding task finishes */");
  422. for (t1 = tasks; t1; t1 = t1->next)
  423. if (t1->use_tag)
  424. {
  425. for (w = 0; w < nw; w++)
  426. fprintf(output, "c%lu = tag%lu;\n", t1->id, (unsigned long) t1->tag_id);
  427. }
  428. fprintf(output, "\n/* tags start after all their tag dependencies finish. */\n");
  429. for (td = tag_deps; td; td = td->next)
  430. fprintf(output, "tag%lu >= tag%lu;\n", (unsigned long) td->tag, (unsigned long) td->dep_tag);
  431. /* TODO: factorize ancestor calls */
  432. fprintf(output, "\n/* For each task pair and each worker, if both tasks are executed by the same worker,\n");
  433. fprintf(output, " one is started after the other's completion */\n");
  434. for (t1 = tasks; t1; t1 = t1->next)
  435. {
  436. for (t2 = t1->next; t2; t2 = t2->next)
  437. {
  438. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  439. {
  440. for (w = 0; w < nw; w++)
  441. {
  442. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  443. if (!isnan(t1->duration[arch]))
  444. {
  445. fprintf(output, "s%lu - c%lu >= -3e5 + 1e5 t%luw%d + 1e5 t%luw%d + 1e5 t%luafter%lu;\n",
  446. t1->id, t2->id, t1->id, w, t2->id, w, t1->id, t2->id);
  447. fprintf(output, "s%lu - c%lu >= -2e5 + 1e5 t%luw%d + 1e5 t%luw%d - 1e5 t%luafter%lu;\n",
  448. t2->id, t1->id, t1->id, w, t2->id, w, t1->id, t2->id);
  449. }
  450. }
  451. }
  452. }
  453. }
  454. #if 0
  455. /* Doesn't help at all to actually express what "after" means */
  456. for (t1 = tasks; t1; t1 = t1->next)
  457. for (t2 = t1->next; t2; t2 = t2->next)
  458. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  459. {
  460. fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
  461. fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
  462. }
  463. #endif
  464. if (recordprio)
  465. {
  466. fprintf(output, "\n/* For StarPU, a priority means given schedulable tasks it will consider the\n");
  467. fprintf(output, " * more prioritized first */\n");
  468. for (t1 = tasks; t1; t1 = t1->next)
  469. {
  470. for (t2 = t1->next; t2; t2 = t2->next)
  471. {
  472. if (!ancestor(t1, t2) && !ancestor(t2, t1)
  473. && t1->priority != t2->priority)
  474. {
  475. if (t1->priority > t2->priority)
  476. {
  477. /* Either t2 is scheduled before t1, but then it
  478. needs to be scheduled before some t dep finishes */
  479. /* One of the t1 deps to give the maximum start time for t2 */
  480. if (t1->depsn > 1)
  481. {
  482. for (i = 0; i < t1->depsn; i++)
  483. fprintf(output, " + t%lut%lud%d", t2->id, t1->id, i);
  484. fprintf(output, " = 1;\n");
  485. }
  486. for (i = 0; i < t1->depsn; i++)
  487. {
  488. fprintf(output, "c%lu - s%lu >= ", t1->deps[i]->id, t2->id);
  489. if (t1->depsn > 1)
  490. /* Only checks this when it's this dependency that is chosen */
  491. fprintf(output, "-2e5 + 1e5 t%lut%lud%d", t2->id, t1->id, i);
  492. else
  493. fprintf(output, "-1e5");
  494. /* Only check this if t1 is after t2 */
  495. fprintf(output, " + 1e5 t%luafter%lu", t1->id, t2->id);
  496. fprintf(output, ";\n");
  497. }
  498. /* Or t2 is scheduled after t1 is. */
  499. fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
  500. }
  501. else
  502. {
  503. /* Either t1 is scheduled before t2, but then it
  504. needs to be scheduled before some t2 dep finishes */
  505. /* One of the t2 deps to give the maximum start time for t1 */
  506. if (t2->depsn > 1)
  507. {
  508. for (i = 0; i < t2->depsn; i++)
  509. fprintf(output, " + t%lut%lud%d", t1->id, t2->id, i);
  510. fprintf(output, " = 1;\n");
  511. }
  512. for (i = 0; i < t2->depsn; i++)
  513. {
  514. fprintf(output, "c%lu - s%lu >= ", t2->deps[i]->id, t1->id);
  515. if (t2->depsn > 1)
  516. /* Only checks this when it's this dependency that is chosen */
  517. fprintf(output, "-1e5 + 1e5 t%lut%lud%d", t1->id, t2->id, i);
  518. /* Only check this if t2 is after t1 */
  519. fprintf(output, " - 1e5 t%luafter%lu;\n", t1->id, t2->id);
  520. }
  521. /* Or t1 is scheduled after t2 is. */
  522. fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
  523. }
  524. }
  525. }
  526. }
  527. }
  528. for (t1 = tasks; t1; t1 = t1->next)
  529. for (t2 = t1->next; t2; t2 = t2->next)
  530. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  531. {
  532. fprintf(output, "bin t%luafter%lu;\n", t1->id, t2->id);
  533. if (recordprio && t1->priority != t2->priority)
  534. {
  535. if (t1->priority > t2->priority)
  536. {
  537. if (t1->depsn > 1)
  538. for (i = 0; i < t1->depsn; i++)
  539. fprintf(output, "bin t%lut%lud%d;\n", t2->id, t1->id, i);
  540. }
  541. else
  542. {
  543. if (t2->depsn > 1)
  544. for (i = 0; i < t2->depsn; i++)
  545. fprintf(output, "bin t%lut%lud%d;\n", t1->id, t2->id, i);
  546. }
  547. }
  548. }
  549. for (t1 = tasks; t1; t1 = t1->next)
  550. for (w = 0; w < nw; w++)
  551. fprintf(output, "bin t%luw%d;\n", t1->id, w);
  552. }
  553. else
  554. {
  555. struct bound_task_pool *tp;
  556. nt = 0;
  557. for (tp = task_pools; tp; tp = tp->next)
  558. nt++;
  559. {
  560. double times[nw*nt];
  561. _starpu_get_tasks_times(nw, nt, times);
  562. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  563. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  564. fprintf(output, "min: tmax;\n\n");
  565. fprintf(output, "/* Which is the maximum of all worker execution times (ms) */\n");
  566. for (w = 0; w < nw; w++)
  567. {
  568. char name[32];
  569. starpu_worker_get_name(w, name, sizeof(name));
  570. fprintf(output, "/* worker %s */\n0", name);
  571. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  572. {
  573. if (!isnan(times[w*nt+t]))
  574. fprintf(output, "\t%+f * w%dt%dn", (float) times[w*nt+t], w, t);
  575. }
  576. fprintf(output, " <= tmax;\n");
  577. }
  578. fprintf(output, "\n");
  579. fprintf(output, "/* And we have to have computed exactly all tasks */\n");
  580. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  581. {
  582. fprintf(output, "/* task %s key %x */\n0", tp->cl->name, (unsigned) tp->footprint);
  583. for (w = 0; w < nw; w++)
  584. if (!isnan(times[w*nt+t]))
  585. fprintf(output, "\t+w%dt%dn", w, t);
  586. fprintf(output, " = %lu;\n", tp->n);
  587. /* Show actual values */
  588. fprintf(output, "/*");
  589. for (w = 0; w < nw; w++)
  590. fprintf(output, "\t+%lu", tp->cl->per_worker_stats[w]);
  591. fprintf(output, "\t*/\n\n");
  592. }
  593. fprintf(output, "/* Optionally tell that tasks can not be divided */\n");
  594. fprintf(output, "/* int ");
  595. int first = 1;
  596. for (w = 0; w < nw; w++)
  597. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  598. {
  599. if (!first)
  600. fprintf(output, ",");
  601. else
  602. first = 0;
  603. fprintf(output, "w%dt%dn", w, t);
  604. }
  605. fprintf(output, "; */\n");
  606. }
  607. }
  608. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  609. }
  610. /*
  611. * MPS output format
  612. */
  613. void starpu_bound_print_mps(FILE *output)
  614. {
  615. struct bound_task_pool * tp;
  616. int nt; /* Number of different kinds of tasks */
  617. int nw; /* Number of different workers */
  618. int t, w;
  619. if (recorddeps)
  620. {
  621. fprintf(output, "Not supported\n");
  622. return;
  623. }
  624. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  625. nw = starpu_worker_get_count();
  626. nt = 0;
  627. for (tp = task_pools; tp; tp = tp->next)
  628. nt++;
  629. {
  630. double times[nw*nt];
  631. _starpu_get_tasks_times(nw, nt, times);
  632. fprintf(output, "NAME StarPU theoretical bound\n");
  633. fprintf(output, "\nROWS\n");
  634. fprintf(output, "* We want to minimize total execution time (ms)\n");
  635. fprintf(output, " N TMAX\n");
  636. fprintf(output, "\n* Which is the maximum of all worker execution times (ms)\n");
  637. for (w = 0; w < nw; w++)
  638. {
  639. char name[32];
  640. starpu_worker_get_name(w, name, sizeof(name));
  641. fprintf(output, "* worker %s\n", name);
  642. fprintf(output, " L W%d\n", w);
  643. }
  644. fprintf(output, "\n* And we have to have computed exactly all tasks\n");
  645. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  646. {
  647. fprintf(output, "* task %s key %x\n", tp->cl->name, (unsigned) tp->footprint);
  648. fprintf(output, " E T%d\n", t);
  649. }
  650. fprintf(output, "\nCOLUMNS\n");
  651. fprintf(output, "\n* Execution times and completion of all tasks\n");
  652. for (w = 0; w < nw; w++)
  653. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  654. if (!isnan(times[w*nt+t]))
  655. {
  656. char name[9];
  657. snprintf(name, sizeof(name), "W%dT%d", w, t);
  658. fprintf(stderr," %-8s W%-7d %12f\n", name, w, times[w*nt+t]);
  659. fprintf(stderr," %-8s T%-7d %12d\n", name, t, 1);
  660. }
  661. fprintf(output, "\n* Total execution time\n");
  662. for (w = 0; w < nw; w++)
  663. fprintf(stderr," TMAX W%-2d %12d\n", w, -1);
  664. fprintf(stderr," TMAX TMAX %12d\n", 1);
  665. fprintf(output, "\nRHS\n");
  666. fprintf(output, "\n* Total number of tasks\n");
  667. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  668. fprintf(stderr," NT%-2d T%-7d %12lu\n", t, t, tp->n);
  669. fprintf(output, "ENDATA\n");
  670. }
  671. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  672. }
  673. /*
  674. * GNU Linear Programming Kit backend
  675. */
  676. #ifdef HAVE_GLPK_H
  677. static glp_prob *_starpu_bound_glp_resolve(int integer)
  678. {
  679. struct bound_task_pool * tp;
  680. int nt; /* Number of different kinds of tasks */
  681. int nw; /* Number of different workers */
  682. int t, w;
  683. glp_prob *lp;
  684. int ret;
  685. nw = starpu_worker_get_count();
  686. nt = 0;
  687. for (tp = task_pools; tp; tp = tp->next)
  688. nt++;
  689. lp = glp_create_prob();
  690. glp_set_prob_name(lp, "StarPU theoretical bound");
  691. glp_set_obj_dir(lp, GLP_MIN);
  692. glp_set_obj_name(lp, "total execution time");
  693. {
  694. double times[nw*nt];
  695. int ne =
  696. nw * (nt+1) /* worker execution time */
  697. + nt * nw
  698. + 1; /* glp dumbness */
  699. int n = 1;
  700. int ia[ne], ja[ne];
  701. double ar[ne];
  702. _starpu_get_tasks_times(nw, nt, times);
  703. /* Variables: number of tasks i assigned to worker j, and tmax */
  704. glp_add_cols(lp, nw*nt+1);
  705. #define colnum(w, t) ((t)*nw+(w)+1)
  706. glp_set_obj_coef(lp, nw*nt+1, 1.);
  707. for (w = 0; w < nw; w++)
  708. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  709. {
  710. char name[32];
  711. snprintf(name, sizeof(name), "w%dt%dn", w, t);
  712. glp_set_col_name(lp, colnum(w, t), name);
  713. if (integer)
  714. glp_set_col_kind(lp, colnum(w, t), GLP_IV);
  715. glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.);
  716. }
  717. glp_set_col_bnds(lp, nw*nt+1, GLP_LO, 0., 0.);
  718. /* Total worker execution time */
  719. glp_add_rows(lp, nw);
  720. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  721. {
  722. int someone = 0;
  723. for (w = 0; w < nw; w++)
  724. if (!isnan(times[w*nt+t]))
  725. someone = 1;
  726. if (!someone)
  727. {
  728. /* This task does not have any performance model at all, abort */
  729. glp_delete_prob(lp);
  730. return NULL;
  731. }
  732. }
  733. for (w = 0; w < nw; w++)
  734. {
  735. char name[32], title[64];
  736. starpu_worker_get_name(w, name, sizeof(name));
  737. snprintf(title, sizeof(title), "worker %s", name);
  738. glp_set_row_name(lp, w+1, title);
  739. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  740. {
  741. ia[n] = w+1;
  742. ja[n] = colnum(w, t);
  743. if (isnan(times[w*nt+t]))
  744. ar[n] = 1000000000.;
  745. else
  746. ar[n] = times[w*nt+t];
  747. n++;
  748. }
  749. /* tmax */
  750. ia[n] = w+1;
  751. ja[n] = nw*nt+1;
  752. ar[n] = -1;
  753. n++;
  754. glp_set_row_bnds(lp, w+1, GLP_UP, 0, 0);
  755. }
  756. /* Total task completion */
  757. glp_add_rows(lp, nt);
  758. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  759. {
  760. char name[32], title[64];
  761. starpu_worker_get_name(w, name, sizeof(name));
  762. snprintf(title, sizeof(title), "task %s key %x", tp->cl->name, (unsigned) tp->footprint);
  763. glp_set_row_name(lp, nw+t+1, title);
  764. for (w = 0; w < nw; w++)
  765. {
  766. ia[n] = nw+t+1;
  767. ja[n] = colnum(w, t);
  768. ar[n] = 1;
  769. n++;
  770. }
  771. glp_set_row_bnds(lp, nw+t+1, GLP_FX, tp->n, tp->n);
  772. }
  773. STARPU_ASSERT(n == ne);
  774. glp_load_matrix(lp, ne-1, ia, ja, ar);
  775. }
  776. glp_smcp parm;
  777. glp_init_smcp(&parm);
  778. parm.msg_lev = GLP_MSG_OFF;
  779. ret = glp_simplex(lp, &parm);
  780. if (ret)
  781. {
  782. glp_delete_prob(lp);
  783. lp = NULL;
  784. return NULL;
  785. }
  786. if (integer)
  787. {
  788. glp_iocp iocp;
  789. glp_init_iocp(&iocp);
  790. iocp.msg_lev = GLP_MSG_OFF;
  791. glp_intopt(lp, &iocp);
  792. }
  793. return lp;
  794. }
  795. #endif /* HAVE_GLPK_H */
  796. void starpu_bound_print(FILE *output, int integer __attribute__ ((unused)))
  797. {
  798. #ifdef HAVE_GLPK_H
  799. if (recorddeps)
  800. {
  801. fprintf(output, "Not supported\n");
  802. return;
  803. }
  804. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  805. glp_prob *lp = _starpu_bound_glp_resolve(integer);
  806. if (lp)
  807. {
  808. struct bound_task_pool * tp;
  809. int t, w;
  810. int nw; /* Number of different workers */
  811. double tmax;
  812. nw = starpu_worker_get_count();
  813. if (integer)
  814. tmax = glp_mip_obj_val(lp);
  815. else
  816. tmax = glp_get_obj_val(lp);
  817. fprintf(output, "Theoretical minimum execution time: %f ms\n", tmax);
  818. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  819. {
  820. fprintf(output, "%s key %x\n", tp->cl->name, (unsigned) tp->footprint);
  821. for (w = 0; w < nw; w++)
  822. if (integer)
  823. fprintf(output, "\tw%dt%dn %f", w, t, glp_mip_col_val(lp, colnum(w, t)));
  824. else
  825. fprintf(output, "\tw%dt%dn %f", w, t, glp_get_col_prim(lp, colnum(w, t)));
  826. fprintf(output, "\n");
  827. }
  828. glp_delete_prob(lp);
  829. }
  830. else
  831. {
  832. fprintf(stderr, "Simplex failed\n");
  833. }
  834. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  835. #else /* HAVE_GLPK_H */
  836. fprintf(output, "Please rebuild StarPU with glpk installed.\n");
  837. #endif /* HAVE_GLPK_H */
  838. }
  839. void starpu_bound_compute(double *res, double *integer_res __attribute__ ((unused)), int integer __attribute__ ((unused)))
  840. {
  841. #ifdef HAVE_GLPK_H
  842. double ret;
  843. if (recorddeps)
  844. {
  845. *res = 0.;
  846. return;
  847. }
  848. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  849. glp_prob *lp = _starpu_bound_glp_resolve(integer);
  850. if (lp)
  851. {
  852. ret = glp_get_obj_val(lp);
  853. if (integer)
  854. *integer_res = glp_mip_obj_val(lp);
  855. glp_delete_prob(lp);
  856. }
  857. else
  858. ret = 0.;
  859. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  860. *res = ret;
  861. #else /* HAVE_GLPK_H */
  862. *res = 0.;
  863. #endif /* HAVE_GLPK_H */
  864. }