bound.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
  4. * Copyright (C) 2010, 2011 Université de Bordeaux 1
  5. * Copyright (C) 2011 Télécom-SudParis
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. /*
  19. * Record which kinds of tasks have been executed, to later on compute an upper
  20. * bound of the performance that could have theoretically been achieved
  21. */
  22. #include <starpu.h>
  23. #include <starpu_config.h>
  24. #include <profiling/bound.h>
  25. #include <core/jobs.h>
  26. #ifdef HAVE_GLPK_H
  27. #include <glpk.h>
  28. #endif /* HAVE_GLPK_H */
  29. /* TODO: output duration between starpu_bound_start and starpu_bound_stop */
  30. /*
  31. * Record without dependencies: just count each kind of task
  32. *
  33. * The linear programming problem will just have as variables:
  34. * - the number of tasks of kind `t' executed by worker `w'
  35. * - the total duration
  36. *
  37. * and the constraints will be:
  38. * - the time taken by each worker to complete its assigned tasks is lower than
  39. * the total duration.
  40. * - the total numer of tasks of a given kind is equal to the number run by the
  41. * application.
  42. */
  43. struct bound_task_pool {
  44. /* Which codelet has been executed */
  45. struct starpu_codelet_t *cl;
  46. /* Task footprint key */
  47. uint32_t footprint;
  48. /* Number of tasks of this kind */
  49. unsigned long n;
  50. /* Other task kinds */
  51. struct bound_task_pool *next;
  52. };
  53. /*
  54. * Record with dependencies: each task is recorded separately
  55. *
  56. * The linear programming problem will have as variables:
  57. * - The start time of each task
  58. * - The completion time of each tag
  59. * - The total duration
  60. * - For each task and for each worker, whether the task is executing on that worker.
  61. * - For each pair of task, which task is scheduled first.
  62. *
  63. * and the constraints will be:
  64. * - All task start time plus duration are less than total duration
  65. * - Each task is executed on exactly one worker.
  66. * - Each task starts after all its task dependencies finish.
  67. * - Each task starts after all its tag dependencies finish.
  68. * - For each task pair and each worker, if both tasks are executed by that worker,
  69. * one is started after the other's completion.
  70. */
  71. /* Note: only task-task, implicit data dependencies or task-tag dependencies
  72. * are taken into account. Tags released in a callback or something like this
  73. * is not taken into account, only tags associated with a task are. */
  74. struct bound_task {
  75. /* Unique ID */
  76. unsigned long id;
  77. /* Tag ID, if any */
  78. starpu_tag_t tag_id;
  79. int use_tag;
  80. /* Which codelet has been executed */
  81. struct starpu_codelet_t *cl;
  82. /* Task footprint key */
  83. uint32_t footprint;
  84. /* Task priority */
  85. int priority;
  86. /* Tasks this one depends on */
  87. struct bound_task **deps;
  88. int depsn;
  89. /* Estimated duration */
  90. double duration[STARPU_NARCH_VARIATIONS];
  91. /* Other tasks */
  92. struct bound_task *next;
  93. };
  94. struct bound_tag_dep {
  95. starpu_tag_t tag;
  96. starpu_tag_t dep_tag;
  97. struct bound_tag_dep *next;
  98. };
  99. static struct bound_task_pool *task_pools, *last;
  100. static struct bound_task *tasks;
  101. static struct bound_tag_dep *tag_deps;
  102. int _starpu_bound_recording;
  103. static int recorddeps;
  104. static int recordprio;
  105. static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
  106. void starpu_bound_start(int deps, int prio)
  107. {
  108. struct bound_task_pool *tp;
  109. struct bound_task *t;
  110. struct bound_tag_dep *td;
  111. PTHREAD_MUTEX_LOCK(&mutex);
  112. tp = task_pools;
  113. task_pools = NULL;
  114. last = NULL;
  115. t = tasks;
  116. tasks = NULL;
  117. td = tag_deps;
  118. tag_deps = NULL;
  119. _starpu_bound_recording = 1;
  120. recorddeps = deps;
  121. recordprio = prio;
  122. PTHREAD_MUTEX_UNLOCK(&mutex);
  123. for ( ; tp; tp = tp->next)
  124. free(tp);
  125. for ( ; t; t = t->next)
  126. free(t);
  127. for ( ; td; td = td->next)
  128. free(td);
  129. }
  130. static int good_job(starpu_job_t j)
  131. {
  132. /* No codelet, nothing to measure */
  133. if (j->exclude_from_dag)
  134. return 0;
  135. if (!j->task->cl)
  136. return 0;
  137. /* No performance model, no time duration estimation */
  138. if (!j->task->cl->model)
  139. return 0;
  140. /* Only support history based */
  141. if (j->task->cl->model->type != STARPU_HISTORY_BASED)
  142. return 0;
  143. return 1;
  144. }
  145. static void new_task(starpu_job_t j)
  146. {
  147. struct bound_task *t;
  148. if (j->bound_task)
  149. return;
  150. t = (struct bound_task *) malloc(sizeof(*t));
  151. memset(t, 0, sizeof(*t));
  152. t->id = j->job_id;
  153. t->tag_id = j->task->tag_id;
  154. t->use_tag = j->task->use_tag;
  155. t->cl = j->task->cl;
  156. t->footprint = _starpu_compute_buffers_footprint(j);
  157. t->priority = j->task->priority;
  158. t->deps = NULL;
  159. t->depsn = 0;
  160. t->next = tasks;
  161. j->bound_task = t;
  162. tasks = t;
  163. }
  164. void _starpu_bound_record(starpu_job_t j)
  165. {
  166. if (!_starpu_bound_recording)
  167. return;
  168. if (!good_job(j))
  169. return;
  170. PTHREAD_MUTEX_LOCK(&mutex);
  171. /* Re-check, this time with mutex held */
  172. if (!_starpu_bound_recording) {
  173. PTHREAD_MUTEX_UNLOCK(&mutex);
  174. return;
  175. }
  176. if (recorddeps) {
  177. new_task(j);
  178. } else {
  179. struct bound_task_pool *tp;
  180. _starpu_compute_buffers_footprint(j);
  181. if (last && last->cl == j->task->cl && last->footprint == j->footprint)
  182. tp = last;
  183. else
  184. for (tp = task_pools; tp; tp = tp->next)
  185. if (tp->cl == j->task->cl && tp->footprint == j->footprint)
  186. break;
  187. if (!tp) {
  188. tp = (struct bound_task_pool *) malloc(sizeof(*tp));
  189. tp->cl = j->task->cl;
  190. tp->footprint = j->footprint;
  191. tp->n = 0;
  192. tp->next = task_pools;
  193. task_pools = tp;
  194. }
  195. /* One more task of this kind */
  196. tp->n++;
  197. }
  198. PTHREAD_MUTEX_UNLOCK(&mutex);
  199. }
  200. void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id)
  201. {
  202. struct bound_tag_dep *td;
  203. if (!_starpu_bound_recording || !recorddeps)
  204. return;
  205. PTHREAD_MUTEX_LOCK(&mutex);
  206. /* Re-check, this time with mutex held */
  207. if (!_starpu_bound_recording || !recorddeps) {
  208. PTHREAD_MUTEX_UNLOCK(&mutex);
  209. return;
  210. }
  211. td = (struct bound_tag_dep *) malloc(sizeof(*td));
  212. td->tag = id;
  213. td->dep_tag = dep_id;
  214. td->next = tag_deps;
  215. tag_deps = td;
  216. PTHREAD_MUTEX_UNLOCK(&mutex);
  217. }
  218. void _starpu_bound_task_dep(starpu_job_t j, starpu_job_t dep_j)
  219. {
  220. struct bound_task *t;
  221. if (!_starpu_bound_recording || !recorddeps)
  222. return;
  223. if (!good_job(j) || !good_job(dep_j))
  224. return;
  225. PTHREAD_MUTEX_LOCK(&mutex);
  226. /* Re-check, this time with mutex held */
  227. if (!_starpu_bound_recording || !recorddeps) {
  228. PTHREAD_MUTEX_UNLOCK(&mutex);
  229. return;
  230. }
  231. new_task(j);
  232. new_task(dep_j);
  233. t = j->bound_task;
  234. t->deps = (struct bound_task **) realloc(t->deps, ++t->depsn * sizeof(t->deps[0]));
  235. t->deps[t->depsn-1] = dep_j->bound_task;
  236. PTHREAD_MUTEX_UNLOCK(&mutex);
  237. }
  238. static struct bound_task *find_job(unsigned long id)
  239. {
  240. struct bound_task *t;
  241. for (t = tasks; t; t = t->next)
  242. if (t->id == id)
  243. return t;
  244. return NULL;
  245. }
  246. void _starpu_bound_job_id_dep(starpu_job_t j, unsigned long id)
  247. {
  248. struct bound_task *t, *dep_t;
  249. if (!_starpu_bound_recording || !recorddeps)
  250. return;
  251. if (!good_job(j))
  252. return;
  253. PTHREAD_MUTEX_LOCK(&mutex);
  254. /* Re-check, this time with mutex held */
  255. if (!_starpu_bound_recording || !recorddeps) {
  256. PTHREAD_MUTEX_UNLOCK(&mutex);
  257. return;
  258. }
  259. new_task(j);
  260. dep_t = find_job(id);
  261. if (!dep_t) {
  262. fprintf(stderr,"dependency %lu not found !\n", id);
  263. PTHREAD_MUTEX_UNLOCK(&mutex);
  264. return;
  265. }
  266. t = j->bound_task;
  267. t->deps = (struct bound_task **) realloc(t->deps, ++t->depsn * sizeof(t->deps[0]));
  268. t->deps[t->depsn-1] = dep_t;
  269. PTHREAD_MUTEX_UNLOCK(&mutex);
  270. }
  271. void starpu_bound_stop(void)
  272. {
  273. PTHREAD_MUTEX_LOCK(&mutex);
  274. _starpu_bound_recording = 0;
  275. PTHREAD_MUTEX_UNLOCK(&mutex);
  276. }
  277. static void _starpu_get_tasks_times(int nw, int nt, double *times) {
  278. struct bound_task_pool *tp;
  279. int w, t;
  280. for (w = 0; w < nw; w++) {
  281. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  282. struct starpu_job_s j = {
  283. .footprint = tp->footprint,
  284. .footprint_is_computed = 1,
  285. };
  286. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  287. double length = _starpu_history_based_job_expected_perf(tp->cl->model, arch, &j, j.nimpl);
  288. if (length == -1.0)
  289. times[w*nt+t] = -1.0;
  290. else
  291. times[w*nt+t] = length / 1000.;
  292. }
  293. }
  294. }
  295. static int ancestor(struct bound_task *child, struct bound_task *parent) {
  296. int i;
  297. for (i = 0; i < child->depsn; i++) {
  298. if (parent == child->deps[i])
  299. return 1;
  300. if (ancestor(child->deps[i], parent))
  301. return -1;
  302. }
  303. return 0;
  304. }
  305. void starpu_bound_print_dot(FILE *output) {
  306. struct bound_task *t;
  307. struct bound_tag_dep *td;
  308. int i;
  309. if (!recorddeps) {
  310. fprintf(output, "Not supported\n");
  311. return;
  312. }
  313. fprintf(output, "strict digraph bounddeps {\n");
  314. for (t = tasks; t; t = t->next) {
  315. fprintf(output, "\"t%lu\" [label=\"%lu: %s\"]\n", t->id, t->id, t->cl->model->symbol);
  316. for (i = 0; i < t->depsn; i++)
  317. fprintf(output, "\"t%lu\" -> \"t%lu\"\n", t->deps[i]->id, t->id);
  318. }
  319. for (td = tag_deps; td; td = td->next)
  320. fprintf(output, "\"tag%lu\" -> \"tag%lu\";\n", (unsigned long) td->dep_tag, (unsigned long) td->tag);
  321. fprintf(output, "}\n");
  322. }
  323. /*
  324. * lp_solve format
  325. */
  326. void starpu_bound_print_lp(FILE *output)
  327. {
  328. int nt; /* Number of different kinds of tasks */
  329. int nw; /* Number of different workers */
  330. int t, w;
  331. PTHREAD_MUTEX_LOCK(&mutex);
  332. nw = starpu_worker_get_count();
  333. if (recorddeps) {
  334. struct bound_task *t1, *t2;
  335. struct bound_tag_dep *td;
  336. int i;
  337. nt = 0;
  338. for (t1 = tasks; t1; t1 = t1->next) {
  339. struct starpu_job_s j = {
  340. .footprint = t1->footprint,
  341. .footprint_is_computed = 1,
  342. };
  343. for (w = 0; w < nw; w++) {
  344. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  345. if (t1->duration[arch] == 0.) {
  346. double length = _starpu_history_based_job_expected_perf(t1->cl->model, arch, &j,j.nimpl);
  347. if (length == -1.0)
  348. /* Avoid problems with binary coding of doubles */
  349. t1->duration[arch] = -1.0;
  350. else
  351. t1->duration[arch] = length / 1000.;
  352. }
  353. }
  354. nt++;
  355. }
  356. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  357. fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n");
  358. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  359. fprintf(output, "min: tmax;\n\n");
  360. fprintf(output, "/* Which is the maximum of all task completion times (ms) */\n");
  361. for (t1 = tasks; t1; t1 = t1->next)
  362. fprintf(output, "c%lu <= tmax;\n", t1->id);
  363. fprintf(output, "\n/* We have tasks executing on workers, exactly one worker executes each task */\n");
  364. for (t1 = tasks; t1; t1 = t1->next) {
  365. for (w = 0; w < nw; w++) {
  366. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  367. if (t1->duration[arch] != -1.0)
  368. fprintf(output, " +t%luw%d", t1->id, w);
  369. }
  370. fprintf(output, " = 1;\n");
  371. }
  372. fprintf(output, "\n/* Completion time is start time plus computation time */\n");
  373. fprintf(output, "/* According to where the task is indeed executed */\n");
  374. for (t1 = tasks; t1; t1 = t1->next) {
  375. fprintf(output, "/* %s %x */\tc%lu = s%lu", t1->cl->model->symbol, (unsigned) t1->footprint, t1->id, t1->id);
  376. for (w = 0; w < nw; w++) {
  377. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  378. if (t1->duration[arch] != -1.0)
  379. fprintf(output, " + %f t%luw%d", t1->duration[arch], t1->id, w);
  380. }
  381. fprintf(output, ";\n");
  382. }
  383. fprintf(output, "\n/* Each task starts after all its task dependencies finish. */\n");
  384. fprintf(output, "/* Note that the dependency finish time depends on the worker where it's working */\n");
  385. for (t1 = tasks; t1; t1 = t1->next)
  386. for (i = 0; i < t1->depsn; i++)
  387. fprintf(output, "s%lu >= c%lu;\n", t1->id, t1->deps[i]->id);
  388. fprintf(output, "\n/* Each tag finishes when its corresponding task finishes */");
  389. for (t1 = tasks; t1; t1 = t1->next)
  390. if (t1->use_tag) {
  391. for (w = 0; w < nw; w++)
  392. fprintf(output, "c%lu = tag%lu;\n", t1->id, (unsigned long) t1->tag_id);
  393. }
  394. fprintf(output, "\n/* tags start after all their tag dependencies finish. */\n");
  395. for (td = tag_deps; td; td = td->next)
  396. fprintf(output, "tag%lu >= tag%lu;\n", (unsigned long) td->tag, (unsigned long) td->dep_tag);
  397. /* TODO: factorize ancestor calls */
  398. fprintf(output, "\n/* For each task pair and each worker, if both tasks are executed by the same worker,\n");
  399. fprintf(output, " one is started after the other's completion */\n");
  400. for (t1 = tasks; t1; t1 = t1->next) {
  401. for (t2 = t1->next; t2; t2 = t2->next)
  402. {
  403. if (!ancestor(t1, t2) && !ancestor(t2, t1)) {
  404. for (w = 0; w < nw; w++) {
  405. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  406. if (t1->duration[arch] != -1.0) {
  407. fprintf(output, "s%lu - c%lu >= -3e5 + 1e5 t%luw%d + 1e5 t%luw%d + 1e5 t%luafter%lu;\n",
  408. t1->id, t2->id, t1->id, w, t2->id, w, t1->id, t2->id);
  409. fprintf(output, "s%lu - c%lu >= -2e5 + 1e5 t%luw%d + 1e5 t%luw%d - 1e5 t%luafter%lu;\n",
  410. t2->id, t1->id, t1->id, w, t2->id, w, t1->id, t2->id);
  411. }
  412. }
  413. }
  414. }
  415. }
  416. #if 0
  417. /* Doesn't help at all to actually express what "after" means */
  418. for (t1 = tasks; t1; t1 = t1->next)
  419. for (t2 = t1->next; t2; t2 = t2->next)
  420. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  421. {
  422. fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
  423. fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
  424. }
  425. #endif
  426. if (recordprio) {
  427. fprintf(output, "\n/* For StarPU, a priority means given schedulable tasks it will consider the\n");
  428. fprintf(output, " * more prioritized first */\n");
  429. for (t1 = tasks; t1; t1 = t1->next) {
  430. for (t2 = t1->next; t2; t2 = t2->next)
  431. {
  432. if (!ancestor(t1, t2) && !ancestor(t2, t1)
  433. && t1->priority != t2->priority) {
  434. if (t1->priority > t2->priority) {
  435. /* Either t2 is scheduled before t1, but then it
  436. needs to be scheduled before some t dep finishes */
  437. /* One of the t1 deps to give the maximum start time for t2 */
  438. if (t1->depsn > 1) {
  439. for (i = 0; i < t1->depsn; i++)
  440. fprintf(output, " + t%lut%lud%d", t2->id, t1->id, i);
  441. fprintf(output, " = 1;\n");
  442. }
  443. for (i = 0; i < t1->depsn; i++) {
  444. fprintf(output, "c%lu - s%lu >= ", t1->deps[i]->id, t2->id);
  445. if (t1->depsn > 1)
  446. /* Only checks this when it's this dependency that is chosen */
  447. fprintf(output, "-2e5 + 1e5 t%lut%lud%d", t2->id, t1->id, i);
  448. else
  449. fprintf(output, "-1e5");
  450. /* Only check this if t1 is after t2 */
  451. fprintf(output, " + 1e5 t%luafter%lu", t1->id, t2->id);
  452. fprintf(output, ";\n");
  453. }
  454. /* Or t2 is scheduled after t1 is. */
  455. fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
  456. } else {
  457. /* Either t1 is scheduled before t2, but then it
  458. needs to be scheduled before some t2 dep finishes */
  459. /* One of the t2 deps to give the maximum start time for t1 */
  460. if (t2->depsn > 1) {
  461. for (i = 0; i < t2->depsn; i++)
  462. fprintf(output, " + t%lut%lud%d", t1->id, t2->id, i);
  463. fprintf(output, " = 1;\n");
  464. }
  465. for (i = 0; i < t2->depsn; i++) {
  466. fprintf(output, "c%lu - s%lu >= ", t2->deps[i]->id, t1->id);
  467. if (t2->depsn > 1)
  468. /* Only checks this when it's this dependency that is chosen */
  469. fprintf(output, "-1e5 + 1e5 t%lut%lud%d", t1->id, t2->id, i);
  470. /* Only check this if t2 is after t1 */
  471. fprintf(output, " - 1e5 t%luafter%lu;\n", t1->id, t2->id);
  472. }
  473. /* Or t1 is scheduled after t2 is. */
  474. fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
  475. }
  476. }
  477. }
  478. }
  479. }
  480. for (t1 = tasks; t1; t1 = t1->next)
  481. for (t2 = t1->next; t2; t2 = t2->next)
  482. if (!ancestor(t1, t2) && !ancestor(t2, t1)) {
  483. fprintf(output, "bin t%luafter%lu;\n", t1->id, t2->id);
  484. if (recordprio && t1->priority != t2->priority) {
  485. if (t1->priority > t2->priority) {
  486. if (t1->depsn > 1)
  487. for (i = 0; i < t1->depsn; i++)
  488. fprintf(output, "bin t%lut%lud%d;\n", t2->id, t1->id, i);
  489. } else {
  490. if (t2->depsn > 1)
  491. for (i = 0; i < t2->depsn; i++)
  492. fprintf(output, "bin t%lut%lud%d;\n", t1->id, t2->id, i);
  493. }
  494. }
  495. }
  496. for (t1 = tasks; t1; t1 = t1->next)
  497. for (w = 0; w < nw; w++)
  498. fprintf(output, "bin t%luw%d;\n", t1->id, w);
  499. } else {
  500. struct bound_task_pool *tp;
  501. nt = 0;
  502. for (tp = task_pools; tp; tp = tp->next)
  503. nt++;
  504. {
  505. double times[nw*nt];
  506. _starpu_get_tasks_times(nw, nt, times);
  507. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  508. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  509. fprintf(output, "min: tmax;\n\n");
  510. fprintf(output, "/* Which is the maximum of all worker execution times (ms) */\n");
  511. for (w = 0; w < nw; w++) {
  512. char name[32];
  513. starpu_worker_get_name(w, name, sizeof(name));
  514. fprintf(output, "/* worker %s */\n", name);
  515. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  516. if (times[w*nt+t] != -1.0)
  517. fprintf(output, "\t%+f * w%dt%dn", (float) times[w*nt+t], w, t);
  518. }
  519. fprintf(output, " <= tmax;\n");
  520. }
  521. fprintf(output, "\n");
  522. fprintf(output, "/* And we have to have computed exactly all tasks */\n");
  523. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  524. fprintf(output, "/* task %s key %x */\n", tp->cl->model->symbol, (unsigned) tp->footprint);
  525. for (w = 0; w < nw; w++)
  526. if (times[w*nt+t] != -1.0)
  527. fprintf(output, "\t+w%dt%dn", w, t);
  528. fprintf(output, " = %lu;\n", tp->n);
  529. /* Show actual values */
  530. fprintf(output, "/*");
  531. for (w = 0; w < nw; w++)
  532. fprintf(output, "\t+%lu", tp->cl->per_worker_stats[w]);
  533. fprintf(output, "\t*/\n\n");
  534. }
  535. fprintf(output, "/* Optionally tell that tasks can not be divided */\n");
  536. fprintf(output, "/* int ");
  537. int first = 1;
  538. for (w = 0; w < nw; w++)
  539. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  540. if (!first)
  541. fprintf(output, ",");
  542. else
  543. first = 0;
  544. fprintf(output, "w%dt%dn", w, t);
  545. }
  546. fprintf(output, "; */\n");
  547. }
  548. }
  549. PTHREAD_MUTEX_UNLOCK(&mutex);
  550. }
  551. /*
  552. * MPS output format
  553. */
  554. void starpu_bound_print_mps(FILE *output)
  555. {
  556. struct bound_task_pool * tp;
  557. int nt; /* Number of different kinds of tasks */
  558. int nw; /* Number of different workers */
  559. int t, w;
  560. if (recorddeps) {
  561. fprintf(output, "Not supported\n");
  562. return;
  563. }
  564. PTHREAD_MUTEX_LOCK(&mutex);
  565. nw = starpu_worker_get_count();
  566. nt = 0;
  567. for (tp = task_pools; tp; tp = tp->next)
  568. nt++;
  569. {
  570. double times[nw*nt];
  571. _starpu_get_tasks_times(nw, nt, times);
  572. fprintf(output, "NAME StarPU theoretical bound\n");
  573. fprintf(output, "\nROWS\n");
  574. fprintf(output, "* We want to minimize total execution time (ms)\n");
  575. fprintf(output, " N TMAX\n");
  576. fprintf(output, "\n* Which is the maximum of all worker execution times (ms)\n");
  577. for (w = 0; w < nw; w++) {
  578. char name[32];
  579. starpu_worker_get_name(w, name, sizeof(name));
  580. fprintf(output, "* worker %s\n", name);
  581. fprintf(output, " L W%d\n", w);
  582. }
  583. fprintf(output, "\n* And we have to have computed exactly all tasks\n");
  584. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  585. fprintf(output, "* task %s key %x\n", tp->cl->model->symbol, (unsigned) tp->footprint);
  586. fprintf(output, " E T%d\n", t);
  587. }
  588. fprintf(output, "\nCOLUMNS\n");
  589. fprintf(output, "\n* Execution times and completion of all tasks\n");
  590. for (w = 0; w < nw; w++)
  591. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  592. if (times[w*nt+t] != -1.0) {
  593. char name[9];
  594. snprintf(name, sizeof(name), "W%dT%d", w, t);
  595. fprintf(stderr," %-8s W%-7d %12f\n", name, w, times[w*nt+t]);
  596. fprintf(stderr," %-8s T%-7d %12d\n", name, t, 1);
  597. }
  598. fprintf(output, "\n* Total execution time\n");
  599. for (w = 0; w < nw; w++)
  600. fprintf(stderr," TMAX W%-2d %12d\n", w, -1);
  601. fprintf(stderr," TMAX TMAX %12d\n", 1);
  602. fprintf(output, "\nRHS\n");
  603. fprintf(output, "\n* Total number of tasks\n");
  604. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  605. fprintf(stderr," NT%-2d T%-7d %12lu\n", t, t, tp->n);
  606. fprintf(output, "ENDATA\n");
  607. }
  608. PTHREAD_MUTEX_UNLOCK(&mutex);
  609. }
  610. /*
  611. * GNU Linear Programming Kit backend
  612. */
  613. #ifdef HAVE_GLPK_H
  614. static glp_prob *_starpu_bound_glp_resolve(int integer)
  615. {
  616. struct bound_task_pool * tp;
  617. int nt; /* Number of different kinds of tasks */
  618. int nw; /* Number of different workers */
  619. int t, w;
  620. glp_prob *lp;
  621. int ret;
  622. nw = starpu_worker_get_count();
  623. nt = 0;
  624. for (tp = task_pools; tp; tp = tp->next)
  625. nt++;
  626. lp = glp_create_prob();
  627. glp_set_prob_name(lp, "StarPU theoretical bound");
  628. glp_set_obj_dir(lp, GLP_MIN);
  629. glp_set_obj_name(lp, "total execution time");
  630. {
  631. double times[nw*nt];
  632. int ne =
  633. nw * (nt+1) /* worker execution time */
  634. + nt * nw
  635. + 1; /* glp dumbness */
  636. int n = 1;
  637. int ia[ne], ja[ne];
  638. double ar[ne];
  639. _starpu_get_tasks_times(nw, nt, times);
  640. /* Variables: number of tasks i assigned to worker j, and tmax */
  641. glp_add_cols(lp, nw*nt+1);
  642. #define colnum(w, t) ((t)*nw+(w)+1)
  643. glp_set_obj_coef(lp, nw*nt+1, 1.);
  644. for (w = 0; w < nw; w++)
  645. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  646. char name[32];
  647. snprintf(name, sizeof(name), "w%dt%dn", w, t);
  648. glp_set_col_name(lp, colnum(w, t), name);
  649. if (integer)
  650. glp_set_col_kind(lp, colnum(w, t), GLP_IV);
  651. glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.);
  652. }
  653. glp_set_col_bnds(lp, nw*nt+1, GLP_LO, 0., 0.);
  654. /* Total worker execution time */
  655. glp_add_rows(lp, nw);
  656. for (w = 0; w < nw; w++) {
  657. char name[32], title[64];
  658. starpu_worker_get_name(w, name, sizeof(name));
  659. snprintf(title, sizeof(title), "worker %s", name);
  660. glp_set_row_name(lp, w+1, title);
  661. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  662. ia[n] = w+1;
  663. ja[n] = colnum(w, t);
  664. if (times[w*nt+t] == -1.)
  665. ar[n] = 1000000000.;
  666. else
  667. ar[n] = times[w*nt+t];
  668. n++;
  669. }
  670. /* tmax */
  671. ia[n] = w+1;
  672. ja[n] = nw*nt+1;
  673. ar[n] = -1;
  674. n++;
  675. glp_set_row_bnds(lp, w+1, GLP_UP, 0, 0);
  676. }
  677. /* Total task completion */
  678. glp_add_rows(lp, nt);
  679. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  680. char name[32], title[64];
  681. starpu_worker_get_name(w, name, sizeof(name));
  682. snprintf(title, sizeof(title), "task %s key %x", tp->cl->model->symbol, (unsigned) tp->footprint);
  683. glp_set_row_name(lp, nw+t+1, title);
  684. for (w = 0; w < nw; w++) {
  685. ia[n] = nw+t+1;
  686. ja[n] = colnum(w, t);
  687. ar[n] = 1;
  688. n++;
  689. }
  690. glp_set_row_bnds(lp, nw+t+1, GLP_FX, tp->n, tp->n);
  691. }
  692. STARPU_ASSERT(n == ne);
  693. glp_load_matrix(lp, ne-1, ia, ja, ar);
  694. }
  695. glp_smcp parm;
  696. glp_init_smcp(&parm);
  697. parm.msg_lev = GLP_MSG_OFF;
  698. ret = glp_simplex(lp, &parm);
  699. if (ret) {
  700. glp_delete_prob(lp);
  701. lp = NULL;
  702. return NULL;
  703. }
  704. if (integer) {
  705. glp_iocp iocp;
  706. glp_init_iocp(&iocp);
  707. iocp.msg_lev = GLP_MSG_OFF;
  708. glp_intopt(lp, &iocp);
  709. }
  710. return lp;
  711. }
  712. #endif /* HAVE_GLPK_H */
  713. void starpu_bound_print(FILE *output, int integer __attribute__ ((unused))) {
  714. #ifdef HAVE_GLPK_H
  715. if (recorddeps) {
  716. fprintf(output, "Not supported\n");
  717. return;
  718. }
  719. PTHREAD_MUTEX_LOCK(&mutex);
  720. glp_prob *lp = _starpu_bound_glp_resolve(integer);
  721. if (lp) {
  722. struct bound_task_pool * tp;
  723. int t, w;
  724. int nw; /* Number of different workers */
  725. double tmax;
  726. nw = starpu_worker_get_count();
  727. if (integer)
  728. tmax = glp_mip_obj_val(lp);
  729. else
  730. tmax = glp_get_obj_val(lp);
  731. fprintf(output, "Theoretical minimum execution time: %f ms\n", tmax);
  732. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  733. fprintf(output, "%s key %x\n", tp->cl->model->symbol, (unsigned) tp->footprint);
  734. for (w = 0; w < nw; w++)
  735. if (integer)
  736. fprintf(output, "\tw%dt%dn %f", w, t, glp_mip_col_val(lp, colnum(w, t)));
  737. else
  738. fprintf(output, "\tw%dt%dn %f", w, t, glp_get_col_prim(lp, colnum(w, t)));
  739. fprintf(output, "\n");
  740. }
  741. glp_delete_prob(lp);
  742. } else {
  743. fprintf(stderr, "Simplex failed\n");
  744. }
  745. PTHREAD_MUTEX_UNLOCK(&mutex);
  746. #else /* HAVE_GLPK_H */
  747. fprintf(output, "Please rebuild StarPU with glpk installed.\n");
  748. #endif /* HAVE_GLPK_H */
  749. }
  750. void starpu_bound_compute(double *res, double *integer_res __attribute__ ((unused)), int integer __attribute__ ((unused))) {
  751. #ifdef HAVE_GLPK_H
  752. double ret;
  753. if (recorddeps) {
  754. *res = 0.;
  755. return;
  756. }
  757. PTHREAD_MUTEX_LOCK(&mutex);
  758. glp_prob *lp = _starpu_bound_glp_resolve(integer);
  759. if (lp) {
  760. ret = glp_get_obj_val(lp);
  761. if (integer)
  762. *integer_res = glp_mip_obj_val(lp);
  763. glp_delete_prob(lp);
  764. } else
  765. ret = 0.;
  766. PTHREAD_MUTEX_UNLOCK(&mutex);
  767. *res = ret;
  768. #else /* HAVE_GLPK_H */
  769. *res = 0.;
  770. #endif /* HAVE_GLPK_H */
  771. }