bound.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828
  1. /*
  2. * StarPU
  3. * Copyright (C) Université Bordeaux 1, CNRS 2010 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. /*
  17. * Record which kinds of tasks have been executed, to later on compute an upper
  18. * bound of the performance that could have theoretically been achieved
  19. */
  20. #include <starpu.h>
  21. #include <starpu_config.h>
  22. #include <profiling/bound.h>
  23. #include <core/jobs.h>
  24. #ifdef HAVE_GLPK_H
  25. #include <glpk.h>
  26. #endif /* HAVE_GLPK_H */
  27. /* TODO: output duration between starpu_bound_start and starpu_bound_stop */
  28. /*
  29. * Record without dependencies: just count each kind of task
  30. *
  31. * The linear programming problem will just have as variables:
  32. * - the number of tasks of kind `t' executed by worker `w'
  33. * - the total duration
  34. *
  35. * and the constraints will be:
  36. * - the time taken by each worker to complete its assigned tasks is lower than
  37. * the total duration.
  38. * - the total numer of tasks of a given kind is equal to the number run by the
  39. * application.
  40. */
  41. struct bound_task_pool {
  42. /* Which codelet has been executed */
  43. struct starpu_codelet_t *cl;
  44. /* Task footprint key */
  45. uint32_t footprint;
  46. /* Number of tasks of this kind */
  47. unsigned long n;
  48. /* Other task kinds */
  49. struct bound_task_pool *next;
  50. };
  51. /*
  52. * Record with dependencies: each task is recorded separately
  53. *
  54. * The linear programming problem will have as variables:
  55. * - The start time of each task
  56. * - The completion time of each tag
  57. * - The total duration
  58. * - For each task and for each worker, whether the task is executing on that worker.
  59. * - For each pair of task, which task is scheduled first.
  60. *
  61. * and the constraints will be:
  62. * - All task start time plus duration are less than total duration
  63. * - Each task is executed on exactly one worker.
  64. * - Each task starts after all its task dependencies finish.
  65. * - Each task starts after all its tag dependencies finish.
  66. * - For each task pair and each worker, if both tasks are executed by that worker,
  67. * one is started after the other's completion.
  68. */
  69. /* Note: only task-task, implicit data dependencies or task-tag dependencies
  70. * are taken into account. Tags released in a callback or something like this
  71. * is not taken into account, only tags associated with a task are. */
  72. struct bound_task {
  73. /* Unique ID */
  74. int id;
  75. /* Tag ID, if any */
  76. starpu_tag_t tag_id;
  77. int use_tag;
  78. /* Which codelet has been executed */
  79. struct starpu_codelet_t *cl;
  80. /* Task footprint key */
  81. uint32_t footprint;
  82. /* Task priority */
  83. int priority;
  84. /* Tasks this one depends on */
  85. struct bound_task **deps;
  86. int depsn;
  87. /* Estimated duration */
  88. double duration[STARPU_NARCH_VARIATIONS];
  89. /* Other tasks */
  90. struct bound_task *next;
  91. };
  92. struct bound_tag_dep {
  93. starpu_tag_t tag;
  94. starpu_tag_t dep_tag;
  95. struct bound_tag_dep *next;
  96. };
  97. static struct bound_task_pool *task_pools, *last;
  98. static struct bound_task *tasks;
  99. static struct bound_tag_dep *tag_deps;
  100. static int recording;
  101. static int recorddeps;
  102. static int recordprio;
  103. static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
  104. void starpu_bound_start(int deps, int prio)
  105. {
  106. struct bound_task_pool *tp;
  107. struct bound_task *t;
  108. struct bound_tag_dep *td;
  109. PTHREAD_MUTEX_LOCK(&mutex);
  110. tp = task_pools;
  111. task_pools = NULL;
  112. last = NULL;
  113. t = tasks;
  114. tasks = NULL;
  115. td = tag_deps;
  116. tag_deps = NULL;
  117. recording = 1;
  118. recorddeps = deps;
  119. recordprio = prio;
  120. PTHREAD_MUTEX_UNLOCK(&mutex);
  121. for ( ; tp; tp = tp->next)
  122. free(tp);
  123. for ( ; t; t = t->next)
  124. free(t);
  125. for ( ; td; td = td->next)
  126. free(td);
  127. }
  128. static int good_job(starpu_job_t j)
  129. {
  130. /* No codelet, nothing to measure */
  131. if (j->exclude_from_dag)
  132. return 0;
  133. if (!j->task->cl)
  134. return 0;
  135. /* No performance model, no time duration estimation */
  136. if (!j->task->cl->model)
  137. return 0;
  138. /* Only support history based */
  139. if (j->task->cl->model->type != STARPU_HISTORY_BASED)
  140. return 0;
  141. return 1;
  142. }
  143. static void new_task(starpu_job_t j)
  144. {
  145. struct bound_task *t;
  146. static int task_ids;
  147. if (j->bound_task)
  148. return;
  149. if (STARPU_UNLIKELY(!j->footprint_is_computed))
  150. _starpu_compute_buffers_footprint(j);
  151. t = malloc(sizeof(*t));
  152. memset(t, 0, sizeof(*t));
  153. t->id = task_ids++;
  154. t->tag_id = j->task->tag_id;
  155. t->use_tag = j->task->use_tag;
  156. t->cl = j->task->cl;
  157. t->footprint = j->footprint;
  158. t->priority = j->task->priority;
  159. t->deps = NULL;
  160. t->depsn = 0;
  161. t->next = tasks;
  162. j->bound_task = t;
  163. tasks = t;
  164. }
  165. void _starpu_bound_record(starpu_job_t j)
  166. {
  167. if (!recording)
  168. return;
  169. if (!good_job(j))
  170. return;
  171. PTHREAD_MUTEX_LOCK(&mutex);
  172. /* Re-check, this time with mutex held */
  173. if (!recording) {
  174. PTHREAD_MUTEX_UNLOCK(&mutex);
  175. return;
  176. }
  177. if (recorddeps) {
  178. new_task(j);
  179. } else {
  180. struct bound_task_pool *tp;
  181. if (STARPU_UNLIKELY(!j->footprint_is_computed))
  182. _starpu_compute_buffers_footprint(j);
  183. if (last && last->cl == j->task->cl && last->footprint == j->footprint)
  184. tp = last;
  185. else
  186. for (tp = task_pools; tp; tp = tp->next)
  187. if (tp->cl == j->task->cl && tp->footprint == j->footprint)
  188. break;
  189. if (!tp) {
  190. tp = malloc(sizeof(*tp));
  191. tp->cl = j->task->cl;
  192. tp->footprint = j->footprint;
  193. tp->n = 0;
  194. tp->next = task_pools;
  195. task_pools = tp;
  196. }
  197. /* One more task of this kind */
  198. tp->n++;
  199. }
  200. PTHREAD_MUTEX_UNLOCK(&mutex);
  201. }
  202. void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id)
  203. {
  204. struct bound_tag_dep *td;
  205. if (!recording || !recorddeps)
  206. return;
  207. PTHREAD_MUTEX_LOCK(&mutex);
  208. /* Re-check, this time with mutex held */
  209. if (!recording || !recorddeps) {
  210. PTHREAD_MUTEX_UNLOCK(&mutex);
  211. return;
  212. }
  213. td = malloc(sizeof(*td));
  214. td->tag = id;
  215. td->dep_tag = dep_id;
  216. td->next = tag_deps;
  217. tag_deps = td;
  218. PTHREAD_MUTEX_UNLOCK(&mutex);
  219. }
  220. void _starpu_bound_task_dep(starpu_job_t j, starpu_job_t dep_j)
  221. {
  222. struct bound_task *t;
  223. if (!recording || !recorddeps)
  224. return;
  225. if (!good_job(j) || !good_job(dep_j))
  226. return;
  227. PTHREAD_MUTEX_LOCK(&mutex);
  228. /* Re-check, this time with mutex held */
  229. if (!recording || !recorddeps) {
  230. PTHREAD_MUTEX_UNLOCK(&mutex);
  231. return;
  232. }
  233. new_task(j);
  234. new_task(dep_j);
  235. t = j->bound_task;
  236. t->deps = realloc(t->deps, ++t->depsn * sizeof(t->deps[0]));
  237. t->deps[t->depsn-1] = dep_j->bound_task;
  238. PTHREAD_MUTEX_UNLOCK(&mutex);
  239. }
  240. void starpu_bound_stop(void)
  241. {
  242. PTHREAD_MUTEX_LOCK(&mutex);
  243. recording = 0;
  244. PTHREAD_MUTEX_UNLOCK(&mutex);
  245. }
  246. static void _starpu_get_tasks_times(int nw, int nt, double times[nw][nt]) {
  247. struct bound_task_pool *tp;
  248. int w, t;
  249. for (w = 0; w < nw; w++) {
  250. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  251. struct starpu_job_s j = {
  252. .footprint = tp->footprint,
  253. .footprint_is_computed = 1,
  254. };
  255. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  256. double length = _starpu_history_based_job_expected_length(tp->cl->model, arch, &j);
  257. if (length == -1.0)
  258. times[w][t] = -1.0;
  259. else
  260. times[w][t] = length / 1000.;
  261. }
  262. }
  263. }
  264. static int ancestor(struct bound_task *child, struct bound_task *parent) {
  265. int i;
  266. for (i = 0; i < child->depsn; i++) {
  267. if (parent == child->deps[i])
  268. return 1;
  269. if (ancestor(child->deps[i], parent))
  270. return -1;
  271. }
  272. return 0;
  273. }
  274. /*
  275. * lp_solve format
  276. */
  277. void starpu_bound_print_lp(FILE *output)
  278. {
  279. int nt; /* Number of different kinds of tasks */
  280. int nw; /* Number of different workers */
  281. int t, w;
  282. PTHREAD_MUTEX_LOCK(&mutex);
  283. nw = starpu_worker_get_count();
  284. if (recorddeps) {
  285. struct bound_task *t, *t2;
  286. struct bound_tag_dep *td;
  287. int i;
  288. nt = 0;
  289. for (t = tasks; t; t = t->next) {
  290. struct starpu_job_s j = {
  291. .footprint = t->footprint,
  292. .footprint_is_computed = 1,
  293. };
  294. for (w = 0; w < nw; w++) {
  295. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  296. if (t->duration[arch] == 0.) {
  297. double length = _starpu_history_based_job_expected_length(t->cl->model, arch, &j);
  298. if (length == -1.0)
  299. /* Avoid problems with binary coding of doubles */
  300. t->duration[arch] = -1.0;
  301. else
  302. t->duration[arch] = length / 1000.;
  303. }
  304. }
  305. nt++;
  306. }
  307. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  308. fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n");
  309. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  310. fprintf(output, "min: tmax;\n\n");
  311. fprintf(output, "/* Which is the maximum of all task completion times (ms) */\n");
  312. for (t = tasks; t; t = t->next)
  313. fprintf(output, "c%u <= tmax;\n", t->id);
  314. fprintf(output, "\n/* We have tasks executing on workers, exactly one worker executes each task */\n");
  315. for (t = tasks; t; t = t->next) {
  316. for (w = 0; w < nw; w++) {
  317. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  318. if (t->duration[arch] != -1.0)
  319. fprintf(output, " +t%uw%u", t->id, w);
  320. }
  321. fprintf(output, " = 1;\n");
  322. }
  323. fprintf(output, "\n/* Completion time is start time plus computation time */\n");
  324. fprintf(output, "/* According to where the task is indeed executed */\n");
  325. for (t = tasks; t; t = t->next) {
  326. fprintf(output, "/* %s %x */\tc%u = s%u", t->cl->model->symbol, (unsigned) t->footprint, t->id, t->id);
  327. for (w = 0; w < nw; w++) {
  328. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  329. if (t->duration[arch] != -1.0)
  330. fprintf(output, " + %f t%uw%u", t->duration[arch], t->id, w);
  331. }
  332. fprintf(output, ";\n");
  333. }
  334. fprintf(output, "\n/* Each task starts after all its task dependencies finish. */\n");
  335. fprintf(output, "/* Note that the dependency finish time depends on the worker where it's working */\n");
  336. for (t = tasks; t; t = t->next)
  337. for (i = 0; i < t->depsn; i++)
  338. fprintf(output, "s%u >= c%u;\n", t->id, t->deps[i]->id);
  339. fprintf(output, "\n/* Each tag finishes when its corresponding task finishes */");
  340. for (t = tasks; t; t = t->next)
  341. if (t->use_tag) {
  342. for (w = 0; w < nw; w++)
  343. fprintf(output, "c%u = tag%lu;\n", t->id, (unsigned long) t->tag_id);
  344. }
  345. fprintf(output, "\n/* tags start after all their tag dependencies finish. */\n");
  346. for (td = tag_deps; td; td = td->next)
  347. fprintf(output, "tag%lu >= tag%lu;\n", (unsigned long) td->tag, (unsigned long) td->dep_tag);
  348. /* TODO: factorize ancestor calls */
  349. fprintf(output, "\n/* For each task pair and each worker, if both tasks are executed by the same worker,\n");
  350. fprintf(output, " one is started after the other's completion */\n");
  351. for (t = tasks; t; t = t->next) {
  352. for (t2 = t->next; t2; t2 = t2->next)
  353. {
  354. if (!ancestor(t, t2) && !ancestor(t2, t)) {
  355. for (w = 0; w < nw; w++) {
  356. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  357. if (t->duration[arch] != -1.0) {
  358. fprintf(output, "s%u - c%u >= -3e5 + 1e5 t%uw%u + 1e5 t%uw%u + 1e5 t%uafter%u;\n",
  359. t->id, t2->id, t->id, w, t2->id, w, t->id, t2->id);
  360. fprintf(output, "s%u - c%u >= -2e5 + 1e5 t%uw%u + 1e5 t%uw%u - 1e5 t%uafter%u;\n",
  361. t2->id, t->id, t->id, w, t2->id, w, t->id, t2->id);
  362. }
  363. }
  364. }
  365. }
  366. }
  367. #if 0
  368. /* Doesn't help at all to actually express what "after" means */
  369. for (t = tasks; t; t = t->next)
  370. for (t2 = t->next; t2; t2 = t2->next)
  371. if (!ancestor(t, t2) && !ancestor(t2, t))
  372. {
  373. fprintf(output, "s%u - s%u >= -1e5 + 1e5 t%uafter%u;\n", t->id, t2->id, t->id, t2->id);
  374. fprintf(output, "s%u - s%u >= -1e5 t%uafter%u;\n", t2->id, t->id, t->id, t2->id);
  375. }
  376. #endif
  377. if (recordprio) {
  378. fprintf(output, "\n/* For StarPU, a priority means given schedulable tasks it will consider the\n");
  379. fprintf(output, " * more prioritized first */\n");
  380. for (t = tasks; t; t = t->next) {
  381. for (t2 = t->next; t2; t2 = t2->next)
  382. {
  383. if (!ancestor(t, t2) && !ancestor(t2, t)
  384. && t->priority != t2->priority) {
  385. if (t->priority > t2->priority) {
  386. /* Either t2 is scheduled before t, but then it
  387. needs to be scheduled before some t dep finishes */
  388. /* One of the t deps to give the maximum start time for t2 */
  389. if (t->depsn > 1) {
  390. for (i = 0; i < t->depsn; i++)
  391. fprintf(output, " + t%ut%ud%u", t2->id, t->id, i);
  392. fprintf(output, " = 1;\n");
  393. }
  394. for (i = 0; i < t->depsn; i++) {
  395. fprintf(output, "c%u - s%u >= ", t->deps[i]->id, t2->id);
  396. if (t->depsn > 1)
  397. /* Only checks this when it's this dependency that is chosen */
  398. fprintf(output, "-2e5 + 1e5 t%ut%ud%u", t2->id, t->id, i);
  399. else
  400. fprintf(output, "-1e5");
  401. /* Only check this if t is after t2 */
  402. fprintf(output, " + 1e5 t%uafter%u", t->id, t2->id);
  403. fprintf(output, ";\n");
  404. }
  405. /* Or t2 is scheduled after t is. */
  406. fprintf(output, "s%u - s%u >= -1e5 t%uafter%u;\n", t2->id, t->id, t->id, t2->id);
  407. } else {
  408. /* Either t is scheduled before t2, but then it
  409. needs to be scheduled before some t2 dep finishes */
  410. /* One of the t2 deps to give the maximum start time for t */
  411. if (t2->depsn > 1) {
  412. for (i = 0; i < t2->depsn; i++)
  413. fprintf(output, " + t%ut%ud%u", t->id, t2->id, i);
  414. fprintf(output, " = 1;\n");
  415. }
  416. for (i = 0; i < t2->depsn; i++) {
  417. fprintf(output, "c%u - s%u >= ", t2->deps[i]->id, t->id);
  418. if (t2->depsn > 1)
  419. /* Only checks this when it's this dependency that is chosen */
  420. fprintf(output, "-1e5 + 1e5 t%ut%ud%u", t->id, t2->id, i);
  421. /* Only check this if t2 is after t */
  422. fprintf(output, " - 1e5 t%uafter%u;\n", t->id, t2->id);
  423. }
  424. /* Or t is scheduled after t2 is. */
  425. fprintf(output, "s%u - s%u >= -1e5 + 1e5 t%uafter%u;\n", t->id, t2->id, t->id, t2->id);
  426. }
  427. }
  428. }
  429. }
  430. }
  431. for (t = tasks; t; t = t->next)
  432. for (t2 = t->next; t2; t2 = t2->next)
  433. if (!ancestor(t, t2) && !ancestor(t2, t)) {
  434. fprintf(output, "bin t%uafter%u;\n", t->id, t2->id);
  435. if (recordprio && t->priority != t2->priority) {
  436. if (t->priority > t2->priority) {
  437. if (t->depsn > 1)
  438. for (i = 0; i < t->depsn; i++)
  439. fprintf(output, "bin t%ut%ud%u;\n", t2->id, t->id, i);
  440. } else {
  441. if (t2->depsn > 1)
  442. for (i = 0; i < t2->depsn; i++)
  443. fprintf(output, "bin t%ut%ud%u;\n", t->id, t2->id, i);
  444. }
  445. }
  446. }
  447. for (t = tasks; t; t = t->next)
  448. for (w = 0; w < nw; w++)
  449. fprintf(output, "bin t%uw%u;\n", t->id, w);
  450. } else {
  451. struct bound_task_pool *tp;
  452. nt = 0;
  453. for (tp = task_pools; tp; tp = tp->next)
  454. nt++;
  455. {
  456. double times[nw][nt];
  457. _starpu_get_tasks_times(nw, nt, times);
  458. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  459. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  460. fprintf(output, "min: tmax;\n\n");
  461. fprintf(output, "/* Which is the maximum of all worker execution times (ms) */\n");
  462. for (w = 0; w < nw; w++) {
  463. char name[32];
  464. starpu_worker_get_name(w, name, sizeof(name));
  465. fprintf(output, "/* worker %s */\n", name);
  466. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  467. if (times[w][t] != -1.0)
  468. fprintf(output, "\t%+f * w%ut%un", (float) times[w][t], w, t);
  469. }
  470. fprintf(output, " <= tmax;\n");
  471. }
  472. fprintf(output, "\n");
  473. fprintf(output, "/* And we have to have computed exactly all tasks */\n");
  474. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  475. fprintf(output, "/* task %s key %x */\n", tp->cl->model->symbol, (unsigned) tp->footprint);
  476. for (w = 0; w < nw; w++)
  477. if (times[w][t] != -1.0)
  478. fprintf(output, "\t+w%ut%un", w, t);
  479. fprintf(output, " = %ld;\n", tp->n);
  480. /* Show actual values */
  481. fprintf(output, "/*");
  482. for (w = 0; w < nw; w++)
  483. fprintf(output, "\t+%ld", tp->cl->per_worker_stats[w]);
  484. fprintf(output, "\t*/\n\n");
  485. }
  486. fprintf(output, "/* Optionally tell that tasks can not be divided */\n");
  487. fprintf(output, "/* int ");
  488. int first = 1;
  489. for (w = 0; w < nw; w++)
  490. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  491. if (!first)
  492. fprintf(output, ",");
  493. else
  494. first = 0;
  495. fprintf(output, "w%ut%un", w, t);
  496. }
  497. fprintf(output, "; */\n");
  498. }
  499. }
  500. PTHREAD_MUTEX_UNLOCK(&mutex);
  501. }
  502. /*
  503. * MPS output format
  504. */
  505. void starpu_bound_print_mps(FILE *output)
  506. {
  507. struct bound_task_pool * tp;
  508. int nt; /* Number of different kinds of tasks */
  509. int nw; /* Number of different workers */
  510. int t, w;
  511. if (recorddeps) {
  512. fprintf(output, "Not supported\n");
  513. return;
  514. }
  515. PTHREAD_MUTEX_LOCK(&mutex);
  516. nw = starpu_worker_get_count();
  517. nt = 0;
  518. for (tp = task_pools; tp; tp = tp->next)
  519. nt++;
  520. {
  521. double times[nw][nt];
  522. _starpu_get_tasks_times(nw, nt, times);
  523. fprintf(output, "NAME StarPU theoretical bound\n");
  524. fprintf(output, "\nROWS\n");
  525. fprintf(output, "* We want to minimize total execution time (ms)\n");
  526. fprintf(output, " N TMAX\n");
  527. fprintf(output, "\n* Which is the maximum of all worker execution times (ms)\n");
  528. for (w = 0; w < nw; w++) {
  529. char name[32];
  530. starpu_worker_get_name(w, name, sizeof(name));
  531. fprintf(output, "* worker %s\n", name);
  532. fprintf(output, " L W%u\n", w);
  533. }
  534. fprintf(output, "\n* And we have to have computed exactly all tasks\n");
  535. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  536. fprintf(output, "* task %s key %x\n", tp->cl->model->symbol, (unsigned) tp->footprint);
  537. fprintf(output, " E T%u\n", t);
  538. }
  539. fprintf(output, "\nCOLUMNS\n");
  540. fprintf(output, "\n* Execution times and completion of all tasks\n");
  541. for (w = 0; w < nw; w++)
  542. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  543. if (times[w][t] != -1.0) {
  544. char name[9];
  545. snprintf(name, sizeof(name), "W%uT%u", w, t);
  546. fprintf(stderr," %-8s W%-7u %12f\n", name, w, times[w][t]);
  547. fprintf(stderr," %-8s T%-7u %12u\n", name, t, 1);
  548. }
  549. fprintf(output, "\n* Total execution time\n");
  550. for (w = 0; w < nw; w++)
  551. fprintf(stderr," TMAX W%-2u %12u\n", w, -1);
  552. fprintf(stderr," TMAX TMAX %12u\n", 1);
  553. fprintf(output, "\nRHS\n");
  554. fprintf(output, "\n* Total number of tasks\n");
  555. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  556. fprintf(stderr," NT%-2u T%-7u %12lu\n", t, t, tp->n);
  557. fprintf(output, "ENDATA\n");
  558. }
  559. PTHREAD_MUTEX_UNLOCK(&mutex);
  560. }
  561. /*
  562. * GNU Linear Programming Kit backend
  563. */
  564. #ifdef HAVE_GLPK_H
  565. static glp_prob *_starpu_bound_glp_resolve(void)
  566. {
  567. struct bound_task_pool * tp;
  568. int nt; /* Number of different kinds of tasks */
  569. int nw; /* Number of different workers */
  570. int t, w;
  571. glp_prob *lp;
  572. int ret;
  573. nw = starpu_worker_get_count();
  574. nt = 0;
  575. for (tp = task_pools; tp; tp = tp->next)
  576. nt++;
  577. lp = glp_create_prob();
  578. glp_set_prob_name(lp, "StarPU theoretical bound");
  579. glp_set_obj_dir(lp, GLP_MIN);
  580. glp_set_obj_name(lp, "total execution time");
  581. {
  582. double times[nw][nt];
  583. int ne =
  584. nw * (nt+1) /* worker execution time */
  585. + nt * nw
  586. + 1; /* glp dumbness */
  587. int n = 1;
  588. int ia[ne], ja[ne];
  589. double ar[ne];
  590. _starpu_get_tasks_times(nw, nt, times);
  591. /* Variables: number of tasks i assigned to worker j, and tmax */
  592. glp_add_cols(lp, nw*nt+1);
  593. #define colnum(w, t) ((t)*nw+(w)+1)
  594. glp_set_obj_coef(lp, nw*nt+1, 1.);
  595. for (w = 0; w < nw; w++)
  596. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  597. char name[32];
  598. snprintf(name, sizeof(name), "w%ut%un", w, t);
  599. glp_set_col_name(lp, colnum(w, t), name);
  600. glp_set_col_kind(lp, colnum(w, t), GLP_IV);
  601. glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.);
  602. }
  603. glp_set_col_bnds(lp, nw*nt+1, GLP_LO, 0., 0.);
  604. /* Total worker execution time */
  605. glp_add_rows(lp, nw);
  606. for (w = 0; w < nw; w++) {
  607. char name[32], title[64];
  608. starpu_worker_get_name(w, name, sizeof(name));
  609. snprintf(title, sizeof(title), "worker %s", name);
  610. glp_set_row_name(lp, w+1, title);
  611. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  612. ia[n] = w+1;
  613. ja[n] = colnum(w, t);
  614. if (times[w][t] == -1.)
  615. ar[n] = INFINITY;
  616. else
  617. ar[n] = times[w][t];
  618. n++;
  619. }
  620. /* tmax */
  621. ia[n] = w+1;
  622. ja[n] = nw*nt+1;
  623. ar[n] = -1;
  624. n++;
  625. glp_set_row_bnds(lp, w+1, GLP_UP, 0, 0);
  626. }
  627. /* Total task completion */
  628. glp_add_rows(lp, nt);
  629. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  630. char name[32], title[64];
  631. starpu_worker_get_name(w, name, sizeof(name));
  632. snprintf(title, sizeof(title), "task %s key %x", tp->cl->model->symbol, (unsigned) tp->footprint);
  633. glp_set_row_name(lp, nw+t+1, title);
  634. for (w = 0; w < nw; w++) {
  635. ia[n] = nw+t+1;
  636. ja[n] = colnum(w, t);
  637. ar[n] = 1;
  638. n++;
  639. }
  640. glp_set_row_bnds(lp, nw+t+1, GLP_FX, tp->n, tp->n);
  641. }
  642. STARPU_ASSERT(n == ne);
  643. glp_load_matrix(lp, ne-1, ia, ja, ar);
  644. }
  645. glp_smcp parm;
  646. glp_init_smcp(&parm);
  647. parm.msg_lev = GLP_MSG_OFF;
  648. ret = glp_simplex(lp, &parm);
  649. glp_iocp iocp;
  650. glp_init_iocp(&iocp);
  651. iocp.msg_lev = GLP_MSG_OFF;
  652. glp_intopt(lp, &iocp);
  653. if (ret) {
  654. glp_delete_prob(lp);
  655. lp = NULL;
  656. }
  657. return lp;
  658. }
  659. #endif /* HAVE_GLPK_H */
  660. void starpu_bound_print(FILE *output) {
  661. #ifdef HAVE_GLPK_H
  662. if (recorddeps) {
  663. fprintf(output, "Not supported\n");
  664. return;
  665. }
  666. PTHREAD_MUTEX_LOCK(&mutex);
  667. glp_prob *lp = _starpu_bound_glp_resolve();
  668. if (lp) {
  669. struct bound_task_pool * tp;
  670. int t, w;
  671. int nw; /* Number of different workers */
  672. double tmax;
  673. nw = starpu_worker_get_count();
  674. tmax = glp_get_obj_val(lp);
  675. fprintf(output, "Theoretical minimum execution time: %f ms\n", tmax);
  676. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  677. fprintf(output, "%s key %x\n", tp->cl->model->symbol, (unsigned) tp->footprint);
  678. for (w = 0; w < nw; w++)
  679. fprintf(output, "\tw%ut%un %f", w, t, glp_mip_col_val(lp, colnum(w, t)));
  680. fprintf(output, "\n");
  681. }
  682. glp_delete_prob(lp);
  683. } else {
  684. fprintf(stderr, "Simplex failed\n");
  685. }
  686. PTHREAD_MUTEX_UNLOCK(&mutex);
  687. #else /* HAVE_GLPK_H */
  688. fprintf(output, "Please rebuild StarPU with glpk installed.\n");
  689. #endif /* HAVE_GLPK_H */
  690. }
  691. void starpu_bound_compute(double *res) {
  692. #ifdef HAVE_GLPK_H
  693. double ret;
  694. if (recorddeps) {
  695. *res = 0.;
  696. return;
  697. }
  698. PTHREAD_MUTEX_LOCK(&mutex);
  699. glp_prob *lp = _starpu_bound_glp_resolve();
  700. if (lp) {
  701. ret = glp_get_obj_val(lp);
  702. glp_delete_prob(lp);
  703. } else
  704. ret = 0.;
  705. PTHREAD_MUTEX_UNLOCK(&mutex);
  706. *res = ret;
  707. #else /* HAVE_GLPK_H */
  708. *res = 0.;
  709. #endif /* HAVE_GLPK_H */
  710. }