bound.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
  4. * Copyright (C) 2010, 2011 Université de Bordeaux 1
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. /*
  18. * Record which kinds of tasks have been executed, to later on compute an upper
  19. * bound of the performance that could have theoretically been achieved
  20. */
  21. #include <starpu.h>
  22. #include <starpu_config.h>
  23. #include <profiling/bound.h>
  24. #include <core/jobs.h>
  25. #ifdef HAVE_GLPK_H
  26. #include <glpk.h>
  27. #endif /* HAVE_GLPK_H */
  28. /* TODO: output duration between starpu_bound_start and starpu_bound_stop */
  29. /*
  30. * Record without dependencies: just count each kind of task
  31. *
  32. * The linear programming problem will just have as variables:
  33. * - the number of tasks of kind `t' executed by worker `w'
  34. * - the total duration
  35. *
  36. * and the constraints will be:
  37. * - the time taken by each worker to complete its assigned tasks is lower than
  38. * the total duration.
  39. * - the total numer of tasks of a given kind is equal to the number run by the
  40. * application.
  41. */
  42. struct bound_task_pool {
  43. /* Which codelet has been executed */
  44. struct starpu_codelet_t *cl;
  45. /* Task footprint key */
  46. uint32_t footprint;
  47. /* Number of tasks of this kind */
  48. unsigned long n;
  49. /* Other task kinds */
  50. struct bound_task_pool *next;
  51. };
  52. /*
  53. * Record with dependencies: each task is recorded separately
  54. *
  55. * The linear programming problem will have as variables:
  56. * - The start time of each task
  57. * - The completion time of each tag
  58. * - The total duration
  59. * - For each task and for each worker, whether the task is executing on that worker.
  60. * - For each pair of task, which task is scheduled first.
  61. *
  62. * and the constraints will be:
  63. * - All task start time plus duration are less than total duration
  64. * - Each task is executed on exactly one worker.
  65. * - Each task starts after all its task dependencies finish.
  66. * - Each task starts after all its tag dependencies finish.
  67. * - For each task pair and each worker, if both tasks are executed by that worker,
  68. * one is started after the other's completion.
  69. */
  70. /* Note: only task-task, implicit data dependencies or task-tag dependencies
  71. * are taken into account. Tags released in a callback or something like this
  72. * is not taken into account, only tags associated with a task are. */
  73. struct bound_task {
  74. /* Unique ID */
  75. unsigned long id;
  76. /* Tag ID, if any */
  77. starpu_tag_t tag_id;
  78. int use_tag;
  79. /* Which codelet has been executed */
  80. struct starpu_codelet_t *cl;
  81. /* Task footprint key */
  82. uint32_t footprint;
  83. /* Task priority */
  84. int priority;
  85. /* Tasks this one depends on */
  86. struct bound_task **deps;
  87. int depsn;
  88. /* Estimated duration */
  89. double duration[STARPU_NARCH_VARIATIONS];
  90. /* Other tasks */
  91. struct bound_task *next;
  92. };
  93. struct bound_tag_dep {
  94. starpu_tag_t tag;
  95. starpu_tag_t dep_tag;
  96. struct bound_tag_dep *next;
  97. };
  98. static struct bound_task_pool *task_pools, *last;
  99. static struct bound_task *tasks;
  100. static struct bound_tag_dep *tag_deps;
  101. int _starpu_bound_recording;
  102. static int recorddeps;
  103. static int recordprio;
  104. static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
  105. void starpu_bound_start(int deps, int prio)
  106. {
  107. struct bound_task_pool *tp;
  108. struct bound_task *t;
  109. struct bound_tag_dep *td;
  110. PTHREAD_MUTEX_LOCK(&mutex);
  111. tp = task_pools;
  112. task_pools = NULL;
  113. last = NULL;
  114. t = tasks;
  115. tasks = NULL;
  116. td = tag_deps;
  117. tag_deps = NULL;
  118. _starpu_bound_recording = 1;
  119. recorddeps = deps;
  120. recordprio = prio;
  121. PTHREAD_MUTEX_UNLOCK(&mutex);
  122. for ( ; tp; tp = tp->next)
  123. free(tp);
  124. for ( ; t; t = t->next)
  125. free(t);
  126. for ( ; td; td = td->next)
  127. free(td);
  128. }
  129. static int good_job(starpu_job_t j)
  130. {
  131. /* No codelet, nothing to measure */
  132. if (j->exclude_from_dag)
  133. return 0;
  134. if (!j->task->cl)
  135. return 0;
  136. /* No performance model, no time duration estimation */
  137. if (!j->task->cl->model)
  138. return 0;
  139. /* Only support history based */
  140. if (j->task->cl->model->type != STARPU_HISTORY_BASED)
  141. return 0;
  142. return 1;
  143. }
  144. static void new_task(starpu_job_t j)
  145. {
  146. struct bound_task *t;
  147. if (j->bound_task)
  148. return;
  149. t = malloc(sizeof(*t));
  150. memset(t, 0, sizeof(*t));
  151. t->id = j->job_id;
  152. t->tag_id = j->task->tag_id;
  153. t->use_tag = j->task->use_tag;
  154. t->cl = j->task->cl;
  155. t->footprint = _starpu_compute_buffers_footprint(j);
  156. t->priority = j->task->priority;
  157. t->deps = NULL;
  158. t->depsn = 0;
  159. t->next = tasks;
  160. j->bound_task = t;
  161. tasks = t;
  162. }
  163. void _starpu_bound_record(starpu_job_t j)
  164. {
  165. if (!_starpu_bound_recording)
  166. return;
  167. if (!good_job(j))
  168. return;
  169. PTHREAD_MUTEX_LOCK(&mutex);
  170. /* Re-check, this time with mutex held */
  171. if (!_starpu_bound_recording) {
  172. PTHREAD_MUTEX_UNLOCK(&mutex);
  173. return;
  174. }
  175. if (recorddeps) {
  176. new_task(j);
  177. } else {
  178. struct bound_task_pool *tp;
  179. _starpu_compute_buffers_footprint(j);
  180. if (last && last->cl == j->task->cl && last->footprint == j->footprint)
  181. tp = last;
  182. else
  183. for (tp = task_pools; tp; tp = tp->next)
  184. if (tp->cl == j->task->cl && tp->footprint == j->footprint)
  185. break;
  186. if (!tp) {
  187. tp = malloc(sizeof(*tp));
  188. tp->cl = j->task->cl;
  189. tp->footprint = j->footprint;
  190. tp->n = 0;
  191. tp->next = task_pools;
  192. task_pools = tp;
  193. }
  194. /* One more task of this kind */
  195. tp->n++;
  196. }
  197. PTHREAD_MUTEX_UNLOCK(&mutex);
  198. }
  199. void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id)
  200. {
  201. struct bound_tag_dep *td;
  202. if (!_starpu_bound_recording || !recorddeps)
  203. return;
  204. PTHREAD_MUTEX_LOCK(&mutex);
  205. /* Re-check, this time with mutex held */
  206. if (!_starpu_bound_recording || !recorddeps) {
  207. PTHREAD_MUTEX_UNLOCK(&mutex);
  208. return;
  209. }
  210. td = malloc(sizeof(*td));
  211. td->tag = id;
  212. td->dep_tag = dep_id;
  213. td->next = tag_deps;
  214. tag_deps = td;
  215. PTHREAD_MUTEX_UNLOCK(&mutex);
  216. }
  217. void _starpu_bound_task_dep(starpu_job_t j, starpu_job_t dep_j)
  218. {
  219. struct bound_task *t;
  220. if (!_starpu_bound_recording || !recorddeps)
  221. return;
  222. if (!good_job(j) || !good_job(dep_j))
  223. return;
  224. PTHREAD_MUTEX_LOCK(&mutex);
  225. /* Re-check, this time with mutex held */
  226. if (!_starpu_bound_recording || !recorddeps) {
  227. PTHREAD_MUTEX_UNLOCK(&mutex);
  228. return;
  229. }
  230. new_task(j);
  231. new_task(dep_j);
  232. t = j->bound_task;
  233. t->deps = realloc(t->deps, ++t->depsn * sizeof(t->deps[0]));
  234. t->deps[t->depsn-1] = dep_j->bound_task;
  235. PTHREAD_MUTEX_UNLOCK(&mutex);
  236. }
  237. static struct bound_task *find_job(unsigned long id)
  238. {
  239. struct bound_task *t;
  240. for (t = tasks; t; t = t->next)
  241. if (t->id == id)
  242. return t;
  243. return NULL;
  244. }
  245. void _starpu_bound_job_id_dep(starpu_job_t j, unsigned long id)
  246. {
  247. struct bound_task *t, *dep_t;
  248. if (!_starpu_bound_recording || !recorddeps)
  249. return;
  250. if (!good_job(j))
  251. return;
  252. PTHREAD_MUTEX_LOCK(&mutex);
  253. /* Re-check, this time with mutex held */
  254. if (!_starpu_bound_recording || !recorddeps) {
  255. PTHREAD_MUTEX_UNLOCK(&mutex);
  256. return;
  257. }
  258. new_task(j);
  259. dep_t = find_job(id);
  260. if (!dep_t) {
  261. fprintf(stderr,"dependency %lu not found !\n", id);
  262. PTHREAD_MUTEX_UNLOCK(&mutex);
  263. return;
  264. }
  265. t = j->bound_task;
  266. t->deps = realloc(t->deps, ++t->depsn * sizeof(t->deps[0]));
  267. t->deps[t->depsn-1] = dep_t;
  268. PTHREAD_MUTEX_UNLOCK(&mutex);
  269. }
  270. void starpu_bound_stop(void)
  271. {
  272. PTHREAD_MUTEX_LOCK(&mutex);
  273. _starpu_bound_recording = 0;
  274. PTHREAD_MUTEX_UNLOCK(&mutex);
  275. }
  276. static void _starpu_get_tasks_times(int nw, int nt, double *times) {
  277. struct bound_task_pool *tp;
  278. int w, t;
  279. for (w = 0; w < nw; w++) {
  280. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  281. struct starpu_job_s j = {
  282. .footprint = tp->footprint,
  283. .footprint_is_computed = 1,
  284. };
  285. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  286. double length = _starpu_history_based_job_expected_perf(tp->cl->model, arch, &j);
  287. if (length == -1.0)
  288. times[w*nt+t] = -1.0;
  289. else
  290. times[w*nt+t] = length / 1000.;
  291. }
  292. }
  293. }
  294. static int ancestor(struct bound_task *child, struct bound_task *parent) {
  295. int i;
  296. for (i = 0; i < child->depsn; i++) {
  297. if (parent == child->deps[i])
  298. return 1;
  299. if (ancestor(child->deps[i], parent))
  300. return -1;
  301. }
  302. return 0;
  303. }
  304. void starpu_bound_print_dot(FILE *output) {
  305. struct bound_task *t;
  306. struct bound_tag_dep *td;
  307. int i;
  308. if (!recorddeps) {
  309. fprintf(output, "Not supported\n");
  310. return;
  311. }
  312. fprintf(output, "strict digraph bounddeps {\n");
  313. for (t = tasks; t; t = t->next) {
  314. fprintf(output, "\"t%lu\" [label=\"%lu: %s\"]\n", t->id, t->id, t->cl->model->symbol);
  315. for (i = 0; i < t->depsn; i++)
  316. fprintf(output, "\"t%lu\" -> \"t%lu\"\n", t->deps[i]->id, t->id);
  317. }
  318. for (td = tag_deps; td; td = td->next)
  319. fprintf(output, "\"tag%lu\" -> \"tag%lu\";\n", (unsigned long) td->dep_tag, (unsigned long) td->tag);
  320. fprintf(output, "}\n");
  321. }
  322. /*
  323. * lp_solve format
  324. */
  325. void starpu_bound_print_lp(FILE *output)
  326. {
  327. int nt; /* Number of different kinds of tasks */
  328. int nw; /* Number of different workers */
  329. int t, w;
  330. PTHREAD_MUTEX_LOCK(&mutex);
  331. nw = starpu_worker_get_count();
  332. if (recorddeps) {
  333. struct bound_task *t1, *t2;
  334. struct bound_tag_dep *td;
  335. int i;
  336. nt = 0;
  337. for (t1 = tasks; t1; t1 = t1->next) {
  338. struct starpu_job_s j = {
  339. .footprint = t1->footprint,
  340. .footprint_is_computed = 1,
  341. };
  342. for (w = 0; w < nw; w++) {
  343. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  344. if (t1->duration[arch] == 0.) {
  345. double length = _starpu_history_based_job_expected_perf(t1->cl->model, arch, &j);
  346. if (length == -1.0)
  347. /* Avoid problems with binary coding of doubles */
  348. t1->duration[arch] = -1.0;
  349. else
  350. t1->duration[arch] = length / 1000.;
  351. }
  352. }
  353. nt++;
  354. }
  355. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  356. fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n");
  357. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  358. fprintf(output, "min: tmax;\n\n");
  359. fprintf(output, "/* Which is the maximum of all task completion times (ms) */\n");
  360. for (t1 = tasks; t1; t1 = t1->next)
  361. fprintf(output, "c%lu <= tmax;\n", t1->id);
  362. fprintf(output, "\n/* We have tasks executing on workers, exactly one worker executes each task */\n");
  363. for (t1 = tasks; t1; t1 = t1->next) {
  364. for (w = 0; w < nw; w++) {
  365. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  366. if (t1->duration[arch] != -1.0)
  367. fprintf(output, " +t%luw%d", t1->id, w);
  368. }
  369. fprintf(output, " = 1;\n");
  370. }
  371. fprintf(output, "\n/* Completion time is start time plus computation time */\n");
  372. fprintf(output, "/* According to where the task is indeed executed */\n");
  373. for (t1 = tasks; t1; t1 = t1->next) {
  374. fprintf(output, "/* %s %x */\tc%lu = s%lu", t1->cl->model->symbol, (unsigned) t1->footprint, t1->id, t1->id);
  375. for (w = 0; w < nw; w++) {
  376. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  377. if (t1->duration[arch] != -1.0)
  378. fprintf(output, " + %f t%luw%d", t1->duration[arch], t1->id, w);
  379. }
  380. fprintf(output, ";\n");
  381. }
  382. fprintf(output, "\n/* Each task starts after all its task dependencies finish. */\n");
  383. fprintf(output, "/* Note that the dependency finish time depends on the worker where it's working */\n");
  384. for (t1 = tasks; t1; t1 = t1->next)
  385. for (i = 0; i < t1->depsn; i++)
  386. fprintf(output, "s%lu >= c%lu;\n", t1->id, t1->deps[i]->id);
  387. fprintf(output, "\n/* Each tag finishes when its corresponding task finishes */");
  388. for (t1 = tasks; t1; t1 = t1->next)
  389. if (t1->use_tag) {
  390. for (w = 0; w < nw; w++)
  391. fprintf(output, "c%lu = tag%lu;\n", t1->id, (unsigned long) t1->tag_id);
  392. }
  393. fprintf(output, "\n/* tags start after all their tag dependencies finish. */\n");
  394. for (td = tag_deps; td; td = td->next)
  395. fprintf(output, "tag%lu >= tag%lu;\n", (unsigned long) td->tag, (unsigned long) td->dep_tag);
  396. /* TODO: factorize ancestor calls */
  397. fprintf(output, "\n/* For each task pair and each worker, if both tasks are executed by the same worker,\n");
  398. fprintf(output, " one is started after the other's completion */\n");
  399. for (t1 = tasks; t1; t1 = t1->next) {
  400. for (t2 = t1->next; t2; t2 = t2->next)
  401. {
  402. if (!ancestor(t1, t2) && !ancestor(t2, t1)) {
  403. for (w = 0; w < nw; w++) {
  404. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  405. if (t1->duration[arch] != -1.0) {
  406. fprintf(output, "s%lu - c%lu >= -3e5 + 1e5 t%luw%d + 1e5 t%luw%d + 1e5 t%luafter%lu;\n",
  407. t1->id, t2->id, t1->id, w, t2->id, w, t1->id, t2->id);
  408. fprintf(output, "s%lu - c%lu >= -2e5 + 1e5 t%luw%d + 1e5 t%luw%d - 1e5 t%luafter%lu;\n",
  409. t2->id, t1->id, t1->id, w, t2->id, w, t1->id, t2->id);
  410. }
  411. }
  412. }
  413. }
  414. }
  415. #if 0
  416. /* Doesn't help at all to actually express what "after" means */
  417. for (t1 = tasks; t1; t1 = t1->next)
  418. for (t2 = t1->next; t2; t2 = t2->next)
  419. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  420. {
  421. fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
  422. fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
  423. }
  424. #endif
  425. if (recordprio) {
  426. fprintf(output, "\n/* For StarPU, a priority means given schedulable tasks it will consider the\n");
  427. fprintf(output, " * more prioritized first */\n");
  428. for (t1 = tasks; t1; t1 = t1->next) {
  429. for (t2 = t1->next; t2; t2 = t2->next)
  430. {
  431. if (!ancestor(t1, t2) && !ancestor(t2, t1)
  432. && t1->priority != t2->priority) {
  433. if (t1->priority > t2->priority) {
  434. /* Either t2 is scheduled before t1, but then it
  435. needs to be scheduled before some t dep finishes */
  436. /* One of the t1 deps to give the maximum start time for t2 */
  437. if (t1->depsn > 1) {
  438. for (i = 0; i < t1->depsn; i++)
  439. fprintf(output, " + t%lut%lud%d", t2->id, t1->id, i);
  440. fprintf(output, " = 1;\n");
  441. }
  442. for (i = 0; i < t1->depsn; i++) {
  443. fprintf(output, "c%lu - s%lu >= ", t1->deps[i]->id, t2->id);
  444. if (t1->depsn > 1)
  445. /* Only checks this when it's this dependency that is chosen */
  446. fprintf(output, "-2e5 + 1e5 t%lut%lud%d", t2->id, t1->id, i);
  447. else
  448. fprintf(output, "-1e5");
  449. /* Only check this if t1 is after t2 */
  450. fprintf(output, " + 1e5 t%luafter%lu", t1->id, t2->id);
  451. fprintf(output, ";\n");
  452. }
  453. /* Or t2 is scheduled after t1 is. */
  454. fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
  455. } else {
  456. /* Either t1 is scheduled before t2, but then it
  457. needs to be scheduled before some t2 dep finishes */
  458. /* One of the t2 deps to give the maximum start time for t1 */
  459. if (t2->depsn > 1) {
  460. for (i = 0; i < t2->depsn; i++)
  461. fprintf(output, " + t%lut%lud%d", t1->id, t2->id, i);
  462. fprintf(output, " = 1;\n");
  463. }
  464. for (i = 0; i < t2->depsn; i++) {
  465. fprintf(output, "c%lu - s%lu >= ", t2->deps[i]->id, t1->id);
  466. if (t2->depsn > 1)
  467. /* Only checks this when it's this dependency that is chosen */
  468. fprintf(output, "-1e5 + 1e5 t%lut%lud%d", t1->id, t2->id, i);
  469. /* Only check this if t2 is after t1 */
  470. fprintf(output, " - 1e5 t%luafter%lu;\n", t1->id, t2->id);
  471. }
  472. /* Or t1 is scheduled after t2 is. */
  473. fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
  474. }
  475. }
  476. }
  477. }
  478. }
  479. for (t1 = tasks; t1; t1 = t1->next)
  480. for (t2 = t1->next; t2; t2 = t2->next)
  481. if (!ancestor(t1, t2) && !ancestor(t2, t1)) {
  482. fprintf(output, "bin t%luafter%lu;\n", t1->id, t2->id);
  483. if (recordprio && t1->priority != t2->priority) {
  484. if (t1->priority > t2->priority) {
  485. if (t1->depsn > 1)
  486. for (i = 0; i < t1->depsn; i++)
  487. fprintf(output, "bin t%lut%lud%d;\n", t2->id, t1->id, i);
  488. } else {
  489. if (t2->depsn > 1)
  490. for (i = 0; i < t2->depsn; i++)
  491. fprintf(output, "bin t%lut%lud%d;\n", t1->id, t2->id, i);
  492. }
  493. }
  494. }
  495. for (t1 = tasks; t1; t1 = t1->next)
  496. for (w = 0; w < nw; w++)
  497. fprintf(output, "bin t%luw%d;\n", t1->id, w);
  498. } else {
  499. struct bound_task_pool *tp;
  500. nt = 0;
  501. for (tp = task_pools; tp; tp = tp->next)
  502. nt++;
  503. {
  504. double times[nw*nt];
  505. _starpu_get_tasks_times(nw, nt, times);
  506. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  507. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  508. fprintf(output, "min: tmax;\n\n");
  509. fprintf(output, "/* Which is the maximum of all worker execution times (ms) */\n");
  510. for (w = 0; w < nw; w++) {
  511. char name[32];
  512. starpu_worker_get_name(w, name, sizeof(name));
  513. fprintf(output, "/* worker %s */\n", name);
  514. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  515. if (times[w*nt+t] != -1.0)
  516. fprintf(output, "\t%+f * w%dt%dn", (float) times[w*nt+t], w, t);
  517. }
  518. fprintf(output, " <= tmax;\n");
  519. }
  520. fprintf(output, "\n");
  521. fprintf(output, "/* And we have to have computed exactly all tasks */\n");
  522. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  523. fprintf(output, "/* task %s key %x */\n", tp->cl->model->symbol, (unsigned) tp->footprint);
  524. for (w = 0; w < nw; w++)
  525. if (times[w*nt+t] != -1.0)
  526. fprintf(output, "\t+w%dt%dn", w, t);
  527. fprintf(output, " = %lu;\n", tp->n);
  528. /* Show actual values */
  529. fprintf(output, "/*");
  530. for (w = 0; w < nw; w++)
  531. fprintf(output, "\t+%lu", tp->cl->per_worker_stats[w]);
  532. fprintf(output, "\t*/\n\n");
  533. }
  534. fprintf(output, "/* Optionally tell that tasks can not be divided */\n");
  535. fprintf(output, "/* int ");
  536. int first = 1;
  537. for (w = 0; w < nw; w++)
  538. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  539. if (!first)
  540. fprintf(output, ",");
  541. else
  542. first = 0;
  543. fprintf(output, "w%dt%dn", w, t);
  544. }
  545. fprintf(output, "; */\n");
  546. }
  547. }
  548. PTHREAD_MUTEX_UNLOCK(&mutex);
  549. }
  550. /*
  551. * MPS output format
  552. */
  553. void starpu_bound_print_mps(FILE *output)
  554. {
  555. struct bound_task_pool * tp;
  556. int nt; /* Number of different kinds of tasks */
  557. int nw; /* Number of different workers */
  558. int t, w;
  559. if (recorddeps) {
  560. fprintf(output, "Not supported\n");
  561. return;
  562. }
  563. PTHREAD_MUTEX_LOCK(&mutex);
  564. nw = starpu_worker_get_count();
  565. nt = 0;
  566. for (tp = task_pools; tp; tp = tp->next)
  567. nt++;
  568. {
  569. double times[nw*nt];
  570. _starpu_get_tasks_times(nw, nt, times);
  571. fprintf(output, "NAME StarPU theoretical bound\n");
  572. fprintf(output, "\nROWS\n");
  573. fprintf(output, "* We want to minimize total execution time (ms)\n");
  574. fprintf(output, " N TMAX\n");
  575. fprintf(output, "\n* Which is the maximum of all worker execution times (ms)\n");
  576. for (w = 0; w < nw; w++) {
  577. char name[32];
  578. starpu_worker_get_name(w, name, sizeof(name));
  579. fprintf(output, "* worker %s\n", name);
  580. fprintf(output, " L W%d\n", w);
  581. }
  582. fprintf(output, "\n* And we have to have computed exactly all tasks\n");
  583. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  584. fprintf(output, "* task %s key %x\n", tp->cl->model->symbol, (unsigned) tp->footprint);
  585. fprintf(output, " E T%d\n", t);
  586. }
  587. fprintf(output, "\nCOLUMNS\n");
  588. fprintf(output, "\n* Execution times and completion of all tasks\n");
  589. for (w = 0; w < nw; w++)
  590. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  591. if (times[w*nt+t] != -1.0) {
  592. char name[9];
  593. snprintf(name, sizeof(name), "W%dT%d", w, t);
  594. fprintf(stderr," %-8s W%-7d %12f\n", name, w, times[w*nt+t]);
  595. fprintf(stderr," %-8s T%-7d %12d\n", name, t, 1);
  596. }
  597. fprintf(output, "\n* Total execution time\n");
  598. for (w = 0; w < nw; w++)
  599. fprintf(stderr," TMAX W%-2d %12d\n", w, -1);
  600. fprintf(stderr," TMAX TMAX %12d\n", 1);
  601. fprintf(output, "\nRHS\n");
  602. fprintf(output, "\n* Total number of tasks\n");
  603. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  604. fprintf(stderr," NT%-2d T%-7d %12lu\n", t, t, tp->n);
  605. fprintf(output, "ENDATA\n");
  606. }
  607. PTHREAD_MUTEX_UNLOCK(&mutex);
  608. }
  609. /*
  610. * GNU Linear Programming Kit backend
  611. */
  612. #ifdef HAVE_GLPK_H
  613. static glp_prob *_starpu_bound_glp_resolve(int integer)
  614. {
  615. struct bound_task_pool * tp;
  616. int nt; /* Number of different kinds of tasks */
  617. int nw; /* Number of different workers */
  618. int t, w;
  619. glp_prob *lp;
  620. int ret;
  621. nw = starpu_worker_get_count();
  622. nt = 0;
  623. for (tp = task_pools; tp; tp = tp->next)
  624. nt++;
  625. lp = glp_create_prob();
  626. glp_set_prob_name(lp, "StarPU theoretical bound");
  627. glp_set_obj_dir(lp, GLP_MIN);
  628. glp_set_obj_name(lp, "total execution time");
  629. {
  630. double times[nw*nt];
  631. int ne =
  632. nw * (nt+1) /* worker execution time */
  633. + nt * nw
  634. + 1; /* glp dumbness */
  635. int n = 1;
  636. int ia[ne], ja[ne];
  637. double ar[ne];
  638. _starpu_get_tasks_times(nw, nt, times);
  639. /* Variables: number of tasks i assigned to worker j, and tmax */
  640. glp_add_cols(lp, nw*nt+1);
  641. #define colnum(w, t) ((t)*nw+(w)+1)
  642. glp_set_obj_coef(lp, nw*nt+1, 1.);
  643. for (w = 0; w < nw; w++)
  644. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  645. char name[32];
  646. snprintf(name, sizeof(name), "w%dt%dn", w, t);
  647. glp_set_col_name(lp, colnum(w, t), name);
  648. if (integer)
  649. glp_set_col_kind(lp, colnum(w, t), GLP_IV);
  650. glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.);
  651. }
  652. glp_set_col_bnds(lp, nw*nt+1, GLP_LO, 0., 0.);
  653. /* Total worker execution time */
  654. glp_add_rows(lp, nw);
  655. for (w = 0; w < nw; w++) {
  656. char name[32], title[64];
  657. starpu_worker_get_name(w, name, sizeof(name));
  658. snprintf(title, sizeof(title), "worker %s", name);
  659. glp_set_row_name(lp, w+1, title);
  660. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  661. ia[n] = w+1;
  662. ja[n] = colnum(w, t);
  663. if (times[w*nt+t] == -1.)
  664. ar[n] = 1000000000.;
  665. else
  666. ar[n] = times[w*nt+t];
  667. n++;
  668. }
  669. /* tmax */
  670. ia[n] = w+1;
  671. ja[n] = nw*nt+1;
  672. ar[n] = -1;
  673. n++;
  674. glp_set_row_bnds(lp, w+1, GLP_UP, 0, 0);
  675. }
  676. /* Total task completion */
  677. glp_add_rows(lp, nt);
  678. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  679. char name[32], title[64];
  680. starpu_worker_get_name(w, name, sizeof(name));
  681. snprintf(title, sizeof(title), "task %s key %x", tp->cl->model->symbol, (unsigned) tp->footprint);
  682. glp_set_row_name(lp, nw+t+1, title);
  683. for (w = 0; w < nw; w++) {
  684. ia[n] = nw+t+1;
  685. ja[n] = colnum(w, t);
  686. ar[n] = 1;
  687. n++;
  688. }
  689. glp_set_row_bnds(lp, nw+t+1, GLP_FX, tp->n, tp->n);
  690. }
  691. STARPU_ASSERT(n == ne);
  692. glp_load_matrix(lp, ne-1, ia, ja, ar);
  693. }
  694. glp_smcp parm;
  695. glp_init_smcp(&parm);
  696. parm.msg_lev = GLP_MSG_OFF;
  697. ret = glp_simplex(lp, &parm);
  698. if (ret) {
  699. glp_delete_prob(lp);
  700. lp = NULL;
  701. return NULL;
  702. }
  703. if (integer) {
  704. glp_iocp iocp;
  705. glp_init_iocp(&iocp);
  706. iocp.msg_lev = GLP_MSG_OFF;
  707. glp_intopt(lp, &iocp);
  708. }
  709. return lp;
  710. }
  711. #endif /* HAVE_GLPK_H */
  712. void starpu_bound_print(FILE *output, int integer __attribute__ ((unused))) {
  713. #ifdef HAVE_GLPK_H
  714. if (recorddeps) {
  715. fprintf(output, "Not supported\n");
  716. return;
  717. }
  718. PTHREAD_MUTEX_LOCK(&mutex);
  719. glp_prob *lp = _starpu_bound_glp_resolve(integer);
  720. if (lp) {
  721. struct bound_task_pool * tp;
  722. int t, w;
  723. int nw; /* Number of different workers */
  724. double tmax;
  725. nw = starpu_worker_get_count();
  726. if (integer)
  727. tmax = glp_mip_obj_val(lp);
  728. else
  729. tmax = glp_get_obj_val(lp);
  730. fprintf(output, "Theoretical minimum execution time: %f ms\n", tmax);
  731. for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
  732. fprintf(output, "%s key %x\n", tp->cl->model->symbol, (unsigned) tp->footprint);
  733. for (w = 0; w < nw; w++)
  734. if (integer)
  735. fprintf(output, "\tw%dt%dn %f", w, t, glp_mip_col_val(lp, colnum(w, t)));
  736. else
  737. fprintf(output, "\tw%dt%dn %f", w, t, glp_get_col_prim(lp, colnum(w, t)));
  738. fprintf(output, "\n");
  739. }
  740. glp_delete_prob(lp);
  741. } else {
  742. fprintf(stderr, "Simplex failed\n");
  743. }
  744. PTHREAD_MUTEX_UNLOCK(&mutex);
  745. #else /* HAVE_GLPK_H */
  746. fprintf(output, "Please rebuild StarPU with glpk installed.\n");
  747. #endif /* HAVE_GLPK_H */
  748. }
  749. void starpu_bound_compute(double *res, double *integer_res __attribute__ ((unused)), int integer __attribute__ ((unused))) {
  750. #ifdef HAVE_GLPK_H
  751. double ret;
  752. if (recorddeps) {
  753. *res = 0.;
  754. return;
  755. }
  756. PTHREAD_MUTEX_LOCK(&mutex);
  757. glp_prob *lp = _starpu_bound_glp_resolve(integer);
  758. if (lp) {
  759. ret = glp_get_obj_val(lp);
  760. if (integer)
  761. *integer_res = glp_mip_obj_val(lp);
  762. glp_delete_prob(lp);
  763. } else
  764. ret = 0.;
  765. PTHREAD_MUTEX_UNLOCK(&mutex);
  766. *res = ret;
  767. #else /* HAVE_GLPK_H */
  768. *res = 0.;
  769. #endif /* HAVE_GLPK_H */
  770. }