bound.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique
  4. * Copyright (C) 2010-2012 Université de Bordeaux 1
  5. * Copyright (C) 2011 Télécom-SudParis
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. /*
  19. * Record which kinds of tasks have been executed, to later on compute an upper
  20. * bound of the performance that could have theoretically been achieved
  21. */
  22. #include <starpu.h>
  23. #include <starpu_config.h>
  24. #include <profiling/bound.h>
  25. #include <core/jobs.h>
  26. #ifdef HAVE_GLPK_H
  27. #include <glpk.h>
  28. #endif /* HAVE_GLPK_H */
  29. /* TODO: output duration between starpu_bound_start and starpu_bound_stop */
  30. /*
  31. * Record without dependencies: just count each kind of task
  32. *
  33. * The linear programming problem will just have as variables:
  34. * - the number of tasks of kind `t' executed by worker `w'
  35. * - the total duration
  36. *
  37. * and the constraints will be:
  38. * - the time taken by each worker to complete its assigned tasks is lower than
  39. * the total duration.
  40. * - the total numer of tasks of a given kind is equal to the number run by the
  41. * application.
  42. */
  43. struct bound_task_pool
  44. {
  45. /* Which codelet has been executed */
  46. struct starpu_codelet *cl;
  47. /* Task footprint key */
  48. uint32_t footprint;
  49. /* Number of tasks of this kind */
  50. unsigned long n;
  51. /* Other task kinds */
  52. struct bound_task_pool *next;
  53. };
  54. /*
  55. * Record with dependencies: each task is recorded separately
  56. *
  57. * The linear programming problem will have as variables:
  58. * - The start time of each task
  59. * - The completion time of each tag
  60. * - The total duration
  61. * - For each task and for each worker, whether the task is executing on that worker.
  62. * - For each pair of task, which task is scheduled first.
  63. *
  64. * and the constraints will be:
  65. * - All task start time plus duration are less than total duration
  66. * - Each task is executed on exactly one worker.
  67. * - Each task starts after all its task dependencies finish.
  68. * - Each task starts after all its tag dependencies finish.
  69. * - For each task pair and each worker, if both tasks are executed by that worker,
  70. * one is started after the other's completion.
  71. */
  72. /* Note: only task-task, implicit data dependencies or task-tag dependencies
  73. * are taken into account. Tags released in a callback or something like this
  74. * is not taken into account, only tags associated with a task are. */
  75. struct bound_task
  76. {
  77. /* Unique ID */
  78. unsigned long id;
  79. /* Tag ID, if any */
  80. starpu_tag_t tag_id;
  81. int use_tag;
  82. /* Which codelet has been executed */
  83. struct starpu_codelet *cl;
  84. /* Task footprint key */
  85. uint32_t footprint;
  86. /* Task priority */
  87. int priority;
  88. /* Tasks this one depends on */
  89. struct bound_task **deps;
  90. int depsn;
  91. /* Estimated duration */
  92. double duration[STARPU_NARCH_VARIATIONS];
  93. /* Other tasks */
  94. struct bound_task *next;
  95. };
  96. struct bound_tag_dep
  97. {
  98. starpu_tag_t tag;
  99. starpu_tag_t dep_tag;
  100. struct bound_tag_dep *next;
  101. };
  102. static struct bound_task_pool *task_pools, *last;
  103. static struct bound_task *tasks;
  104. static struct bound_tag_dep *tag_deps;
  105. int _starpu_bound_recording;
  106. static int recorddeps;
  107. static int recordprio;
  108. static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
  109. void starpu_bound_start(int deps, int prio)
  110. {
  111. struct bound_task_pool *tp;
  112. struct bound_task *t;
  113. struct bound_tag_dep *td;
  114. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  115. tp = task_pools;
  116. task_pools = NULL;
  117. last = NULL;
  118. t = tasks;
  119. tasks = NULL;
  120. td = tag_deps;
  121. tag_deps = NULL;
  122. _starpu_bound_recording = 1;
  123. recorddeps = deps;
  124. recordprio = prio;
  125. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  126. while (tp != NULL)
  127. {
  128. struct bound_task_pool *next = tp->next;
  129. free(tp);
  130. tp = next;
  131. }
  132. while (t != NULL)
  133. {
  134. struct bound_task *next = t->next;
  135. free(t);
  136. t = next;
  137. }
  138. while (td != NULL)
  139. {
  140. struct bound_tag_dep *next = td->next;
  141. free(td);
  142. td = next;
  143. }
  144. }
  145. static int good_job(struct _starpu_job *j)
  146. {
  147. /* No codelet, nothing to measure */
  148. if (j->exclude_from_dag)
  149. return 0;
  150. if (!j->task->cl)
  151. return 0;
  152. /* No performance model, no time duration estimation */
  153. if (!j->task->cl->model)
  154. return 0;
  155. /* Only support history based */
  156. if (j->task->cl->model->type != STARPU_HISTORY_BASED
  157. && j->task->cl->model->type != STARPU_NL_REGRESSION_BASED)
  158. return 0;
  159. return 1;
  160. }
  161. static void new_task(struct _starpu_job *j)
  162. {
  163. struct bound_task *t;
  164. if (j->bound_task)
  165. return;
  166. t = (struct bound_task *) malloc(sizeof(*t));
  167. memset(t, 0, sizeof(*t));
  168. t->id = j->job_id;
  169. t->tag_id = j->task->tag_id;
  170. t->use_tag = j->task->use_tag;
  171. t->cl = j->task->cl;
  172. t->footprint = _starpu_compute_buffers_footprint(NULL, STARPU_CPU_DEFAULT, 0, j);
  173. t->priority = j->task->priority;
  174. t->deps = NULL;
  175. t->depsn = 0;
  176. t->next = tasks;
  177. j->bound_task = t;
  178. tasks = t;
  179. }
  180. void _starpu_bound_record(struct _starpu_job *j)
  181. {
  182. if (!_starpu_bound_recording)
  183. return;
  184. if (!good_job(j))
  185. return;
  186. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  187. /* Re-check, this time with mutex held */
  188. if (!_starpu_bound_recording)
  189. {
  190. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  191. return;
  192. }
  193. if (recorddeps)
  194. {
  195. new_task(j);
  196. }
  197. else
  198. {
  199. struct bound_task_pool *tp;
  200. _starpu_compute_buffers_footprint(NULL, STARPU_CPU_DEFAULT, 0, j);
  201. if (last && last->cl == j->task->cl && last->footprint == j->footprint)
  202. tp = last;
  203. else
  204. for (tp = task_pools; tp; tp = tp->next)
  205. if (tp->cl == j->task->cl && tp->footprint == j->footprint)
  206. break;
  207. if (!tp)
  208. {
  209. tp = (struct bound_task_pool *) malloc(sizeof(*tp));
  210. tp->cl = j->task->cl;
  211. tp->footprint = j->footprint;
  212. tp->n = 0;
  213. tp->next = task_pools;
  214. task_pools = tp;
  215. }
  216. /* One more task of this kind */
  217. tp->n++;
  218. }
  219. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  220. }
  221. void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id)
  222. {
  223. struct bound_tag_dep *td;
  224. if (!_starpu_bound_recording || !recorddeps)
  225. return;
  226. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  227. /* Re-check, this time with mutex held */
  228. if (!_starpu_bound_recording || !recorddeps)
  229. {
  230. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  231. return;
  232. }
  233. td = (struct bound_tag_dep *) malloc(sizeof(*td));
  234. td->tag = id;
  235. td->dep_tag = dep_id;
  236. td->next = tag_deps;
  237. tag_deps = td;
  238. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  239. }
  240. void _starpu_bound_task_dep(struct _starpu_job *j, struct _starpu_job *dep_j)
  241. {
  242. struct bound_task *t;
  243. if (!_starpu_bound_recording || !recorddeps)
  244. return;
  245. if (!good_job(j) || !good_job(dep_j))
  246. return;
  247. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  248. /* Re-check, this time with mutex held */
  249. if (!_starpu_bound_recording || !recorddeps)
  250. {
  251. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  252. return;
  253. }
  254. new_task(j);
  255. new_task(dep_j);
  256. t = j->bound_task;
  257. t->deps = (struct bound_task **) realloc(t->deps, ++t->depsn * sizeof(t->deps[0]));
  258. t->deps[t->depsn-1] = dep_j->bound_task;
  259. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  260. }
  261. static struct bound_task *find_job(unsigned long id)
  262. {
  263. struct bound_task *t;
  264. for (t = tasks; t; t = t->next)
  265. if (t->id == id)
  266. return t;
  267. return NULL;
  268. }
  269. void _starpu_bound_job_id_dep(struct _starpu_job *j, unsigned long id)
  270. {
  271. struct bound_task *t, *dep_t;
  272. if (!_starpu_bound_recording || !recorddeps)
  273. return;
  274. if (!good_job(j))
  275. return;
  276. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  277. /* Re-check, this time with mutex held */
  278. if (!_starpu_bound_recording || !recorddeps)
  279. {
  280. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  281. return;
  282. }
  283. new_task(j);
  284. dep_t = find_job(id);
  285. if (!dep_t)
  286. {
  287. fprintf(stderr,"dependency %lu not found !\n", id);
  288. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  289. return;
  290. }
  291. t = j->bound_task;
  292. t->deps = (struct bound_task **) realloc(t->deps, ++t->depsn * sizeof(t->deps[0]));
  293. t->deps[t->depsn-1] = dep_t;
  294. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  295. }
  296. void starpu_bound_stop(void)
  297. {
  298. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  299. _starpu_bound_recording = 0;
  300. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  301. }
  302. static void _starpu_get_tasks_times(int nw, int nt, double *times)
  303. {
  304. struct bound_task_pool *tp;
  305. int w, t;
  306. for (w = 0; w < nw; w++)
  307. {
  308. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  309. {
  310. struct _starpu_job j =
  311. {
  312. .footprint = tp->footprint,
  313. .footprint_is_computed = 1,
  314. };
  315. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  316. double length = _starpu_history_based_job_expected_perf(tp->cl->model, arch, &j, j.nimpl);
  317. if (isnan(length))
  318. times[w*nt+t] = NAN;
  319. else
  320. times[w*nt+t] = length / 1000.;
  321. }
  322. }
  323. }
  324. static int ancestor(struct bound_task *child, struct bound_task *parent)
  325. {
  326. int i;
  327. for (i = 0; i < child->depsn; i++)
  328. {
  329. if (parent == child->deps[i])
  330. return 1;
  331. if (ancestor(child->deps[i], parent))
  332. return -1;
  333. }
  334. return 0;
  335. }
  336. void starpu_bound_print_dot(FILE *output)
  337. {
  338. struct bound_task *t;
  339. struct bound_tag_dep *td;
  340. int i;
  341. if (!recorddeps)
  342. {
  343. fprintf(output, "Not supported\n");
  344. return;
  345. }
  346. fprintf(output, "strict digraph bounddeps {\n");
  347. for (t = tasks; t; t = t->next)
  348. {
  349. fprintf(output, "\"t%lu\" [label=\"%lu: %s\"]\n", t->id, t->id, _starpu_codelet_get_model_name(t->cl));
  350. for (i = 0; i < t->depsn; i++)
  351. fprintf(output, "\"t%lu\" -> \"t%lu\"\n", t->deps[i]->id, t->id);
  352. }
  353. for (td = tag_deps; td; td = td->next)
  354. fprintf(output, "\"tag%lu\" -> \"tag%lu\";\n", (unsigned long) td->dep_tag, (unsigned long) td->tag);
  355. fprintf(output, "}\n");
  356. }
  357. /*
  358. * lp_solve format
  359. *
  360. * When dependencies are enabled, you can check the set of tasks and deps that
  361. * were recorded by using tools/lp2paje and vite.
  362. */
  363. void starpu_bound_print_lp(FILE *output)
  364. {
  365. int nt; /* Number of different kinds of tasks */
  366. int nw; /* Number of different workers */
  367. int t, w;
  368. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  369. nw = starpu_worker_get_count();
  370. if (recorddeps)
  371. {
  372. struct bound_task *t1, *t2;
  373. struct bound_tag_dep *td;
  374. int i;
  375. nt = 0;
  376. for (t1 = tasks; t1; t1 = t1->next)
  377. {
  378. if (t1->cl->model->type != STARPU_HISTORY_BASED &&
  379. t1->cl->model->type != STARPU_NL_REGRESSION_BASED)
  380. /* TODO: */
  381. fprintf(stderr, "Warning: task %s uses a perf model which is neither history nor non-linear regression-based, support for such model is not implemented yet, system will not be solvable.\n", _starpu_codelet_get_model_name(t1->cl));
  382. struct _starpu_job j =
  383. {
  384. .footprint = t1->footprint,
  385. .footprint_is_computed = 1,
  386. };
  387. for (w = 0; w < nw; w++)
  388. {
  389. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  390. if (_STARPU_IS_ZERO(t1->duration[arch]))
  391. {
  392. double length = _starpu_history_based_job_expected_perf(t1->cl->model, arch, &j,j.nimpl);
  393. if (isnan(length))
  394. /* Avoid problems with binary coding of doubles */
  395. t1->duration[arch] = NAN;
  396. else
  397. t1->duration[arch] = length / 1000.;
  398. }
  399. }
  400. nt++;
  401. }
  402. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  403. fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n");
  404. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  405. fprintf(output, "min: tmax;\n\n");
  406. fprintf(output, "/* Which is the maximum of all task completion times (ms) */\n");
  407. for (t1 = tasks; t1; t1 = t1->next)
  408. fprintf(output, "c%lu <= tmax;\n", t1->id);
  409. fprintf(output, "\n/* We have tasks executing on workers, exactly one worker executes each task */\n");
  410. for (t1 = tasks; t1; t1 = t1->next)
  411. {
  412. for (w = 0; w < nw; w++)
  413. {
  414. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  415. if (!isnan(t1->duration[arch]))
  416. fprintf(output, " +t%luw%d", t1->id, w);
  417. }
  418. fprintf(output, " = 1;\n");
  419. }
  420. fprintf(output, "\n/* Completion time is start time plus computation time */\n");
  421. fprintf(output, "/* According to where the task is indeed executed */\n");
  422. for (t1 = tasks; t1; t1 = t1->next)
  423. {
  424. fprintf(output, "/* %s %x */\tc%lu = s%lu", _starpu_codelet_get_model_name(t1->cl), (unsigned) t1->footprint, t1->id, t1->id);
  425. for (w = 0; w < nw; w++)
  426. {
  427. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  428. if (!isnan(t1->duration[arch]))
  429. fprintf(output, " + %f t%luw%d", t1->duration[arch], t1->id, w);
  430. }
  431. fprintf(output, ";\n");
  432. }
  433. fprintf(output, "\n/* Each task starts after all its task dependencies finish. */\n");
  434. fprintf(output, "/* Note that the dependency finish time depends on the worker where it's working */\n");
  435. for (t1 = tasks; t1; t1 = t1->next)
  436. for (i = 0; i < t1->depsn; i++)
  437. fprintf(output, "s%lu >= c%lu;\n", t1->id, t1->deps[i]->id);
  438. fprintf(output, "\n/* Each tag finishes when its corresponding task finishes */");
  439. for (t1 = tasks; t1; t1 = t1->next)
  440. if (t1->use_tag)
  441. {
  442. for (w = 0; w < nw; w++)
  443. fprintf(output, "c%lu = tag%lu;\n", t1->id, (unsigned long) t1->tag_id);
  444. }
  445. fprintf(output, "\n/* tags start after all their tag dependencies finish. */\n");
  446. for (td = tag_deps; td; td = td->next)
  447. fprintf(output, "tag%lu >= tag%lu;\n", (unsigned long) td->tag, (unsigned long) td->dep_tag);
  448. /* TODO: factorize ancestor calls */
  449. fprintf(output, "\n/* For each task pair and each worker, if both tasks are executed by the same worker,\n");
  450. fprintf(output, " one is started after the other's completion */\n");
  451. for (t1 = tasks; t1; t1 = t1->next)
  452. {
  453. for (t2 = t1->next; t2; t2 = t2->next)
  454. {
  455. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  456. {
  457. for (w = 0; w < nw; w++)
  458. {
  459. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  460. if (!isnan(t1->duration[arch]))
  461. {
  462. fprintf(output, "s%lu - c%lu >= -3e5 + 1e5 t%luw%d + 1e5 t%luw%d + 1e5 t%luafter%lu;\n",
  463. t1->id, t2->id, t1->id, w, t2->id, w, t1->id, t2->id);
  464. fprintf(output, "s%lu - c%lu >= -2e5 + 1e5 t%luw%d + 1e5 t%luw%d - 1e5 t%luafter%lu;\n",
  465. t2->id, t1->id, t1->id, w, t2->id, w, t1->id, t2->id);
  466. }
  467. }
  468. }
  469. }
  470. }
  471. #if 0
  472. /* Doesn't help at all to actually express what "after" means */
  473. for (t1 = tasks; t1; t1 = t1->next)
  474. for (t2 = t1->next; t2; t2 = t2->next)
  475. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  476. {
  477. fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
  478. fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
  479. }
  480. #endif
  481. if (recordprio)
  482. {
  483. fprintf(output, "\n/* For StarPU, a priority means given schedulable tasks it will consider the\n");
  484. fprintf(output, " * more prioritized first */\n");
  485. for (t1 = tasks; t1; t1 = t1->next)
  486. {
  487. for (t2 = t1->next; t2; t2 = t2->next)
  488. {
  489. if (!ancestor(t1, t2) && !ancestor(t2, t1)
  490. && t1->priority != t2->priority)
  491. {
  492. if (t1->priority > t2->priority)
  493. {
  494. /* Either t2 is scheduled before t1, but then it
  495. needs to be scheduled before some t dep finishes */
  496. /* One of the t1 deps to give the maximum start time for t2 */
  497. if (t1->depsn > 1)
  498. {
  499. for (i = 0; i < t1->depsn; i++)
  500. fprintf(output, " + t%lut%lud%d", t2->id, t1->id, i);
  501. fprintf(output, " = 1;\n");
  502. }
  503. for (i = 0; i < t1->depsn; i++)
  504. {
  505. fprintf(output, "c%lu - s%lu >= ", t1->deps[i]->id, t2->id);
  506. if (t1->depsn > 1)
  507. /* Only checks this when it's this dependency that is chosen */
  508. fprintf(output, "-2e5 + 1e5 t%lut%lud%d", t2->id, t1->id, i);
  509. else
  510. fprintf(output, "-1e5");
  511. /* Only check this if t1 is after t2 */
  512. fprintf(output, " + 1e5 t%luafter%lu", t1->id, t2->id);
  513. fprintf(output, ";\n");
  514. }
  515. /* Or t2 is scheduled after t1 is. */
  516. fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
  517. }
  518. else
  519. {
  520. /* Either t1 is scheduled before t2, but then it
  521. needs to be scheduled before some t2 dep finishes */
  522. /* One of the t2 deps to give the maximum start time for t1 */
  523. if (t2->depsn > 1)
  524. {
  525. for (i = 0; i < t2->depsn; i++)
  526. fprintf(output, " + t%lut%lud%d", t1->id, t2->id, i);
  527. fprintf(output, " = 1;\n");
  528. }
  529. for (i = 0; i < t2->depsn; i++)
  530. {
  531. fprintf(output, "c%lu - s%lu >= ", t2->deps[i]->id, t1->id);
  532. if (t2->depsn > 1)
  533. /* Only checks this when it's this dependency that is chosen */
  534. fprintf(output, "-1e5 + 1e5 t%lut%lud%d", t1->id, t2->id, i);
  535. /* Only check this if t2 is after t1 */
  536. fprintf(output, " - 1e5 t%luafter%lu;\n", t1->id, t2->id);
  537. }
  538. /* Or t1 is scheduled after t2 is. */
  539. fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
  540. }
  541. }
  542. }
  543. }
  544. }
  545. for (t1 = tasks; t1; t1 = t1->next)
  546. for (t2 = t1->next; t2; t2 = t2->next)
  547. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  548. {
  549. fprintf(output, "bin t%luafter%lu;\n", t1->id, t2->id);
  550. if (recordprio && t1->priority != t2->priority)
  551. {
  552. if (t1->priority > t2->priority)
  553. {
  554. if (t1->depsn > 1)
  555. for (i = 0; i < t1->depsn; i++)
  556. fprintf(output, "bin t%lut%lud%d;\n", t2->id, t1->id, i);
  557. }
  558. else
  559. {
  560. if (t2->depsn > 1)
  561. for (i = 0; i < t2->depsn; i++)
  562. fprintf(output, "bin t%lut%lud%d;\n", t1->id, t2->id, i);
  563. }
  564. }
  565. }
  566. for (t1 = tasks; t1; t1 = t1->next)
  567. for (w = 0; w < nw; w++)
  568. fprintf(output, "bin t%luw%d;\n", t1->id, w);
  569. }
  570. else
  571. {
  572. struct bound_task_pool *tp;
  573. nt = 0;
  574. for (tp = task_pools; tp; tp = tp->next)
  575. nt++;
  576. {
  577. double times[nw*nt];
  578. _starpu_get_tasks_times(nw, nt, times);
  579. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  580. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  581. fprintf(output, "min: tmax;\n\n");
  582. fprintf(output, "/* Which is the maximum of all worker execution times (ms) */\n");
  583. for (w = 0; w < nw; w++)
  584. {
  585. char name[32];
  586. starpu_worker_get_name(w, name, sizeof(name));
  587. fprintf(output, "/* worker %s */\n0", name);
  588. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  589. {
  590. if (!isnan(times[w*nt+t]))
  591. fprintf(output, "\t%+f * w%dt%dn", (float) times[w*nt+t], w, t);
  592. }
  593. fprintf(output, " <= tmax;\n");
  594. }
  595. fprintf(output, "\n");
  596. fprintf(output, "/* And we have to have computed exactly all tasks */\n");
  597. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  598. {
  599. int got_one = 0;
  600. fprintf(output, "/* task %s key %x */\n0", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
  601. for (w = 0; w < nw; w++) {
  602. if (isnan(times[w*nt+t]))
  603. fprintf(stderr, "Warning: task %s has no performance measurement for worker %d.\n", _starpu_codelet_get_model_name(tp->cl), w);
  604. else {
  605. got_one = 1;
  606. fprintf(output, "\t+w%dt%dn", w, t);
  607. }
  608. }
  609. fprintf(output, " = %lu;\n", tp->n);
  610. if (!got_one)
  611. fprintf(stderr, "Warning: task %s has no performance measurement for any worker, system will not be solvable!\n", _starpu_codelet_get_model_name(tp->cl));
  612. /* Show actual values */
  613. fprintf(output, "/*");
  614. for (w = 0; w < nw; w++)
  615. fprintf(output, "\t+%lu", tp->cl->per_worker_stats[w]);
  616. fprintf(output, "\t*/\n\n");
  617. }
  618. fprintf(output, "/* Optionally tell that tasks can not be divided */\n");
  619. fprintf(output, "/* int ");
  620. int first = 1;
  621. for (w = 0; w < nw; w++)
  622. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  623. {
  624. if (!first)
  625. fprintf(output, ",");
  626. else
  627. first = 0;
  628. fprintf(output, "w%dt%dn", w, t);
  629. }
  630. fprintf(output, "; */\n");
  631. }
  632. }
  633. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  634. }
  635. /*
  636. * MPS output format
  637. */
  638. void starpu_bound_print_mps(FILE *output)
  639. {
  640. struct bound_task_pool * tp;
  641. int nt; /* Number of different kinds of tasks */
  642. int nw; /* Number of different workers */
  643. int t, w;
  644. if (recorddeps)
  645. {
  646. fprintf(output, "Not supported\n");
  647. return;
  648. }
  649. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  650. nw = starpu_worker_get_count();
  651. nt = 0;
  652. for (tp = task_pools; tp; tp = tp->next)
  653. nt++;
  654. {
  655. double times[nw*nt];
  656. _starpu_get_tasks_times(nw, nt, times);
  657. fprintf(output, "NAME StarPU theoretical bound\n");
  658. fprintf(output, "\nROWS\n");
  659. fprintf(output, "* We want to minimize total execution time (ms)\n");
  660. fprintf(output, " N TMAX\n");
  661. fprintf(output, "\n* Which is the maximum of all worker execution times (ms)\n");
  662. for (w = 0; w < nw; w++)
  663. {
  664. char name[32];
  665. starpu_worker_get_name(w, name, sizeof(name));
  666. fprintf(output, "* worker %s\n", name);
  667. fprintf(output, " L W%d\n", w);
  668. }
  669. fprintf(output, "\n* And we have to have computed exactly all tasks\n");
  670. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  671. {
  672. fprintf(output, "* task %s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
  673. fprintf(output, " E T%d\n", t);
  674. }
  675. fprintf(output, "\nCOLUMNS\n");
  676. fprintf(output, "\n* Execution times and completion of all tasks\n");
  677. for (w = 0; w < nw; w++)
  678. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  679. if (!isnan(times[w*nt+t]))
  680. {
  681. char name[9];
  682. snprintf(name, sizeof(name), "W%dT%d", w, t);
  683. fprintf(stderr," %-8s W%-7d %12f\n", name, w, times[w*nt+t]);
  684. fprintf(stderr," %-8s T%-7d %12d\n", name, t, 1);
  685. }
  686. fprintf(output, "\n* Total execution time\n");
  687. for (w = 0; w < nw; w++)
  688. fprintf(stderr," TMAX W%-2d %12d\n", w, -1);
  689. fprintf(stderr," TMAX TMAX %12d\n", 1);
  690. fprintf(output, "\nRHS\n");
  691. fprintf(output, "\n* Total number of tasks\n");
  692. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  693. fprintf(stderr," NT%-2d T%-7d %12lu\n", t, t, tp->n);
  694. fprintf(output, "ENDATA\n");
  695. }
  696. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  697. }
  698. /*
  699. * GNU Linear Programming Kit backend
  700. */
  701. #ifdef HAVE_GLPK_H
  702. static glp_prob *_starpu_bound_glp_resolve(int integer)
  703. {
  704. struct bound_task_pool * tp;
  705. int nt; /* Number of different kinds of tasks */
  706. int nw; /* Number of different workers */
  707. int t, w;
  708. glp_prob *lp;
  709. int ret;
  710. nw = starpu_worker_get_count();
  711. nt = 0;
  712. for (tp = task_pools; tp; tp = tp->next)
  713. nt++;
  714. lp = glp_create_prob();
  715. glp_set_prob_name(lp, "StarPU theoretical bound");
  716. glp_set_obj_dir(lp, GLP_MIN);
  717. glp_set_obj_name(lp, "total execution time");
  718. {
  719. double times[nw*nt];
  720. int ne =
  721. nw * (nt+1) /* worker execution time */
  722. + nt * nw
  723. + 1; /* glp dumbness */
  724. int n = 1;
  725. int ia[ne], ja[ne];
  726. double ar[ne];
  727. _starpu_get_tasks_times(nw, nt, times);
  728. /* Variables: number of tasks i assigned to worker j, and tmax */
  729. glp_add_cols(lp, nw*nt+1);
  730. #define colnum(w, t) ((t)*nw+(w)+1)
  731. glp_set_obj_coef(lp, nw*nt+1, 1.);
  732. for (w = 0; w < nw; w++)
  733. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  734. {
  735. char name[32];
  736. snprintf(name, sizeof(name), "w%dt%dn", w, t);
  737. glp_set_col_name(lp, colnum(w, t), name);
  738. if (integer)
  739. glp_set_col_kind(lp, colnum(w, t), GLP_IV);
  740. glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.);
  741. }
  742. glp_set_col_bnds(lp, nw*nt+1, GLP_LO, 0., 0.);
  743. /* Total worker execution time */
  744. glp_add_rows(lp, nw);
  745. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  746. {
  747. int someone = 0;
  748. for (w = 0; w < nw; w++)
  749. if (!isnan(times[w*nt+t]))
  750. someone = 1;
  751. if (!someone)
  752. {
  753. /* This task does not have any performance model at all, abort */
  754. glp_delete_prob(lp);
  755. return NULL;
  756. }
  757. }
  758. for (w = 0; w < nw; w++)
  759. {
  760. char name[32], title[64];
  761. starpu_worker_get_name(w, name, sizeof(name));
  762. snprintf(title, sizeof(title), "worker %s", name);
  763. glp_set_row_name(lp, w+1, title);
  764. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  765. {
  766. ia[n] = w+1;
  767. ja[n] = colnum(w, t);
  768. if (isnan(times[w*nt+t]))
  769. ar[n] = 1000000000.;
  770. else
  771. ar[n] = times[w*nt+t];
  772. n++;
  773. }
  774. /* tmax */
  775. ia[n] = w+1;
  776. ja[n] = nw*nt+1;
  777. ar[n] = -1;
  778. n++;
  779. glp_set_row_bnds(lp, w+1, GLP_UP, 0, 0);
  780. }
  781. /* Total task completion */
  782. glp_add_rows(lp, nt);
  783. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  784. {
  785. char name[32], title[64];
  786. starpu_worker_get_name(w, name, sizeof(name));
  787. snprintf(title, sizeof(title), "task %s key %x", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
  788. glp_set_row_name(lp, nw+t+1, title);
  789. for (w = 0; w < nw; w++)
  790. {
  791. ia[n] = nw+t+1;
  792. ja[n] = colnum(w, t);
  793. ar[n] = 1;
  794. n++;
  795. }
  796. glp_set_row_bnds(lp, nw+t+1, GLP_FX, tp->n, tp->n);
  797. }
  798. STARPU_ASSERT(n == ne);
  799. glp_load_matrix(lp, ne-1, ia, ja, ar);
  800. }
  801. glp_smcp parm;
  802. glp_init_smcp(&parm);
  803. parm.msg_lev = GLP_MSG_OFF;
  804. ret = glp_simplex(lp, &parm);
  805. if (ret)
  806. {
  807. glp_delete_prob(lp);
  808. lp = NULL;
  809. return NULL;
  810. }
  811. if (integer)
  812. {
  813. glp_iocp iocp;
  814. glp_init_iocp(&iocp);
  815. iocp.msg_lev = GLP_MSG_OFF;
  816. glp_intopt(lp, &iocp);
  817. }
  818. return lp;
  819. }
  820. #endif /* HAVE_GLPK_H */
  821. void starpu_bound_print(FILE *output, int integer __attribute__ ((unused)))
  822. {
  823. #ifdef HAVE_GLPK_H
  824. if (recorddeps)
  825. {
  826. fprintf(output, "Not supported\n");
  827. return;
  828. }
  829. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  830. glp_prob *lp = _starpu_bound_glp_resolve(integer);
  831. if (lp)
  832. {
  833. struct bound_task_pool * tp;
  834. int t, w;
  835. int nw; /* Number of different workers */
  836. double tmax;
  837. nw = starpu_worker_get_count();
  838. if (integer)
  839. tmax = glp_mip_obj_val(lp);
  840. else
  841. tmax = glp_get_obj_val(lp);
  842. fprintf(output, "Theoretical minimum execution time: %f ms\n", tmax);
  843. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  844. {
  845. fprintf(output, "%s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
  846. for (w = 0; w < nw; w++)
  847. if (integer)
  848. fprintf(output, "\tw%dt%dn %f", w, t, glp_mip_col_val(lp, colnum(w, t)));
  849. else
  850. fprintf(output, "\tw%dt%dn %f", w, t, glp_get_col_prim(lp, colnum(w, t)));
  851. fprintf(output, "\n");
  852. }
  853. glp_delete_prob(lp);
  854. }
  855. else
  856. {
  857. fprintf(stderr, "Simplex failed\n");
  858. }
  859. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  860. #else /* HAVE_GLPK_H */
  861. fprintf(output, "Please rebuild StarPU with glpk installed.\n");
  862. #endif /* HAVE_GLPK_H */
  863. }
  864. void starpu_bound_compute(double *res, double *integer_res __attribute__ ((unused)), int integer __attribute__ ((unused)))
  865. {
  866. #ifdef HAVE_GLPK_H
  867. double ret;
  868. if (recorddeps)
  869. {
  870. *res = 0.;
  871. return;
  872. }
  873. _STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  874. glp_prob *lp = _starpu_bound_glp_resolve(integer);
  875. if (lp)
  876. {
  877. ret = glp_get_obj_val(lp);
  878. if (integer)
  879. *integer_res = glp_mip_obj_val(lp);
  880. glp_delete_prob(lp);
  881. }
  882. else
  883. ret = 0.;
  884. _STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  885. *res = ret;
  886. #else /* HAVE_GLPK_H */
  887. *res = 0.;
  888. #endif /* HAVE_GLPK_H */
  889. }