bound.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011,2012,2014 Inria
  4. * Copyright (C) 2010-2017 Université de Bordeaux
  5. * Copyright (C) 2010-2017 CNRS
  6. * Copyright (C) 2013 Thibaut Lambert
  7. * Copyright (C) 2011 Télécom-SudParis
  8. *
  9. * StarPU is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as published by
  11. * the Free Software Foundation; either version 2.1 of the License, or (at
  12. * your option) any later version.
  13. *
  14. * StarPU is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17. *
  18. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  19. */
  20. /*
  21. * Record which kinds of tasks have been executed, to later on compute an upper
  22. * bound of the performance that could have theoretically been achieved
  23. */
  24. #include <starpu.h>
  25. #include <starpu_config.h>
  26. #include <profiling/bound.h>
  27. #include <core/jobs.h>
  28. #include <core/workers.h>
  29. #ifdef STARPU_HAVE_GLPK_H
  30. #include <glpk.h>
  31. #endif /* STARPU_HAVE_GLPK_H */
  32. /* TODO: output duration between starpu_bound_start and starpu_bound_stop */
  33. /* TODO: compute critical path and introduce it in the LP */
  34. /*
  35. * Record without dependencies: just count each kind of task
  36. *
  37. * The linear programming problem will just have as variables:
  38. * - the number of tasks of kind `t' executed by worker `w'
  39. * - the total duration
  40. *
  41. * and the constraints will be:
  42. * - the time taken by each worker to complete its assigned tasks is lower than
  43. * the total duration.
  44. * - the total numer of tasks of a given kind is equal to the number run by the
  45. * application.
  46. */
  47. struct bound_task_pool
  48. {
  49. /* Which codelet has been executed */
  50. struct starpu_codelet *cl;
  51. /* Task footprint key (for history-based perfmodel) */
  52. uint32_t footprint;
  53. /* Number of tasks of this kind */
  54. unsigned long n;
  55. /* Other task kinds */
  56. struct bound_task_pool *next;
  57. };
  58. /*
  59. * Record with dependencies: each task is recorded separately
  60. *
  61. * The linear programming problem will have as variables:
  62. * - The start time of each task
  63. * - The completion time of each tag
  64. * - The total duration
  65. * - For each task and for each worker, whether the task is executing on that worker.
  66. * - For each pair of task, which task is scheduled first.
  67. *
  68. * and the constraints will be:
  69. * - All task start time plus duration are less than total duration
  70. * - Each task is executed on exactly one worker.
  71. * - Each task starts after all its task dependencies finish.
  72. * - Each task starts after all its tag dependencies finish.
  73. * - For each task pair and each worker, if both tasks are executed by that worker,
  74. * one is started after the other's completion.
  75. */
  76. struct task_dep
  77. {
  78. /* Task this depends on */
  79. struct bound_task *dep;
  80. /* Data transferred between tasks (i.e. implicit data dep size) */
  81. size_t size;
  82. };
  83. struct bound_task
  84. {
  85. /* Unique ID */
  86. unsigned long id;
  87. /* Tag ID, if any */
  88. starpu_tag_t tag_id;
  89. int use_tag;
  90. /* Which codelet has been executed */
  91. struct starpu_codelet *cl;
  92. /* Task footprint key */
  93. uint32_t footprint;
  94. /* Task priority */
  95. int priority;
  96. /* Tasks this one depends on */
  97. struct task_dep *deps;
  98. int depsn;
  99. /* Estimated duration */
  100. double** duration[STARPU_NARCH];
  101. /* Other tasks */
  102. struct bound_task *next;
  103. };
  104. struct bound_tag_dep
  105. {
  106. starpu_tag_t tag;
  107. starpu_tag_t dep_tag;
  108. struct bound_tag_dep *next;
  109. };
  110. static struct bound_task_pool *task_pools, *last;
  111. static struct bound_task *tasks;
  112. static struct bound_tag_dep *tag_deps;
  113. int _starpu_bound_recording;
  114. static int recorddeps;
  115. static int recordprio;
  116. static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
  117. /* Initialization */
  118. void starpu_bound_start(int deps, int prio)
  119. {
  120. struct bound_task_pool *tp;
  121. struct bound_task *t;
  122. struct bound_tag_dep *td;
  123. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  124. tp = task_pools;
  125. task_pools = NULL;
  126. last = NULL;
  127. t = tasks;
  128. tasks = NULL;
  129. td = tag_deps;
  130. tag_deps = NULL;
  131. _starpu_bound_recording = 1;
  132. recorddeps = deps;
  133. recordprio = prio;
  134. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  135. while (tp != NULL)
  136. {
  137. struct bound_task_pool *next = tp->next;
  138. free(tp);
  139. tp = next;
  140. }
  141. while (t != NULL)
  142. {
  143. struct bound_task *next = t->next;
  144. free(t);
  145. t = next;
  146. }
  147. while (td != NULL)
  148. {
  149. struct bound_tag_dep *next = td->next;
  150. free(td);
  151. td = next;
  152. }
  153. }
  154. /* Whether we will include it in the computation */
  155. static int good_job(struct _starpu_job *j)
  156. {
  157. /* No codelet, nothing to measure */
  158. if (j->exclude_from_dag)
  159. return 0;
  160. if (!j->task->cl)
  161. return 0;
  162. /* No performance model, no time duration estimation */
  163. if (!j->task->cl->model)
  164. return 0;
  165. /* Only support history based */
  166. if (j->task->cl->model->type != STARPU_HISTORY_BASED
  167. && j->task->cl->model->type != STARPU_NL_REGRESSION_BASED)
  168. return 0;
  169. return 1;
  170. }
  171. static double** initialize_arch_duration(int maxdevid, unsigned* maxncore_table)
  172. {
  173. int devid, maxncore;
  174. double ** arch_model;
  175. _STARPU_MALLOC(arch_model, sizeof(*arch_model)*(maxdevid+1));
  176. arch_model[maxdevid] = NULL;
  177. for(devid=0; devid<maxdevid; devid++)
  178. {
  179. if(maxncore_table != NULL)
  180. maxncore = maxncore_table[devid];
  181. else
  182. maxncore = 1;
  183. _STARPU_CALLOC(arch_model[devid], maxncore+1,sizeof(*arch_model[devid]));
  184. }
  185. return arch_model;
  186. }
  187. static void initialize_duration(struct bound_task *task)
  188. {
  189. struct _starpu_machine_config *conf = _starpu_get_machine_config();
  190. task->duration[STARPU_CPU_WORKER] = initialize_arch_duration(1,&conf->topology.nhwcpus);
  191. task->duration[STARPU_CUDA_WORKER] = initialize_arch_duration(conf->topology.nhwcudagpus,NULL);
  192. task->duration[STARPU_OPENCL_WORKER] = initialize_arch_duration(conf->topology.nhwopenclgpus,NULL);
  193. task->duration[STARPU_MIC_WORKER] = initialize_arch_duration(conf->topology.nhwmicdevices,conf->topology.nmiccores);
  194. task->duration[STARPU_SCC_WORKER] = initialize_arch_duration(conf->topology.nhwscc,NULL);
  195. }
  196. static struct starpu_perfmodel_device device =
  197. {
  198. .type = STARPU_CPU_WORKER,
  199. .devid = 0,
  200. .ncores = 1,
  201. };
  202. static struct starpu_perfmodel_arch dumb_arch =
  203. {
  204. .ndevices = 1,
  205. .devices = &device,
  206. };
  207. /* Create a new task (either because it has just been submitted, or a
  208. * dependency was added before submission) */
  209. static void new_task(struct _starpu_job *j)
  210. {
  211. struct bound_task *t;
  212. if (j->bound_task)
  213. return;
  214. _STARPU_MALLOC(t, sizeof(*t));
  215. memset(t, 0, sizeof(*t));
  216. t->id = j->job_id;
  217. t->tag_id = j->task->tag_id;
  218. t->use_tag = j->task->use_tag;
  219. t->cl = j->task->cl;
  220. t->footprint = _starpu_compute_buffers_footprint(j->task->cl?j->task->cl->model:NULL, &dumb_arch, 0, j);
  221. t->priority = j->task->priority;
  222. t->deps = NULL;
  223. t->depsn = 0;
  224. initialize_duration(t);
  225. t->next = tasks;
  226. j->bound_task = t;
  227. tasks = t;
  228. }
  229. /* A new task was submitted, record it */
  230. void _starpu_bound_record(struct _starpu_job *j)
  231. {
  232. if (!_starpu_bound_recording)
  233. return;
  234. if (!good_job(j))
  235. return;
  236. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  237. /* Re-check, this time with mutex held */
  238. if (!_starpu_bound_recording)
  239. {
  240. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  241. return;
  242. }
  243. if (recorddeps)
  244. {
  245. new_task(j);
  246. }
  247. else
  248. {
  249. struct bound_task_pool *tp;
  250. _starpu_compute_buffers_footprint(j->task->cl?j->task->cl->model:NULL, NULL, 0, j);
  251. if (last && last->cl == j->task->cl && last->footprint == j->footprint)
  252. tp = last;
  253. else
  254. for (tp = task_pools; tp; tp = tp->next)
  255. if (tp->cl == j->task->cl && tp->footprint == j->footprint)
  256. break;
  257. if (!tp)
  258. {
  259. _STARPU_MALLOC(tp, sizeof(*tp));
  260. tp->cl = j->task->cl;
  261. tp->footprint = j->footprint;
  262. tp->n = 0;
  263. tp->next = task_pools;
  264. task_pools = tp;
  265. }
  266. /* One more task of this kind */
  267. tp->n++;
  268. }
  269. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  270. }
  271. /* A tag dependency was emitted, record it */
  272. void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id)
  273. {
  274. struct bound_tag_dep *td;
  275. if (!_starpu_bound_recording || !recorddeps)
  276. return;
  277. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  278. /* Re-check, this time with mutex held */
  279. if (!_starpu_bound_recording || !recorddeps)
  280. {
  281. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  282. return;
  283. }
  284. _STARPU_MALLOC(td, sizeof(*td));
  285. td->tag = id;
  286. td->dep_tag = dep_id;
  287. td->next = tag_deps;
  288. tag_deps = td;
  289. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  290. }
  291. /* A task dependency was emitted, record it */
  292. void _starpu_bound_task_dep(struct _starpu_job *j, struct _starpu_job *dep_j)
  293. {
  294. struct bound_task *t;
  295. int i;
  296. if (!_starpu_bound_recording || !recorddeps)
  297. return;
  298. if (!good_job(j) || !good_job(dep_j))
  299. return;
  300. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  301. /* Re-check, this time with mutex held */
  302. if (!_starpu_bound_recording || !recorddeps)
  303. {
  304. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  305. return;
  306. }
  307. new_task(j);
  308. new_task(dep_j);
  309. t = j->bound_task;
  310. for (i = 0; i < t->depsn; i++)
  311. if (t->deps[i].dep == dep_j->bound_task)
  312. break;
  313. if (i == t->depsn)
  314. {
  315. /* Not already there, add */
  316. _STARPU_REALLOC(t->deps, ++t->depsn * sizeof(t->deps[0]));
  317. t->deps[t->depsn-1].dep = dep_j->bound_task;
  318. t->deps[t->depsn-1].size = 0; /* We don't have data information in that case */
  319. }
  320. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  321. }
  322. /* Look for job with id ID among our tasks */
  323. static struct bound_task *find_job(unsigned long id)
  324. {
  325. struct bound_task *t;
  326. for (t = tasks; t; t = t->next)
  327. if (t->id == id)
  328. return t;
  329. return NULL;
  330. }
  331. /* Job J depends on previous job of id ID (which is already finished) */
  332. void _starpu_bound_job_id_dep(starpu_data_handle_t handle, struct _starpu_job *j, unsigned long id)
  333. {
  334. struct bound_task *t, *dep_t;
  335. int i;
  336. if (!_starpu_bound_recording || !recorddeps)
  337. return;
  338. if (!good_job(j))
  339. return;
  340. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  341. /* Re-check, this time with mutex held */
  342. if (!_starpu_bound_recording || !recorddeps)
  343. {
  344. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  345. return;
  346. }
  347. new_task(j);
  348. dep_t = find_job(id);
  349. if (!dep_t)
  350. {
  351. _STARPU_MSG("dependency %lu not found !\n", id);
  352. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  353. return;
  354. }
  355. t = j->bound_task;
  356. for (i = 0; i < t->depsn; i++)
  357. if (t->deps[i].dep == dep_t)
  358. {
  359. /* Found, just add size */
  360. t->deps[i].size += _starpu_data_get_size(handle);
  361. break;
  362. }
  363. if (i == t->depsn)
  364. {
  365. /* Not already there, add */
  366. _STARPU_REALLOC(t->deps, ++t->depsn * sizeof(t->deps[0]));
  367. t->deps[t->depsn-1].dep = dep_t;
  368. t->deps[t->depsn-1].size = _starpu_data_get_size(handle);
  369. }
  370. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  371. }
  372. void starpu_bound_stop(void)
  373. {
  374. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  375. _starpu_bound_recording = 0;
  376. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  377. }
  378. /* Compute all tasks times on all workers */
  379. static void _starpu_get_tasks_times(int nw, int nt, double *times)
  380. {
  381. struct bound_task_pool *tp;
  382. int w, t;
  383. for (w = 0; w < nw; w++)
  384. {
  385. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  386. {
  387. struct _starpu_job j =
  388. {
  389. .footprint = tp->footprint,
  390. .footprint_is_computed = 1,
  391. };
  392. struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS);
  393. double length = _starpu_history_based_job_expected_perf(tp->cl->model, arch, &j, j.nimpl);
  394. if (isnan(length))
  395. times[w*nt+t] = NAN;
  396. else
  397. times[w*nt+t] = length / 1000.;
  398. }
  399. }
  400. }
  401. /* Return whether PARENT is an ancestor of CHILD */
  402. static int ancestor(struct bound_task *child, struct bound_task *parent)
  403. {
  404. int i;
  405. for (i = 0; i < child->depsn; i++)
  406. {
  407. if (parent == child->deps[i].dep)
  408. return 1;
  409. if (ancestor(child->deps[i].dep, parent))
  410. return -1;
  411. }
  412. return 0;
  413. }
  414. /* Print bound recording in .dot format */
  415. void starpu_bound_print_dot(FILE *output)
  416. {
  417. struct bound_task *t;
  418. struct bound_tag_dep *td;
  419. int i;
  420. if (!recorddeps)
  421. {
  422. fprintf(output, "Not supported\n");
  423. return;
  424. }
  425. fprintf(output, "strict digraph bounddeps {\n");
  426. for (t = tasks; t; t = t->next)
  427. {
  428. fprintf(output, "\"t%lu\" [label=\"%lu: %s\"]\n", t->id, t->id, _starpu_codelet_get_model_name(t->cl));
  429. for (i = 0; i < t->depsn; i++)
  430. fprintf(output, "\"t%lu\" -> \"t%lu\"\n", t->deps[i].dep->id, t->id);
  431. }
  432. for (td = tag_deps; td; td = td->next)
  433. fprintf(output, "\"tag%lu\" -> \"tag%lu\";\n", (unsigned long) td->dep_tag, (unsigned long) td->tag);
  434. fprintf(output, "}\n");
  435. }
  436. /*
  437. * Print bound system in lp_solve format
  438. *
  439. * When dependencies are enabled, you can check the set of tasks and deps that
  440. * were recorded by using tools/lp2paje and vite.
  441. */
  442. void starpu_bound_print_lp(FILE *output)
  443. {
  444. int nt; /* Number of different kinds of tasks */
  445. int nw; /* Number of different workers */
  446. int t;
  447. int w, w2; /* worker */
  448. unsigned n, n2;
  449. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  450. nw = starpu_worker_get_count();
  451. if (!nw)
  452. /* Make llvm happy about the VLA below */
  453. return;
  454. if (recorddeps)
  455. {
  456. struct bound_task *t1, *t2;
  457. struct bound_tag_dep *td;
  458. int i;
  459. nt = 0;
  460. for (t1 = tasks; t1; t1 = t1->next)
  461. {
  462. if (t1->cl->model->type != STARPU_HISTORY_BASED &&
  463. t1->cl->model->type != STARPU_NL_REGRESSION_BASED)
  464. /* TODO: */
  465. _STARPU_MSG("Warning: task %s uses a perf model which is neither history nor non-linear regression-based, support for such model is not implemented yet, system will not be solvable.\n", _starpu_codelet_get_model_name(t1->cl));
  466. struct _starpu_job j =
  467. {
  468. .footprint = t1->footprint,
  469. .footprint_is_computed = 1,
  470. };
  471. for (w = 0; w < nw; w++)
  472. {
  473. struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS);
  474. if (_STARPU_IS_ZERO(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores]))
  475. {
  476. double length = _starpu_history_based_job_expected_perf(t1->cl->model, arch, &j,j.nimpl);
  477. if (isnan(length))
  478. /* Avoid problems with binary coding of doubles */
  479. t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores] = NAN;
  480. else
  481. t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores] = length / 1000.;
  482. }
  483. }
  484. nt++;
  485. }
  486. if (!nt)
  487. return;
  488. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  489. fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n");
  490. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  491. fprintf(output, "min: tmax;\n\n");
  492. fprintf(output, "/* Number of tasks */\n");
  493. fprintf(output, "nt = %d;\n", nt);
  494. fprintf(output, "/* Number of workers */\n");
  495. fprintf(output, "nw = %d;\n", nw);
  496. fprintf(output, "/* The total execution time is the maximum of all task completion times (ms) */\n");
  497. for (t1 = tasks; t1; t1 = t1->next)
  498. fprintf(output, "c%lu <= tmax;\n", t1->id);
  499. fprintf(output, "\n/* We have tasks executing on workers, exactly one worker executes each task */\n");
  500. for (t1 = tasks; t1; t1 = t1->next)
  501. {
  502. for (w = 0; w < nw; w++)
  503. {
  504. struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS);
  505. if (!isnan(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores]))
  506. fprintf(output, " +t%luw%d", t1->id, w);
  507. }
  508. fprintf(output, " = 1;\n");
  509. }
  510. fprintf(output, "\n/* Completion time is start time plus computation time */\n");
  511. fprintf(output, "/* According to where the task is indeed executed */\n");
  512. for (t1 = tasks; t1; t1 = t1->next)
  513. {
  514. fprintf(output, "/* %s %x */\tc%lu = s%lu", _starpu_codelet_get_model_name(t1->cl), (unsigned) t1->footprint, t1->id, t1->id);
  515. for (w = 0; w < nw; w++)
  516. {
  517. struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS);
  518. if (!isnan(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores]))
  519. fprintf(output, " + %f t%luw%d", t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores], t1->id, w);
  520. }
  521. fprintf(output, ";\n");
  522. }
  523. fprintf(output, "\n/* Each task starts after all its task dependencies finish and data is transferred. */\n");
  524. fprintf(output, "/* Note that the dependency finish time depends on the worker where it's working */\n");
  525. for (t1 = tasks; t1; t1 = t1->next)
  526. for (i = 0; i < t1->depsn; i++)
  527. {
  528. fprintf(output, "/* %lu bytes transferred */\n", (unsigned long) t1->deps[i].size);
  529. fprintf(output, "s%lu >= c%lu", t1->id, t1->deps[i].dep->id);
  530. /* Transfer time: pick up one source node and a worker on it */
  531. for (n = 0; n < starpu_memory_nodes_get_count(); n++)
  532. for (w = 0; w < nw; w++)
  533. if (starpu_worker_get_memory_node(w) == n)
  534. {
  535. /* pick up another destination node and a worker on it */
  536. for (n2 = 0; n2 < starpu_memory_nodes_get_count(); n2++)
  537. if (n2 != n)
  538. {
  539. for (w2 = 0; w2 < nw; w2++)
  540. if (starpu_worker_get_memory_node(w2) == n2)
  541. {
  542. /* If predecessor is on worker w and successor
  543. * on worker w2 on different nodes, we need to
  544. * transfer the data. */
  545. fprintf(output, " + d_t%luw%dt%luw%d", t1->deps[i].dep->id, w, t1->id, w2);
  546. }
  547. }
  548. }
  549. fprintf(output, ";\n");
  550. /* Transfer time: pick up one source node and a worker on it */
  551. for (n = 0; n < starpu_memory_nodes_get_count(); n++)
  552. for (w = 0; w < nw; w++)
  553. if (starpu_worker_get_memory_node(w) == n)
  554. {
  555. /* pick up another destination node and a worker on it */
  556. for (n2 = 0; n2 < starpu_memory_nodes_get_count(); n2++)
  557. if (n2 != n)
  558. {
  559. for (w2 = 0; w2 < nw; w2++)
  560. if (starpu_worker_get_memory_node(w2) == n2)
  561. {
  562. /* The data transfer is at least 0ms */
  563. fprintf(output, "d_t%luw%dt%luw%d >= 0;\n", t1->deps[i].dep->id, w, t1->id, w2);
  564. /* The data transfer from w to w2 only happens if tasks run there */
  565. fprintf(output, "d_t%luw%dt%luw%d >= %f - 2e5 + 1e5 t%luw%d + 1e5 t%luw%d;\n",
  566. t1->deps[i].dep->id, w, t1->id, w2,
  567. starpu_transfer_predict(n, n2, t1->deps[i].size)/1000.,
  568. t1->deps[i].dep->id, w, t1->id, w2);
  569. }
  570. }
  571. }
  572. }
  573. fprintf(output, "\n/* Each tag finishes when its corresponding task finishes */\n");
  574. for (t1 = tasks; t1; t1 = t1->next)
  575. if (t1->use_tag)
  576. {
  577. for (w = 0; w < nw; w++)
  578. fprintf(output, "c%lu = tag%lu;\n", t1->id, (unsigned long) t1->tag_id);
  579. }
  580. fprintf(output, "\n/* tags start after all their tag dependencies finish. */\n");
  581. for (td = tag_deps; td; td = td->next)
  582. fprintf(output, "tag%lu >= tag%lu;\n", (unsigned long) td->tag, (unsigned long) td->dep_tag);
  583. /* TODO: factorize ancestor calls */
  584. fprintf(output, "\n/* For each task pair and each worker, if both tasks are executed by the same worker,\n");
  585. fprintf(output, " one is started after the other's completion */\n");
  586. for (t1 = tasks; t1; t1 = t1->next)
  587. {
  588. for (t2 = t1->next; t2; t2 = t2->next)
  589. {
  590. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  591. {
  592. for (w = 0; w < nw; w++)
  593. {
  594. struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS);
  595. if (!isnan(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores]))
  596. {
  597. fprintf(output, "s%lu - c%lu >= -3e5 + 1e5 t%luw%d + 1e5 t%luw%d + 1e5 t%luafter%lu;\n",
  598. t1->id, t2->id, t1->id, w, t2->id, w, t1->id, t2->id);
  599. fprintf(output, "s%lu - c%lu >= -2e5 + 1e5 t%luw%d + 1e5 t%luw%d - 1e5 t%luafter%lu;\n",
  600. t2->id, t1->id, t1->id, w, t2->id, w, t1->id, t2->id);
  601. }
  602. }
  603. }
  604. }
  605. }
  606. #if 0
  607. /* Doesn't help at all to actually express what "after" means */
  608. for (t1 = tasks; t1; t1 = t1->next)
  609. for (t2 = t1->next; t2; t2 = t2->next)
  610. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  611. {
  612. fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
  613. fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
  614. }
  615. #endif
  616. if (recordprio)
  617. {
  618. fprintf(output, "\n/* For StarPU, a priority means given schedulable tasks it will consider the\n");
  619. fprintf(output, " * more prioritized first */\n");
  620. for (t1 = tasks; t1; t1 = t1->next)
  621. {
  622. for (t2 = t1->next; t2; t2 = t2->next)
  623. {
  624. if (!ancestor(t1, t2) && !ancestor(t2, t1)
  625. && t1->priority != t2->priority)
  626. {
  627. if (t1->priority > t2->priority)
  628. {
  629. /* Either t2 is scheduled before t1, but then it
  630. needs to be scheduled before some t dep finishes */
  631. /* One of the t1 deps to give the maximum start time for t2 */
  632. if (t1->depsn > 1)
  633. {
  634. for (i = 0; i < t1->depsn; i++)
  635. fprintf(output, " + t%lut%lud%d", t2->id, t1->id, i);
  636. fprintf(output, " = 1;\n");
  637. }
  638. for (i = 0; i < t1->depsn; i++)
  639. {
  640. fprintf(output, "c%lu - s%lu >= ", t1->deps[i].dep->id, t2->id);
  641. if (t1->depsn > 1)
  642. /* Only checks this when it's this dependency that is chosen */
  643. fprintf(output, "-2e5 + 1e5 t%lut%lud%d", t2->id, t1->id, i);
  644. else
  645. fprintf(output, "-1e5");
  646. /* Only check this if t1 is after t2 */
  647. fprintf(output, " + 1e5 t%luafter%lu", t1->id, t2->id);
  648. fprintf(output, ";\n");
  649. }
  650. /* Or t2 is scheduled after t1 is. */
  651. fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
  652. }
  653. else
  654. {
  655. /* Either t1 is scheduled before t2, but then it
  656. needs to be scheduled before some t2 dep finishes */
  657. /* One of the t2 deps to give the maximum start time for t1 */
  658. if (t2->depsn > 1)
  659. {
  660. for (i = 0; i < t2->depsn; i++)
  661. fprintf(output, " + t%lut%lud%d", t1->id, t2->id, i);
  662. fprintf(output, " = 1;\n");
  663. }
  664. for (i = 0; i < t2->depsn; i++)
  665. {
  666. fprintf(output, "c%lu - s%lu >= ", t2->deps[i].dep->id, t1->id);
  667. if (t2->depsn > 1)
  668. /* Only checks this when it's this dependency that is chosen */
  669. fprintf(output, "-1e5 + 1e5 t%lut%lud%d", t1->id, t2->id, i);
  670. /* Only check this if t2 is after t1 */
  671. fprintf(output, " - 1e5 t%luafter%lu;\n", t1->id, t2->id);
  672. }
  673. /* Or t1 is scheduled after t2 is. */
  674. fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
  675. }
  676. }
  677. }
  678. }
  679. }
  680. for (t1 = tasks; t1; t1 = t1->next)
  681. for (t2 = t1->next; t2; t2 = t2->next)
  682. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  683. {
  684. fprintf(output, "bin t%luafter%lu;\n", t1->id, t2->id);
  685. if (recordprio && t1->priority != t2->priority)
  686. {
  687. if (t1->priority > t2->priority)
  688. {
  689. if (t1->depsn > 1)
  690. for (i = 0; i < t1->depsn; i++)
  691. fprintf(output, "bin t%lut%lud%d;\n", t2->id, t1->id, i);
  692. }
  693. else
  694. {
  695. if (t2->depsn > 1)
  696. for (i = 0; i < t2->depsn; i++)
  697. fprintf(output, "bin t%lut%lud%d;\n", t1->id, t2->id, i);
  698. }
  699. }
  700. }
  701. for (t1 = tasks; t1; t1 = t1->next)
  702. for (w = 0; w < nw; w++)
  703. fprintf(output, "bin t%luw%d;\n", t1->id, w);
  704. }
  705. else
  706. {
  707. struct bound_task_pool *tp;
  708. nt = 0;
  709. for (tp = task_pools; tp; tp = tp->next)
  710. nt++;
  711. if (!nt)
  712. return;
  713. {
  714. double times[nw*nt];
  715. _starpu_get_tasks_times(nw, nt, times);
  716. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  717. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  718. fprintf(output, "min: tmax;\n\n");
  719. fprintf(output, "/* Which is the maximum of all worker execution times (ms) */\n");
  720. for (w = 0; w < nw; w++)
  721. {
  722. char name[32];
  723. starpu_worker_get_name(w, name, sizeof(name));
  724. fprintf(output, "/* worker %s */\n0", name);
  725. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  726. {
  727. if (!isnan(times[w*nt+t]))
  728. fprintf(output, "\t%+f * w%dt%dn", (float) times[w*nt+t], w, t);
  729. }
  730. fprintf(output, " <= tmax;\n");
  731. }
  732. fprintf(output, "\n");
  733. fprintf(output, "/* And we have to have computed exactly all tasks */\n");
  734. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  735. {
  736. int got_one = 0;
  737. fprintf(output, "/* task %s key %x */\n0", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
  738. for (w = 0; w < nw; w++)
  739. {
  740. if (isnan(times[w*nt+t]))
  741. _STARPU_MSG("Warning: task %s has no performance measurement for worker %d.\n", _starpu_codelet_get_model_name(tp->cl), w);
  742. else
  743. {
  744. got_one = 1;
  745. fprintf(output, "\t+w%dt%dn", w, t);
  746. }
  747. }
  748. fprintf(output, " = %lu;\n", tp->n);
  749. if (!got_one)
  750. _STARPU_MSG("Warning: task %s has no performance measurement for any worker, system will not be solvable!\n", _starpu_codelet_get_model_name(tp->cl));
  751. /* Show actual values */
  752. fprintf(output, "/*");
  753. for (w = 0; w < nw; w++)
  754. fprintf(output, "\t+%lu", tp->cl->per_worker_stats[w]);
  755. fprintf(output, "\t*/\n\n");
  756. }
  757. fprintf(output, "/* Optionally tell that tasks can not be divided */\n");
  758. fprintf(output, "/* int ");
  759. int first = 1;
  760. for (w = 0; w < nw; w++)
  761. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  762. {
  763. if (!first)
  764. fprintf(output, ",");
  765. else
  766. first = 0;
  767. fprintf(output, "w%dt%dn", w, t);
  768. }
  769. fprintf(output, "; */\n");
  770. }
  771. }
  772. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  773. }
  774. /*
  775. * Print bound system in MPS output format
  776. */
  777. void starpu_bound_print_mps(FILE *output)
  778. {
  779. struct bound_task_pool * tp;
  780. int nt; /* Number of different kinds of tasks */
  781. int nw; /* Number of different workers */
  782. int t, w;
  783. if (recorddeps)
  784. {
  785. fprintf(output, "Not supported\n");
  786. return;
  787. }
  788. nw = starpu_worker_get_count();
  789. if (!nw)
  790. /* Make llvm happy about the VLA below */
  791. return;
  792. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  793. nt = 0;
  794. for (tp = task_pools; tp; tp = tp->next)
  795. nt++;
  796. if (!nt)
  797. {
  798. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  799. return;
  800. }
  801. {
  802. double times[nw*nt];
  803. _starpu_get_tasks_times(nw, nt, times);
  804. fprintf(output, "NAME StarPU theoretical bound\n");
  805. fprintf(output, "*\nROWS\n");
  806. fprintf(output, "* We want to minimize total execution time (ms)\n");
  807. fprintf(output, " N TMAX\n");
  808. fprintf(output, "* Which is the maximum of all worker execution times (ms)\n");
  809. for (w = 0; w < nw; w++)
  810. {
  811. char name[32];
  812. starpu_worker_get_name(w, name, sizeof(name));
  813. fprintf(output, "* worker %s\n", name);
  814. fprintf(output, " L W%d\n", w);
  815. }
  816. fprintf(output, "*\n* And we have to have computed exactly all tasks\n*\n");
  817. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  818. {
  819. fprintf(output, "* task %s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
  820. fprintf(output, " E T%d\n", t);
  821. }
  822. fprintf(output, "*\nCOLUMNS\n*\n");
  823. fprintf(output, "*\n* Execution times and completion of all tasks\n*\n");
  824. for (w = 0; w < nw; w++)
  825. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  826. if (!isnan(times[w*nt+t]))
  827. {
  828. char name[23];
  829. snprintf(name, sizeof(name), "W%dT%d", w, t);
  830. fprintf(output," %-8s W%-7d %12f\n", name, w, times[w*nt+t]);
  831. fprintf(output," %-8s T%-7d %12d\n", name, t, 1);
  832. }
  833. fprintf(output, "*\n* Total execution time\n*\n");
  834. for (w = 0; w < nw; w++)
  835. fprintf(output," TMAX W%-2d %12d\n", w, -1);
  836. fprintf(output," TMAX TMAX %12d\n", 1);
  837. fprintf(output, "*\nRHS\n*\n");
  838. fprintf(output, "*\n* Total number of tasks\n*\n");
  839. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  840. fprintf(output," NT%-2d T%-7d %12lu\n", t, t, tp->n);
  841. fprintf(output, "ENDATA\n");
  842. }
  843. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  844. }
  845. /*
  846. * Solve bound system thanks to GNU Linear Programming Kit backend
  847. */
  848. #ifdef STARPU_HAVE_GLPK_H
  849. static glp_prob *_starpu_bound_glp_resolve(int integer)
  850. {
  851. struct bound_task_pool * tp;
  852. int nt; /* Number of different kinds of tasks */
  853. int nw; /* Number of different workers */
  854. int t, w;
  855. glp_prob *lp;
  856. int ret;
  857. nw = starpu_worker_get_count();
  858. if (!nw)
  859. /* Make llvm happy about the VLA below */
  860. return NULL;
  861. nt = 0;
  862. for (tp = task_pools; tp; tp = tp->next)
  863. nt++;
  864. if (!nt)
  865. return NULL;
  866. lp = glp_create_prob();
  867. glp_set_prob_name(lp, "StarPU theoretical bound");
  868. glp_set_obj_dir(lp, GLP_MIN);
  869. glp_set_obj_name(lp, "total execution time");
  870. {
  871. double times[nw*nt];
  872. int ne =
  873. nw * (nt+1) /* worker execution time */
  874. + nt * nw
  875. + 1; /* glp dumbness */
  876. int n = 1;
  877. int ia[ne], ja[ne];
  878. double ar[ne];
  879. _starpu_get_tasks_times(nw, nt, times);
  880. /* Variables: number of tasks i assigned to worker j, and tmax */
  881. glp_add_cols(lp, nw*nt+1);
  882. #define colnum(w, t) ((t)*nw+(w)+1)
  883. glp_set_obj_coef(lp, nw*nt+1, 1.);
  884. for (w = 0; w < nw; w++)
  885. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  886. {
  887. char name[32];
  888. snprintf(name, sizeof(name), "w%dt%dn", w, t);
  889. glp_set_col_name(lp, colnum(w, t), name);
  890. if (integer)
  891. glp_set_col_kind(lp, colnum(w, t), GLP_IV);
  892. glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.);
  893. }
  894. glp_set_col_bnds(lp, nw*nt+1, GLP_LO, 0., 0.);
  895. /* Total worker execution time */
  896. glp_add_rows(lp, nw);
  897. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  898. {
  899. int someone = 0;
  900. for (w = 0; w < nw; w++)
  901. if (!isnan(times[w*nt+t]))
  902. someone = 1;
  903. if (!someone)
  904. {
  905. /* This task does not have any performance model at all, abort */
  906. glp_delete_prob(lp);
  907. return NULL;
  908. }
  909. }
  910. for (w = 0; w < nw; w++)
  911. {
  912. char name[32], title[64];
  913. starpu_worker_get_name(w, name, sizeof(name));
  914. snprintf(title, sizeof(title), "worker %s", name);
  915. glp_set_row_name(lp, w+1, title);
  916. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  917. {
  918. ia[n] = w+1;
  919. ja[n] = colnum(w, t);
  920. if (isnan(times[w*nt+t]))
  921. ar[n] = 1000000000.;
  922. else
  923. ar[n] = times[w*nt+t];
  924. n++;
  925. }
  926. /* tmax */
  927. ia[n] = w+1;
  928. ja[n] = nw*nt+1;
  929. ar[n] = -1;
  930. n++;
  931. glp_set_row_bnds(lp, w+1, GLP_UP, 0, 0);
  932. }
  933. /* Total task completion */
  934. glp_add_rows(lp, nt);
  935. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  936. {
  937. char name[32], title[64];
  938. starpu_worker_get_name(w, name, sizeof(name));
  939. snprintf(title, sizeof(title), "task %s key %x", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
  940. glp_set_row_name(lp, nw+t+1, title);
  941. for (w = 0; w < nw; w++)
  942. {
  943. ia[n] = nw+t+1;
  944. ja[n] = colnum(w, t);
  945. ar[n] = 1;
  946. n++;
  947. }
  948. glp_set_row_bnds(lp, nw+t+1, GLP_FX, tp->n, tp->n);
  949. }
  950. STARPU_ASSERT(n == ne);
  951. glp_load_matrix(lp, ne-1, ia, ja, ar);
  952. }
  953. glp_smcp parm;
  954. glp_init_smcp(&parm);
  955. parm.msg_lev = GLP_MSG_OFF;
  956. ret = glp_simplex(lp, &parm);
  957. if (ret)
  958. {
  959. glp_delete_prob(lp);
  960. lp = NULL;
  961. return NULL;
  962. }
  963. if (integer)
  964. {
  965. glp_iocp iocp;
  966. glp_init_iocp(&iocp);
  967. iocp.msg_lev = GLP_MSG_OFF;
  968. glp_intopt(lp, &iocp);
  969. }
  970. return lp;
  971. }
  972. #endif /* STARPU_HAVE_GLPK_H */
  973. /* Print the computed bound as well as the optimized distribution of tasks */
  974. void starpu_bound_print(FILE *output, int integer)
  975. {
  976. #ifdef STARPU_HAVE_GLPK_H
  977. if (recorddeps)
  978. {
  979. fprintf(output, "Not supported\n");
  980. return;
  981. }
  982. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  983. glp_prob *lp = _starpu_bound_glp_resolve(integer);
  984. if (lp)
  985. {
  986. struct bound_task_pool * tp;
  987. int t, w;
  988. int nw; /* Number of different workers */
  989. double tmax;
  990. nw = starpu_worker_get_count();
  991. if (integer)
  992. tmax = glp_mip_obj_val(lp);
  993. else
  994. tmax = glp_get_obj_val(lp);
  995. fprintf(output, "Theoretical minimum execution time: %f ms\n", tmax);
  996. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  997. {
  998. fprintf(output, "%s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
  999. for (w = 0; w < nw; w++)
  1000. if (integer)
  1001. fprintf(output, "\tw%dt%dn %f", w, t, glp_mip_col_val(lp, colnum(w, t)));
  1002. else
  1003. fprintf(output, "\tw%dt%dn %f", w, t, glp_get_col_prim(lp, colnum(w, t)));
  1004. fprintf(output, "\n");
  1005. }
  1006. glp_delete_prob(lp);
  1007. }
  1008. else
  1009. {
  1010. _STARPU_MSG("Simplex failed\n");
  1011. }
  1012. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  1013. #else /* STARPU_HAVE_GLPK_H */
  1014. (void) integer;
  1015. fprintf(output, "Please rebuild StarPU with glpk installed.\n");
  1016. #endif /* STARPU_HAVE_GLPK_H */
  1017. }
  1018. /* Compute and return the bound */
  1019. void starpu_bound_compute(double *res, double *integer_res, int integer)
  1020. {
  1021. #ifdef STARPU_HAVE_GLPK_H
  1022. double ret;
  1023. if (recorddeps)
  1024. {
  1025. *res = 0.;
  1026. return;
  1027. }
  1028. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  1029. glp_prob *lp = _starpu_bound_glp_resolve(integer);
  1030. if (lp)
  1031. {
  1032. ret = glp_get_obj_val(lp);
  1033. if (integer)
  1034. *integer_res = glp_mip_obj_val(lp);
  1035. glp_delete_prob(lp);
  1036. }
  1037. else
  1038. ret = 0.;
  1039. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  1040. *res = ret;
  1041. #else /* STARPU_HAVE_GLPK_H */
  1042. (void) integer_res;
  1043. (void) integer;
  1044. *res = 0.;
  1045. #endif /* STARPU_HAVE_GLPK_H */
  1046. }