bound.c 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  4. * Copyright (C) 2010-2013 Université de Bordeaux 1
  5. * Copyright (C) 2011 Télécom-SudParis
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. /*
  19. * Record which kinds of tasks have been executed, to later on compute an upper
  20. * bound of the performance that could have theoretically been achieved
  21. */
  22. #include <starpu.h>
  23. #include <starpu_config.h>
  24. #include <profiling/bound.h>
  25. #include <core/jobs.h>
  26. #ifdef STARPU_HAVE_GLPK_H
  27. #include <glpk.h>
  28. #endif /* STARPU_HAVE_GLPK_H */
  29. /* TODO: output duration between starpu_bound_start and starpu_bound_stop */
  30. /*
  31. * Record without dependencies: just count each kind of task
  32. *
  33. * The linear programming problem will just have as variables:
  34. * - the number of tasks of kind `t' executed by worker `w'
  35. * - the total duration
  36. *
  37. * and the constraints will be:
  38. * - the time taken by each worker to complete its assigned tasks is lower than
  39. * the total duration.
  40. * - the total numer of tasks of a given kind is equal to the number run by the
  41. * application.
  42. */
  43. struct bound_task_pool
  44. {
  45. /* Which codelet has been executed */
  46. struct starpu_codelet *cl;
  47. /* Task footprint key (for history-based perfmodel) */
  48. uint32_t footprint;
  49. /* Number of tasks of this kind */
  50. unsigned long n;
  51. /* Other task kinds */
  52. struct bound_task_pool *next;
  53. };
  54. /*
  55. * Record with dependencies: each task is recorded separately
  56. *
  57. * The linear programming problem will have as variables:
  58. * - The start time of each task
  59. * - The completion time of each tag
  60. * - The total duration
  61. * - For each task and for each worker, whether the task is executing on that worker.
  62. * - For each pair of task, which task is scheduled first.
  63. *
  64. * and the constraints will be:
  65. * - All task start time plus duration are less than total duration
  66. * - Each task is executed on exactly one worker.
  67. * - Each task starts after all its task dependencies finish.
  68. * - Each task starts after all its tag dependencies finish.
  69. * - For each task pair and each worker, if both tasks are executed by that worker,
  70. * one is started after the other's completion.
  71. */
  72. struct task_dep
  73. {
  74. /* Task this depends on */
  75. struct bound_task *dep;
  76. /* Data transferred between tasks (i.e. implicit data dep size) */
  77. size_t size;
  78. };
  79. struct bound_task
  80. {
  81. /* Unique ID */
  82. unsigned long id;
  83. /* Tag ID, if any */
  84. starpu_tag_t tag_id;
  85. int use_tag;
  86. /* Which codelet has been executed */
  87. struct starpu_codelet *cl;
  88. /* Task footprint key */
  89. uint32_t footprint;
  90. /* Task priority */
  91. int priority;
  92. /* Tasks this one depends on */
  93. struct task_dep *deps;
  94. int depsn;
  95. /* Estimated duration */
  96. double duration[STARPU_NARCH_VARIATIONS];
  97. /* Other tasks */
  98. struct bound_task *next;
  99. };
  100. struct bound_tag_dep
  101. {
  102. starpu_tag_t tag;
  103. starpu_tag_t dep_tag;
  104. struct bound_tag_dep *next;
  105. };
  106. static struct bound_task_pool *task_pools, *last;
  107. static struct bound_task *tasks;
  108. static struct bound_tag_dep *tag_deps;
  109. int _starpu_bound_recording;
  110. static int recorddeps;
  111. static int recordprio;
  112. static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
  113. /* Initialization */
  114. void starpu_bound_start(int deps, int prio)
  115. {
  116. struct bound_task_pool *tp;
  117. struct bound_task *t;
  118. struct bound_tag_dep *td;
  119. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  120. tp = task_pools;
  121. task_pools = NULL;
  122. last = NULL;
  123. t = tasks;
  124. tasks = NULL;
  125. td = tag_deps;
  126. tag_deps = NULL;
  127. _starpu_bound_recording = 1;
  128. recorddeps = deps;
  129. recordprio = prio;
  130. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  131. while (tp != NULL)
  132. {
  133. struct bound_task_pool *next = tp->next;
  134. free(tp);
  135. tp = next;
  136. }
  137. while (t != NULL)
  138. {
  139. struct bound_task *next = t->next;
  140. free(t);
  141. t = next;
  142. }
  143. while (td != NULL)
  144. {
  145. struct bound_tag_dep *next = td->next;
  146. free(td);
  147. td = next;
  148. }
  149. }
  150. /* Whether we will include it in the computation */
  151. static int good_job(struct _starpu_job *j)
  152. {
  153. /* No codelet, nothing to measure */
  154. if (j->exclude_from_dag)
  155. return 0;
  156. if (!j->task->cl)
  157. return 0;
  158. /* No performance model, no time duration estimation */
  159. if (!j->task->cl->model)
  160. return 0;
  161. /* Only support history based */
  162. if (j->task->cl->model->type != STARPU_HISTORY_BASED
  163. && j->task->cl->model->type != STARPU_NL_REGRESSION_BASED)
  164. return 0;
  165. return 1;
  166. }
  167. /* Create a new task (either because it has just been submitted, or a
  168. * dependency was added before submission) */
  169. static void new_task(struct _starpu_job *j)
  170. {
  171. struct bound_task *t;
  172. if (j->bound_task)
  173. return;
  174. t = (struct bound_task *) malloc(sizeof(*t));
  175. memset(t, 0, sizeof(*t));
  176. t->id = j->job_id;
  177. t->tag_id = j->task->tag_id;
  178. t->use_tag = j->task->use_tag;
  179. t->cl = j->task->cl;
  180. t->footprint = _starpu_compute_buffers_footprint(NULL, STARPU_CPU_DEFAULT, 0, j);
  181. t->priority = j->task->priority;
  182. t->deps = NULL;
  183. t->depsn = 0;
  184. t->next = tasks;
  185. j->bound_task = t;
  186. tasks = t;
  187. }
  188. /* A new task was submitted, record it */
  189. void _starpu_bound_record(struct _starpu_job *j)
  190. {
  191. if (!_starpu_bound_recording)
  192. return;
  193. if (!good_job(j))
  194. return;
  195. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  196. /* Re-check, this time with mutex held */
  197. if (!_starpu_bound_recording)
  198. {
  199. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  200. return;
  201. }
  202. if (recorddeps)
  203. {
  204. new_task(j);
  205. }
  206. else
  207. {
  208. struct bound_task_pool *tp;
  209. _starpu_compute_buffers_footprint(NULL, STARPU_CPU_DEFAULT, 0, j);
  210. if (last && last->cl == j->task->cl && last->footprint == j->footprint)
  211. tp = last;
  212. else
  213. for (tp = task_pools; tp; tp = tp->next)
  214. if (tp->cl == j->task->cl && tp->footprint == j->footprint)
  215. break;
  216. if (!tp)
  217. {
  218. tp = (struct bound_task_pool *) malloc(sizeof(*tp));
  219. tp->cl = j->task->cl;
  220. tp->footprint = j->footprint;
  221. tp->n = 0;
  222. tp->next = task_pools;
  223. task_pools = tp;
  224. }
  225. /* One more task of this kind */
  226. tp->n++;
  227. }
  228. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  229. }
  230. /* A tag dependency was emitted, record it */
  231. void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id)
  232. {
  233. struct bound_tag_dep *td;
  234. if (!_starpu_bound_recording || !recorddeps)
  235. return;
  236. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  237. /* Re-check, this time with mutex held */
  238. if (!_starpu_bound_recording || !recorddeps)
  239. {
  240. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  241. return;
  242. }
  243. td = (struct bound_tag_dep *) malloc(sizeof(*td));
  244. td->tag = id;
  245. td->dep_tag = dep_id;
  246. td->next = tag_deps;
  247. tag_deps = td;
  248. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  249. }
  250. /* A task dependency was emitted, record it */
  251. void _starpu_bound_task_dep(struct _starpu_job *j, struct _starpu_job *dep_j)
  252. {
  253. struct bound_task *t;
  254. int i;
  255. if (!_starpu_bound_recording || !recorddeps)
  256. return;
  257. if (!good_job(j) || !good_job(dep_j))
  258. return;
  259. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  260. /* Re-check, this time with mutex held */
  261. if (!_starpu_bound_recording || !recorddeps)
  262. {
  263. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  264. return;
  265. }
  266. new_task(j);
  267. new_task(dep_j);
  268. t = j->bound_task;
  269. for (i = 0; i < t->depsn; i++)
  270. if (t->deps[i].dep == dep_j->bound_task)
  271. break;
  272. if (i == t->depsn)
  273. {
  274. /* Not already there, add */
  275. t->deps = (struct task_dep *) realloc(t->deps, ++t->depsn * sizeof(t->deps[0]));
  276. t->deps[t->depsn-1].dep = dep_j->bound_task;
  277. t->deps[t->depsn-1].size = 0; /* We don't have data information in that case */
  278. }
  279. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  280. }
  281. /* Look for job with id ID among our tasks */
  282. static struct bound_task *find_job(unsigned long id)
  283. {
  284. struct bound_task *t;
  285. for (t = tasks; t; t = t->next)
  286. if (t->id == id)
  287. return t;
  288. return NULL;
  289. }
  290. /* Job J depends on previous job of id ID (which is already finished) */
  291. void _starpu_bound_job_id_dep(starpu_data_handle_t handle, struct _starpu_job *j, unsigned long id)
  292. {
  293. struct bound_task *t, *dep_t;
  294. int i;
  295. if (!_starpu_bound_recording || !recorddeps)
  296. return;
  297. if (!good_job(j))
  298. return;
  299. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  300. /* Re-check, this time with mutex held */
  301. if (!_starpu_bound_recording || !recorddeps)
  302. {
  303. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  304. return;
  305. }
  306. new_task(j);
  307. dep_t = find_job(id);
  308. if (!dep_t)
  309. {
  310. fprintf(stderr,"dependency %lu not found !\n", id);
  311. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  312. return;
  313. }
  314. t = j->bound_task;
  315. for (i = 0; i < t->depsn; i++)
  316. if (t->deps[i].dep == dep_t)
  317. {
  318. /* Found, just add size */
  319. t->deps[i].size += _starpu_data_get_size(handle);
  320. break;
  321. }
  322. if (i == t->depsn)
  323. {
  324. /* Not already there, add */
  325. t->deps = (struct task_dep *) realloc(t->deps, ++t->depsn * sizeof(t->deps[0]));
  326. t->deps[t->depsn-1].dep = dep_t;
  327. t->deps[t->depsn-1].size = _starpu_data_get_size(handle);
  328. }
  329. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  330. }
  331. void starpu_bound_stop(void)
  332. {
  333. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  334. _starpu_bound_recording = 0;
  335. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  336. }
  337. /* Compute all tasks times on all workers */
  338. static void _starpu_get_tasks_times(int nw, int nt, double *times)
  339. {
  340. struct bound_task_pool *tp;
  341. int w, t;
  342. for (w = 0; w < nw; w++)
  343. {
  344. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  345. {
  346. struct _starpu_job j =
  347. {
  348. .footprint = tp->footprint,
  349. .footprint_is_computed = 1,
  350. };
  351. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  352. double length = _starpu_history_based_job_expected_perf(tp->cl->model, arch, &j, j.nimpl);
  353. if (isnan(length))
  354. times[w*nt+t] = NAN;
  355. else
  356. times[w*nt+t] = length / 1000.;
  357. }
  358. }
  359. }
  360. /* Return whether PARENT is an ancestor of CHILD */
  361. static int ancestor(struct bound_task *child, struct bound_task *parent)
  362. {
  363. int i;
  364. for (i = 0; i < child->depsn; i++)
  365. {
  366. if (parent == child->deps[i].dep)
  367. return 1;
  368. if (ancestor(child->deps[i].dep, parent))
  369. return -1;
  370. }
  371. return 0;
  372. }
  373. /* Print bound recording in .dot format */
  374. void starpu_bound_print_dot(FILE *output)
  375. {
  376. struct bound_task *t;
  377. struct bound_tag_dep *td;
  378. int i;
  379. if (!recorddeps)
  380. {
  381. fprintf(output, "Not supported\n");
  382. return;
  383. }
  384. fprintf(output, "strict digraph bounddeps {\n");
  385. for (t = tasks; t; t = t->next)
  386. {
  387. fprintf(output, "\"t%lu\" [label=\"%lu: %s\"]\n", t->id, t->id, _starpu_codelet_get_model_name(t->cl));
  388. for (i = 0; i < t->depsn; i++)
  389. fprintf(output, "\"t%lu\" -> \"t%lu\"\n", t->deps[i].dep->id, t->id);
  390. }
  391. for (td = tag_deps; td; td = td->next)
  392. fprintf(output, "\"tag%lu\" -> \"tag%lu\";\n", (unsigned long) td->dep_tag, (unsigned long) td->tag);
  393. fprintf(output, "}\n");
  394. }
  395. /*
  396. * Print bound system in lp_solve format
  397. *
  398. * When dependencies are enabled, you can check the set of tasks and deps that
  399. * were recorded by using tools/lp2paje and vite.
  400. */
  401. void starpu_bound_print_lp(FILE *output)
  402. {
  403. int nt; /* Number of different kinds of tasks */
  404. int nw; /* Number of different workers */
  405. int t;
  406. int w, w2; /* worker */
  407. unsigned n, n2;
  408. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  409. nw = starpu_worker_get_count();
  410. if (recorddeps)
  411. {
  412. struct bound_task *t1, *t2;
  413. struct bound_tag_dep *td;
  414. int i;
  415. nt = 0;
  416. for (t1 = tasks; t1; t1 = t1->next)
  417. {
  418. if (t1->cl->model->type != STARPU_HISTORY_BASED &&
  419. t1->cl->model->type != STARPU_NL_REGRESSION_BASED)
  420. /* TODO: */
  421. fprintf(stderr, "Warning: task %s uses a perf model which is neither history nor non-linear regression-based, support for such model is not implemented yet, system will not be solvable.\n", _starpu_codelet_get_model_name(t1->cl));
  422. struct _starpu_job j =
  423. {
  424. .footprint = t1->footprint,
  425. .footprint_is_computed = 1,
  426. };
  427. for (w = 0; w < nw; w++)
  428. {
  429. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  430. if (_STARPU_IS_ZERO(t1->duration[arch]))
  431. {
  432. double length = _starpu_history_based_job_expected_perf(t1->cl->model, arch, &j,j.nimpl);
  433. if (isnan(length))
  434. /* Avoid problems with binary coding of doubles */
  435. t1->duration[arch] = NAN;
  436. else
  437. t1->duration[arch] = length / 1000.;
  438. }
  439. }
  440. nt++;
  441. }
  442. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  443. fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n");
  444. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  445. fprintf(output, "min: tmax;\n\n");
  446. fprintf(output, "/* Number of tasks */\n");
  447. fprintf(output, "nt = %d;\n", nt);
  448. fprintf(output, "/* Number of workers */\n");
  449. fprintf(output, "nw = %d;\n", nw);
  450. fprintf(output, "/* The total execution time is the maximum of all task completion times (ms) */\n");
  451. for (t1 = tasks; t1; t1 = t1->next)
  452. fprintf(output, "c%lu <= tmax;\n", t1->id);
  453. fprintf(output, "\n/* We have tasks executing on workers, exactly one worker executes each task */\n");
  454. for (t1 = tasks; t1; t1 = t1->next)
  455. {
  456. for (w = 0; w < nw; w++)
  457. {
  458. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  459. if (!isnan(t1->duration[arch]))
  460. fprintf(output, " +t%luw%d", t1->id, w);
  461. }
  462. fprintf(output, " = 1;\n");
  463. }
  464. fprintf(output, "\n/* Completion time is start time plus computation time */\n");
  465. fprintf(output, "/* According to where the task is indeed executed */\n");
  466. for (t1 = tasks; t1; t1 = t1->next)
  467. {
  468. fprintf(output, "/* %s %x */\tc%lu = s%lu", _starpu_codelet_get_model_name(t1->cl), (unsigned) t1->footprint, t1->id, t1->id);
  469. for (w = 0; w < nw; w++)
  470. {
  471. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  472. if (!isnan(t1->duration[arch]))
  473. fprintf(output, " + %f t%luw%d", t1->duration[arch], t1->id, w);
  474. }
  475. fprintf(output, ";\n");
  476. }
  477. fprintf(output, "\n/* Each task starts after all its task dependencies finish and data is transferred. */\n");
  478. fprintf(output, "/* Note that the dependency finish time depends on the worker where it's working */\n");
  479. for (t1 = tasks; t1; t1 = t1->next)
  480. for (i = 0; i < t1->depsn; i++)
  481. {
  482. fprintf(output, "/* %lu bytes transferred */\n", (unsigned long) t1->deps[i].size);
  483. fprintf(output, "s%lu >= c%lu", t1->id, t1->deps[i].dep->id);
  484. /* Transfer time: pick up one source node and a worker on it */
  485. for (n = 0; n < starpu_memory_nodes_get_count(); n++)
  486. for (w = 0; w < nw; w++)
  487. if (starpu_worker_get_memory_node(w) == n)
  488. {
  489. /* pick up another destination node and a worker on it */
  490. for (n2 = 0; n2 < starpu_memory_nodes_get_count(); n2++)
  491. if (n2 != n)
  492. {
  493. for (w2 = 0; w2 < nw; w2++)
  494. if (starpu_worker_get_memory_node(w2) == n2)
  495. {
  496. /* If predecessor is on worker w and successor
  497. * on worker w2 on different nodes, we need to
  498. * transfer the data. */
  499. fprintf(output, " + d_t%luw%ut%luw%u", t1->deps[i].dep->id, w, t1->id, w2);
  500. }
  501. }
  502. }
  503. fprintf(output, ";\n");
  504. /* Transfer time: pick up one source node and a worker on it */
  505. for (n = 0; n < starpu_memory_nodes_get_count(); n++)
  506. for (w = 0; w < nw; w++)
  507. if (starpu_worker_get_memory_node(w) == n)
  508. {
  509. /* pick up another destination node and a worker on it */
  510. for (n2 = 0; n2 < starpu_memory_nodes_get_count(); n2++)
  511. if (n2 != n)
  512. {
  513. for (w2 = 0; w2 < nw; w2++)
  514. if (starpu_worker_get_memory_node(w2) == n2)
  515. {
  516. /* The data transfer is at least 0ms */
  517. fprintf(output, "d_t%luw%ut%luw%u >= 0;\n", t1->deps[i].dep->id, w, t1->id, w2);
  518. /* The data transfer from w to w2 only happens if tasks run there */
  519. fprintf(output, "d_t%luw%ut%luw%u >= %f - 2e5 + 1e5 t%luw%u + 1e5 t%luw%u;\n",
  520. t1->deps[i].dep->id, w, t1->id, w2,
  521. _starpu_predict_transfer_time(n, n2, t1->deps[i].size)/1000.,
  522. t1->deps[i].dep->id, w, t1->id, w2);
  523. }
  524. }
  525. }
  526. }
  527. fprintf(output, "\n/* Each tag finishes when its corresponding task finishes */\n");
  528. for (t1 = tasks; t1; t1 = t1->next)
  529. if (t1->use_tag)
  530. {
  531. for (w = 0; w < nw; w++)
  532. fprintf(output, "c%lu = tag%lu;\n", t1->id, (unsigned long) t1->tag_id);
  533. }
  534. fprintf(output, "\n/* tags start after all their tag dependencies finish. */\n");
  535. for (td = tag_deps; td; td = td->next)
  536. fprintf(output, "tag%lu >= tag%lu;\n", (unsigned long) td->tag, (unsigned long) td->dep_tag);
  537. /* TODO: factorize ancestor calls */
  538. fprintf(output, "\n/* For each task pair and each worker, if both tasks are executed by the same worker,\n");
  539. fprintf(output, " one is started after the other's completion */\n");
  540. for (t1 = tasks; t1; t1 = t1->next)
  541. {
  542. for (t2 = t1->next; t2; t2 = t2->next)
  543. {
  544. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  545. {
  546. for (w = 0; w < nw; w++)
  547. {
  548. enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
  549. if (!isnan(t1->duration[arch]))
  550. {
  551. fprintf(output, "s%lu - c%lu >= -3e5 + 1e5 t%luw%d + 1e5 t%luw%d + 1e5 t%luafter%lu;\n",
  552. t1->id, t2->id, t1->id, w, t2->id, w, t1->id, t2->id);
  553. fprintf(output, "s%lu - c%lu >= -2e5 + 1e5 t%luw%d + 1e5 t%luw%d - 1e5 t%luafter%lu;\n",
  554. t2->id, t1->id, t1->id, w, t2->id, w, t1->id, t2->id);
  555. }
  556. }
  557. }
  558. }
  559. }
  560. #if 0
  561. /* Doesn't help at all to actually express what "after" means */
  562. for (t1 = tasks; t1; t1 = t1->next)
  563. for (t2 = t1->next; t2; t2 = t2->next)
  564. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  565. {
  566. fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
  567. fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
  568. }
  569. #endif
  570. if (recordprio)
  571. {
  572. fprintf(output, "\n/* For StarPU, a priority means given schedulable tasks it will consider the\n");
  573. fprintf(output, " * more prioritized first */\n");
  574. for (t1 = tasks; t1; t1 = t1->next)
  575. {
  576. for (t2 = t1->next; t2; t2 = t2->next)
  577. {
  578. if (!ancestor(t1, t2) && !ancestor(t2, t1)
  579. && t1->priority != t2->priority)
  580. {
  581. if (t1->priority > t2->priority)
  582. {
  583. /* Either t2 is scheduled before t1, but then it
  584. needs to be scheduled before some t dep finishes */
  585. /* One of the t1 deps to give the maximum start time for t2 */
  586. if (t1->depsn > 1)
  587. {
  588. for (i = 0; i < t1->depsn; i++)
  589. fprintf(output, " + t%lut%lud%d", t2->id, t1->id, i);
  590. fprintf(output, " = 1;\n");
  591. }
  592. for (i = 0; i < t1->depsn; i++)
  593. {
  594. fprintf(output, "c%lu - s%lu >= ", t1->deps[i].dep->id, t2->id);
  595. if (t1->depsn > 1)
  596. /* Only checks this when it's this dependency that is chosen */
  597. fprintf(output, "-2e5 + 1e5 t%lut%lud%d", t2->id, t1->id, i);
  598. else
  599. fprintf(output, "-1e5");
  600. /* Only check this if t1 is after t2 */
  601. fprintf(output, " + 1e5 t%luafter%lu", t1->id, t2->id);
  602. fprintf(output, ";\n");
  603. }
  604. /* Or t2 is scheduled after t1 is. */
  605. fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
  606. }
  607. else
  608. {
  609. /* Either t1 is scheduled before t2, but then it
  610. needs to be scheduled before some t2 dep finishes */
  611. /* One of the t2 deps to give the maximum start time for t1 */
  612. if (t2->depsn > 1)
  613. {
  614. for (i = 0; i < t2->depsn; i++)
  615. fprintf(output, " + t%lut%lud%d", t1->id, t2->id, i);
  616. fprintf(output, " = 1;\n");
  617. }
  618. for (i = 0; i < t2->depsn; i++)
  619. {
  620. fprintf(output, "c%lu - s%lu >= ", t2->deps[i].dep->id, t1->id);
  621. if (t2->depsn > 1)
  622. /* Only checks this when it's this dependency that is chosen */
  623. fprintf(output, "-1e5 + 1e5 t%lut%lud%d", t1->id, t2->id, i);
  624. /* Only check this if t2 is after t1 */
  625. fprintf(output, " - 1e5 t%luafter%lu;\n", t1->id, t2->id);
  626. }
  627. /* Or t1 is scheduled after t2 is. */
  628. fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id);
  629. }
  630. }
  631. }
  632. }
  633. }
  634. for (t1 = tasks; t1; t1 = t1->next)
  635. for (t2 = t1->next; t2; t2 = t2->next)
  636. if (!ancestor(t1, t2) && !ancestor(t2, t1))
  637. {
  638. fprintf(output, "bin t%luafter%lu;\n", t1->id, t2->id);
  639. if (recordprio && t1->priority != t2->priority)
  640. {
  641. if (t1->priority > t2->priority)
  642. {
  643. if (t1->depsn > 1)
  644. for (i = 0; i < t1->depsn; i++)
  645. fprintf(output, "bin t%lut%lud%d;\n", t2->id, t1->id, i);
  646. }
  647. else
  648. {
  649. if (t2->depsn > 1)
  650. for (i = 0; i < t2->depsn; i++)
  651. fprintf(output, "bin t%lut%lud%d;\n", t1->id, t2->id, i);
  652. }
  653. }
  654. }
  655. for (t1 = tasks; t1; t1 = t1->next)
  656. for (w = 0; w < nw; w++)
  657. fprintf(output, "bin t%luw%d;\n", t1->id, w);
  658. }
  659. else
  660. {
  661. struct bound_task_pool *tp;
  662. nt = 0;
  663. for (tp = task_pools; tp; tp = tp->next)
  664. nt++;
  665. {
  666. double times[nw*nt];
  667. _starpu_get_tasks_times(nw, nt, times);
  668. fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
  669. fprintf(output, "/* We want to minimize total execution time (ms) */\n");
  670. fprintf(output, "min: tmax;\n\n");
  671. fprintf(output, "/* Which is the maximum of all worker execution times (ms) */\n");
  672. for (w = 0; w < nw; w++)
  673. {
  674. char name[32];
  675. starpu_worker_get_name(w, name, sizeof(name));
  676. fprintf(output, "/* worker %s */\n0", name);
  677. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  678. {
  679. if (!isnan(times[w*nt+t]))
  680. fprintf(output, "\t%+f * w%dt%dn", (float) times[w*nt+t], w, t);
  681. }
  682. fprintf(output, " <= tmax;\n");
  683. }
  684. fprintf(output, "\n");
  685. fprintf(output, "/* And we have to have computed exactly all tasks */\n");
  686. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  687. {
  688. int got_one = 0;
  689. fprintf(output, "/* task %s key %x */\n0", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
  690. for (w = 0; w < nw; w++)
  691. {
  692. if (isnan(times[w*nt+t]))
  693. fprintf(stderr, "Warning: task %s has no performance measurement for worker %d.\n", _starpu_codelet_get_model_name(tp->cl), w);
  694. else
  695. {
  696. got_one = 1;
  697. fprintf(output, "\t+w%dt%dn", w, t);
  698. }
  699. }
  700. fprintf(output, " = %lu;\n", tp->n);
  701. if (!got_one)
  702. fprintf(stderr, "Warning: task %s has no performance measurement for any worker, system will not be solvable!\n", _starpu_codelet_get_model_name(tp->cl));
  703. /* Show actual values */
  704. fprintf(output, "/*");
  705. for (w = 0; w < nw; w++)
  706. fprintf(output, "\t+%lu", tp->cl->per_worker_stats[w]);
  707. fprintf(output, "\t*/\n\n");
  708. }
  709. fprintf(output, "/* Optionally tell that tasks can not be divided */\n");
  710. fprintf(output, "/* int ");
  711. int first = 1;
  712. for (w = 0; w < nw; w++)
  713. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  714. {
  715. if (!first)
  716. fprintf(output, ",");
  717. else
  718. first = 0;
  719. fprintf(output, "w%dt%dn", w, t);
  720. }
  721. fprintf(output, "; */\n");
  722. }
  723. }
  724. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  725. }
  726. /*
  727. * Print bound system in MPS output format
  728. */
  729. void starpu_bound_print_mps(FILE *output)
  730. {
  731. struct bound_task_pool * tp;
  732. int nt; /* Number of different kinds of tasks */
  733. int nw; /* Number of different workers */
  734. int t, w;
  735. if (recorddeps)
  736. {
  737. fprintf(output, "Not supported\n");
  738. return;
  739. }
  740. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  741. nw = starpu_worker_get_count();
  742. nt = 0;
  743. for (tp = task_pools; tp; tp = tp->next)
  744. nt++;
  745. {
  746. double times[nw*nt];
  747. _starpu_get_tasks_times(nw, nt, times);
  748. fprintf(output, "NAME StarPU theoretical bound\n");
  749. fprintf(output, "*\nROWS\n");
  750. fprintf(output, "* We want to minimize total execution time (ms)\n");
  751. fprintf(output, " N TMAX\n");
  752. fprintf(output, "* Which is the maximum of all worker execution times (ms)\n");
  753. for (w = 0; w < nw; w++)
  754. {
  755. char name[32];
  756. starpu_worker_get_name(w, name, sizeof(name));
  757. fprintf(output, "* worker %s\n", name);
  758. fprintf(output, " L W%d\n", w);
  759. }
  760. fprintf(output, "*\n* And we have to have computed exactly all tasks\n*\n");
  761. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  762. {
  763. fprintf(output, "* task %s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
  764. fprintf(output, " E T%d\n", t);
  765. }
  766. fprintf(output, "*\nCOLUMNS\n*\n");
  767. fprintf(output, "*\n* Execution times and completion of all tasks\n*\n");
  768. for (w = 0; w < nw; w++)
  769. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  770. if (!isnan(times[w*nt+t]))
  771. {
  772. char name[9];
  773. snprintf(name, sizeof(name), "W%dT%d", w, t);
  774. fprintf(output," %-8s W%-7d %12f\n", name, w, times[w*nt+t]);
  775. fprintf(output," %-8s T%-7d %12d\n", name, t, 1);
  776. }
  777. fprintf(output, "*\n* Total execution time\n*\n");
  778. for (w = 0; w < nw; w++)
  779. fprintf(output," TMAX W%-2d %12d\n", w, -1);
  780. fprintf(output," TMAX TMAX %12d\n", 1);
  781. fprintf(output, "*\nRHS\n*\n");
  782. fprintf(output, "*\n* Total number of tasks\n*\n");
  783. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  784. fprintf(output," NT%-2d T%-7d %12lu\n", t, t, tp->n);
  785. fprintf(output, "ENDATA\n");
  786. }
  787. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  788. }
  789. /*
  790. * Solve bound system thanks to GNU Linear Programming Kit backend
  791. */
  792. #ifdef STARPU_HAVE_GLPK_H
  793. static glp_prob *_starpu_bound_glp_resolve(int integer)
  794. {
  795. struct bound_task_pool * tp;
  796. int nt; /* Number of different kinds of tasks */
  797. int nw; /* Number of different workers */
  798. int t, w;
  799. glp_prob *lp;
  800. int ret;
  801. nw = starpu_worker_get_count();
  802. nt = 0;
  803. for (tp = task_pools; tp; tp = tp->next)
  804. nt++;
  805. lp = glp_create_prob();
  806. glp_set_prob_name(lp, "StarPU theoretical bound");
  807. glp_set_obj_dir(lp, GLP_MIN);
  808. glp_set_obj_name(lp, "total execution time");
  809. {
  810. double times[nw*nt];
  811. int ne =
  812. nw * (nt+1) /* worker execution time */
  813. + nt * nw
  814. + 1; /* glp dumbness */
  815. int n = 1;
  816. int ia[ne], ja[ne];
  817. double ar[ne];
  818. _starpu_get_tasks_times(nw, nt, times);
  819. /* Variables: number of tasks i assigned to worker j, and tmax */
  820. glp_add_cols(lp, nw*nt+1);
  821. #define colnum(w, t) ((t)*nw+(w)+1)
  822. glp_set_obj_coef(lp, nw*nt+1, 1.);
  823. for (w = 0; w < nw; w++)
  824. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  825. {
  826. char name[32];
  827. snprintf(name, sizeof(name), "w%dt%dn", w, t);
  828. glp_set_col_name(lp, colnum(w, t), name);
  829. if (integer)
  830. glp_set_col_kind(lp, colnum(w, t), GLP_IV);
  831. glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.);
  832. }
  833. glp_set_col_bnds(lp, nw*nt+1, GLP_LO, 0., 0.);
  834. /* Total worker execution time */
  835. glp_add_rows(lp, nw);
  836. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  837. {
  838. int someone = 0;
  839. for (w = 0; w < nw; w++)
  840. if (!isnan(times[w*nt+t]))
  841. someone = 1;
  842. if (!someone)
  843. {
  844. /* This task does not have any performance model at all, abort */
  845. glp_delete_prob(lp);
  846. return NULL;
  847. }
  848. }
  849. for (w = 0; w < nw; w++)
  850. {
  851. char name[32], title[64];
  852. starpu_worker_get_name(w, name, sizeof(name));
  853. snprintf(title, sizeof(title), "worker %s", name);
  854. glp_set_row_name(lp, w+1, title);
  855. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  856. {
  857. ia[n] = w+1;
  858. ja[n] = colnum(w, t);
  859. if (isnan(times[w*nt+t]))
  860. ar[n] = 1000000000.;
  861. else
  862. ar[n] = times[w*nt+t];
  863. n++;
  864. }
  865. /* tmax */
  866. ia[n] = w+1;
  867. ja[n] = nw*nt+1;
  868. ar[n] = -1;
  869. n++;
  870. glp_set_row_bnds(lp, w+1, GLP_UP, 0, 0);
  871. }
  872. /* Total task completion */
  873. glp_add_rows(lp, nt);
  874. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  875. {
  876. char name[32], title[64];
  877. starpu_worker_get_name(w, name, sizeof(name));
  878. snprintf(title, sizeof(title), "task %s key %x", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
  879. glp_set_row_name(lp, nw+t+1, title);
  880. for (w = 0; w < nw; w++)
  881. {
  882. ia[n] = nw+t+1;
  883. ja[n] = colnum(w, t);
  884. ar[n] = 1;
  885. n++;
  886. }
  887. glp_set_row_bnds(lp, nw+t+1, GLP_FX, tp->n, tp->n);
  888. }
  889. STARPU_ASSERT(n == ne);
  890. glp_load_matrix(lp, ne-1, ia, ja, ar);
  891. }
  892. glp_smcp parm;
  893. glp_init_smcp(&parm);
  894. parm.msg_lev = GLP_MSG_OFF;
  895. ret = glp_simplex(lp, &parm);
  896. if (ret)
  897. {
  898. glp_delete_prob(lp);
  899. lp = NULL;
  900. return NULL;
  901. }
  902. if (integer)
  903. {
  904. glp_iocp iocp;
  905. glp_init_iocp(&iocp);
  906. iocp.msg_lev = GLP_MSG_OFF;
  907. glp_intopt(lp, &iocp);
  908. }
  909. return lp;
  910. }
  911. #endif /* STARPU_HAVE_GLPK_H */
  912. /* Print the computed bound as well as the optimized distribution of tasks */
  913. void starpu_bound_print(FILE *output, int integer __attribute__ ((unused)))
  914. {
  915. #ifdef STARPU_HAVE_GLPK_H
  916. if (recorddeps)
  917. {
  918. fprintf(output, "Not supported\n");
  919. return;
  920. }
  921. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  922. glp_prob *lp = _starpu_bound_glp_resolve(integer);
  923. if (lp)
  924. {
  925. struct bound_task_pool * tp;
  926. int t, w;
  927. int nw; /* Number of different workers */
  928. double tmax;
  929. nw = starpu_worker_get_count();
  930. if (integer)
  931. tmax = glp_mip_obj_val(lp);
  932. else
  933. tmax = glp_get_obj_val(lp);
  934. fprintf(output, "Theoretical minimum execution time: %f ms\n", tmax);
  935. for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
  936. {
  937. fprintf(output, "%s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
  938. for (w = 0; w < nw; w++)
  939. if (integer)
  940. fprintf(output, "\tw%dt%dn %f", w, t, glp_mip_col_val(lp, colnum(w, t)));
  941. else
  942. fprintf(output, "\tw%dt%dn %f", w, t, glp_get_col_prim(lp, colnum(w, t)));
  943. fprintf(output, "\n");
  944. }
  945. glp_delete_prob(lp);
  946. }
  947. else
  948. {
  949. fprintf(stderr, "Simplex failed\n");
  950. }
  951. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  952. #else /* STARPU_HAVE_GLPK_H */
  953. fprintf(output, "Please rebuild StarPU with glpk installed.\n");
  954. #endif /* STARPU_HAVE_GLPK_H */
  955. }
  956. /* Compute and return the bound */
  957. void starpu_bound_compute(double *res, double *integer_res __attribute__ ((unused)), int integer __attribute__ ((unused)))
  958. {
  959. #ifdef STARPU_HAVE_GLPK_H
  960. double ret;
  961. if (recorddeps)
  962. {
  963. *res = 0.;
  964. return;
  965. }
  966. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  967. glp_prob *lp = _starpu_bound_glp_resolve(integer);
  968. if (lp)
  969. {
  970. ret = glp_get_obj_val(lp);
  971. if (integer)
  972. *integer_res = glp_mip_obj_val(lp);
  973. glp_delete_prob(lp);
  974. }
  975. else
  976. ret = 0.;
  977. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  978. *res = ret;
  979. #else /* STARPU_HAVE_GLPK_H */
  980. *res = 0.;
  981. #endif /* STARPU_HAVE_GLPK_H */
  982. }