perfmodel_history.c 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2013 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2011 Télécom-SudParis
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <dirent.h>
  19. #include <unistd.h>
  20. #include <sys/stat.h>
  21. #include <errno.h>
  22. #include <common/config.h>
  23. #include <common/utils.h>
  24. #include <core/perfmodel/perfmodel.h>
  25. #include <core/jobs.h>
  26. #include <core/workers.h>
  27. #include <pthread.h>
  28. #include <datawizard/datawizard.h>
  29. #include <core/perfmodel/regression.h>
  30. #include <common/config.h>
  31. #include <starpu_parameters.h>
  32. #include <common/uthash.h>
  33. #ifdef STARPU_HAVE_WINDOWS
  34. #include <windows.h>
  35. #endif
  36. #define HASH_ADD_UINT32_T(head,field,add) HASH_ADD(hh,head,field,sizeof(uint32_t),add)
  37. #define HASH_FIND_UINT32_T(head,find,out) HASH_FIND(hh,head,find,sizeof(uint32_t),out)
  38. struct starpu_perfmodel_history_table
  39. {
  40. UT_hash_handle hh;
  41. uint32_t footprint;
  42. struct starpu_perfmodel_history_entry *history_entry;
  43. };
  44. /* We want more than 10% variance on X to trust regression */
  45. #define VALID_REGRESSION(reg_model) \
  46. ((reg_model)->minx < (9*(reg_model)->maxx)/10 && (reg_model)->nsample >= _STARPU_CALIBRATION_MINIMUM)
  47. static _starpu_pthread_rwlock_t registered_models_rwlock;
  48. static struct _starpu_perfmodel_list *registered_models = NULL;
  49. size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned nimpl, struct _starpu_job *j)
  50. {
  51. struct starpu_task *task = j->task;
  52. if (model && model->per_arch[arch][nimpl].size_base)
  53. {
  54. return model->per_arch[arch][nimpl].size_base(task, arch, nimpl);
  55. }
  56. else if (model && model->size_base)
  57. {
  58. return model->size_base(task, nimpl);
  59. }
  60. else
  61. {
  62. unsigned nbuffers = task->cl->nbuffers;
  63. size_t size = 0;
  64. unsigned buffer;
  65. for (buffer = 0; buffer < nbuffers; buffer++)
  66. {
  67. starpu_data_handle_t handle = task->handles[buffer];
  68. size += _starpu_data_get_size(handle);
  69. }
  70. return size;
  71. }
  72. }
  73. /*
  74. * History based model
  75. */
  76. static void insert_history_entry(struct starpu_perfmodel_history_entry *entry, struct starpu_perfmodel_history_list **list, struct starpu_perfmodel_history_table **history_ptr)
  77. {
  78. struct starpu_perfmodel_history_list *link;
  79. struct starpu_perfmodel_history_table *table;
  80. link = (struct starpu_perfmodel_history_list *) malloc(sizeof(struct starpu_perfmodel_history_list));
  81. link->next = *list;
  82. link->entry = entry;
  83. *list = link;
  84. /* detect concurrency issue */
  85. //HASH_FIND_UINT32_T(*history_ptr, &entry->footprint, table);
  86. //STARPU_ASSERT(table == NULL);
  87. table = (struct starpu_perfmodel_history_table*) malloc(sizeof(*table));
  88. STARPU_ASSERT(table != NULL);
  89. table->footprint = entry->footprint;
  90. table->history_entry = entry;
  91. HASH_ADD_UINT32_T(*history_ptr, footprint, table);
  92. }
  93. static void dump_reg_model(FILE *f, struct starpu_perfmodel *model, unsigned arch, unsigned nimpl)
  94. {
  95. struct starpu_perfmodel_per_arch *per_arch_model;
  96. per_arch_model = &model->per_arch[arch][nimpl];
  97. struct starpu_perfmodel_regression_model *reg_model;
  98. reg_model = &per_arch_model->regression;
  99. /*
  100. * Linear Regression model
  101. */
  102. /* Unless we have enough measurements, we put NaN in the file to indicate the model is invalid */
  103. double alpha = nan(""), beta = nan("");
  104. if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  105. {
  106. if (reg_model->nsample > 1)
  107. {
  108. alpha = reg_model->alpha;
  109. beta = reg_model->beta;
  110. }
  111. }
  112. fprintf(f, "# sumlnx\tsumlnx2\t\tsumlny\t\tsumlnxlny\talpha\t\tbeta\t\tn\tminx\t\tmaxx\n");
  113. fprintf(f, "%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%u\t%-15lu\t%-15lu\n", reg_model->sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny, alpha, beta, reg_model->nsample, reg_model->minx, reg_model->maxx);
  114. /*
  115. * Non-Linear Regression model
  116. */
  117. double a = nan(""), b = nan(""), c = nan("");
  118. if (model->type == STARPU_NL_REGRESSION_BASED)
  119. _starpu_regression_non_linear_power(per_arch_model->list, &a, &b, &c);
  120. fprintf(f, "# a\t\tb\t\tc\n");
  121. fprintf(f, "%-15le\t%-15le\t%-15le\n", a, b, c);
  122. }
  123. static void scan_reg_model(FILE *f, struct starpu_perfmodel_regression_model *reg_model)
  124. {
  125. int res;
  126. /*
  127. * Linear Regression model
  128. */
  129. _starpu_drop_comments(f);
  130. res = fscanf(f, "%le\t%le\t%le\t%le\t%le\t%le\t%u\t%lu\t%lu\n",
  131. &reg_model->sumlnx, &reg_model->sumlnx2, &reg_model->sumlny,
  132. &reg_model->sumlnxlny, &reg_model->alpha, &reg_model->beta,
  133. &reg_model->nsample,
  134. &reg_model->minx, &reg_model->maxx);
  135. STARPU_ASSERT_MSG(res == 9, "Incorrect performance model file");
  136. /* If any of the parameters describing the linear regression model is NaN, the model is invalid */
  137. unsigned invalid = (isnan(reg_model->alpha)||isnan(reg_model->beta));
  138. reg_model->valid = !invalid && VALID_REGRESSION(reg_model);
  139. /*
  140. * Non-Linear Regression model
  141. */
  142. _starpu_drop_comments(f);
  143. res = fscanf(f, "%le\t%le\t%le\n", &reg_model->a, &reg_model->b, &reg_model->c);
  144. STARPU_ASSERT_MSG(res == 3, "Incorrect performance model file");
  145. /* If any of the parameters describing the non-linear regression model is NaN, the model is invalid */
  146. unsigned nl_invalid = (isnan(reg_model->a)||isnan(reg_model->b)||isnan(reg_model->c));
  147. reg_model->nl_valid = !nl_invalid && VALID_REGRESSION(reg_model);
  148. }
  149. static void dump_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
  150. {
  151. fprintf(f, "%08x\t%-15lu\t%-15le\t%-15le\t%-15le\t%-15le\t%u\n", entry->footprint, (unsigned long) entry->size, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
  152. }
  153. static void scan_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
  154. {
  155. int res;
  156. _starpu_drop_comments(f);
  157. /* In case entry is NULL, we just drop these values */
  158. unsigned nsample;
  159. uint32_t footprint;
  160. #ifdef STARPU_HAVE_WINDOWS
  161. unsigned size; /* in bytes */
  162. #else
  163. size_t size; /* in bytes */
  164. #endif
  165. double mean;
  166. double deviation;
  167. double sum;
  168. double sum2;
  169. /* Read the values from the file */
  170. res = fscanf(f, "%x\t%"
  171. #ifndef STARPU_HAVE_WINDOWS
  172. "z"
  173. #endif
  174. "u\t%le\t%le\t%le\t%le\t%u\n", &footprint, &size, &mean, &deviation, &sum, &sum2, &nsample);
  175. STARPU_ASSERT_MSG(res == 7, "Incorrect performance model file");
  176. if (entry)
  177. {
  178. entry->footprint = footprint;
  179. entry->size = size;
  180. entry->mean = mean;
  181. entry->deviation = deviation;
  182. entry->sum = sum;
  183. entry->sum2 = sum2;
  184. entry->nsample = nsample;
  185. }
  186. }
  187. static void parse_per_arch_model_file(FILE *f, struct starpu_perfmodel_per_arch *per_arch_model, unsigned scan_history)
  188. {
  189. unsigned nentries;
  190. _starpu_drop_comments(f);
  191. int res = fscanf(f, "%u\n", &nentries);
  192. STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file");
  193. scan_reg_model(f, &per_arch_model->regression);
  194. /* parse cpu entries */
  195. unsigned i;
  196. for (i = 0; i < nentries; i++)
  197. {
  198. struct starpu_perfmodel_history_entry *entry = NULL;
  199. if (scan_history)
  200. {
  201. entry = (struct starpu_perfmodel_history_entry *) malloc(sizeof(struct starpu_perfmodel_history_entry));
  202. STARPU_ASSERT(entry);
  203. }
  204. scan_history_entry(f, entry);
  205. /* insert the entry in the hashtable and the list structures */
  206. /* TODO: Insert it at the end of the list, to avoid reversing
  207. * the order... But efficiently! We may have a lot of entries */
  208. if (scan_history)
  209. insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
  210. }
  211. }
  212. static void parse_arch(FILE *f, struct starpu_perfmodel *model, unsigned scan_history, unsigned archmin, unsigned archmax, unsigned skiparch)
  213. {
  214. struct starpu_perfmodel_per_arch dummy;
  215. int nimpls, implmax, skipimpl, impl;
  216. unsigned ret, arch;
  217. for (arch = archmin; arch < archmax; arch++)
  218. {
  219. _STARPU_DEBUG("Parsing arch %u\n", arch);
  220. _starpu_drop_comments(f);
  221. ret = fscanf(f, "%d\n", &nimpls);
  222. _STARPU_DEBUG("%d implementations\n", nimpls);
  223. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  224. implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
  225. skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
  226. for (impl = 0; impl < implmax; impl++)
  227. {
  228. parse_per_arch_model_file(f, &model->per_arch[arch][impl], scan_history);
  229. }
  230. if (skipimpl > 0)
  231. {
  232. for (impl = 0; impl < skipimpl; impl++)
  233. {
  234. parse_per_arch_model_file(f, &dummy, 0);
  235. }
  236. }
  237. }
  238. if (skiparch > 0)
  239. {
  240. _starpu_drop_comments(f);
  241. for (arch = 0; arch < skiparch; arch ++)
  242. {
  243. _STARPU_DEBUG("skipping arch %u\n", arch);
  244. ret = fscanf(f, "%d\n", &nimpls);
  245. _STARPU_DEBUG("%d implementations\n", nimpls);
  246. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  247. implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
  248. skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
  249. for (impl = 0; impl < implmax; impl++)
  250. {
  251. parse_per_arch_model_file(f, &dummy, 0);
  252. }
  253. if (skipimpl > 0)
  254. {
  255. for (impl = 0; impl < skipimpl; impl++)
  256. {
  257. parse_per_arch_model_file(f, &dummy, 0);
  258. }
  259. }
  260. }
  261. }
  262. }
  263. static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned scan_history)
  264. {
  265. unsigned ret;
  266. unsigned archmin = 0;
  267. unsigned narchs;
  268. /* We could probably write a clean loop here, but the code would not
  269. * really be easier to read. */
  270. /* Parsing CPUs */
  271. _starpu_drop_comments(f);
  272. ret = fscanf(f, "%u\n", &narchs);
  273. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  274. _STARPU_DEBUG("Parsing %u CPUs\n", narchs);
  275. if (narchs > 0)
  276. {
  277. parse_arch(f, model, scan_history,
  278. archmin,
  279. STARPU_MIN(narchs, STARPU_MAXCPUS),
  280. narchs > STARPU_MAXCPUS ? narchs - STARPU_MAXCPUS : 0);
  281. }
  282. /* Parsing CUDA devs */
  283. _starpu_drop_comments(f);
  284. ret = fscanf(f, "%u\n", &narchs);
  285. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  286. archmin += STARPU_MAXCPUS;
  287. _STARPU_DEBUG("Parsing %u CUDA devices\n", narchs);
  288. if (narchs > 0)
  289. {
  290. parse_arch(f, model, scan_history,
  291. archmin,
  292. archmin + STARPU_MIN(narchs, STARPU_MAXCUDADEVS),
  293. narchs > STARPU_MAXCUDADEVS ? narchs - STARPU_MAXCUDADEVS : 0);
  294. }
  295. /* Parsing OpenCL devs */
  296. _starpu_drop_comments(f);
  297. ret = fscanf(f, "%u\n", &narchs);
  298. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  299. archmin += STARPU_MAXCUDADEVS;
  300. _STARPU_DEBUG("Parsing %u OpenCL devices\n", narchs);
  301. if (narchs > 0)
  302. {
  303. parse_arch(f, model, scan_history,
  304. archmin,
  305. archmin + STARPU_MIN(narchs, STARPU_MAXOPENCLDEVS),
  306. narchs > STARPU_MAXOPENCLDEVS ? narchs - STARPU_MAXOPENCLDEVS : 0);
  307. }
  308. }
  309. static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, unsigned arch, unsigned nimpl)
  310. {
  311. struct starpu_perfmodel_per_arch *per_arch_model;
  312. per_arch_model = &model->per_arch[arch][nimpl];
  313. /* count the number of elements in the lists */
  314. struct starpu_perfmodel_history_list *ptr = NULL;
  315. unsigned nentries = 0;
  316. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  317. {
  318. /* Dump the list of all entries in the history */
  319. ptr = per_arch_model->list;
  320. while(ptr)
  321. {
  322. nentries++;
  323. ptr = ptr->next;
  324. }
  325. }
  326. /* header */
  327. char archname[32];
  328. starpu_perfmodel_get_arch_name((enum starpu_perf_archtype) arch, archname, 32, nimpl);
  329. fprintf(f, "# Model for %s\n", archname);
  330. fprintf(f, "# number of entries\n%u\n", nentries);
  331. dump_reg_model(f, model, arch, nimpl);
  332. /* Dump the history into the model file in case it is necessary */
  333. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  334. {
  335. fprintf(f, "# hash\t\tsize\t\tmean\t\tdev\t\tsum\t\tsum2\t\tn\n");
  336. ptr = per_arch_model->list;
  337. while (ptr)
  338. {
  339. dump_history_entry(f, ptr->entry);
  340. ptr = ptr->next;
  341. }
  342. }
  343. fprintf(f, "\n##################\n");
  344. }
  345. static unsigned get_n_entries(struct starpu_perfmodel *model, unsigned arch, unsigned impl)
  346. {
  347. struct starpu_perfmodel_per_arch *per_arch_model;
  348. per_arch_model = &model->per_arch[arch][impl];
  349. /* count the number of elements in the lists */
  350. struct starpu_perfmodel_history_list *ptr = NULL;
  351. unsigned nentries = 0;
  352. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  353. {
  354. /* Dump the list of all entries in the history */
  355. ptr = per_arch_model->list;
  356. while(ptr)
  357. {
  358. nentries++;
  359. ptr = ptr->next;
  360. }
  361. }
  362. return nentries;
  363. }
  364. static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
  365. {
  366. unsigned narch[4] = { 0, 0, 0, 0};
  367. unsigned arch, arch_base = 0, my_narch = 0;
  368. unsigned nimpl;
  369. unsigned idx = 0;
  370. /* Finding the number of archs to write for each kind of device */
  371. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  372. {
  373. switch (arch)
  374. {
  375. case STARPU_CUDA_DEFAULT:
  376. case STARPU_OPENCL_DEFAULT:
  377. arch_base = arch;
  378. idx++;
  379. break;
  380. default:
  381. break;
  382. }
  383. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  384. {
  385. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  386. if (get_n_entries(model, arch, nimpl))
  387. {
  388. narch[idx]=arch-arch_base+1;
  389. break;
  390. }
  391. }
  392. else if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_PER_ARCH || model->type == STARPU_COMMON)
  393. {
  394. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  395. if (model->per_arch[arch][nimpl].regression.nsample)
  396. {
  397. narch[idx]=arch-arch_base+1;
  398. break;
  399. }
  400. }
  401. else
  402. {
  403. STARPU_ASSERT_MSG(0, "Unknown history-based performance model %d", model->type);
  404. }
  405. }
  406. /* Writing stuff */
  407. char *name = "unknown";
  408. unsigned substract_to_arch = 0;
  409. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  410. {
  411. switch (arch)
  412. {
  413. case STARPU_CPU_DEFAULT:
  414. arch_base = arch;
  415. name = "CPU";
  416. fprintf(f, "##################\n");
  417. fprintf(f, "# %ss\n", name);
  418. fprintf(f, "# maximum number of %ss\n", name);
  419. fprintf(f, "%u\n", my_narch = narch[0]);
  420. break;
  421. case STARPU_CUDA_DEFAULT:
  422. arch_base = arch;
  423. name = "CUDA";
  424. substract_to_arch = STARPU_MAXCPUS;
  425. fprintf(f, "##################\n");
  426. fprintf(f, "# %ss\n", name);
  427. fprintf(f, "# number of %s architectures\n", name);
  428. fprintf(f, "%u\n", my_narch = narch[1]);
  429. break;
  430. case STARPU_OPENCL_DEFAULT:
  431. arch_base = arch;
  432. name = "OPENCL";
  433. substract_to_arch += STARPU_MAXCUDADEVS;
  434. fprintf(f, "##################\n");
  435. fprintf(f, "# %ss\n", name);
  436. fprintf(f, "# number of %s architectures\n", name);
  437. fprintf(f, "%u\n", my_narch = narch[2]);
  438. break;
  439. default:
  440. break;
  441. }
  442. unsigned max_impl = 0;
  443. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  444. {
  445. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  446. if (get_n_entries(model, arch, nimpl))
  447. max_impl = nimpl + 1;
  448. }
  449. else if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_PER_ARCH || model->type == STARPU_COMMON)
  450. {
  451. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  452. if (model->per_arch[arch][nimpl].regression.nsample)
  453. max_impl = nimpl + 1;
  454. }
  455. else
  456. STARPU_ASSERT_MSG(0, "Unknown history-based performance model %u", arch);
  457. if (arch >= my_narch + arch_base)
  458. continue;
  459. fprintf(f, "###########\n");
  460. if (substract_to_arch)
  461. fprintf(f, "# %s_%u\n", name, arch - substract_to_arch);
  462. else
  463. /* CPU */
  464. fprintf(f, "# %u CPU(s) in parallel\n", arch + 1);
  465. fprintf(f, "# number of implementations\n");
  466. fprintf(f, "%u\n", max_impl);
  467. for (nimpl = 0; nimpl < max_impl; nimpl++)
  468. {
  469. dump_per_arch_model_file(f, model, arch, nimpl);
  470. }
  471. }
  472. }
  473. static void initialize_per_arch_model(struct starpu_perfmodel_per_arch *per_arch_model)
  474. {
  475. per_arch_model->history = NULL;
  476. per_arch_model->list = NULL;
  477. per_arch_model->regression.nsample = 0;
  478. per_arch_model->regression.valid = 0;
  479. per_arch_model->regression.nl_valid = 0;
  480. }
  481. static void initialize_model(struct starpu_perfmodel *model)
  482. {
  483. unsigned arch;
  484. unsigned nimpl;
  485. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  486. {
  487. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  488. {
  489. initialize_per_arch_model(&model->per_arch[arch][nimpl]);
  490. }
  491. }
  492. }
  493. static void get_model_debug_path(struct starpu_perfmodel *model, const char *arch, char *path, size_t maxlen)
  494. {
  495. STARPU_ASSERT(path);
  496. _starpu_get_perf_model_dir_debug(path, maxlen);
  497. strncat(path, model->symbol, maxlen);
  498. char hostname[65];
  499. _starpu_gethostname(hostname, sizeof(hostname));
  500. strncat(path, ".", maxlen);
  501. strncat(path, hostname, maxlen);
  502. strncat(path, ".", maxlen);
  503. strncat(path, arch, maxlen);
  504. strncat(path, ".debug", maxlen);
  505. }
  506. /*
  507. * Returns 0 is the model was already loaded, 1 otherwise.
  508. */
  509. int _starpu_register_model(struct starpu_perfmodel *model)
  510. {
  511. /* If the model has already been loaded, there is nothing to do */
  512. _STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  513. if (model->is_loaded)
  514. {
  515. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  516. return 0;
  517. }
  518. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  519. /* We have to make sure the model has not been loaded since the
  520. * last time we took the lock */
  521. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  522. if (model->is_loaded)
  523. {
  524. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  525. return 0;
  526. }
  527. /* add the model to a linked list */
  528. struct _starpu_perfmodel_list *node = (struct _starpu_perfmodel_list *) malloc(sizeof(struct _starpu_perfmodel_list));
  529. node->model = model;
  530. //model->debug_modelid = debug_modelid++;
  531. /* put this model at the beginning of the list */
  532. node->next = registered_models;
  533. registered_models = node;
  534. #ifdef STARPU_MODEL_DEBUG
  535. _starpu_create_sampling_directory_if_needed();
  536. unsigned arch;
  537. unsigned nimpl;
  538. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  539. {
  540. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  541. {
  542. starpu_perfmodel_debugfilepath(model, arch, model->per_arch[arch][nimpl].debug_path, 256, nimpl);
  543. }
  544. }
  545. #endif
  546. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  547. return 1;
  548. }
  549. static void get_model_path(struct starpu_perfmodel *model, char *path, size_t maxlen)
  550. {
  551. _starpu_get_perf_model_dir_codelets(path, maxlen);
  552. strncat(path, model->symbol, maxlen);
  553. char hostname[65];
  554. _starpu_gethostname(hostname, sizeof(hostname));
  555. strncat(path, ".", maxlen);
  556. strncat(path, hostname, maxlen);
  557. }
  558. static void save_history_based_model(struct starpu_perfmodel *model)
  559. {
  560. STARPU_ASSERT(model);
  561. STARPU_ASSERT(model->symbol);
  562. /* TODO checks */
  563. /* filename = $STARPU_PERF_MODEL_DIR/codelets/symbol.hostname */
  564. char path[256];
  565. get_model_path(model, path, 256);
  566. _STARPU_DEBUG("Opening performance model file %s for model %s\n", path, model->symbol);
  567. /* overwrite existing file, or create it */
  568. FILE *f;
  569. f = fopen(path, "w+");
  570. STARPU_ASSERT_MSG(f, "Could not save performance model %s\n", path);
  571. dump_model_file(f, model);
  572. fclose(f);
  573. }
  574. static void _starpu_dump_registered_models(void)
  575. {
  576. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  577. struct _starpu_perfmodel_list *node;
  578. node = registered_models;
  579. _STARPU_DEBUG("DUMP MODELS !\n");
  580. while (node)
  581. {
  582. save_history_based_model(node->model);
  583. node = node->next;
  584. }
  585. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  586. }
  587. void _starpu_initialize_registered_performance_models(void)
  588. {
  589. registered_models = NULL;
  590. _STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
  591. }
  592. void _starpu_deinitialize_registered_performance_models(void)
  593. {
  594. if (_starpu_get_calibrate_flag())
  595. _starpu_dump_registered_models();
  596. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  597. struct _starpu_perfmodel_list *node, *pnode;
  598. node = registered_models;
  599. _STARPU_DEBUG("FREE MODELS !\n");
  600. while (node)
  601. {
  602. struct starpu_perfmodel *model = node->model;
  603. unsigned arch;
  604. unsigned nimpl;
  605. _STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  606. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  607. {
  608. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  609. {
  610. struct starpu_perfmodel_per_arch *archmodel = &model->per_arch[arch][nimpl];
  611. struct starpu_perfmodel_history_list *list, *plist;
  612. struct starpu_perfmodel_history_table *entry, *tmp;
  613. HASH_ITER(hh, archmodel->history, entry, tmp)
  614. {
  615. HASH_DEL(archmodel->history, entry);
  616. free(entry);
  617. }
  618. archmodel->history = NULL;
  619. list = archmodel->list;
  620. while (list)
  621. {
  622. free(list->entry);
  623. plist = list;
  624. list = list->next;
  625. free(plist);
  626. }
  627. archmodel->list = NULL;
  628. }
  629. }
  630. model->is_loaded = 0;
  631. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  632. pnode = node;
  633. node = node->next;
  634. free(pnode);
  635. }
  636. registered_models = NULL;
  637. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  638. _STARPU_PTHREAD_RWLOCK_DESTROY(&registered_models_rwlock);
  639. }
  640. /*
  641. * XXX: We should probably factorize the beginning of the _starpu_load_*_model
  642. * functions. This is a bit tricky though, because we must be sure to unlock
  643. * registered_models_rwlock at the right place.
  644. */
  645. void _starpu_load_per_arch_based_model(struct starpu_perfmodel *model)
  646. {
  647. STARPU_ASSERT(model && model->symbol);
  648. int already_loaded;
  649. _STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  650. already_loaded = model->is_loaded;
  651. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  652. if (already_loaded)
  653. return;
  654. /* The model is still not loaded so we grab the lock in write mode, and
  655. * if it's not loaded once we have the lock, we do load it. */
  656. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  657. /* Was the model initialized since the previous test ? */
  658. if (model->is_loaded)
  659. {
  660. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  661. return;
  662. }
  663. _STARPU_PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
  664. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  665. }
  666. void _starpu_load_common_based_model(struct starpu_perfmodel *model)
  667. {
  668. STARPU_ASSERT(model && model->symbol);
  669. int already_loaded;
  670. _STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  671. already_loaded = model->is_loaded;
  672. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  673. if (already_loaded)
  674. return;
  675. /* The model is still not loaded so we grab the lock in write mode, and
  676. * if it's not loaded once we have the lock, we do load it. */
  677. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  678. /* Was the model initialized since the previous test ? */
  679. if (model->is_loaded)
  680. {
  681. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  682. return;
  683. }
  684. _STARPU_PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
  685. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  686. }
  687. /* We first try to grab the global lock in read mode to check whether the model
  688. * was loaded or not (this is very likely to have been already loaded). If the
  689. * model was not loaded yet, we take the lock in write mode, and if the model
  690. * is still not loaded once we have the lock, we do load it. */
  691. void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history)
  692. {
  693. STARPU_ASSERT(model);
  694. STARPU_ASSERT(model->symbol);
  695. int already_loaded;
  696. _STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  697. already_loaded = model->is_loaded;
  698. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  699. if (already_loaded)
  700. return;
  701. /* The model is still not loaded so we grab the lock in write mode, and
  702. * if it's not loaded once we have the lock, we do load it. */
  703. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  704. /* Was the model initialized since the previous test ? */
  705. if (model->is_loaded)
  706. {
  707. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  708. return;
  709. }
  710. _STARPU_PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
  711. _STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  712. /* make sure the performance model directory exists (or create it) */
  713. _starpu_create_sampling_directory_if_needed();
  714. char path[256];
  715. get_model_path(model, path, 256);
  716. _STARPU_DEBUG("Opening performance model file %s for model %s ...\n", path, model->symbol);
  717. unsigned calibrate_flag = _starpu_get_calibrate_flag();
  718. model->benchmarking = calibrate_flag;
  719. /* try to open an existing file and load it */
  720. int res;
  721. res = access(path, F_OK);
  722. if (res == 0)
  723. {
  724. if (calibrate_flag == 2)
  725. {
  726. /* The user specified that the performance model should
  727. * be overwritten, so we don't load the existing file !
  728. * */
  729. _STARPU_DEBUG("Overwrite existing file\n");
  730. initialize_model(model);
  731. }
  732. else
  733. {
  734. /* We load the available file */
  735. _STARPU_DEBUG("File exists\n");
  736. FILE *f;
  737. f = fopen(path, "r");
  738. STARPU_ASSERT(f);
  739. parse_model_file(f, model, scan_history);
  740. fclose(f);
  741. }
  742. }
  743. else
  744. {
  745. _STARPU_DEBUG("File does not exists\n");
  746. if (!calibrate_flag)
  747. {
  748. _STARPU_DISP("Warning: model %s is not calibrated, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
  749. _starpu_set_calibrate_flag(1);
  750. model->benchmarking = 1;
  751. }
  752. initialize_model(model);
  753. }
  754. _STARPU_DEBUG("Performance model file %s for model %s is loaded\n", path, model->symbol);
  755. model->is_loaded = 1;
  756. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  757. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  758. }
  759. /* This function is intended to be used by external tools that should read
  760. * the performance model files */
  761. int starpu_perfmodel_list(FILE *output)
  762. {
  763. char path[256];
  764. DIR *dp;
  765. struct dirent *ep;
  766. char perf_model_dir_codelets[256];
  767. _starpu_get_perf_model_dir_codelets(perf_model_dir_codelets, 256);
  768. strncpy(path, perf_model_dir_codelets, 256);
  769. dp = opendir(path);
  770. if (dp != NULL)
  771. {
  772. while ((ep = readdir(dp)))
  773. {
  774. if (strcmp(ep->d_name, ".") && strcmp(ep->d_name, ".."))
  775. fprintf(output, "file: <%s>\n", ep->d_name);
  776. }
  777. closedir (dp);
  778. }
  779. else
  780. {
  781. _STARPU_DISP("Could not open the perfmodel directory <%s>\n", path);
  782. }
  783. return 0;
  784. }
  785. /* This function is intended to be used by external tools that should read the
  786. * performance model files */
  787. int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
  788. {
  789. model->symbol = strdup(symbol);
  790. initialize_model(model);
  791. /* where is the file if it exists ? */
  792. char path[256];
  793. get_model_path(model, path, 256);
  794. // _STARPU_DEBUG("get_model_path -> %s\n", path);
  795. /* does it exist ? */
  796. int res;
  797. res = access(path, F_OK);
  798. if (res)
  799. {
  800. const char *dot = strrchr(symbol, '.');
  801. if (dot)
  802. {
  803. char *symbol2 = strdup(symbol);
  804. symbol2[dot-symbol] = '\0';
  805. int ret;
  806. _STARPU_DISP("note: loading history from %s instead of %s\n", symbol2, symbol);
  807. ret = starpu_perfmodel_load_symbol(symbol2,model);
  808. free(symbol2);
  809. return ret;
  810. }
  811. _STARPU_DISP("There is no performance model for symbol %s\n", symbol);
  812. return 1;
  813. }
  814. FILE *f = fopen(path, "r");
  815. STARPU_ASSERT(f);
  816. parse_model_file(f, model, 1);
  817. STARPU_ASSERT(fclose(f) == 0);
  818. return 0;
  819. }
  820. void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archname, size_t maxlen,unsigned nimpl)
  821. {
  822. if (arch < STARPU_CUDA_DEFAULT)
  823. {
  824. if (arch == STARPU_CPU_DEFAULT)
  825. {
  826. /* NB: We could just use cpu_1 as well ... */
  827. snprintf(archname, maxlen, "cpu_impl_%u",nimpl);
  828. }
  829. else
  830. {
  831. /* For combined CPU workers */
  832. int cpu_count = arch - STARPU_CPU_DEFAULT + 1;
  833. snprintf(archname, maxlen, "cpu_%d_impl_%u", cpu_count,nimpl);
  834. }
  835. }
  836. else if ((STARPU_CUDA_DEFAULT <= arch)
  837. && (arch < STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS))
  838. {
  839. int devid = arch - STARPU_CUDA_DEFAULT;
  840. snprintf(archname, maxlen, "cuda_%d_impl_%u", devid,nimpl);
  841. }
  842. else if ((STARPU_OPENCL_DEFAULT <= arch)
  843. && (arch < STARPU_OPENCL_DEFAULT + STARPU_MAXOPENCLDEVS))
  844. {
  845. int devid = arch - STARPU_OPENCL_DEFAULT;
  846. snprintf(archname, maxlen, "opencl_%d_impl_%u", devid,nimpl);
  847. }
  848. else
  849. {
  850. STARPU_ABORT();
  851. }
  852. }
  853. void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model,
  854. enum starpu_perf_archtype arch, char *path, size_t maxlen, unsigned nimpl)
  855. {
  856. char archname[32];
  857. starpu_perfmodel_get_arch_name(arch, archname, 32, nimpl);
  858. STARPU_ASSERT(path);
  859. get_model_debug_path(model, archname, path, maxlen);
  860. }
  861. double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j, unsigned nimpl)
  862. {
  863. double exp = NAN;
  864. size_t size = _starpu_job_get_data_size(model, arch, nimpl, j);
  865. struct starpu_perfmodel_regression_model *regmodel;
  866. regmodel = &model->per_arch[arch][nimpl].regression;
  867. if (regmodel->valid)
  868. exp = regmodel->alpha*pow((double)size, regmodel->beta);
  869. return exp;
  870. }
  871. double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j,unsigned nimpl)
  872. {
  873. double exp = NAN;
  874. size_t size = _starpu_job_get_data_size(model, arch, nimpl, j);
  875. struct starpu_perfmodel_regression_model *regmodel;
  876. regmodel = &model->per_arch[arch][nimpl].regression;
  877. if (regmodel->nl_valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1)
  878. exp = regmodel->a*pow((double)size, regmodel->b) + regmodel->c;
  879. else
  880. {
  881. uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  882. struct starpu_perfmodel_per_arch *per_arch_model = &model->per_arch[arch][nimpl];
  883. struct starpu_perfmodel_history_table *history;
  884. struct starpu_perfmodel_history_table *entry;
  885. _STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
  886. history = per_arch_model->history;
  887. HASH_FIND_UINT32_T(history, &key, entry);
  888. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  889. if (entry && entry->history_entry && entry->history_entry->nsample >= _STARPU_CALIBRATION_MINIMUM)
  890. exp = entry->history_entry->mean;
  891. else if (!model->benchmarking)
  892. {
  893. char archname[32];
  894. starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl);
  895. _STARPU_DISP("Warning: model %s is not calibrated enough for %s, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol, archname);
  896. _starpu_set_calibrate_flag(1);
  897. model->benchmarking = 1;
  898. }
  899. }
  900. return exp;
  901. }
  902. double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j,unsigned nimpl)
  903. {
  904. double exp;
  905. struct starpu_perfmodel_per_arch *per_arch_model;
  906. struct starpu_perfmodel_history_entry *entry;
  907. struct starpu_perfmodel_history_table *history, *elt;
  908. uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  909. per_arch_model = &model->per_arch[arch][nimpl];
  910. _STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
  911. history = per_arch_model->history;
  912. HASH_FIND_UINT32_T(history, &key, elt);
  913. entry = (elt == NULL) ? NULL : elt->history_entry;
  914. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  915. exp = entry?entry->mean:NAN;
  916. if (entry && entry->nsample < _STARPU_CALIBRATION_MINIMUM)
  917. /* TODO: report differently if we've scheduled really enough
  918. * of that task and the scheduler should perhaps put it aside */
  919. /* Not calibrated enough */
  920. exp = NAN;
  921. if (isnan(exp) && !model->benchmarking)
  922. {
  923. char archname[32];
  924. starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl);
  925. _STARPU_DISP("Warning: model %s is not calibrated enough for %s, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol, archname);
  926. _starpu_set_calibrate_flag(1);
  927. model->benchmarking = 1;
  928. }
  929. return exp;
  930. }
  931. double starpu_history_based_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, uint32_t footprint)
  932. {
  933. struct _starpu_job j =
  934. {
  935. .footprint = footprint,
  936. .footprint_is_computed = 1,
  937. };
  938. return _starpu_history_based_job_expected_perf(model, arch, &j, j.nimpl);
  939. }
  940. void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned nimpl)
  941. {
  942. if (model)
  943. {
  944. _STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  945. struct starpu_perfmodel_per_arch *per_arch_model = &model->per_arch[arch][nimpl];
  946. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  947. {
  948. struct starpu_perfmodel_history_entry *entry;
  949. struct starpu_perfmodel_history_table *elt;
  950. struct starpu_perfmodel_history_list **list;
  951. uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  952. list = &per_arch_model->list;
  953. HASH_FIND_UINT32_T(per_arch_model->history, &key, elt);
  954. entry = (elt == NULL) ? NULL : elt->history_entry;
  955. if (!entry)
  956. {
  957. /* this is the first entry with such a footprint */
  958. entry = (struct starpu_perfmodel_history_entry *) malloc(sizeof(struct starpu_perfmodel_history_entry));
  959. STARPU_ASSERT(entry);
  960. entry->mean = measured;
  961. entry->sum = measured;
  962. entry->deviation = 0.0;
  963. entry->sum2 = measured*measured;
  964. entry->size = _starpu_job_get_data_size(model, arch, nimpl, j);
  965. entry->footprint = key;
  966. entry->nsample = 1;
  967. insert_history_entry(entry, list, &per_arch_model->history);
  968. }
  969. else
  970. {
  971. /* there is already some entry with the same footprint */
  972. entry->sum += measured;
  973. entry->sum2 += measured*measured;
  974. entry->nsample++;
  975. unsigned n = entry->nsample;
  976. entry->mean = entry->sum / n;
  977. entry->deviation = sqrt((entry->sum2 - (entry->sum*entry->sum)/n)/n);
  978. }
  979. STARPU_ASSERT(entry);
  980. }
  981. if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  982. {
  983. struct starpu_perfmodel_regression_model *reg_model;
  984. reg_model = &per_arch_model->regression;
  985. /* update the regression model */
  986. size_t job_size = _starpu_job_get_data_size(model, arch, nimpl, j);
  987. double logy, logx;
  988. logx = log((double)job_size);
  989. logy = log(measured);
  990. reg_model->sumlnx += logx;
  991. reg_model->sumlnx2 += logx*logx;
  992. reg_model->sumlny += logy;
  993. reg_model->sumlnxlny += logx*logy;
  994. if (reg_model->minx == 0 || job_size < reg_model->minx)
  995. reg_model->minx = job_size;
  996. if (reg_model->maxx == 0 || job_size > reg_model->maxx)
  997. reg_model->maxx = job_size;
  998. reg_model->nsample++;
  999. if (VALID_REGRESSION(reg_model))
  1000. {
  1001. unsigned n = reg_model->nsample;
  1002. double num = (n*reg_model->sumlnxlny - reg_model->sumlnx*reg_model->sumlny);
  1003. double denom = (n*reg_model->sumlnx2 - reg_model->sumlnx*reg_model->sumlnx);
  1004. reg_model->beta = num/denom;
  1005. reg_model->alpha = exp((reg_model->sumlny - reg_model->beta*reg_model->sumlnx)/n);
  1006. reg_model->valid = 1;
  1007. }
  1008. }
  1009. #ifdef STARPU_MODEL_DEBUG
  1010. struct starpu_task *task = j->task;
  1011. FILE *f = fopen(per_arch_model->debug_path, "a+");
  1012. if (f == NULL)
  1013. {
  1014. _STARPU_DISP("Error <%s> when opening file <%s>\n", strerror(errno), per_arch_model->debug_path);
  1015. STARPU_ABORT();
  1016. }
  1017. if (!j->footprint_is_computed)
  1018. (void) _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  1019. STARPU_ASSERT(j->footprint_is_computed);
  1020. fprintf(f, "0x%x\t%lu\t%f\t%f\t%f\t%d\t\t", j->footprint, (unsigned long) _starpu_job_get_data_size(model, arch, nimpl, j), measured, task->predicted, task->predicted_transfer, cpuid);
  1021. unsigned i;
  1022. for (i = 0; i < task->cl->nbuffers; i++)
  1023. {
  1024. starpu_data_handle_t handle = task->handles[i];
  1025. STARPU_ASSERT(handle->ops);
  1026. STARPU_ASSERT(handle->ops->display);
  1027. handle->ops->display(handle, f);
  1028. }
  1029. fprintf(f, "\n");
  1030. fclose(f);
  1031. #endif
  1032. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  1033. }
  1034. }
  1035. void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, enum starpu_perf_archtype arch, unsigned cpuid, unsigned nimpl, double measured)
  1036. {
  1037. struct _starpu_job *job = _starpu_get_job_associated_to_task(task);
  1038. _starpu_load_perfmodel(model);
  1039. /* Record measurement */
  1040. _starpu_update_perfmodel_history(job, model, arch, cpuid, measured, nimpl);
  1041. /* and save perfmodel on termination */
  1042. _starpu_set_calibrate_flag(1);
  1043. }