perfmodel_history.c 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2012 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2011 Télécom-SudParis
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <dirent.h>
  19. #include <unistd.h>
  20. #include <sys/stat.h>
  21. #include <errno.h>
  22. #include <common/config.h>
  23. #include <common/utils.h>
  24. #include <core/perfmodel/perfmodel.h>
  25. #include <core/jobs.h>
  26. #include <core/workers.h>
  27. #include <pthread.h>
  28. #include <datawizard/datawizard.h>
  29. #include <core/perfmodel/regression.h>
  30. #include <common/config.h>
  31. #include <starpu_parameters.h>
  32. #include <common/uthash.h>
  33. #ifdef STARPU_HAVE_WINDOWS
  34. #include <windows.h>
  35. #endif
  36. #define HASH_ADD_UINT32_T(head,field,add) HASH_ADD(hh,head,field,sizeof(uint32_t),add)
  37. #define HASH_FIND_UINT32_T(head,find,out) HASH_FIND(hh,head,find,sizeof(uint32_t),out)
  38. struct starpu_perfmodel_history_table
  39. {
  40. UT_hash_handle hh;
  41. uint32_t footprint;
  42. struct starpu_perfmodel_history_entry *history_entry;
  43. };
  44. /* We want more than 10% variance on X to trust regression */
  45. #define VALID_REGRESSION(reg_model) \
  46. ((reg_model)->minx < (9*(reg_model)->maxx)/10 && (reg_model)->nsample >= _STARPU_CALIBRATION_MINIMUM)
  47. static pthread_rwlock_t registered_models_rwlock;
  48. static struct _starpu_perfmodel_list *registered_models = NULL;
  49. /*
  50. * History based model
  51. */
  52. static void insert_history_entry(struct starpu_perfmodel_history_entry *entry, struct starpu_perfmodel_history_list **list, struct starpu_perfmodel_history_table **history_ptr)
  53. {
  54. struct starpu_perfmodel_history_list *link;
  55. struct starpu_perfmodel_history_table *table;
  56. link = (struct starpu_perfmodel_history_list *) malloc(sizeof(struct starpu_perfmodel_history_list));
  57. link->next = *list;
  58. link->entry = entry;
  59. *list = link;
  60. /* detect concurrency issue */
  61. //HASH_FIND_UINT32_T(*history_ptr, &entry->footprint, table);
  62. //STARPU_ASSERT(table == NULL);
  63. table = (struct starpu_perfmodel_history_table*) malloc(sizeof(*table));
  64. STARPU_ASSERT(table != NULL);
  65. table->footprint = entry->footprint;
  66. table->history_entry = entry;
  67. HASH_ADD_UINT32_T(*history_ptr, footprint, table);
  68. }
  69. static void dump_reg_model(FILE *f, struct starpu_perfmodel *model, unsigned arch, unsigned nimpl)
  70. {
  71. struct starpu_perfmodel_per_arch *per_arch_model;
  72. per_arch_model = &model->per_arch[arch][nimpl];
  73. struct starpu_perfmodel_regression_model *reg_model;
  74. reg_model = &per_arch_model->regression;
  75. /*
  76. * Linear Regression model
  77. */
  78. /* Unless we have enough measurements, we put NaN in the file to indicate the model is invalid */
  79. double alpha = nan(""), beta = nan("");
  80. if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  81. {
  82. if (reg_model->nsample > 1)
  83. {
  84. alpha = reg_model->alpha;
  85. beta = reg_model->beta;
  86. }
  87. }
  88. fprintf(f, "# sumlnx\tsumlnx2\t\tsumlny\t\tsumlnxlny\talpha\t\tbeta\t\tn\tminx\t\tmaxx\n");
  89. fprintf(f, "%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%u\t%-15lu\t%-15lu\n", reg_model->sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny, alpha, beta, reg_model->nsample, reg_model->minx, reg_model->maxx);
  90. /*
  91. * Non-Linear Regression model
  92. */
  93. double a = nan(""), b = nan(""), c = nan("");
  94. if (model->type == STARPU_NL_REGRESSION_BASED)
  95. _starpu_regression_non_linear_power(per_arch_model->list, &a, &b, &c);
  96. fprintf(f, "# a\t\tb\t\tc\n");
  97. fprintf(f, "%-15le\t%-15le\t%-15le\n", a, b, c);
  98. }
  99. static void scan_reg_model(FILE *f, struct starpu_perfmodel_regression_model *reg_model)
  100. {
  101. int res;
  102. /*
  103. * Linear Regression model
  104. */
  105. _starpu_drop_comments(f);
  106. res = fscanf(f, "%le\t%le\t%le\t%le\t%le\t%le\t%u\t%lu\t%lu\n",
  107. &reg_model->sumlnx, &reg_model->sumlnx2, &reg_model->sumlny,
  108. &reg_model->sumlnxlny, &reg_model->alpha, &reg_model->beta,
  109. &reg_model->nsample,
  110. &reg_model->minx, &reg_model->maxx);
  111. STARPU_ASSERT_MSG(res == 9, "Incorrect performance model file");
  112. /* If any of the parameters describing the linear regression model is NaN, the model is invalid */
  113. unsigned invalid = (isnan(reg_model->alpha)||isnan(reg_model->beta));
  114. reg_model->valid = !invalid && VALID_REGRESSION(reg_model);
  115. /*
  116. * Non-Linear Regression model
  117. */
  118. _starpu_drop_comments(f);
  119. res = fscanf(f, "%le\t%le\t%le\n", &reg_model->a, &reg_model->b, &reg_model->c);
  120. STARPU_ASSERT_MSG(res == 3, "Incorrect performance model file");
  121. /* If any of the parameters describing the non-linear regression model is NaN, the model is invalid */
  122. unsigned nl_invalid = (isnan(reg_model->a)||isnan(reg_model->b)||isnan(reg_model->c));
  123. reg_model->nl_valid = !nl_invalid && VALID_REGRESSION(reg_model);
  124. }
  125. static void dump_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
  126. {
  127. fprintf(f, "%08x\t%-15lu\t%-15le\t%-15le\t%-15le\t%-15le\t%u\n", entry->footprint, (unsigned long) entry->size, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
  128. }
  129. static void scan_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
  130. {
  131. int res;
  132. _starpu_drop_comments(f);
  133. /* In case entry is NULL, we just drop these values */
  134. unsigned nsample;
  135. uint32_t footprint;
  136. #ifdef STARPU_HAVE_WINDOWS
  137. unsigned size; /* in bytes */
  138. #else
  139. size_t size; /* in bytes */
  140. #endif
  141. double mean;
  142. double deviation;
  143. double sum;
  144. double sum2;
  145. /* Read the values from the file */
  146. res = fscanf(f, "%x\t%"
  147. #ifndef STARPU_HAVE_WINDOWS
  148. "z"
  149. #endif
  150. "u\t%le\t%le\t%le\t%le\t%u\n", &footprint, &size, &mean, &deviation, &sum, &sum2, &nsample);
  151. STARPU_ASSERT_MSG(res == 7, "Incorrect performance model file");
  152. if (entry)
  153. {
  154. entry->footprint = footprint;
  155. entry->size = size;
  156. entry->mean = mean;
  157. entry->deviation = deviation;
  158. entry->sum = sum;
  159. entry->sum2 = sum2;
  160. entry->nsample = nsample;
  161. }
  162. }
  163. static void parse_per_arch_model_file(FILE *f, struct starpu_perfmodel_per_arch *per_arch_model, unsigned scan_history)
  164. {
  165. unsigned nentries;
  166. _starpu_drop_comments(f);
  167. int res = fscanf(f, "%u\n", &nentries);
  168. STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file");
  169. scan_reg_model(f, &per_arch_model->regression);
  170. /* parse cpu entries */
  171. unsigned i;
  172. for (i = 0; i < nentries; i++)
  173. {
  174. struct starpu_perfmodel_history_entry *entry = NULL;
  175. if (scan_history)
  176. {
  177. entry = (struct starpu_perfmodel_history_entry *) malloc(sizeof(struct starpu_perfmodel_history_entry));
  178. STARPU_ASSERT(entry);
  179. }
  180. scan_history_entry(f, entry);
  181. /* insert the entry in the hashtable and the list structures */
  182. if (scan_history)
  183. insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
  184. }
  185. }
  186. static void parse_arch(FILE *f, struct starpu_perfmodel *model, unsigned scan_history, unsigned archmin, unsigned archmax, unsigned skiparch)
  187. {
  188. struct starpu_perfmodel_per_arch dummy;
  189. int nimpls, implmax, skipimpl, impl;
  190. unsigned ret, arch;
  191. for (arch = archmin; arch < archmax; arch++)
  192. {
  193. _STARPU_DEBUG("Parsing arch %u\n", arch);
  194. _starpu_drop_comments(f);
  195. ret = fscanf(f, "%d\n", &nimpls);
  196. _STARPU_DEBUG("%d implementations\n", nimpls);
  197. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  198. implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
  199. skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
  200. for (impl = 0; impl < implmax; impl++)
  201. {
  202. parse_per_arch_model_file(f, &model->per_arch[arch][impl], scan_history);
  203. }
  204. if (skipimpl > 0)
  205. {
  206. for (impl = 0; impl < skipimpl; impl++)
  207. {
  208. parse_per_arch_model_file(f, &dummy, 0);
  209. }
  210. }
  211. }
  212. if (skiparch > 0)
  213. {
  214. _starpu_drop_comments(f);
  215. for (arch = 0; arch < skiparch; arch ++)
  216. {
  217. _STARPU_DEBUG("skipping arch %u\n", arch);
  218. ret = fscanf(f, "%d\n", &nimpls);
  219. _STARPU_DEBUG("%d implementations\n", nimpls);
  220. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  221. implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
  222. skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
  223. for (impl = 0; impl < implmax; impl++)
  224. {
  225. parse_per_arch_model_file(f, &dummy, 0);
  226. }
  227. if (skipimpl > 0)
  228. {
  229. for (impl = 0; impl < skipimpl; impl++)
  230. {
  231. parse_per_arch_model_file(f, &dummy, 0);
  232. }
  233. }
  234. }
  235. }
  236. }
  237. static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned scan_history)
  238. {
  239. unsigned ret;
  240. unsigned archmin = 0;
  241. unsigned narchs;
  242. /* We could probably write a clean loop here, but the code would not
  243. * really be easier to read. */
  244. /* Parsing CPUs */
  245. _starpu_drop_comments(f);
  246. ret = fscanf(f, "%u\n", &narchs);
  247. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  248. _STARPU_DEBUG("Parsing %u CPUs\n", narchs);
  249. if (narchs > 0)
  250. {
  251. parse_arch(f, model, scan_history,
  252. archmin,
  253. STARPU_MIN(narchs, STARPU_MAXCPUS),
  254. narchs > STARPU_MAXCPUS ? narchs - STARPU_MAXCPUS : 0);
  255. }
  256. /* Parsing CUDA devs */
  257. _starpu_drop_comments(f);
  258. ret = fscanf(f, "%u\n", &narchs);
  259. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  260. archmin += STARPU_MAXCPUS;
  261. _STARPU_DEBUG("Parsing %u CUDA devices\n", narchs);
  262. if (narchs > 0)
  263. {
  264. parse_arch(f, model, scan_history,
  265. archmin,
  266. archmin + STARPU_MIN(narchs, STARPU_MAXCUDADEVS),
  267. narchs > STARPU_MAXCUDADEVS ? narchs - STARPU_MAXCUDADEVS : 0);
  268. }
  269. /* Parsing OpenCL devs */
  270. _starpu_drop_comments(f);
  271. ret = fscanf(f, "%u\n", &narchs);
  272. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  273. archmin += STARPU_MAXCUDADEVS;
  274. _STARPU_DEBUG("Parsing %u OpenCL devices\n", narchs);
  275. if (narchs > 0)
  276. {
  277. parse_arch(f, model, scan_history,
  278. archmin,
  279. archmin + STARPU_MIN(narchs, STARPU_MAXOPENCLDEVS),
  280. narchs > STARPU_MAXOPENCLDEVS ? narchs - STARPU_MAXOPENCLDEVS : 0);
  281. }
  282. /* Parsing Gordon implementations */
  283. _starpu_drop_comments(f);
  284. ret = fscanf(f, "%u\n", &narchs);
  285. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  286. archmin += STARPU_MAXOPENCLDEVS;
  287. _STARPU_DEBUG("Parsing %u Gordon devices\n", narchs);
  288. if (narchs > 0)
  289. {
  290. parse_arch(f, model, scan_history,
  291. archmin,
  292. archmin + STARPU_MAXGORDONDEVS,
  293. narchs > STARPU_MAXGORDONDEVS ? narchs - STARPU_MAXGORDONDEVS : 0);
  294. }
  295. }
  296. static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, unsigned arch, unsigned nimpl)
  297. {
  298. struct starpu_perfmodel_per_arch *per_arch_model;
  299. per_arch_model = &model->per_arch[arch][nimpl];
  300. /* count the number of elements in the lists */
  301. struct starpu_perfmodel_history_list *ptr = NULL;
  302. unsigned nentries = 0;
  303. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  304. {
  305. /* Dump the list of all entries in the history */
  306. ptr = per_arch_model->list;
  307. while(ptr)
  308. {
  309. nentries++;
  310. ptr = ptr->next;
  311. }
  312. }
  313. /* header */
  314. char archname[32];
  315. starpu_perfmodel_get_arch_name((enum starpu_perf_archtype) arch, archname, 32, nimpl);
  316. fprintf(f, "# Model for %s\n", archname);
  317. fprintf(f, "# number of entries\n%u\n", nentries);
  318. dump_reg_model(f, model, arch, nimpl);
  319. /* Dump the history into the model file in case it is necessary */
  320. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  321. {
  322. fprintf(f, "# hash\t\tsize\t\tmean\t\tdev\t\tsum\t\tsum2\t\tn\n");
  323. ptr = per_arch_model->list;
  324. while (ptr)
  325. {
  326. dump_history_entry(f, ptr->entry);
  327. ptr = ptr->next;
  328. }
  329. }
  330. fprintf(f, "\n##################\n");
  331. }
  332. static unsigned get_n_entries(struct starpu_perfmodel *model, unsigned arch, unsigned impl)
  333. {
  334. struct starpu_perfmodel_per_arch *per_arch_model;
  335. per_arch_model = &model->per_arch[arch][impl];
  336. /* count the number of elements in the lists */
  337. struct starpu_perfmodel_history_list *ptr = NULL;
  338. unsigned nentries = 0;
  339. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  340. {
  341. /* Dump the list of all entries in the history */
  342. ptr = per_arch_model->list;
  343. while(ptr)
  344. {
  345. nentries++;
  346. ptr = ptr->next;
  347. }
  348. }
  349. return nentries;
  350. }
  351. static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
  352. {
  353. unsigned narch[4] = { 0, 0, 0, 0};
  354. unsigned arch, arch_base = 0, my_narch = 0;
  355. unsigned nimpl;
  356. unsigned idx = 0;
  357. /* Finding the number of archs to write for each kind of device */
  358. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  359. {
  360. switch (arch)
  361. {
  362. case STARPU_CUDA_DEFAULT:
  363. case STARPU_OPENCL_DEFAULT:
  364. case STARPU_GORDON_DEFAULT:
  365. arch_base = arch;
  366. idx++;
  367. break;
  368. default:
  369. break;
  370. }
  371. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  372. {
  373. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  374. if (get_n_entries(model, arch, nimpl))
  375. {
  376. narch[idx]=arch-arch_base+1;
  377. break;
  378. }
  379. }
  380. else if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_PER_ARCH || model->type == STARPU_COMMON)
  381. {
  382. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  383. if (model->per_arch[arch][nimpl].regression.nsample)
  384. {
  385. narch[idx]=arch-arch_base+1;
  386. break;
  387. }
  388. }
  389. else
  390. {
  391. STARPU_ASSERT_MSG(0, "Unknown history-based performance model");
  392. }
  393. }
  394. /* Writing stuff */
  395. char *name = "unknown";
  396. unsigned substract_to_arch = 0;
  397. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  398. {
  399. switch (arch)
  400. {
  401. case STARPU_CPU_DEFAULT:
  402. arch_base = arch;
  403. name = "CPU";
  404. fprintf(f, "##################\n");
  405. fprintf(f, "# %ss\n", name);
  406. fprintf(f, "# maximum number of %ss\n", name);
  407. fprintf(f, "%u\n", my_narch = narch[0]);
  408. break;
  409. case STARPU_CUDA_DEFAULT:
  410. arch_base = arch;
  411. name = "CUDA";
  412. substract_to_arch = STARPU_MAXCPUS;
  413. fprintf(f, "##################\n");
  414. fprintf(f, "# %ss\n", name);
  415. fprintf(f, "# number of %s architectures\n", name);
  416. fprintf(f, "%u\n", my_narch = narch[1]);
  417. break;
  418. case STARPU_OPENCL_DEFAULT:
  419. arch_base = arch;
  420. name = "OPENCL";
  421. substract_to_arch += STARPU_MAXCUDADEVS;
  422. fprintf(f, "##################\n");
  423. fprintf(f, "# %ss\n", name);
  424. fprintf(f, "# number of %s architectures\n", name);
  425. fprintf(f, "%u\n", my_narch = narch[2]);
  426. break;
  427. case STARPU_GORDON_DEFAULT:
  428. arch_base = arch;
  429. name = "GORDON";
  430. substract_to_arch += STARPU_MAXOPENCLDEVS;
  431. fprintf(f, "##################\n");
  432. fprintf(f, "# %ss\n", name);
  433. fprintf(f, "# number of %s architectures\n", name);
  434. fprintf(f, "%u\n", my_narch = narch[3]);
  435. break;
  436. default:
  437. break;
  438. }
  439. unsigned max_impl = 0;
  440. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  441. {
  442. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  443. if (get_n_entries(model, arch, nimpl))
  444. max_impl = nimpl + 1;
  445. }
  446. else if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_PER_ARCH || model->type == STARPU_COMMON)
  447. {
  448. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  449. if (model->per_arch[arch][nimpl].regression.nsample)
  450. max_impl = nimpl + 1;
  451. }
  452. else
  453. STARPU_ASSERT_MSG(0, "Unknown history-based performance model");
  454. if (arch >= my_narch + arch_base)
  455. continue;
  456. fprintf(f, "###########\n");
  457. if (substract_to_arch)
  458. fprintf(f, "# %s_%u\n", name, arch - substract_to_arch);
  459. else
  460. /* CPU */
  461. fprintf(f, "# %u CPU(s) in parallel\n", arch + 1);
  462. fprintf(f, "# number of implementations\n");
  463. fprintf(f, "%u\n", max_impl);
  464. for (nimpl = 0; nimpl < max_impl; nimpl++)
  465. {
  466. dump_per_arch_model_file(f, model, arch, nimpl);
  467. }
  468. }
  469. }
  470. static void initialize_per_arch_model(struct starpu_perfmodel_per_arch *per_arch_model)
  471. {
  472. per_arch_model->history = NULL;
  473. per_arch_model->list = NULL;
  474. per_arch_model->regression.nsample = 0;
  475. per_arch_model->regression.valid = 0;
  476. per_arch_model->regression.nl_valid = 0;
  477. }
  478. static void initialize_model(struct starpu_perfmodel *model)
  479. {
  480. unsigned arch;
  481. unsigned nimpl;
  482. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  483. {
  484. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  485. {
  486. initialize_per_arch_model(&model->per_arch[arch][nimpl]);
  487. }
  488. }
  489. }
  490. static void get_model_debug_path(struct starpu_perfmodel *model, const char *arch, char *path, size_t maxlen)
  491. {
  492. STARPU_ASSERT(path);
  493. _starpu_get_perf_model_dir_debug(path, maxlen);
  494. strncat(path, model->symbol, maxlen);
  495. char hostname[32];
  496. char *forced_hostname = getenv("STARPU_HOSTNAME");
  497. if (forced_hostname && forced_hostname[0])
  498. snprintf(hostname, sizeof(hostname), "%s", forced_hostname);
  499. else
  500. gethostname(hostname, sizeof(hostname));
  501. strncat(path, ".", maxlen);
  502. strncat(path, hostname, maxlen);
  503. strncat(path, ".", maxlen);
  504. strncat(path, arch, maxlen);
  505. strncat(path, ".debug", maxlen);
  506. }
  507. /*
  508. * Returns 0 is the model was already loaded, 1 otherwise.
  509. */
  510. int _starpu_register_model(struct starpu_perfmodel *model)
  511. {
  512. /* If the model has already been loaded, there is nothing to do */
  513. _STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  514. if (model->is_loaded)
  515. {
  516. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  517. return 0;
  518. }
  519. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  520. /* We have to make sure the model has not been loaded since the
  521. * last time we took the lock */
  522. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  523. if (model->is_loaded)
  524. {
  525. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  526. return 0;
  527. }
  528. /* add the model to a linked list */
  529. struct _starpu_perfmodel_list *node = (struct _starpu_perfmodel_list *) malloc(sizeof(struct _starpu_perfmodel_list));
  530. node->model = model;
  531. //model->debug_modelid = debug_modelid++;
  532. /* put this model at the beginning of the list */
  533. node->next = registered_models;
  534. registered_models = node;
  535. #ifdef STARPU_MODEL_DEBUG
  536. _starpu_create_sampling_directory_if_needed();
  537. unsigned arch;
  538. unsigned nimpl;
  539. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  540. {
  541. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  542. {
  543. starpu_perfmodel_debugfilepath(model, arch, model->per_arch[arch][nimpl].debug_path, 256, nimpl);
  544. }
  545. }
  546. #endif
  547. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  548. return 1;
  549. }
  550. static void get_model_path(struct starpu_perfmodel *model, char *path, size_t maxlen)
  551. {
  552. _starpu_get_perf_model_dir_codelets(path, maxlen);
  553. strncat(path, model->symbol, maxlen);
  554. char hostname[32];
  555. char *forced_hostname = getenv("STARPU_HOSTNAME");
  556. if (forced_hostname && forced_hostname[0])
  557. snprintf(hostname, sizeof(hostname), "%s", forced_hostname);
  558. else
  559. gethostname(hostname, sizeof(hostname));
  560. strncat(path, ".", maxlen);
  561. strncat(path, hostname, maxlen);
  562. }
  563. static void save_history_based_model(struct starpu_perfmodel *model)
  564. {
  565. STARPU_ASSERT(model);
  566. STARPU_ASSERT(model->symbol);
  567. /* TODO checks */
  568. /* filename = $STARPU_PERF_MODEL_DIR/codelets/symbol.hostname */
  569. char path[256];
  570. get_model_path(model, path, 256);
  571. _STARPU_DEBUG("Opening performance model file %s for model %s\n", path, model->symbol);
  572. /* overwrite existing file, or create it */
  573. FILE *f;
  574. f = fopen(path, "w+");
  575. STARPU_ASSERT(f);
  576. dump_model_file(f, model);
  577. fclose(f);
  578. }
  579. static void _starpu_dump_registered_models(void)
  580. {
  581. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  582. struct _starpu_perfmodel_list *node;
  583. node = registered_models;
  584. _STARPU_DEBUG("DUMP MODELS !\n");
  585. while (node)
  586. {
  587. save_history_based_model(node->model);
  588. node = node->next;
  589. }
  590. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  591. }
  592. void _starpu_initialize_registered_performance_models(void)
  593. {
  594. registered_models = NULL;
  595. _STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
  596. }
  597. void _starpu_deinitialize_registered_performance_models(void)
  598. {
  599. if (_starpu_get_calibrate_flag())
  600. _starpu_dump_registered_models();
  601. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  602. struct _starpu_perfmodel_list *node, *pnode;
  603. node = registered_models;
  604. _STARPU_DEBUG("FREE MODELS !\n");
  605. while (node)
  606. {
  607. struct starpu_perfmodel *model = node->model;
  608. unsigned arch;
  609. unsigned nimpl;
  610. _STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  611. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  612. {
  613. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  614. {
  615. struct starpu_perfmodel_per_arch *archmodel = &model->per_arch[arch][nimpl];
  616. struct starpu_perfmodel_history_list *list, *plist;
  617. struct starpu_perfmodel_history_table *entry, *tmp;
  618. HASH_ITER(hh, archmodel->history, entry, tmp)
  619. {
  620. HASH_DEL(archmodel->history, entry);
  621. free(entry);
  622. }
  623. archmodel->history = NULL;
  624. list = archmodel->list;
  625. while (list) {
  626. free(list->entry);
  627. plist = list;
  628. list = list->next;
  629. free(plist);
  630. }
  631. archmodel->list = NULL;
  632. }
  633. }
  634. model->is_loaded = 0;
  635. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  636. pnode = node;
  637. node = node->next;
  638. free(pnode);
  639. }
  640. registered_models = NULL;
  641. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  642. _STARPU_PTHREAD_RWLOCK_DESTROY(&registered_models_rwlock);
  643. }
  644. /*
  645. * XXX: We should probably factorize the beginning of the _starpu_load_*_model
  646. * functions. This is a bit tricky though, because we must be sure to unlock
  647. * registered_models_rwlock at the right place.
  648. */
  649. void _starpu_load_per_arch_based_model(struct starpu_perfmodel *model)
  650. {
  651. STARPU_ASSERT(model && model->symbol);
  652. int already_loaded;
  653. _STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  654. already_loaded = model->is_loaded;
  655. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  656. if (already_loaded)
  657. return;
  658. /* The model is still not loaded so we grab the lock in write mode, and
  659. * if it's not loaded once we have the lock, we do load it. */
  660. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  661. /* Was the model initialized since the previous test ? */
  662. if (model->is_loaded)
  663. {
  664. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  665. return;
  666. }
  667. _STARPU_PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
  668. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  669. }
  670. void _starpu_load_common_based_model(struct starpu_perfmodel *model)
  671. {
  672. STARPU_ASSERT(model && model->symbol);
  673. int already_loaded;
  674. _STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  675. already_loaded = model->is_loaded;
  676. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  677. if (already_loaded)
  678. return;
  679. /* The model is still not loaded so we grab the lock in write mode, and
  680. * if it's not loaded once we have the lock, we do load it. */
  681. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  682. /* Was the model initialized since the previous test ? */
  683. if (model->is_loaded)
  684. {
  685. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  686. return;
  687. }
  688. _STARPU_PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
  689. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  690. }
  691. /* We first try to grab the global lock in read mode to check whether the model
  692. * was loaded or not (this is very likely to have been already loaded). If the
  693. * model was not loaded yet, we take the lock in write mode, and if the model
  694. * is still not loaded once we have the lock, we do load it. */
  695. void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history)
  696. {
  697. STARPU_ASSERT(model);
  698. STARPU_ASSERT(model->symbol);
  699. int already_loaded;
  700. _STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  701. already_loaded = model->is_loaded;
  702. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  703. if (already_loaded)
  704. return;
  705. /* The model is still not loaded so we grab the lock in write mode, and
  706. * if it's not loaded once we have the lock, we do load it. */
  707. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  708. /* Was the model initialized since the previous test ? */
  709. if (model->is_loaded)
  710. {
  711. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  712. return;
  713. }
  714. _STARPU_PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
  715. _STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  716. /* make sure the performance model directory exists (or create it) */
  717. _starpu_create_sampling_directory_if_needed();
  718. char path[256];
  719. get_model_path(model, path, 256);
  720. _STARPU_DEBUG("Opening performance model file %s for model %s ...\n", path, model->symbol);
  721. unsigned calibrate_flag = _starpu_get_calibrate_flag();
  722. model->benchmarking = calibrate_flag;
  723. /* try to open an existing file and load it */
  724. int res;
  725. res = access(path, F_OK);
  726. if (res == 0)
  727. {
  728. if (calibrate_flag == 2)
  729. {
  730. /* The user specified that the performance model should
  731. * be overwritten, so we don't load the existing file !
  732. * */
  733. _STARPU_DEBUG("Overwrite existing file\n");
  734. initialize_model(model);
  735. }
  736. else
  737. {
  738. /* We load the available file */
  739. _STARPU_DEBUG("File exists\n");
  740. FILE *f;
  741. f = fopen(path, "r");
  742. STARPU_ASSERT(f);
  743. parse_model_file(f, model, scan_history);
  744. fclose(f);
  745. }
  746. }
  747. else
  748. {
  749. _STARPU_DEBUG("File does not exists\n");
  750. if (!calibrate_flag)
  751. {
  752. _STARPU_DISP("Warning: model %s is not calibrated, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
  753. _starpu_set_calibrate_flag(1);
  754. model->benchmarking = 1;
  755. }
  756. initialize_model(model);
  757. }
  758. _STARPU_DEBUG("Performance model file %s for model %s is loaded\n", path, model->symbol);
  759. model->is_loaded = 1;
  760. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  761. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  762. }
  763. /* This function is intended to be used by external tools that should read
  764. * the performance model files */
  765. int starpu_perfmodel_list(FILE *output)
  766. {
  767. char path[256];
  768. DIR *dp;
  769. struct dirent *ep;
  770. char perf_model_dir_codelets[256];
  771. _starpu_get_perf_model_dir_codelets(perf_model_dir_codelets, 256);
  772. strncpy(path, perf_model_dir_codelets, 256);
  773. dp = opendir(path);
  774. if (dp != NULL)
  775. {
  776. while ((ep = readdir(dp)))
  777. {
  778. if (strcmp(ep->d_name, ".") && strcmp(ep->d_name, ".."))
  779. fprintf(output, "file: <%s>\n", ep->d_name);
  780. }
  781. closedir (dp);
  782. }
  783. else
  784. {
  785. _STARPU_DISP("Could not open the perfmodel directory <%s>\n", path);
  786. }
  787. return 0;
  788. }
  789. /* This function is intended to be used by external tools that should read the
  790. * performance model files */
  791. int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
  792. {
  793. model->symbol = strdup(symbol);
  794. initialize_model(model);
  795. /* where is the file if it exists ? */
  796. char path[256];
  797. get_model_path(model, path, 256);
  798. // _STARPU_DEBUG("get_model_path -> %s\n", path);
  799. /* does it exist ? */
  800. int res;
  801. res = access(path, F_OK);
  802. if (res)
  803. {
  804. const char *dot = strrchr(symbol, '.');
  805. if (dot)
  806. {
  807. char *symbol2 = strdup(symbol);
  808. symbol2[dot-symbol] = '\0';
  809. int ret;
  810. fprintf(stderr,"note: loading history from %s instead of %s\n", symbol2, symbol);
  811. ret = starpu_perfmodel_load_symbol(symbol2,model);
  812. free(symbol2);
  813. return ret;
  814. }
  815. _STARPU_DISP("There is no performance model for symbol %s\n", symbol);
  816. return 1;
  817. }
  818. FILE *f = fopen(path, "r");
  819. STARPU_ASSERT(f);
  820. parse_model_file(f, model, 1);
  821. STARPU_ASSERT(fclose(f) == 0);
  822. return 0;
  823. }
  824. void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archname, size_t maxlen,unsigned nimpl)
  825. {
  826. if (arch < STARPU_CUDA_DEFAULT)
  827. {
  828. if (arch == STARPU_CPU_DEFAULT)
  829. {
  830. /* NB: We could just use cpu_1 as well ... */
  831. snprintf(archname, maxlen, "cpu_impl_%u",nimpl);
  832. }
  833. else
  834. {
  835. /* For combined CPU workers */
  836. int cpu_count = arch - STARPU_CPU_DEFAULT + 1;
  837. snprintf(archname, maxlen, "cpu_%d_impl_%u", cpu_count,nimpl);
  838. }
  839. }
  840. else if ((STARPU_CUDA_DEFAULT <= arch)
  841. && (arch < STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS))
  842. {
  843. int devid = arch - STARPU_CUDA_DEFAULT;
  844. snprintf(archname, maxlen, "cuda_%d_impl_%u", devid,nimpl);
  845. }
  846. else if ((STARPU_OPENCL_DEFAULT <= arch)
  847. && (arch < STARPU_OPENCL_DEFAULT + STARPU_MAXOPENCLDEVS))
  848. {
  849. int devid = arch - STARPU_OPENCL_DEFAULT;
  850. snprintf(archname, maxlen, "opencl_%d_impl_%u", devid,nimpl);
  851. }
  852. else if (arch == STARPU_GORDON_DEFAULT)
  853. {
  854. snprintf(archname, maxlen, "gordon_impl_%u",nimpl);
  855. }
  856. else
  857. {
  858. STARPU_ABORT();
  859. }
  860. }
  861. void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model,
  862. enum starpu_perf_archtype arch, char *path, size_t maxlen, unsigned nimpl)
  863. {
  864. char archname[32];
  865. starpu_perfmodel_get_arch_name(arch, archname, 32, nimpl);
  866. STARPU_ASSERT(path);
  867. get_model_debug_path(model, archname, path, maxlen);
  868. }
  869. double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j, unsigned nimpl)
  870. {
  871. double exp = NAN;
  872. size_t size = _starpu_job_get_data_size(model, arch, nimpl, j);
  873. struct starpu_perfmodel_regression_model *regmodel;
  874. regmodel = &model->per_arch[arch][nimpl].regression;
  875. if (regmodel->valid)
  876. exp = regmodel->alpha*pow((double)size, regmodel->beta);
  877. return exp;
  878. }
  879. double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j,unsigned nimpl)
  880. {
  881. double exp = NAN;
  882. size_t size = _starpu_job_get_data_size(model, arch, nimpl, j);
  883. struct starpu_perfmodel_regression_model *regmodel;
  884. regmodel = &model->per_arch[arch][nimpl].regression;
  885. if (regmodel->nl_valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1)
  886. exp = regmodel->a*pow((double)size, regmodel->b) + regmodel->c;
  887. else
  888. {
  889. uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  890. struct starpu_perfmodel_per_arch *per_arch_model = &model->per_arch[arch][nimpl];
  891. struct starpu_perfmodel_history_table *history;
  892. struct starpu_perfmodel_history_table *entry;
  893. _STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
  894. history = per_arch_model->history;
  895. HASH_FIND_UINT32_T(history, &key, entry);
  896. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  897. if (entry && entry->history_entry && entry->history_entry->nsample >= _STARPU_CALIBRATION_MINIMUM)
  898. exp = entry->history_entry->mean;
  899. else if (!model->benchmarking)
  900. {
  901. char archname[32];
  902. starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl);
  903. _STARPU_DISP("Warning: model %s is not calibrated enough for %s, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol, archname);
  904. _starpu_set_calibrate_flag(1);
  905. model->benchmarking = 1;
  906. }
  907. }
  908. return exp;
  909. }
  910. double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j,unsigned nimpl)
  911. {
  912. double exp;
  913. struct starpu_perfmodel_per_arch *per_arch_model;
  914. struct starpu_perfmodel_history_entry *entry;
  915. struct starpu_perfmodel_history_table *history, *elt;
  916. uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  917. per_arch_model = &model->per_arch[arch][nimpl];
  918. _STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
  919. history = per_arch_model->history;
  920. HASH_FIND_UINT32_T(history, &key, elt);
  921. entry = (elt == NULL) ? NULL : elt->history_entry;
  922. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  923. exp = entry?entry->mean:NAN;
  924. if (entry && entry->nsample < _STARPU_CALIBRATION_MINIMUM)
  925. /* TODO: report differently if we've scheduled really enough
  926. * of that task and the scheduler should perhaps put it aside */
  927. /* Not calibrated enough */
  928. exp = NAN;
  929. if (isnan(exp) && !model->benchmarking)
  930. {
  931. char archname[32];
  932. starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl);
  933. _STARPU_DISP("Warning: model %s is not calibrated enough for %s, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol, archname);
  934. _starpu_set_calibrate_flag(1);
  935. model->benchmarking = 1;
  936. }
  937. return exp;
  938. }
  939. void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned nimpl)
  940. {
  941. if (model)
  942. {
  943. _STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  944. struct starpu_perfmodel_per_arch *per_arch_model = &model->per_arch[arch][nimpl];
  945. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  946. {
  947. struct starpu_perfmodel_history_entry *entry;
  948. struct starpu_perfmodel_history_table *elt;
  949. struct starpu_perfmodel_history_list **list;
  950. uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  951. list = &per_arch_model->list;
  952. HASH_FIND_UINT32_T(per_arch_model->history, &key, elt);
  953. entry = (elt == NULL) ? NULL : elt->history_entry;
  954. if (!entry)
  955. {
  956. /* this is the first entry with such a footprint */
  957. entry = (struct starpu_perfmodel_history_entry *) malloc(sizeof(struct starpu_perfmodel_history_entry));
  958. STARPU_ASSERT(entry);
  959. entry->mean = measured;
  960. entry->sum = measured;
  961. entry->deviation = 0.0;
  962. entry->sum2 = measured*measured;
  963. entry->size = _starpu_job_get_data_size(model, arch, nimpl, j);
  964. entry->footprint = key;
  965. entry->nsample = 1;
  966. insert_history_entry(entry, list, &per_arch_model->history);
  967. }
  968. else
  969. {
  970. /* there is already some entry with the same footprint */
  971. entry->sum += measured;
  972. entry->sum2 += measured*measured;
  973. entry->nsample++;
  974. unsigned n = entry->nsample;
  975. entry->mean = entry->sum / n;
  976. entry->deviation = sqrt((entry->sum2 - (entry->sum*entry->sum)/n)/n);
  977. }
  978. STARPU_ASSERT(entry);
  979. }
  980. if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  981. {
  982. struct starpu_perfmodel_regression_model *reg_model;
  983. reg_model = &per_arch_model->regression;
  984. /* update the regression model */
  985. size_t job_size = _starpu_job_get_data_size(model, arch, nimpl, j);
  986. double logy, logx;
  987. logx = log((double)job_size);
  988. logy = log(measured);
  989. reg_model->sumlnx += logx;
  990. reg_model->sumlnx2 += logx*logx;
  991. reg_model->sumlny += logy;
  992. reg_model->sumlnxlny += logx*logy;
  993. if (reg_model->minx == 0 || job_size < reg_model->minx)
  994. reg_model->minx = job_size;
  995. if (reg_model->maxx == 0 || job_size > reg_model->maxx)
  996. reg_model->maxx = job_size;
  997. reg_model->nsample++;
  998. unsigned n = reg_model->nsample;
  999. double num = (n*reg_model->sumlnxlny - reg_model->sumlnx*reg_model->sumlny);
  1000. double denom = (n*reg_model->sumlnx2 - reg_model->sumlnx*reg_model->sumlnx);
  1001. reg_model->beta = num/denom;
  1002. reg_model->alpha = exp((reg_model->sumlny - reg_model->beta*reg_model->sumlnx)/n);
  1003. if (VALID_REGRESSION(reg_model))
  1004. reg_model->valid = 1;
  1005. }
  1006. #ifdef STARPU_MODEL_DEBUG
  1007. struct starpu_task *task = j->task;
  1008. FILE *f = fopen(per_arch_model->debug_path, "a+");
  1009. if (f == NULL)
  1010. {
  1011. _STARPU_DISP("Error <%s> when opening file <%s>\n", strerror(errno), per_arch_model->debug_path);
  1012. STARPU_ABORT();
  1013. }
  1014. if (!j->footprint_is_computed)
  1015. (void) _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  1016. STARPU_ASSERT(j->footprint_is_computed);
  1017. fprintf(f, "0x%x\t%lu\t%f\t%f\t%f\t%d\t\t", j->footprint, (unsigned long) _starpu_job_get_data_size(model, arch, nimpl, j), measured, task->predicted, task->predicted_transfer, cpuid);
  1018. unsigned i;
  1019. for (i = 0; i < task->cl->nbuffers; i++)
  1020. {
  1021. starpu_data_handle_t handle = task->handles[i];
  1022. STARPU_ASSERT(handle->ops);
  1023. STARPU_ASSERT(handle->ops->display);
  1024. handle->ops->display(handle, f);
  1025. }
  1026. fprintf(f, "\n");
  1027. fclose(f);
  1028. #endif
  1029. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  1030. }
  1031. }
  1032. void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, enum starpu_perf_archtype arch, unsigned cpuid, unsigned nimpl, double measured) {
  1033. struct _starpu_job *job = _starpu_get_job_associated_to_task(task);
  1034. _starpu_load_perfmodel(model);
  1035. /* Record measurement */
  1036. _starpu_update_perfmodel_history(job, model, arch, cpuid, measured, nimpl);
  1037. /* and save perfmodel on termination */
  1038. _starpu_set_calibrate_flag(1);
  1039. }