perfmodel_history.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2012 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2011 Télécom-SudParis
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <dirent.h>
  19. #include <unistd.h>
  20. #include <sys/stat.h>
  21. #include <errno.h>
  22. #include <common/config.h>
  23. #include <common/utils.h>
  24. #include <core/perfmodel/perfmodel.h>
  25. #include <core/jobs.h>
  26. #include <core/workers.h>
  27. #include <pthread.h>
  28. #include <datawizard/datawizard.h>
  29. #include <core/perfmodel/regression.h>
  30. #include <common/config.h>
  31. #include <starpu_parameters.h>
  32. #include <common/uthash.h>
  33. #ifdef STARPU_HAVE_WINDOWS
  34. #include <windows.h>
  35. #endif
  36. #define HASH_ADD_UINT32_T(head,field,add) HASH_ADD(hh,head,field,sizeof(uint32_t),add)
  37. #define HASH_FIND_UINT32_T(head,find,out) HASH_FIND(hh,head,find,sizeof(uint32_t),out)
  38. struct starpu_history_table
  39. {
  40. UT_hash_handle hh;
  41. uint32_t footprint;
  42. struct starpu_history_entry *history_entry;
  43. };
  44. /* We want more than 10% variance on X to trust regression */
  45. #define VALID_REGRESSION(reg_model) \
  46. ((reg_model)->minx < (9*(reg_model)->maxx)/10 && (reg_model)->nsample >= _STARPU_CALIBRATION_MINIMUM)
  47. static pthread_rwlock_t registered_models_rwlock;
  48. static struct starpu_model_list *registered_models = NULL;
  49. /*
  50. * History based model
  51. */
  52. static void insert_history_entry(struct starpu_history_entry *entry, struct starpu_history_list **list, struct starpu_history_table **history_ptr)
  53. {
  54. struct starpu_history_list *link;
  55. struct starpu_history_table *table;
  56. link = (struct starpu_history_list *) malloc(sizeof(struct starpu_history_list));
  57. link->next = *list;
  58. link->entry = entry;
  59. *list = link;
  60. /* detect concurrency issue */
  61. //HASH_FIND_UINT32_T(*history_ptr, &entry->footprint, table);
  62. //STARPU_ASSERT(table == NULL);
  63. table = (struct starpu_history_table*) malloc(sizeof(*table));
  64. STARPU_ASSERT(table != NULL);
  65. table->footprint = entry->footprint;
  66. table->history_entry = entry;
  67. HASH_ADD_UINT32_T(*history_ptr, footprint, table);
  68. }
  69. static void dump_reg_model(FILE *f, struct starpu_perfmodel *model, unsigned arch, unsigned nimpl)
  70. {
  71. struct starpu_per_arch_perfmodel *per_arch_model;
  72. per_arch_model = &model->per_arch[arch][nimpl];
  73. struct starpu_regression_model *reg_model;
  74. reg_model = &per_arch_model->regression;
  75. /*
  76. * Linear Regression model
  77. */
  78. /* Unless we have enough measurements, we put NaN in the file to indicate the model is invalid */
  79. double alpha = nan(""), beta = nan("");
  80. if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  81. {
  82. if (reg_model->nsample > 1)
  83. {
  84. alpha = reg_model->alpha;
  85. beta = reg_model->beta;
  86. }
  87. }
  88. fprintf(f, "# sumlnx\tsumlnx2\t\tsumlny\t\tsumlnxlny\talpha\t\tbeta\t\tn\tminx\t\tmaxx\n");
  89. fprintf(f, "%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%u\t%-15lu\t%-15lu\n", reg_model->sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny, alpha, beta, reg_model->nsample, reg_model->minx, reg_model->maxx);
  90. /*
  91. * Non-Linear Regression model
  92. */
  93. double a = nan(""), b = nan(""), c = nan("");
  94. if (model->type == STARPU_NL_REGRESSION_BASED)
  95. _starpu_regression_non_linear_power(per_arch_model->list, &a, &b, &c);
  96. fprintf(f, "# a\t\tb\t\tc\n");
  97. fprintf(f, "%-15le\t%-15le\t%-15le\n", a, b, c);
  98. }
  99. static void scan_reg_model(FILE *f, struct starpu_regression_model *reg_model)
  100. {
  101. int res;
  102. /*
  103. * Linear Regression model
  104. */
  105. _starpu_drop_comments(f);
  106. res = fscanf(f, "%le\t%le\t%le\t%le\t%le\t%le\t%u\t%lu\t%lu\n",
  107. &reg_model->sumlnx, &reg_model->sumlnx2, &reg_model->sumlny,
  108. &reg_model->sumlnxlny, &reg_model->alpha, &reg_model->beta,
  109. &reg_model->nsample,
  110. &reg_model->minx, &reg_model->maxx);
  111. STARPU_ASSERT(res == 9);
  112. /* If any of the parameters describing the linear regression model is NaN, the model is invalid */
  113. unsigned invalid = (isnan(reg_model->alpha)||isnan(reg_model->beta));
  114. reg_model->valid = !invalid && VALID_REGRESSION(reg_model);
  115. /*
  116. * Non-Linear Regression model
  117. */
  118. _starpu_drop_comments(f);
  119. res = fscanf(f, "%le\t%le\t%le\n", &reg_model->a, &reg_model->b, &reg_model->c);
  120. STARPU_ASSERT(res == 3);
  121. /* If any of the parameters describing the non-linear regression model is NaN, the model is invalid */
  122. unsigned nl_invalid = (isnan(reg_model->a)||isnan(reg_model->b)||isnan(reg_model->c));
  123. reg_model->nl_valid = !nl_invalid && VALID_REGRESSION(reg_model);
  124. }
  125. static void dump_history_entry(FILE *f, struct starpu_history_entry *entry)
  126. {
  127. fprintf(f, "%08x\t%-15lu\t%-15le\t%-15le\t%-15le\t%-15le\t%u\n", entry->footprint, (unsigned long) entry->size, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
  128. }
  129. static void scan_history_entry(FILE *f, struct starpu_history_entry *entry)
  130. {
  131. int res;
  132. _starpu_drop_comments(f);
  133. /* In case entry is NULL, we just drop these values */
  134. unsigned nsample;
  135. uint32_t footprint;
  136. #ifdef STARPU_HAVE_WINDOWS
  137. unsigned size; /* in bytes */
  138. #else
  139. size_t size; /* in bytes */
  140. #endif
  141. double mean;
  142. double deviation;
  143. double sum;
  144. double sum2;
  145. /* Read the values from the file */
  146. res = fscanf(f, "%x\t%"
  147. #ifndef STARPU_HAVE_WINDOWS
  148. "z"
  149. #endif
  150. "u\t%le\t%le\t%le\t%le\t%u\n", &footprint, &size, &mean, &deviation, &sum, &sum2, &nsample);
  151. STARPU_ASSERT(res == 7);
  152. if (entry)
  153. {
  154. entry->footprint = footprint;
  155. entry->size = size;
  156. entry->mean = mean;
  157. entry->deviation = deviation;
  158. entry->sum = sum;
  159. entry->sum2 = sum2;
  160. entry->nsample = nsample;
  161. }
  162. }
  163. static void parse_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel *per_arch_model, unsigned scan_history)
  164. {
  165. unsigned nentries;
  166. _starpu_drop_comments(f);
  167. int res = fscanf(f, "%u\n", &nentries);
  168. STARPU_ASSERT(res == 1);
  169. scan_reg_model(f, &per_arch_model->regression);
  170. /* parse cpu entries */
  171. unsigned i;
  172. for (i = 0; i < nentries; i++)
  173. {
  174. struct starpu_history_entry *entry = NULL;
  175. if (scan_history)
  176. {
  177. entry = (struct starpu_history_entry *) malloc(sizeof(struct starpu_history_entry));
  178. STARPU_ASSERT(entry);
  179. }
  180. scan_history_entry(f, entry);
  181. /* insert the entry in the hashtable and the list structures */
  182. if (scan_history)
  183. insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
  184. }
  185. }
  186. static void parse_arch(FILE *f, struct starpu_perfmodel *model, unsigned scan_history, unsigned archmin, unsigned archmax, unsigned skiparch)
  187. {
  188. struct starpu_per_arch_perfmodel dummy;
  189. int nimpls, implmax, skipimpl, impl;
  190. unsigned ret, arch;
  191. for (arch = archmin; arch < archmax; arch++)
  192. {
  193. _STARPU_DEBUG("Parsing arch %u\n", arch);
  194. _starpu_drop_comments(f);
  195. ret = fscanf(f, "%d\n", &nimpls);
  196. _STARPU_DEBUG("%d implementations\n", nimpls);
  197. STARPU_ASSERT(ret == 1);
  198. implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
  199. skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
  200. for (impl = 0; impl < implmax; impl++)
  201. {
  202. parse_per_arch_model_file(f, &model->per_arch[arch][impl], scan_history);
  203. }
  204. if (skipimpl > 0)
  205. {
  206. for (impl = 0; impl < skipimpl; impl++)
  207. {
  208. parse_per_arch_model_file(f, &dummy, 0);
  209. }
  210. }
  211. }
  212. if (skiparch > 0)
  213. {
  214. _starpu_drop_comments(f);
  215. for (arch = 0; arch < skiparch; arch ++)
  216. {
  217. _STARPU_DEBUG("skipping arch %u\n", arch);
  218. ret = fscanf(f, "%d\n", &nimpls);
  219. _STARPU_DEBUG("%d implementations\n", nimpls);
  220. STARPU_ASSERT(ret == 1);
  221. implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
  222. skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
  223. for (impl = 0; impl < implmax; impl++)
  224. {
  225. parse_per_arch_model_file(f, &dummy, 0);
  226. }
  227. if (skipimpl > 0)
  228. {
  229. for (impl = 0; impl < skipimpl; impl++)
  230. {
  231. parse_per_arch_model_file(f, &dummy, 0);
  232. }
  233. }
  234. }
  235. }
  236. }
  237. static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned scan_history)
  238. {
  239. unsigned ret;
  240. unsigned archmin = 0;
  241. unsigned max_gordondevs = 1; /* XXX : we need a STARPU_MAXGORDONDEVS cst */
  242. unsigned narchs;
  243. /* We could probably write a clean loop here, but the code would not
  244. * really be easier to read. */
  245. /* Parsing CPUs */
  246. _starpu_drop_comments(f);
  247. ret = fscanf(f, "%u\n", &narchs);
  248. STARPU_ASSERT(ret == 1);
  249. _STARPU_DEBUG("Parsing %u CPUs\n", narchs);
  250. if (narchs > 0)
  251. {
  252. parse_arch(f, model, scan_history,
  253. archmin,
  254. STARPU_MIN(narchs, STARPU_MAXCPUS),
  255. narchs > STARPU_MAXCPUS ? narchs - STARPU_MAXCPUS : 0);
  256. }
  257. /* Parsing CUDA devs */
  258. _starpu_drop_comments(f);
  259. ret = fscanf(f, "%u\n", &narchs);
  260. STARPU_ASSERT(ret == 1);
  261. archmin += STARPU_MAXCPUS;
  262. _STARPU_DEBUG("Parsing %u CUDA devices\n", narchs);
  263. if (narchs > 0)
  264. {
  265. parse_arch(f, model, scan_history,
  266. archmin,
  267. archmin + STARPU_MIN(narchs, STARPU_MAXCUDADEVS),
  268. narchs > STARPU_MAXCUDADEVS ? narchs - STARPU_MAXCUDADEVS : 0);
  269. }
  270. /* Parsing OpenCL devs */
  271. _starpu_drop_comments(f);
  272. ret = fscanf(f, "%u\n", &narchs);
  273. STARPU_ASSERT(ret == 1);
  274. archmin += STARPU_MAXCUDADEVS;
  275. _STARPU_DEBUG("Parsing %u OpenCL devices\n", narchs);
  276. if (narchs > 0)
  277. {
  278. parse_arch(f, model, scan_history,
  279. archmin,
  280. archmin + STARPU_MIN(narchs, STARPU_MAXOPENCLDEVS),
  281. narchs > STARPU_MAXOPENCLDEVS ? narchs - STARPU_MAXOPENCLDEVS : 0);
  282. }
  283. /* Parsing Gordon implementations */
  284. _starpu_drop_comments(f);
  285. ret = fscanf(f, "%u\n", &narchs);
  286. STARPU_ASSERT(ret == 1);
  287. archmin += STARPU_MAXOPENCLDEVS;
  288. _STARPU_DEBUG("Parsing %u Gordon devices\n", narchs);
  289. if (narchs > 0)
  290. {
  291. parse_arch(f, model, scan_history,
  292. archmin,
  293. archmin + max_gordondevs,
  294. narchs > max_gordondevs ? narchs - max_gordondevs : 0);
  295. }
  296. }
  297. static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, unsigned arch, unsigned nimpl)
  298. {
  299. struct starpu_per_arch_perfmodel *per_arch_model;
  300. per_arch_model = &model->per_arch[arch][nimpl];
  301. /* count the number of elements in the lists */
  302. struct starpu_history_list *ptr = NULL;
  303. unsigned nentries = 0;
  304. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  305. {
  306. /* Dump the list of all entries in the history */
  307. ptr = per_arch_model->list;
  308. while(ptr)
  309. {
  310. nentries++;
  311. ptr = ptr->next;
  312. }
  313. }
  314. /* header */
  315. char archname[32];
  316. starpu_perfmodel_get_arch_name((enum starpu_perf_archtype) arch, archname, 32, nimpl);
  317. fprintf(f, "# Model for %s\n", archname);
  318. fprintf(f, "# number of entries\n%u\n", nentries);
  319. dump_reg_model(f, model, arch, nimpl);
  320. /* Dump the history into the model file in case it is necessary */
  321. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  322. {
  323. fprintf(f, "# hash\t\tsize\t\tmean\t\tdev\t\tsum\t\tsum2\t\tn\n");
  324. ptr = per_arch_model->list;
  325. while (ptr)
  326. {
  327. dump_history_entry(f, ptr->entry);
  328. ptr = ptr->next;
  329. }
  330. }
  331. fprintf(f, "\n##################\n");
  332. }
  333. static unsigned get_n_entries(struct starpu_perfmodel *model, unsigned arch, unsigned impl)
  334. {
  335. struct starpu_per_arch_perfmodel *per_arch_model;
  336. per_arch_model = &model->per_arch[arch][impl];
  337. /* count the number of elements in the lists */
  338. struct starpu_history_list *ptr = NULL;
  339. unsigned nentries = 0;
  340. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  341. {
  342. /* Dump the list of all entries in the history */
  343. ptr = per_arch_model->list;
  344. while(ptr)
  345. {
  346. nentries++;
  347. ptr = ptr->next;
  348. }
  349. }
  350. return nentries;
  351. }
  352. static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
  353. {
  354. unsigned narch[4] = { 0, 0, 0, 0};
  355. unsigned arch, arch_base = 0, my_narch = 0;
  356. unsigned nimpl;
  357. unsigned idx = 0;
  358. /* Finding the number of archs to write for each kind of device */
  359. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  360. {
  361. switch (arch)
  362. {
  363. case STARPU_CUDA_DEFAULT:
  364. case STARPU_OPENCL_DEFAULT:
  365. case STARPU_GORDON_DEFAULT:
  366. arch_base = arch;
  367. idx++;
  368. break;
  369. default:
  370. break;
  371. }
  372. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  373. {
  374. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  375. if (get_n_entries(model, arch, nimpl))
  376. {
  377. narch[idx]=arch-arch_base+1;
  378. break;
  379. }
  380. }
  381. else if (model->type == STARPU_REGRESSION_BASED)
  382. {
  383. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  384. if (model->per_arch[arch][nimpl].regression.nsample)
  385. {
  386. narch[idx]=arch-arch_base+1;
  387. break;
  388. }
  389. }
  390. else
  391. STARPU_ASSERT_MSG(0, "Unknown history-based performance model");
  392. }
  393. /* Writing stuff */
  394. char *name = "unknown";
  395. unsigned substract_to_arch = 0;
  396. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  397. {
  398. switch (arch)
  399. {
  400. case STARPU_CPU_DEFAULT:
  401. arch_base = arch;
  402. name = "CPU";
  403. fprintf(f, "##################\n");
  404. fprintf(f, "# %ss\n", name);
  405. fprintf(f, "# maximum number of %ss\n", name);
  406. fprintf(f, "%u\n", my_narch = narch[0]);
  407. break;
  408. case STARPU_CUDA_DEFAULT:
  409. arch_base = arch;
  410. name = "CUDA";
  411. substract_to_arch = STARPU_MAXCPUS;
  412. fprintf(f, "##################\n");
  413. fprintf(f, "# %ss\n", name);
  414. fprintf(f, "# number of %s architectures\n", name);
  415. fprintf(f, "%u\n", my_narch = narch[1]);
  416. break;
  417. case STARPU_OPENCL_DEFAULT:
  418. arch_base = arch;
  419. name = "OPENCL";
  420. substract_to_arch += STARPU_MAXCUDADEVS;
  421. fprintf(f, "##################\n");
  422. fprintf(f, "# %ss\n", name);
  423. fprintf(f, "# number of %s architectures\n", name);
  424. fprintf(f, "%u\n", my_narch = narch[2]);
  425. break;
  426. case STARPU_GORDON_DEFAULT:
  427. arch_base = arch;
  428. name = "GORDON";
  429. substract_to_arch += STARPU_MAXOPENCLDEVS;
  430. fprintf(f, "##################\n");
  431. fprintf(f, "# %ss\n", name);
  432. fprintf(f, "# number of %s architectures\n", name);
  433. fprintf(f, "%u\n", my_narch = narch[3]);
  434. break;
  435. default:
  436. break;
  437. }
  438. unsigned max_impl = 0;
  439. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  440. {
  441. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  442. if (get_n_entries(model, arch, nimpl))
  443. max_impl = nimpl + 1;
  444. }
  445. else if (model->type == STARPU_REGRESSION_BASED)
  446. {
  447. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  448. if (model->per_arch[arch][nimpl].regression.nsample)
  449. max_impl = nimpl + 1;
  450. }
  451. else
  452. STARPU_ASSERT_MSG(0, "Unknown history-based performance model");
  453. if (arch >= my_narch + arch_base)
  454. continue;
  455. fprintf(f, "###########\n");
  456. if (substract_to_arch)
  457. fprintf(f, "# %s_%u\n", name, arch - substract_to_arch);
  458. else
  459. /* CPU */
  460. fprintf(f, "# %u CPU(s) in parallel\n", arch + 1);
  461. fprintf(f, "# number of implementations\n");
  462. fprintf(f, "%u\n", max_impl);
  463. for (nimpl = 0; nimpl < max_impl; nimpl++)
  464. {
  465. dump_per_arch_model_file(f, model, arch, nimpl);
  466. }
  467. }
  468. }
  469. static void initialize_per_arch_model(struct starpu_per_arch_perfmodel *per_arch_model)
  470. {
  471. per_arch_model->history = NULL;
  472. per_arch_model->list = NULL;
  473. }
  474. static void initialize_model(struct starpu_perfmodel *model)
  475. {
  476. unsigned arch;
  477. unsigned nimpl;
  478. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  479. {
  480. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  481. {
  482. initialize_per_arch_model(&model->per_arch[arch][nimpl]);
  483. }
  484. }
  485. }
  486. static void get_model_debug_path(struct starpu_perfmodel *model, const char *arch, char *path, size_t maxlen)
  487. {
  488. STARPU_ASSERT(path);
  489. _starpu_get_perf_model_dir_debug(path, maxlen);
  490. strncat(path, model->symbol, maxlen);
  491. char hostname[32];
  492. char *forced_hostname = getenv("STARPU_HOSTNAME");
  493. if (forced_hostname && forced_hostname[0])
  494. snprintf(hostname, sizeof(hostname), "%s", forced_hostname);
  495. else
  496. gethostname(hostname, sizeof(hostname));
  497. strncat(path, ".", maxlen);
  498. strncat(path, hostname, maxlen);
  499. strncat(path, ".", maxlen);
  500. strncat(path, arch, maxlen);
  501. strncat(path, ".debug", maxlen);
  502. }
  503. /*
  504. * Returns 0 is the model was already loaded, 1 otherwise.
  505. */
  506. int _starpu_register_model(struct starpu_perfmodel *model)
  507. {
  508. /* If the model has already been loaded, there is nothing to do */
  509. _STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  510. if (model->is_loaded)
  511. {
  512. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  513. return 0;
  514. }
  515. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  516. /* We have to make sure the model has not been loaded since the
  517. * last time we took the lock */
  518. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  519. if (model->is_loaded)
  520. {
  521. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  522. return 0;
  523. }
  524. /* add the model to a linked list */
  525. struct starpu_model_list *node = (struct starpu_model_list *) malloc(sizeof(struct starpu_model_list));
  526. node->model = model;
  527. //model->debug_modelid = debug_modelid++;
  528. /* put this model at the beginning of the list */
  529. node->next = registered_models;
  530. registered_models = node;
  531. #ifdef STARPU_MODEL_DEBUG
  532. _starpu_create_sampling_directory_if_needed();
  533. unsigned arch;
  534. unsigned nimpl;
  535. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  536. {
  537. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  538. {
  539. starpu_perfmodel_debugfilepath(model, arch, model->per_arch[arch][nimpl].debug_path, 256, nimpl);
  540. }
  541. }
  542. #endif
  543. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  544. return 1;
  545. }
  546. static void get_model_path(struct starpu_perfmodel *model, char *path, size_t maxlen)
  547. {
  548. _starpu_get_perf_model_dir_codelets(path, maxlen);
  549. strncat(path, model->symbol, maxlen);
  550. char hostname[32];
  551. char *forced_hostname = getenv("STARPU_HOSTNAME");
  552. if (forced_hostname && forced_hostname[0])
  553. snprintf(hostname, sizeof(hostname), "%s", forced_hostname);
  554. else
  555. gethostname(hostname, sizeof(hostname));
  556. strncat(path, ".", maxlen);
  557. strncat(path, hostname, maxlen);
  558. }
  559. static void save_history_based_model(struct starpu_perfmodel *model)
  560. {
  561. STARPU_ASSERT(model);
  562. STARPU_ASSERT(model->symbol);
  563. /* TODO checks */
  564. /* filename = $STARPU_PERF_MODEL_DIR/codelets/symbol.hostname */
  565. char path[256];
  566. get_model_path(model, path, 256);
  567. _STARPU_DEBUG("Opening performance model file %s for model %s\n", path, model->symbol);
  568. /* overwrite existing file, or create it */
  569. FILE *f;
  570. f = fopen(path, "w+");
  571. STARPU_ASSERT(f);
  572. dump_model_file(f, model);
  573. fclose(f);
  574. }
  575. static void _starpu_dump_registered_models(void)
  576. {
  577. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  578. struct starpu_model_list *node;
  579. node = registered_models;
  580. _STARPU_DEBUG("DUMP MODELS !\n");
  581. while (node)
  582. {
  583. save_history_based_model(node->model);
  584. node = node->next;
  585. }
  586. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  587. }
  588. void _starpu_initialize_registered_performance_models(void)
  589. {
  590. registered_models = NULL;
  591. _STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
  592. }
  593. void _starpu_deinitialize_registered_performance_models(void)
  594. {
  595. if (_starpu_get_calibrate_flag())
  596. _starpu_dump_registered_models();
  597. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  598. struct starpu_model_list *node, *pnode;
  599. node = registered_models;
  600. _STARPU_DEBUG("FREE MODELS !\n");
  601. while (node)
  602. {
  603. struct starpu_perfmodel *model = node->model;
  604. unsigned arch;
  605. unsigned nimpl;
  606. _STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  607. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  608. {
  609. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  610. {
  611. struct starpu_per_arch_perfmodel *archmodel = &model->per_arch[arch][nimpl];
  612. struct starpu_history_list *list, *plist;
  613. struct starpu_history_table *entry, *tmp;
  614. HASH_ITER(hh, archmodel->history, entry, tmp)
  615. {
  616. HASH_DEL(archmodel->history, entry);
  617. free(entry);
  618. }
  619. archmodel->history = NULL;
  620. list = archmodel->list;
  621. while (list) {
  622. free(list->entry);
  623. plist = list;
  624. list = list->next;
  625. free(plist);
  626. }
  627. archmodel->list = NULL;
  628. }
  629. }
  630. model->is_loaded = 0;
  631. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  632. pnode = node;
  633. node = node->next;
  634. free(pnode);
  635. }
  636. registered_models = NULL;
  637. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  638. _STARPU_PTHREAD_RWLOCK_DESTROY(&registered_models_rwlock);
  639. }
  640. /* We first try to grab the global lock in read mode to check whether the model
  641. * was loaded or not (this is very likely to have been already loaded). If the
  642. * model was not loaded yet, we take the lock in write mode, and if the model
  643. * is still not loaded once we have the lock, we do load it. */
  644. void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history)
  645. {
  646. STARPU_ASSERT(model);
  647. STARPU_ASSERT(model->symbol);
  648. int already_loaded;
  649. _STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  650. already_loaded = model->is_loaded;
  651. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  652. if (already_loaded)
  653. return;
  654. /* The model is still not loaded so we grab the lock in write mode, and
  655. * if it's not loaded once we have the lock, we do load it. */
  656. _STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  657. /* Was the model initialized since the previous test ? */
  658. if (model->is_loaded)
  659. {
  660. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  661. return;
  662. }
  663. _STARPU_PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
  664. _STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  665. /* make sure the performance model directory exists (or create it) */
  666. _starpu_create_sampling_directory_if_needed();
  667. char path[256];
  668. get_model_path(model, path, 256);
  669. _STARPU_DEBUG("Opening performance model file %s for model %s ...\n", path, model->symbol);
  670. unsigned calibrate_flag = _starpu_get_calibrate_flag();
  671. model->benchmarking = calibrate_flag;
  672. /* try to open an existing file and load it */
  673. int res;
  674. res = access(path, F_OK);
  675. if (res == 0)
  676. {
  677. if (calibrate_flag == 2)
  678. {
  679. /* The user specified that the performance model should
  680. * be overwritten, so we don't load the existing file !
  681. * */
  682. _STARPU_DEBUG("Overwrite existing file\n");
  683. initialize_model(model);
  684. }
  685. else
  686. {
  687. /* We load the available file */
  688. _STARPU_DEBUG("File exists\n");
  689. FILE *f;
  690. f = fopen(path, "r");
  691. STARPU_ASSERT(f);
  692. parse_model_file(f, model, scan_history);
  693. fclose(f);
  694. }
  695. }
  696. else
  697. {
  698. _STARPU_DEBUG("File does not exists\n");
  699. if (!calibrate_flag)
  700. {
  701. _STARPU_DISP("Warning: model %s is not calibrated, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
  702. _starpu_set_calibrate_flag(1);
  703. model->benchmarking = 1;
  704. }
  705. initialize_model(model);
  706. }
  707. _STARPU_DEBUG("Performance model file %s for model %s is loaded\n", path, model->symbol);
  708. model->is_loaded = 1;
  709. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  710. _STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  711. }
  712. /* This function is intended to be used by external tools that should read
  713. * the performance model files */
  714. int starpu_list_models(FILE *output)
  715. {
  716. char path[256];
  717. DIR *dp;
  718. struct dirent *ep;
  719. char perf_model_dir_codelets[256];
  720. _starpu_get_perf_model_dir_codelets(perf_model_dir_codelets, 256);
  721. strncpy(path, perf_model_dir_codelets, 256);
  722. dp = opendir(path);
  723. if (dp != NULL)
  724. {
  725. while ((ep = readdir(dp)))
  726. {
  727. if (strcmp(ep->d_name, ".") && strcmp(ep->d_name, ".."))
  728. fprintf(output, "file: <%s>\n", ep->d_name);
  729. }
  730. closedir (dp);
  731. return 0;
  732. }
  733. else
  734. {
  735. perror("Couldn't open the directory");
  736. return 1;
  737. }
  738. }
  739. /* This function is intended to be used by external tools that should read the
  740. * performance model files */
  741. int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel *model)
  742. {
  743. model->symbol = strdup(symbol);
  744. /* where is the file if it exists ? */
  745. char path[256];
  746. get_model_path(model, path, 256);
  747. // _STARPU_DEBUG("get_model_path -> %s\n", path);
  748. /* does it exist ? */
  749. int res;
  750. res = access(path, F_OK);
  751. if (res)
  752. {
  753. const char *dot = strrchr(symbol, '.');
  754. if (dot)
  755. {
  756. char *symbol2 = strdup(symbol);
  757. symbol2[dot-symbol] = '\0';
  758. int ret;
  759. fprintf(stderr,"note: loading history from %s instead of %s\n", symbol2, symbol);
  760. ret = starpu_load_history_debug(symbol2,model);
  761. free(symbol2);
  762. return ret;
  763. }
  764. _STARPU_DISP("There is no performance model for symbol %s\n", symbol);
  765. return 1;
  766. }
  767. FILE *f = fopen(path, "r");
  768. STARPU_ASSERT(f);
  769. parse_model_file(f, model, 1);
  770. STARPU_ASSERT(fclose(f) == 0);
  771. return 0;
  772. }
  773. void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archname, size_t maxlen,unsigned nimpl)
  774. {
  775. if (arch < STARPU_CUDA_DEFAULT)
  776. {
  777. if (arch == STARPU_CPU_DEFAULT)
  778. {
  779. /* NB: We could just use cpu_1 as well ... */
  780. snprintf(archname, maxlen, "cpu_impl_%u",nimpl);
  781. }
  782. else
  783. {
  784. /* For combined CPU workers */
  785. int cpu_count = arch - STARPU_CPU_DEFAULT + 1;
  786. snprintf(archname, maxlen, "cpu_%d_impl_%u", cpu_count,nimpl);
  787. }
  788. }
  789. else if ((STARPU_CUDA_DEFAULT <= arch)
  790. && (arch < STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS))
  791. {
  792. int devid = arch - STARPU_CUDA_DEFAULT;
  793. snprintf(archname, maxlen, "cuda_%d_impl_%u", devid,nimpl);
  794. }
  795. else if ((STARPU_OPENCL_DEFAULT <= arch)
  796. && (arch < STARPU_OPENCL_DEFAULT + STARPU_MAXOPENCLDEVS))
  797. {
  798. int devid = arch - STARPU_OPENCL_DEFAULT;
  799. snprintf(archname, maxlen, "opencl_%d_impl_%u", devid,nimpl);
  800. }
  801. else if (arch == STARPU_GORDON_DEFAULT)
  802. {
  803. snprintf(archname, maxlen, "gordon_impl_%u",nimpl);
  804. }
  805. else
  806. {
  807. STARPU_ABORT();
  808. }
  809. }
  810. void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model,
  811. enum starpu_perf_archtype arch, char *path, size_t maxlen, unsigned nimpl)
  812. {
  813. char archname[32];
  814. starpu_perfmodel_get_arch_name(arch, archname, 32, nimpl);
  815. STARPU_ASSERT(path);
  816. get_model_debug_path(model, archname, path, maxlen);
  817. }
  818. double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j, unsigned nimpl)
  819. {
  820. double exp = NAN;
  821. size_t size = _starpu_job_get_data_size(model, arch, nimpl, j);
  822. struct starpu_regression_model *regmodel;
  823. regmodel = &model->per_arch[arch][nimpl].regression;
  824. if (regmodel->valid)
  825. exp = regmodel->alpha*pow((double)size, regmodel->beta);
  826. return exp;
  827. }
  828. double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j,unsigned nimpl)
  829. {
  830. double exp = NAN;
  831. size_t size = _starpu_job_get_data_size(model, arch, nimpl, j);
  832. struct starpu_regression_model *regmodel;
  833. regmodel = &model->per_arch[arch][nimpl].regression;
  834. if (regmodel->nl_valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1)
  835. exp = regmodel->a*pow((double)size, regmodel->b) + regmodel->c;
  836. else
  837. {
  838. uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  839. struct starpu_per_arch_perfmodel *per_arch_model = &model->per_arch[arch][nimpl];
  840. struct starpu_history_table *history;
  841. struct starpu_history_table *entry;
  842. _STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
  843. history = per_arch_model->history;
  844. HASH_FIND_UINT32_T(history, &key, entry);
  845. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  846. if (entry && entry->history_entry && entry->history_entry->nsample >= _STARPU_CALIBRATION_MINIMUM)
  847. exp = entry->history_entry->mean;
  848. else if (!model->benchmarking)
  849. {
  850. _STARPU_DISP("Warning: model %s is not calibrated enough, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
  851. _starpu_set_calibrate_flag(1);
  852. model->benchmarking = 1;
  853. }
  854. }
  855. return exp;
  856. }
  857. double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j,unsigned nimpl)
  858. {
  859. double exp;
  860. struct starpu_per_arch_perfmodel *per_arch_model;
  861. struct starpu_history_entry *entry;
  862. struct starpu_history_table *history, *elt;
  863. uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  864. per_arch_model = &model->per_arch[arch][nimpl];
  865. _STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
  866. history = per_arch_model->history;
  867. if (!history) {
  868. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  869. return NAN;
  870. }
  871. HASH_FIND_UINT32_T(history, &key, elt);
  872. entry = (elt == NULL) ? NULL : elt->history_entry;
  873. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  874. exp = entry?entry->mean:NAN;
  875. if (entry && entry->nsample < _STARPU_CALIBRATION_MINIMUM)
  876. /* TODO: report differently if we've scheduled really enough
  877. * of that task and the scheduler should perhaps put it aside */
  878. /* Not calibrated enough */
  879. exp = NAN;
  880. if (isnan(exp) && !model->benchmarking)
  881. {
  882. char archname[32];
  883. starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl);
  884. _STARPU_DISP("Warning: model %s is not calibrated enough for %s, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol, archname);
  885. _starpu_set_calibrate_flag(1);
  886. model->benchmarking = 1;
  887. }
  888. return exp;
  889. }
  890. void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned nimpl)
  891. {
  892. if (model)
  893. {
  894. _STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  895. struct starpu_per_arch_perfmodel *per_arch_model = &model->per_arch[arch][nimpl];
  896. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  897. {
  898. struct starpu_history_entry *entry;
  899. struct starpu_history_table *history, *elt;
  900. struct starpu_history_list **list;
  901. uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  902. history = per_arch_model->history;
  903. list = &per_arch_model->list;
  904. HASH_FIND_UINT32_T(history, &key, elt);
  905. entry = (elt == NULL) ? NULL : elt->history_entry;
  906. if (!entry)
  907. {
  908. /* this is the first entry with such a footprint */
  909. entry = (struct starpu_history_entry *) malloc(sizeof(struct starpu_history_entry));
  910. STARPU_ASSERT(entry);
  911. entry->mean = measured;
  912. entry->sum = measured;
  913. entry->deviation = 0.0;
  914. entry->sum2 = measured*measured;
  915. entry->size = _starpu_job_get_data_size(model, arch, nimpl, j);
  916. entry->footprint = key;
  917. entry->nsample = 1;
  918. insert_history_entry(entry, list, &history);
  919. }
  920. else
  921. {
  922. /* there is already some entry with the same footprint */
  923. entry->sum += measured;
  924. entry->sum2 += measured*measured;
  925. entry->nsample++;
  926. unsigned n = entry->nsample;
  927. entry->mean = entry->sum / n;
  928. entry->deviation = sqrt((entry->sum2 - (entry->sum*entry->sum)/n)/n);
  929. }
  930. STARPU_ASSERT(entry);
  931. }
  932. if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  933. {
  934. struct starpu_regression_model *reg_model;
  935. reg_model = &per_arch_model->regression;
  936. /* update the regression model */
  937. size_t job_size = _starpu_job_get_data_size(model, arch, nimpl, j);
  938. double logy, logx;
  939. logx = log((double)job_size);
  940. logy = log(measured);
  941. reg_model->sumlnx += logx;
  942. reg_model->sumlnx2 += logx*logx;
  943. reg_model->sumlny += logy;
  944. reg_model->sumlnxlny += logx*logy;
  945. if (reg_model->minx == 0 || job_size < reg_model->minx)
  946. reg_model->minx = job_size;
  947. if (reg_model->maxx == 0 || job_size > reg_model->maxx)
  948. reg_model->maxx = job_size;
  949. reg_model->nsample++;
  950. unsigned n = reg_model->nsample;
  951. double num = (n*reg_model->sumlnxlny - reg_model->sumlnx*reg_model->sumlny);
  952. double denom = (n*reg_model->sumlnx2 - reg_model->sumlnx*reg_model->sumlnx);
  953. reg_model->beta = num/denom;
  954. reg_model->alpha = exp((reg_model->sumlny - reg_model->beta*reg_model->sumlnx)/n);
  955. if (VALID_REGRESSION(reg_model))
  956. reg_model->valid = 1;
  957. }
  958. #ifdef STARPU_MODEL_DEBUG
  959. struct starpu_task *task = j->task;
  960. FILE *f = fopen(per_arch_model->debug_path, "a+");
  961. if (f == NULL)
  962. {
  963. _STARPU_DISP("Error <%s> when opening file <%s>\n", strerror(errno), per_arch_model->debug_path);
  964. STARPU_ASSERT(0);
  965. }
  966. if (!j->footprint_is_computed)
  967. (void) _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  968. STARPU_ASSERT(j->footprint_is_computed);
  969. fprintf(f, "0x%x\t%lu\t%f\t%f\t%f\t%d\t\t", j->footprint, (unsigned long) _starpu_job_get_data_size(model, arch, nimpl, j), measured, task->predicted, task->predicted_transfer, cpuid);
  970. unsigned i;
  971. for (i = 0; i < task->cl->nbuffers; i++)
  972. {
  973. starpu_data_handle_t handle = task->handles[i];
  974. STARPU_ASSERT(handle->ops);
  975. STARPU_ASSERT(handle->ops->display);
  976. handle->ops->display(handle, f);
  977. }
  978. fprintf(f, "\n");
  979. fclose(f);
  980. #endif
  981. _STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  982. }
  983. }
  984. void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, enum starpu_perf_archtype arch, unsigned cpuid, unsigned nimpl, double measured) {
  985. struct _starpu_job *job = _starpu_get_job_associated_to_task(task);
  986. _starpu_load_perfmodel(model);
  987. /* Record measurement */
  988. _starpu_update_perfmodel_history(job, model, arch, cpuid, measured, nimpl);
  989. /* and save perfmodel on termination */
  990. _starpu_set_calibrate_flag(1);
  991. }