perfmodel_history.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009, 2010, 2011 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2011 Télécom-SudParis
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <dirent.h>
  19. #include <unistd.h>
  20. #include <sys/stat.h>
  21. #include <errno.h>
  22. #include <common/config.h>
  23. #include <core/perfmodel/perfmodel.h>
  24. #include <core/jobs.h>
  25. #include <core/workers.h>
  26. #include <pthread.h>
  27. #include <datawizard/datawizard.h>
  28. #include <core/perfmodel/regression.h>
  29. #include <common/config.h>
  30. #include <starpu_parameters.h>
  31. #ifdef STARPU_HAVE_WINDOWS
  32. #include <windows.h>
  33. #endif
  34. /* We want more than 10% variance on X to trust regression */
  35. #define VALID_REGRESSION(reg_model) \
  36. ((reg_model)->minx < (9*(reg_model)->maxx)/10 && (reg_model)->nsample >= STARPU_CALIBRATION_MINIMUM)
  37. static pthread_rwlock_t registered_models_rwlock;
  38. static struct starpu_model_list_t *registered_models = NULL;
  39. /*
  40. * History based model
  41. */
  42. static void insert_history_entry(struct starpu_history_entry_t *entry, struct starpu_history_list_t **list, struct starpu_htbl32_node_s **history_ptr)
  43. {
  44. struct starpu_history_list_t *link;
  45. struct starpu_history_entry_t *old;
  46. link = (struct starpu_history_list_t *) malloc(sizeof(struct starpu_history_list_t));
  47. link->next = *list;
  48. link->entry = entry;
  49. *list = link;
  50. old = (struct starpu_history_entry_t *) _starpu_htbl_insert_32(history_ptr, entry->footprint, entry);
  51. /* that may fail in case there is some concurrency issue */
  52. STARPU_ASSERT(old == NULL);
  53. }
  54. static void dump_reg_model(FILE *f, struct starpu_perfmodel *model, unsigned arch, unsigned nimpl)
  55. {
  56. struct starpu_per_arch_perfmodel_t *per_arch_model;
  57. per_arch_model = &model->per_arch[arch][nimpl];
  58. struct starpu_regression_model_t *reg_model;
  59. reg_model = &per_arch_model->regression;
  60. /*
  61. * Linear Regression model
  62. */
  63. /* Unless we have enough measurements, we put NaN in the file to indicate the model is invalid */
  64. double alpha = nan(""), beta = nan("");
  65. if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  66. {
  67. if (reg_model->nsample > 1)
  68. {
  69. alpha = reg_model->alpha;
  70. beta = reg_model->beta;
  71. }
  72. }
  73. fprintf(f, "# sumlnx\tsumlnx2\t\tsumlny\t\tsumlnxlny\talpha\t\tbeta\t\tn\tminx\t\tmaxx\n");
  74. fprintf(f, "%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%u\t%-15lu\t%-15lu\n", reg_model->sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny, alpha, beta, reg_model->nsample, reg_model->minx, reg_model->maxx);
  75. /*
  76. * Non-Linear Regression model
  77. */
  78. double a = nan(""), b = nan(""), c = nan("");
  79. if (model->type == STARPU_NL_REGRESSION_BASED)
  80. _starpu_regression_non_linear_power(per_arch_model->list, &a, &b, &c);
  81. fprintf(f, "# a\t\tb\t\tc\n");
  82. fprintf(f, "%-15le\t%-15le\t%-15le\n", a, b, c);
  83. }
  84. static void scan_reg_model(FILE *f, struct starpu_regression_model_t *reg_model)
  85. {
  86. int res;
  87. /*
  88. * Linear Regression model
  89. */
  90. _starpu_drop_comments(f);
  91. res = fscanf(f, "%le\t%le\t%le\t%le\t%le\t%le\t%u\t%lu\t%lu\n",
  92. &reg_model->sumlnx, &reg_model->sumlnx2, &reg_model->sumlny,
  93. &reg_model->sumlnxlny, &reg_model->alpha, &reg_model->beta,
  94. &reg_model->nsample,
  95. &reg_model->minx, &reg_model->maxx);
  96. STARPU_ASSERT(res == 9);
  97. /* If any of the parameters describing the linear regression model is NaN, the model is invalid */
  98. unsigned invalid = (isnan(reg_model->alpha)||isnan(reg_model->beta));
  99. reg_model->valid = !invalid && VALID_REGRESSION(reg_model);
  100. /*
  101. * Non-Linear Regression model
  102. */
  103. _starpu_drop_comments(f);
  104. res = fscanf(f, "%le\t%le\t%le\n", &reg_model->a, &reg_model->b, &reg_model->c);
  105. STARPU_ASSERT(res == 3);
  106. /* If any of the parameters describing the non-linear regression model is NaN, the model is invalid */
  107. unsigned nl_invalid = (isnan(reg_model->a)||isnan(reg_model->b)||isnan(reg_model->c));
  108. reg_model->nl_valid = !nl_invalid && VALID_REGRESSION(reg_model);
  109. }
  110. static void dump_history_entry(FILE *f, struct starpu_history_entry_t *entry)
  111. {
  112. fprintf(f, "%08x\t%-15lu\t%-15le\t%-15le\t%-15le\t%-15le\t%u\n", entry->footprint, (unsigned long) entry->size, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
  113. }
  114. static void scan_history_entry(FILE *f, struct starpu_history_entry_t *entry)
  115. {
  116. int res;
  117. _starpu_drop_comments(f);
  118. /* In case entry is NULL, we just drop these values */
  119. unsigned nsample;
  120. uint32_t footprint;
  121. #ifdef STARPU_HAVE_WINDOWS
  122. unsigned size; /* in bytes */
  123. #else
  124. size_t size; /* in bytes */
  125. #endif
  126. double mean;
  127. double deviation;
  128. double sum;
  129. double sum2;
  130. /* Read the values from the file */
  131. res = fscanf(f, "%x\t%"
  132. #ifndef STARPU_HAVE_WINDOWS
  133. "z"
  134. #endif
  135. "u\t%le\t%le\t%le\t%le\t%u\n", &footprint, &size, &mean, &deviation, &sum, &sum2, &nsample);
  136. STARPU_ASSERT(res == 7);
  137. if (entry)
  138. {
  139. entry->footprint = footprint;
  140. entry->size = size;
  141. entry->mean = mean;
  142. entry->deviation = deviation;
  143. entry->sum = sum;
  144. entry->sum2 = sum2;
  145. entry->nsample = nsample;
  146. }
  147. }
  148. static void parse_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_t *per_arch_model, unsigned scan_history)
  149. {
  150. unsigned nentries;
  151. _starpu_drop_comments(f);
  152. int res = fscanf(f, "%u\n", &nentries);
  153. STARPU_ASSERT(res == 1);
  154. scan_reg_model(f, &per_arch_model->regression);
  155. /* parse cpu entries */
  156. unsigned i;
  157. for (i = 0; i < nentries; i++) {
  158. struct starpu_history_entry_t *entry = NULL;
  159. if (scan_history)
  160. {
  161. entry = (struct starpu_history_entry_t *) malloc(sizeof(struct starpu_history_entry_t));
  162. STARPU_ASSERT(entry);
  163. }
  164. scan_history_entry(f, entry);
  165. /* insert the entry in the hashtable and the list structures */
  166. if (scan_history)
  167. insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
  168. }
  169. }
  170. static void parse_arch(FILE *f, struct starpu_perfmodel *model, unsigned scan_history, unsigned archmin, unsigned archmax, int skiparch)
  171. {
  172. struct starpu_per_arch_perfmodel_t dummy;
  173. int nimpls, implmax, skipimpl, impl;
  174. unsigned ret, arch;
  175. for (arch = archmin; arch < archmax; arch++) {
  176. _starpu_drop_comments(f);
  177. ret = fscanf(f, "%d\n", &nimpls);
  178. STARPU_ASSERT(ret == 1);
  179. implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
  180. skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
  181. for (impl = 0; impl < implmax; impl++) {
  182. parse_per_arch_model_file(f, &model->per_arch[arch][impl], scan_history);
  183. }
  184. if (skipimpl > 0) {
  185. for (impl = 0; impl < skipimpl; impl++) {
  186. parse_per_arch_model_file(f, &dummy, 0);
  187. }
  188. }
  189. }
  190. if (skiparch > 0) {
  191. _starpu_drop_comments(f);
  192. ret = fscanf(f, "%d\n", &nimpls);
  193. STARPU_ASSERT(ret == 1);
  194. implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
  195. skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
  196. for (arch = 0; arch < skiparch; arch ++) {
  197. for (impl = 0; impl < implmax; impl++) {
  198. parse_per_arch_model_file(f, &dummy, 0);
  199. }
  200. if (skipimpl > 0) {
  201. for (impl = 0; impl < skipimpl; impl++) {
  202. parse_per_arch_model_file(f, &dummy, 0);
  203. }
  204. }
  205. }
  206. }
  207. }
  208. static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned scan_history)
  209. {
  210. unsigned ret;
  211. unsigned archmin = 0;
  212. unsigned max_gordondevs = 1; /* XXX : we need a STARPU_MAXGORDONDEVS cst */
  213. unsigned narchs;
  214. /* We could probably write a clean loop here, but the code would not
  215. * really be easier to read. */
  216. /* Parsing CPUs */
  217. _starpu_drop_comments(f);
  218. ret = fscanf(f, "%u\n", &narchs);
  219. STARPU_ASSERT(ret == 1);
  220. _STARPU_DEBUG("Parsing %u CPUs\n", narchs);
  221. if (narchs > 0)
  222. {
  223. parse_arch(f, model, scan_history,
  224. archmin,
  225. STARPU_MIN(narchs, STARPU_MAXCPUS),
  226. narchs - STARPU_MAXCPUS);
  227. }
  228. /* Parsing CUDA devs */
  229. _starpu_drop_comments(f);
  230. ret = fscanf(f, "%u\n", &narchs);
  231. STARPU_ASSERT(ret == 1);
  232. archmin += STARPU_MAXCPUS;
  233. _STARPU_DEBUG("Parsing %u CUDA devices\n", narchs);
  234. if (narchs > 0)
  235. {
  236. parse_arch(f, model, scan_history,
  237. archmin,
  238. archmin + STARPU_MIN(narchs, STARPU_MAXCUDADEVS),
  239. narchs - STARPU_MAXCUDADEVS);
  240. }
  241. /* Parsing OpenCL devs */
  242. _starpu_drop_comments(f);
  243. ret = fscanf(f, "%u\n", &narchs);
  244. STARPU_ASSERT(ret == 1);
  245. archmin += STARPU_MAXCUDADEVS;
  246. _STARPU_DEBUG("Parsing %u OpenCL devices\n", narchs);
  247. if (narchs > 0)
  248. {
  249. parse_arch(f, model, scan_history,
  250. archmin,
  251. archmin + STARPU_MIN(narchs, STARPU_MAXOPENCLDEVS),
  252. narchs - STARPU_MAXOPENCLDEVS);
  253. }
  254. /* Parsing Gordon implementations */
  255. _starpu_drop_comments(f);
  256. ret = fscanf(f, "%u\n", &narchs);
  257. STARPU_ASSERT(ret == 1);
  258. archmin += STARPU_MAXOPENCLDEVS;
  259. _STARPU_DEBUG("Parsing %u Gordon devices\n", narchs);
  260. if (narchs > 0)
  261. {
  262. parse_arch(f, model, scan_history,
  263. archmin,
  264. archmin + max_gordondevs,
  265. narchs - max_gordondevs);
  266. }
  267. }
  268. static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, unsigned arch, unsigned nimpl)
  269. {
  270. struct starpu_per_arch_perfmodel_t *per_arch_model;
  271. per_arch_model = &model->per_arch[arch][nimpl];
  272. /* count the number of elements in the lists */
  273. struct starpu_history_list_t *ptr = NULL;
  274. unsigned nentries = 0;
  275. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  276. {
  277. /* Dump the list of all entries in the history */
  278. ptr = per_arch_model->list;
  279. while(ptr) {
  280. nentries++;
  281. ptr = ptr->next;
  282. }
  283. }
  284. /* header */
  285. char archname[32];
  286. starpu_perfmodel_get_arch_name((enum starpu_perf_archtype) arch, archname, 32, nimpl);
  287. fprintf(f, "# Model for %s\n", archname);
  288. fprintf(f, "# number of entries\n%u\n", nentries);
  289. dump_reg_model(f, model, arch, nimpl);
  290. /* Dump the history into the model file in case it is necessary */
  291. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  292. {
  293. fprintf(f, "# hash\t\tsize\t\tmean\t\tdev\t\tsum\t\tsum2\t\tn\n");
  294. ptr = per_arch_model->list;
  295. while (ptr) {
  296. dump_history_entry(f, ptr->entry);
  297. ptr = ptr->next;
  298. }
  299. }
  300. fprintf(f, "\n##################\n");
  301. }
  302. static unsigned get_n_entries(struct starpu_perfmodel *model, unsigned arch, unsigned impl)
  303. {
  304. struct starpu_per_arch_perfmodel_t *per_arch_model;
  305. per_arch_model = &model->per_arch[arch][impl];
  306. /* count the number of elements in the lists */
  307. struct starpu_history_list_t *ptr = NULL;
  308. unsigned nentries = 0;
  309. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  310. {
  311. /* Dump the list of all entries in the history */
  312. ptr = per_arch_model->list;
  313. while(ptr) {
  314. nentries++;
  315. ptr = ptr->next;
  316. }
  317. }
  318. return nentries;
  319. }
  320. static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
  321. {
  322. unsigned number_of_archs[4] = { 0, 0, 0, 0};
  323. unsigned arch;
  324. unsigned nimpl;
  325. unsigned idx = 0;
  326. /* Finding the number of archs to write for each kind of device */
  327. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  328. {
  329. switch (arch)
  330. {
  331. case STARPU_CUDA_DEFAULT:
  332. case STARPU_OPENCL_DEFAULT:
  333. case STARPU_GORDON_DEFAULT:
  334. idx++;
  335. break;
  336. default:
  337. break;
  338. }
  339. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED) {
  340. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  341. if (get_n_entries(model, arch, nimpl))
  342. {
  343. number_of_archs[idx]++;
  344. break;
  345. }
  346. } else if (model->type == STARPU_REGRESSION_BASED) {
  347. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  348. if (model->per_arch[arch][nimpl].regression.nsample)
  349. {
  350. number_of_archs[idx]++;
  351. break;
  352. }
  353. } else
  354. STARPU_ASSERT(!"Unknown history-based performance model");
  355. }
  356. /* Writing stuff */
  357. char *name = "unknown";
  358. unsigned substract_to_arch = 0;
  359. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  360. {
  361. switch (arch)
  362. {
  363. case STARPU_CPU_DEFAULT:
  364. name = "CPU";
  365. fprintf(f, "##################\n");
  366. fprintf(f, "# %ss\n", name);
  367. fprintf(f, "# number of %s architectures\n", name);
  368. fprintf(f, "%u\n", number_of_archs[0]);
  369. break;
  370. case STARPU_CUDA_DEFAULT:
  371. name = "CUDA";
  372. substract_to_arch = STARPU_MAXCPUS;
  373. fprintf(f, "##################\n");
  374. fprintf(f, "# %ss\n", name);
  375. fprintf(f, "# number of %s architectures\n", name);
  376. fprintf(f, "%u\n", number_of_archs[1]);
  377. break;
  378. case STARPU_OPENCL_DEFAULT:
  379. name = "OPENCL";
  380. substract_to_arch += STARPU_MAXCUDADEVS;
  381. fprintf(f, "##################\n");
  382. fprintf(f, "# %ss\n", name);
  383. fprintf(f, "# number of %s architectures\n", name);
  384. fprintf(f, "%u\n", number_of_archs[2]);
  385. break;
  386. case STARPU_GORDON_DEFAULT:
  387. name = "GORDON";
  388. substract_to_arch += STARPU_MAXOPENCLDEVS;
  389. fprintf(f, "##################\n");
  390. fprintf(f, "# %ss\n", name);
  391. fprintf(f, "# number of %s architectures\n", name);
  392. fprintf(f, "%u\n", number_of_archs[3]);
  393. break;
  394. default:
  395. break;
  396. }
  397. unsigned max_impl = 0;
  398. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED) {
  399. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  400. if (get_n_entries(model, arch, nimpl))
  401. max_impl = nimpl + 1;
  402. } else if (model->type == STARPU_REGRESSION_BASED) {
  403. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  404. if (model->per_arch[arch][nimpl].regression.nsample)
  405. max_impl = nimpl + 1;
  406. } else
  407. STARPU_ASSERT(!"Unknown history-based performance model");
  408. if (max_impl == 0)
  409. continue;
  410. fprintf(f, "###########\n");
  411. fprintf(f, "# %s_%u\n", name, arch - substract_to_arch);
  412. fprintf(f, "# number of implementations\n");
  413. fprintf(f, "%u\n", max_impl);
  414. for (nimpl = 0; nimpl < max_impl; nimpl++)
  415. {
  416. dump_per_arch_model_file(f, model, arch, nimpl);
  417. }
  418. }
  419. }
  420. static void initialize_per_arch_model(struct starpu_per_arch_perfmodel_t *per_arch_model)
  421. {
  422. per_arch_model->history = NULL;
  423. per_arch_model->list = NULL;
  424. }
  425. static void initialize_model(struct starpu_perfmodel *model)
  426. {
  427. unsigned arch;
  428. unsigned nimpl;
  429. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
  430. {
  431. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  432. {
  433. initialize_per_arch_model(&model->per_arch[arch][nimpl]);
  434. }
  435. }
  436. }
  437. static void get_model_debug_path(struct starpu_perfmodel *model, const char *arch, char *path, size_t maxlen)
  438. {
  439. STARPU_ASSERT(path);
  440. _starpu_get_perf_model_dir_debug(path, maxlen);
  441. strncat(path, model->symbol, maxlen);
  442. char hostname[32];
  443. char *forced_hostname = getenv("STARPU_HOSTNAME");
  444. if (forced_hostname && forced_hostname[0])
  445. snprintf(hostname, sizeof(hostname), "%s", forced_hostname);
  446. else
  447. gethostname(hostname, sizeof(hostname));
  448. strncat(path, ".", maxlen);
  449. strncat(path, hostname, maxlen);
  450. strncat(path, ".", maxlen);
  451. strncat(path, arch, maxlen);
  452. strncat(path, ".debug", maxlen);
  453. }
  454. /*
  455. * Returns 0 is the model was already loaded, 1 otherwise.
  456. */
  457. int _starpu_register_model(struct starpu_perfmodel *model)
  458. {
  459. /* If the model has already been loaded, there is nothing to do */
  460. PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  461. if (model->is_loaded) {
  462. PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  463. return 0;
  464. }
  465. PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  466. /* We have to make sure the model has not been loaded since the
  467. * last time we took the lock */
  468. PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  469. if (model->is_loaded) {
  470. PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  471. return 0;
  472. }
  473. /* add the model to a linked list */
  474. struct starpu_model_list_t *node = (struct starpu_model_list_t *) malloc(sizeof(struct starpu_model_list_t));
  475. node->model = model;
  476. //model->debug_modelid = debug_modelid++;
  477. /* put this model at the beginning of the list */
  478. node->next = registered_models;
  479. registered_models = node;
  480. #ifdef STARPU_MODEL_DEBUG
  481. _starpu_create_sampling_directory_if_needed();
  482. unsigned arch;
  483. unsigned nimpl;
  484. for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++) {
  485. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) {
  486. char debugpath[256];
  487. starpu_perfmodel_debugfilepath(model, arch, debugpath, 256, nimpl);
  488. model->per_arch[arch][nimpl].debug_file = fopen(debugpath, "a+");
  489. STARPU_ASSERT(model->per_arch[arch][nimpl].debug_file);
  490. }
  491. }
  492. #endif
  493. PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  494. return 1;
  495. }
  496. static void get_model_path(struct starpu_perfmodel *model, char *path, size_t maxlen)
  497. {
  498. _starpu_get_perf_model_dir_codelets(path, maxlen);
  499. strncat(path, model->symbol, maxlen);
  500. char hostname[32];
  501. char *forced_hostname = getenv("STARPU_HOSTNAME");
  502. if (forced_hostname && forced_hostname[0])
  503. snprintf(hostname, sizeof(hostname), "%s", forced_hostname);
  504. else
  505. gethostname(hostname, sizeof(hostname));
  506. strncat(path, ".", maxlen);
  507. strncat(path, hostname, maxlen);
  508. }
  509. static void save_history_based_model(struct starpu_perfmodel *model)
  510. {
  511. STARPU_ASSERT(model);
  512. STARPU_ASSERT(model->symbol);
  513. /* TODO checks */
  514. /* filename = $STARPU_PERF_MODEL_DIR/codelets/symbol.hostname */
  515. char path[256];
  516. get_model_path(model, path, 256);
  517. _STARPU_DEBUG("Opening performance model file %s for model %s\n", path, model->symbol);
  518. /* overwrite existing file, or create it */
  519. FILE *f;
  520. f = fopen(path, "w+");
  521. STARPU_ASSERT(f);
  522. dump_model_file(f, model);
  523. fclose(f);
  524. }
  525. static void _starpu_dump_registered_models(void)
  526. {
  527. PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  528. struct starpu_model_list_t *node;
  529. node = registered_models;
  530. _STARPU_DEBUG("DUMP MODELS !\n");
  531. while (node) {
  532. save_history_based_model(node->model);
  533. node = node->next;
  534. /* XXX free node */
  535. }
  536. PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  537. }
  538. void _starpu_initialize_registered_performance_models(void)
  539. {
  540. registered_models = NULL;
  541. PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
  542. }
  543. void _starpu_deinitialize_registered_performance_models(void)
  544. {
  545. if (_starpu_get_calibrate_flag())
  546. _starpu_dump_registered_models();
  547. PTHREAD_RWLOCK_DESTROY(&registered_models_rwlock);
  548. }
  549. /* We first try to grab the global lock in read mode to check whether the model
  550. * was loaded or not (this is very likely to have been already loaded). If the
  551. * model was not loaded yet, we take the lock in write mode, and if the model
  552. * is still not loaded once we have the lock, we do load it. */
  553. void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history)
  554. {
  555. STARPU_ASSERT(model);
  556. STARPU_ASSERT(model->symbol);
  557. int already_loaded;
  558. PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  559. already_loaded = model->is_loaded;
  560. PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  561. if (already_loaded)
  562. return;
  563. /* The model is still not loaded so we grab the lock in write mode, and
  564. * if it's not loaded once we have the lock, we do load it. */
  565. PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  566. /* Was the model initialized since the previous test ? */
  567. if (model->is_loaded)
  568. {
  569. PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  570. return;
  571. }
  572. PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
  573. PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  574. /* make sure the performance model directory exists (or create it) */
  575. _starpu_create_sampling_directory_if_needed();
  576. char path[256];
  577. get_model_path(model, path, 256);
  578. _STARPU_DEBUG("Opening performance model file %s for model %s ... ", path, model->symbol);
  579. unsigned calibrate_flag = _starpu_get_calibrate_flag();
  580. model->benchmarking = calibrate_flag;
  581. /* try to open an existing file and load it */
  582. int res;
  583. res = access(path, F_OK);
  584. if (res == 0) {
  585. if (calibrate_flag == 2)
  586. {
  587. /* The user specified that the performance model should
  588. * be overwritten, so we don't load the existing file !
  589. * */
  590. _STARPU_DEBUG("Overwrite existing file\n");
  591. initialize_model(model);
  592. }
  593. else {
  594. /* We load the available file */
  595. _STARPU_DEBUG("File exists\n");
  596. FILE *f;
  597. f = fopen(path, "r");
  598. STARPU_ASSERT(f);
  599. parse_model_file(f, model, scan_history);
  600. fclose(f);
  601. }
  602. }
  603. else {
  604. _STARPU_DEBUG("File does not exists\n");
  605. if (!calibrate_flag) {
  606. _STARPU_DISP("Warning: model %s is not calibrated, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
  607. _starpu_set_calibrate_flag(1);
  608. model->benchmarking = 1;
  609. }
  610. initialize_model(model);
  611. }
  612. _STARPU_DEBUG("Performance model file %s for model %s is loaded\n", path, model->symbol);
  613. model->is_loaded = 1;
  614. PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  615. PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  616. }
  617. /* This function is intended to be used by external tools that should read
  618. * the performance model files */
  619. int starpu_list_models(FILE *output)
  620. {
  621. char path[256];
  622. DIR *dp;
  623. struct dirent *ep;
  624. char perf_model_dir_codelets[256];
  625. _starpu_get_perf_model_dir_codelets(perf_model_dir_codelets, 256);
  626. strncpy(path, perf_model_dir_codelets, 256);
  627. dp = opendir(path);
  628. if (dp != NULL) {
  629. while ((ep = readdir(dp))) {
  630. if (strcmp(ep->d_name, ".") && strcmp(ep->d_name, ".."))
  631. fprintf(output, "file: <%s>\n", ep->d_name);
  632. }
  633. closedir (dp);
  634. return 0;
  635. }
  636. else {
  637. perror ("Couldn't open the directory");
  638. return 1;
  639. }
  640. }
  641. /* This function is intended to be used by external tools that should read the
  642. * performance model files */
  643. int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel *model)
  644. {
  645. model->symbol = strdup(symbol);
  646. /* where is the file if it exists ? */
  647. char path[256];
  648. get_model_path(model, path, 256);
  649. // _STARPU_DEBUG("get_model_path -> %s\n", path);
  650. /* does it exist ? */
  651. int res;
  652. res = access(path, F_OK);
  653. if (res) {
  654. char *dot = strrchr(symbol, '.');
  655. if (dot) {
  656. char *symbol2 = strdup(symbol);
  657. symbol2[dot-symbol] = '\0';
  658. int ret;
  659. fprintf(stderr,"note: loading history from %s instead of %s\n", symbol2, symbol);
  660. ret = starpu_load_history_debug(symbol2,model);
  661. free(symbol2);
  662. return ret;
  663. }
  664. _STARPU_DISP("There is no performance model for symbol %s\n", symbol);
  665. return 1;
  666. }
  667. FILE *f = fopen(path, "r");
  668. STARPU_ASSERT(f);
  669. parse_model_file(f, model, 1);
  670. STARPU_ASSERT(fclose(f) == 0);
  671. return 0;
  672. }
  673. void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archname, size_t maxlen,unsigned nimpl)
  674. {
  675. if (arch < STARPU_CUDA_DEFAULT)
  676. {
  677. if (arch == STARPU_CPU_DEFAULT)
  678. {
  679. /* NB: We could just use cpu_1 as well ... */
  680. snprintf(archname, maxlen, "cpu_impl_%u",nimpl);
  681. }
  682. else
  683. {
  684. /* For combined CPU workers */
  685. int cpu_count = arch - STARPU_CPU_DEFAULT + 1;
  686. snprintf(archname, maxlen, "cpu_%d_impl_%u", cpu_count,nimpl);
  687. }
  688. }
  689. else if ((STARPU_CUDA_DEFAULT <= arch)
  690. && (arch < STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS))
  691. {
  692. int devid = arch - STARPU_CUDA_DEFAULT;
  693. snprintf(archname, maxlen, "cuda_%d_impl_%u", devid,nimpl);
  694. }
  695. else if ((STARPU_OPENCL_DEFAULT <= arch)
  696. && (arch < STARPU_OPENCL_DEFAULT + STARPU_MAXOPENCLDEVS))
  697. {
  698. int devid = arch - STARPU_OPENCL_DEFAULT;
  699. snprintf(archname, maxlen, "opencl_%d_impl_%u", devid,nimpl);
  700. }
  701. else if (arch == STARPU_GORDON_DEFAULT)
  702. {
  703. snprintf(archname, maxlen, "gordon_impl_%u",nimpl);
  704. }
  705. else
  706. {
  707. STARPU_ABORT();
  708. }
  709. }
  710. void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model,
  711. enum starpu_perf_archtype arch, char *path, size_t maxlen, unsigned nimpl)
  712. {
  713. char archname[32];
  714. starpu_perfmodel_get_arch_name(arch, archname, 32, nimpl);
  715. STARPU_ASSERT(path);
  716. get_model_debug_path(model, archname, path, maxlen);
  717. }
  718. double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct starpu_job_s *j, unsigned nimpl)
  719. {
  720. double exp = -1.0;
  721. size_t size = _starpu_job_get_data_size(j);
  722. struct starpu_regression_model_t *regmodel;
  723. regmodel = &model->per_arch[arch][nimpl].regression;
  724. if (regmodel->valid)
  725. exp = regmodel->alpha*pow((double)size, regmodel->beta);
  726. return exp;
  727. }
  728. double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct starpu_job_s *j,unsigned nimpl)
  729. {
  730. double exp = -1.0;
  731. size_t size = _starpu_job_get_data_size(j);
  732. struct starpu_regression_model_t *regmodel;
  733. regmodel = &model->per_arch[arch][nimpl].regression;
  734. if (regmodel->nl_valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1)
  735. exp = regmodel->a*pow((double)size, regmodel->b) + regmodel->c;
  736. else {
  737. uint32_t key = _starpu_compute_buffers_footprint(j);
  738. struct starpu_per_arch_perfmodel_t *per_arch_model = &model->per_arch[arch][nimpl];
  739. struct starpu_htbl32_node_s *history = per_arch_model->history;
  740. struct starpu_history_entry_t *entry;
  741. PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
  742. entry = (struct starpu_history_entry_t *) _starpu_htbl_search_32(history, key);
  743. PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  744. if (entry && entry->nsample >= STARPU_CALIBRATION_MINIMUM)
  745. exp = entry->mean;
  746. else if (!model->benchmarking) {
  747. _STARPU_DISP("Warning: model %s is not calibrated enough, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
  748. _starpu_set_calibrate_flag(1);
  749. model->benchmarking = 1;
  750. }
  751. }
  752. return exp;
  753. }
  754. double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct starpu_job_s *j,unsigned nimpl)
  755. {
  756. double exp;
  757. struct starpu_per_arch_perfmodel_t *per_arch_model;
  758. struct starpu_history_entry_t *entry;
  759. struct starpu_htbl32_node_s *history;
  760. uint32_t key = _starpu_compute_buffers_footprint(j);
  761. per_arch_model = &model->per_arch[arch][nimpl];
  762. history = per_arch_model->history;
  763. if (!history)
  764. return -1.0;
  765. PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
  766. entry = (struct starpu_history_entry_t *) _starpu_htbl_search_32(history, key);
  767. PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  768. exp = entry?entry->mean:-1.0;
  769. if (entry && entry->nsample < STARPU_CALIBRATION_MINIMUM)
  770. /* TODO: report differently if we've scheduled really enough
  771. * of that task and the scheduler should perhaps put it aside */
  772. /* Not calibrated enough */
  773. exp = -1.0;
  774. if (exp == -1.0 && !model->benchmarking) {
  775. _STARPU_DISP("Warning: model %s is not calibrated enough, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
  776. _starpu_set_calibrate_flag(1);
  777. model->benchmarking = 1;
  778. }
  779. return exp;
  780. }
  781. void _starpu_update_perfmodel_history(starpu_job_t j, struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned nimpl)
  782. {
  783. if (model)
  784. {
  785. PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  786. struct starpu_per_arch_perfmodel_t *per_arch_model = &model->per_arch[arch][nimpl];
  787. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  788. {
  789. uint32_t key = _starpu_compute_buffers_footprint(j);
  790. struct starpu_history_entry_t *entry;
  791. struct starpu_htbl32_node_s *history;
  792. struct starpu_htbl32_node_s **history_ptr;
  793. struct starpu_history_list_t **list;
  794. history = per_arch_model->history;
  795. history_ptr = &per_arch_model->history;
  796. list = &per_arch_model->list;
  797. entry = (struct starpu_history_entry_t *) _starpu_htbl_search_32(history, key);
  798. if (!entry)
  799. {
  800. /* this is the first entry with such a footprint */
  801. entry = (struct starpu_history_entry_t *) malloc(sizeof(struct starpu_history_entry_t));
  802. STARPU_ASSERT(entry);
  803. entry->mean = measured;
  804. entry->sum = measured;
  805. entry->deviation = 0.0;
  806. entry->sum2 = measured*measured;
  807. entry->size = _starpu_job_get_data_size(j);
  808. entry->footprint = key;
  809. entry->nsample = 1;
  810. insert_history_entry(entry, list, history_ptr);
  811. }
  812. else {
  813. /* there is already some entry with the same footprint */
  814. entry->sum += measured;
  815. entry->sum2 += measured*measured;
  816. entry->nsample++;
  817. unsigned n = entry->nsample;
  818. entry->mean = entry->sum / n;
  819. entry->deviation = sqrt((entry->sum2 - (entry->sum*entry->sum)/n)/n);
  820. }
  821. STARPU_ASSERT(entry);
  822. }
  823. if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  824. {
  825. struct starpu_regression_model_t *reg_model;
  826. reg_model = &per_arch_model->regression;
  827. /* update the regression model */
  828. size_t job_size = _starpu_job_get_data_size(j);
  829. double logy, logx;
  830. logx = log((double)job_size);
  831. logy = log(measured);
  832. reg_model->sumlnx += logx;
  833. reg_model->sumlnx2 += logx*logx;
  834. reg_model->sumlny += logy;
  835. reg_model->sumlnxlny += logx*logy;
  836. if (reg_model->minx == 0 || job_size < reg_model->minx)
  837. reg_model->minx = job_size;
  838. if (reg_model->maxx == 0 || job_size > reg_model->maxx)
  839. reg_model->maxx = job_size;
  840. reg_model->nsample++;
  841. unsigned n = reg_model->nsample;
  842. double num = (n*reg_model->sumlnxlny - reg_model->sumlnx*reg_model->sumlny);
  843. double denom = (n*reg_model->sumlnx2 - reg_model->sumlnx*reg_model->sumlnx);
  844. reg_model->beta = num/denom;
  845. reg_model->alpha = exp((reg_model->sumlny - reg_model->beta*reg_model->sumlnx)/n);
  846. if (VALID_REGRESSION(reg_model))
  847. reg_model->valid = 1;
  848. }
  849. #ifdef STARPU_MODEL_DEBUG
  850. struct starpu_task *task = j->task;
  851. FILE * debug_file = per_arch_model->debug_file;
  852. if (!j->footprint_is_computed)
  853. (void) _starpu_compute_buffers_footprint(j);
  854. STARPU_ASSERT(j->footprint_is_computed);
  855. fprintf(debug_file, "0x%x\t%lu\t%f\t%f\t%f\t%d\t\t", j->footprint, (unsigned long) _starpu_job_get_data_size(j), measured, task->predicted, task->predicted_transfer, cpuid);
  856. unsigned i;
  857. for (i = 0; i < task->cl->nbuffers; i++)
  858. {
  859. starpu_data_handle handle = task->buffers[i].handle;
  860. STARPU_ASSERT(handle->ops);
  861. STARPU_ASSERT(handle->ops->display);
  862. handle->ops->display(handle, debug_file);
  863. }
  864. fprintf(debug_file, "\n");
  865. #endif
  866. PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  867. }
  868. }