perfmodel_history.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657
  1. /*
  2. * StarPU
  3. * Copyright (C) INRIA 2008-2010 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <dirent.h>
  17. #include <unistd.h>
  18. #include <sys/stat.h>
  19. #include <errno.h>
  20. #include <common/config.h>
  21. #include <core/perfmodel/perfmodel.h>
  22. #include <core/jobs.h>
  23. #include <core/workers.h>
  24. #include <pthread.h>
  25. #include <datawizard/datawizard.h>
  26. #include <core/perfmodel/regression.h>
  27. #include <common/config.h>
  28. /*
  29. * History based model
  30. */
  31. static void insert_history_entry(struct starpu_history_entry_t *entry, struct starpu_history_list_t **list, struct starpu_htbl32_node_s **history_ptr)
  32. {
  33. struct starpu_history_list_t *link;
  34. struct starpu_history_entry_t *old;
  35. link = malloc(sizeof(struct starpu_history_list_t));
  36. link->next = *list;
  37. link->entry = entry;
  38. *list = link;
  39. old = htbl_insert_32(history_ptr, entry->footprint, entry);
  40. /* that may fail in case there is some concurrency issue */
  41. STARPU_ASSERT(old == NULL);
  42. }
  43. static void drop_comments(FILE *f)
  44. {
  45. while(1) {
  46. int c = getc(f);
  47. switch (c) {
  48. case '#':
  49. {
  50. char s[128];
  51. do {
  52. fgets(s, sizeof(s), f);
  53. } while (!strchr(s, '\n'));
  54. }
  55. case '\n':
  56. continue;
  57. default:
  58. ungetc(c, f);
  59. return;
  60. }
  61. }
  62. }
  63. static void dump_reg_model(FILE *f, struct starpu_regression_model_t *reg_model)
  64. {
  65. fprintf(f, "# sumlnx\tsumlnx2\t\tsumlny\t\tsumlnxlny\talpha\t\tbeta\t\tn\n");
  66. fprintf(f, "%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%u\n", reg_model->sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny, reg_model->alpha, reg_model->beta, reg_model->nsample);
  67. }
  68. static void scan_reg_model(FILE *f, struct starpu_regression_model_t *reg_model)
  69. {
  70. int res;
  71. drop_comments(f);
  72. res = fscanf(f, "%le\t%le\t%le\t%le\t%le\t%le\t%u\n", &reg_model->sumlnx, &reg_model->sumlnx2, &reg_model->sumlny, &reg_model->sumlnxlny, &reg_model->alpha, &reg_model->beta, &reg_model->nsample);
  73. STARPU_ASSERT(res == 7);
  74. }
  75. static void dump_history_entry(FILE *f, struct starpu_history_entry_t *entry)
  76. {
  77. fprintf(f, "%08x\t%-15lu\t%-15le\t%-15le\t%-15le\t%-15le\t%u\n", entry->footprint, (unsigned long) entry->size, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
  78. }
  79. static void scan_history_entry(FILE *f, struct starpu_history_entry_t *entry)
  80. {
  81. int res;
  82. drop_comments(f);
  83. res = fscanf(f, "%x\t%zu\t%le\t%le\t%le\t%le\t%u\n", &entry->footprint, &entry->size, &entry->mean, &entry->deviation, &entry->sum, &entry->sum2, &entry->nsample);
  84. STARPU_ASSERT(res == 7);
  85. }
  86. static void parse_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_t *per_arch_model, unsigned scan_history)
  87. {
  88. unsigned nentries;
  89. drop_comments(f);
  90. int res = fscanf(f, "%u\n", &nentries);
  91. STARPU_ASSERT(res == 1);
  92. scan_reg_model(f, &per_arch_model->regression);
  93. drop_comments(f);
  94. res = fscanf(f, "%le\t%le\t%le\n",
  95. &per_arch_model->regression.a,
  96. &per_arch_model->regression.b,
  97. &per_arch_model->regression.c);
  98. STARPU_ASSERT(res == 3);
  99. if (isnan(per_arch_model->regression.a)||isnan(per_arch_model->regression.b)||isnan(per_arch_model->regression.c))
  100. {
  101. per_arch_model->regression.valid = 0;
  102. }
  103. else {
  104. per_arch_model->regression.valid = 1;
  105. }
  106. if (!scan_history)
  107. return;
  108. /* parse core entries */
  109. unsigned i;
  110. for (i = 0; i < nentries; i++) {
  111. struct starpu_history_entry_t *entry = malloc(sizeof(struct starpu_history_entry_t));
  112. STARPU_ASSERT(entry);
  113. scan_history_entry(f, entry);
  114. /* insert the entry in the hashtable and the list structures */
  115. insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
  116. }
  117. }
  118. static void parse_model_file(FILE *f, struct starpu_perfmodel_t *model, unsigned scan_history)
  119. {
  120. unsigned arch;
  121. for (arch = 0; arch < NARCH_VARIATIONS; arch++)
  122. parse_per_arch_model_file(f, &model->per_arch[arch], scan_history);
  123. }
  124. static void dump_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_t *per_arch_model)
  125. {
  126. /* count the number of elements in the lists */
  127. struct starpu_history_list_t *ptr;
  128. unsigned nentries = 0;
  129. ptr = per_arch_model->list;
  130. while(ptr) {
  131. nentries++;
  132. ptr = ptr->next;
  133. }
  134. /* header */
  135. fprintf(f, "# number of entries\n%u\n", nentries);
  136. dump_reg_model(f, &per_arch_model->regression);
  137. double a,b,c;
  138. regression_non_linear_power(per_arch_model->list, &a, &b, &c);
  139. fprintf(f, "# a\t\tb\t\tc\n");
  140. fprintf(f, "%-15le\t%-15le\t%-15le\n", a, b, c);
  141. fprintf(f, "# hash\t\tsize\t\tmean\t\tdev\t\tsum\t\tsum2\t\tn\n");
  142. ptr = per_arch_model->list;
  143. while (ptr) {
  144. //memcpy(&entries_array[i++], ptr->entry, sizeof(struct starpu_history_entry_t));
  145. dump_history_entry(f, ptr->entry);
  146. ptr = ptr->next;
  147. }
  148. }
  149. static void dump_model_file(FILE *f, struct starpu_perfmodel_t *model)
  150. {
  151. fprintf(f, "#################\n");
  152. unsigned arch;
  153. for (arch = 0; arch < NARCH_VARIATIONS; arch++)
  154. {
  155. char archname[32];
  156. starpu_perfmodel_get_arch_name(arch, archname, 32);
  157. fprintf(f, "# Model for %s\n", archname);
  158. dump_per_arch_model_file(f, &model->per_arch[arch]);
  159. fprintf(f, "\n##################\n");
  160. }
  161. }
  162. static void initialize_per_arch_model(struct starpu_per_arch_perfmodel_t *per_arch_model)
  163. {
  164. per_arch_model->history = NULL;
  165. per_arch_model->list = NULL;
  166. }
  167. static void initialize_model(struct starpu_perfmodel_t *model)
  168. {
  169. unsigned arch;
  170. for (arch = 0; arch < NARCH_VARIATIONS; arch++)
  171. initialize_per_arch_model(&model->per_arch[arch]);
  172. }
  173. static struct starpu_model_list_t *registered_models = NULL;
  174. //static unsigned debug_modelid = 0;
  175. static void get_model_debug_path(struct starpu_perfmodel_t *model, const char *arch, char *path, size_t maxlen)
  176. {
  177. STARPU_ASSERT(path);
  178. _starpu_get_perf_model_dir_debug(path, maxlen);
  179. strncat(path, model->symbol, maxlen);
  180. char hostname[32];
  181. gethostname(hostname, 32);
  182. strncat(path, ".", maxlen);
  183. strncat(path, hostname, maxlen);
  184. strncat(path, ".", maxlen);
  185. strncat(path, arch, maxlen);
  186. strncat(path, ".debug", maxlen);
  187. }
  188. void register_model(struct starpu_perfmodel_t *model)
  189. {
  190. /* add the model to a linked list */
  191. struct starpu_model_list_t *node = malloc(sizeof(struct starpu_model_list_t));
  192. node->model = model;
  193. //model->debug_modelid = debug_modelid++;
  194. /* put this model at the beginning of the list */
  195. node->next = registered_models;
  196. registered_models = node;
  197. #ifdef MODEL_DEBUG
  198. create_sampling_directory_if_needed();
  199. unsigned arch;
  200. for (arch = 0; arch < NARCH_VARIATIONS; arch++)
  201. {
  202. char debugpath[256];
  203. starpu_perfmodel_debugfilepath(model, arch, debugpath, 256);
  204. model->per_arch[arch].debug_file = fopen(debugpath, "a+");
  205. STARPU_ASSERT(model->per_arch[arch].debug_file);
  206. }
  207. #endif
  208. return;
  209. }
  210. static void get_model_path(struct starpu_perfmodel_t *model, char *path, size_t maxlen)
  211. {
  212. _starpu_get_perf_model_dir_codelets(path, maxlen);
  213. strncat(path, model->symbol, maxlen);
  214. char hostname[32];
  215. gethostname(hostname, 32);
  216. strncat(path, ".", maxlen);
  217. strncat(path, hostname, maxlen);
  218. }
  219. static void save_history_based_model(struct starpu_perfmodel_t *model)
  220. {
  221. STARPU_ASSERT(model);
  222. STARPU_ASSERT(model->symbol);
  223. /* TODO checks */
  224. /* filename = $PERF_MODEL_DIR/codelets/symbol.hostname */
  225. char path[256];
  226. get_model_path(model, path, 256);
  227. #ifdef VERBOSE
  228. fprintf(stderr, "Opening performance model file %s for model %s\n", path, model->symbol);
  229. #endif
  230. /* overwrite existing file, or create it */
  231. FILE *f;
  232. f = fopen(path, "w+");
  233. STARPU_ASSERT(f);
  234. dump_model_file(f, model);
  235. fclose(f);
  236. #ifdef DEBUG_MODEL
  237. fclose(model->gordon_debug_file);
  238. fclose(model->cuda_debug_file);
  239. fclose(model->core_debug_file);
  240. #endif
  241. }
  242. void dump_registered_models(void)
  243. {
  244. struct starpu_model_list_t *node;
  245. node = registered_models;
  246. #ifdef VERBOSE
  247. fprintf(stderr, "DUMP MODELS !\n");
  248. #endif
  249. while (node) {
  250. save_history_based_model(node->model);
  251. node = node->next;
  252. /* XXX free node */
  253. }
  254. }
  255. static void load_history_based_model(struct starpu_perfmodel_t *model, unsigned scan_history)
  256. {
  257. STARPU_ASSERT(model);
  258. STARPU_ASSERT(model->symbol);
  259. unsigned have_to_load;
  260. have_to_load = __sync_bool_compare_and_swap (&model->is_loaded,
  261. STARPU_PERFMODEL_NOT_LOADED,
  262. STARPU_PERFMODEL_LOADING);
  263. if (!have_to_load)
  264. {
  265. /* someone is already loading the model, we wait until it's finished */
  266. while (model->is_loaded != STARPU_PERFMODEL_LOADED)
  267. {
  268. __sync_synchronize();
  269. }
  270. return;
  271. }
  272. int res;
  273. res = pthread_rwlock_init(&model->model_rwlock, NULL);
  274. if (STARPU_UNLIKELY(res))
  275. {
  276. perror("pthread_rwlock_init failed");
  277. STARPU_ABORT();
  278. }
  279. res = pthread_rwlock_wrlock(&model->model_rwlock);
  280. if (STARPU_UNLIKELY(res))
  281. {
  282. perror("pthread_rwlock_wrlock failed");
  283. STARPU_ABORT();
  284. }
  285. /* make sure the performance model directory exists (or create it) */
  286. create_sampling_directory_if_needed();
  287. /*
  288. * We need to keep track of all the model that were opened so that we can
  289. * possibly update them at runtime termination ...
  290. */
  291. register_model(model);
  292. char path[256];
  293. get_model_path(model, path, 256);
  294. #ifdef VERBOSE
  295. fprintf(stderr, "Opening performance model file %s for model %s ... ", path, model->symbol);
  296. #endif
  297. /* try to open an existing file and load it */
  298. res = access(path, F_OK);
  299. if (res == 0) {
  300. #ifdef VERBOSE
  301. fprintf(stderr, "File exists !\n");
  302. #endif
  303. FILE *f;
  304. f = fopen(path, "r");
  305. STARPU_ASSERT(f);
  306. parse_model_file(f, model, scan_history);
  307. fclose(f);
  308. }
  309. else {
  310. #ifdef VERBOSE
  311. fprintf(stderr, "File does not exists !\n");
  312. #endif
  313. initialize_model(model);
  314. }
  315. if (starpu_get_env_number("CALIBRATE") != -1)
  316. {
  317. fprintf(stderr, "CALIBRATE model %s\n", model->symbol);
  318. model->benchmarking = 1;
  319. }
  320. else {
  321. model->benchmarking = 0;
  322. }
  323. model->is_loaded = STARPU_PERFMODEL_LOADED;
  324. res = pthread_rwlock_unlock(&model->model_rwlock);
  325. if (STARPU_UNLIKELY(res))
  326. {
  327. perror("pthread_rwlock_unlock");
  328. STARPU_ABORT();
  329. }
  330. }
  331. /* This function is intended to be used by external tools that should read
  332. * the performance model files */
  333. int starpu_list_models(void)
  334. {
  335. char path[256];
  336. DIR *dp;
  337. struct dirent *ep;
  338. char perf_model_dir_codelets[256];
  339. _starpu_get_perf_model_dir_codelets(perf_model_dir_codelets, 256);
  340. strncpy(path, perf_model_dir_codelets, 256);
  341. dp = opendir(path);
  342. if (dp != NULL) {
  343. while ((ep = readdir(dp))) {
  344. if (ep->d_type == DT_REG) {
  345. fprintf(stdout, "file: <%s>\n", ep->d_name);
  346. }
  347. }
  348. closedir (dp);
  349. return 0;
  350. }
  351. else {
  352. perror ("Couldn't open the directory");
  353. return 1;
  354. }
  355. }
  356. /* This function is intended to be used by external tools that should read the
  357. * performance model files */
  358. int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel_t *model)
  359. {
  360. model->symbol = symbol;
  361. /* where is the file if it exists ? */
  362. char path[256];
  363. get_model_path(model, path, 256);
  364. // fprintf(stderr, "get_model_path -> %s\n", path);
  365. /* does it exist ? */
  366. int res;
  367. res = access(path, F_OK);
  368. if (res) {
  369. fprintf(stderr, "There is no performance model for symbol %s\n", symbol);
  370. return 1;
  371. }
  372. FILE *f = fopen(path, "r");
  373. STARPU_ASSERT(f);
  374. parse_model_file(f, model, 1);
  375. return 0;
  376. }
  377. void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archname, size_t maxlen)
  378. {
  379. if (arch == STARPU_CORE_DEFAULT)
  380. {
  381. snprintf(archname, maxlen, "core");
  382. }
  383. else if ((STARPU_CUDA_DEFAULT <= arch)
  384. && (arch < STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS))
  385. {
  386. int devid = arch - STARPU_CUDA_DEFAULT;
  387. snprintf(archname, maxlen, "cuda_%d", devid);
  388. }
  389. else if (arch == STARPU_GORDON_DEFAULT)
  390. {
  391. snprintf(archname, maxlen, "gordon");
  392. }
  393. else
  394. {
  395. STARPU_ABORT();
  396. }
  397. }
  398. void starpu_perfmodel_debugfilepath(struct starpu_perfmodel_t *model,
  399. enum starpu_perf_archtype arch, char *path, size_t maxlen)
  400. {
  401. char archname[32];
  402. starpu_perfmodel_get_arch_name(arch, archname, 32);
  403. STARPU_ASSERT(path);
  404. get_model_debug_path(model, archname, path, maxlen);
  405. }
  406. double regression_based_job_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct job_s *j)
  407. {
  408. double exp = -1.0;
  409. size_t size = _starpu_job_get_data_size(j);
  410. struct starpu_regression_model_t *regmodel;
  411. if (STARPU_UNLIKELY(model->is_loaded != STARPU_PERFMODEL_LOADED))
  412. load_history_based_model(model, 0);
  413. regmodel = &model->per_arch[arch].regression;
  414. if (regmodel->valid)
  415. exp = regmodel->a*pow(size, regmodel->b) + regmodel->c;
  416. return exp;
  417. }
  418. double history_based_job_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct job_s *j)
  419. {
  420. double exp;
  421. struct starpu_per_arch_perfmodel_t *per_arch_model;
  422. struct starpu_history_entry_t *entry;
  423. struct starpu_htbl32_node_s *history;
  424. if (STARPU_UNLIKELY(model->is_loaded != STARPU_PERFMODEL_LOADED))
  425. load_history_based_model(model, 1);
  426. if (STARPU_UNLIKELY(!j->footprint_is_computed))
  427. compute_buffers_footprint(j);
  428. uint32_t key = j->footprint;
  429. per_arch_model = &model->per_arch[arch];
  430. history = per_arch_model->history;
  431. if (!history)
  432. return -1.0;
  433. pthread_rwlock_rdlock(&model->model_rwlock);
  434. entry = htbl_search_32(history, key);
  435. pthread_rwlock_unlock(&model->model_rwlock);
  436. exp = entry?entry->mean:-1.0;
  437. return exp;
  438. }
  439. void _starpu_update_perfmodel_history(job_t j, enum starpu_perf_archtype arch, unsigned cpuid __attribute__((unused)), double measured)
  440. {
  441. struct starpu_perfmodel_t *model = j->task->cl->model;
  442. if (model)
  443. {
  444. struct starpu_per_arch_perfmodel_t *per_arch_model = &model->per_arch[arch];
  445. if (model->type == HISTORY_BASED || model->type == REGRESSION_BASED)
  446. {
  447. uint32_t key = j->footprint;
  448. struct starpu_history_entry_t *entry;
  449. struct starpu_htbl32_node_s *history;
  450. struct starpu_htbl32_node_s **history_ptr;
  451. struct starpu_regression_model_t *reg_model;
  452. struct starpu_history_list_t **list;
  453. history = per_arch_model->history;
  454. history_ptr = &per_arch_model->history;
  455. reg_model = &per_arch_model->regression;
  456. list = &per_arch_model->list;
  457. pthread_rwlock_wrlock(&model->model_rwlock);
  458. entry = htbl_search_32(history, key);
  459. if (!entry)
  460. {
  461. /* this is the first entry with such a footprint */
  462. entry = malloc(sizeof(struct starpu_history_entry_t));
  463. STARPU_ASSERT(entry);
  464. entry->mean = measured;
  465. entry->sum = measured;
  466. entry->deviation = 0.0;
  467. entry->sum2 = measured*measured;
  468. entry->size = _starpu_job_get_data_size(j);
  469. entry->footprint = key;
  470. entry->nsample = 1;
  471. insert_history_entry(entry, list, history_ptr);
  472. }
  473. else {
  474. /* there is already some entry with the same footprint */
  475. entry->sum += measured;
  476. entry->sum2 += measured*measured;
  477. entry->nsample++;
  478. unsigned n = entry->nsample;
  479. entry->mean = entry->sum / n;
  480. entry->deviation = sqrt((entry->sum2 - (entry->sum*entry->sum)/n)/n);
  481. }
  482. STARPU_ASSERT(entry);
  483. /* update the regression model as well */
  484. double logy, logx;
  485. logx = log(entry->size);
  486. logy = log(measured);
  487. reg_model->sumlnx += logx;
  488. reg_model->sumlnx2 += logx*logx;
  489. reg_model->sumlny += logy;
  490. reg_model->sumlnxlny += logx*logy;
  491. reg_model->nsample++;
  492. unsigned n = reg_model->nsample;
  493. double num = (n*reg_model->sumlnxlny - reg_model->sumlnx*reg_model->sumlny);
  494. double denom = (n*reg_model->sumlnx2 - reg_model->sumlnx*reg_model->sumlnx);
  495. reg_model->beta = num/denom;
  496. reg_model->alpha = exp((reg_model->sumlny - reg_model->beta*reg_model->sumlnx)/n);
  497. pthread_rwlock_unlock(&model->model_rwlock);
  498. }
  499. #ifdef MODEL_DEBUG
  500. FILE * debug_file = per_arch_model->debug_file;
  501. pthread_rwlock_wrlock(&model->model_rwlock);
  502. STARPU_ASSERT(j->footprint_is_computed);
  503. fprintf(debug_file, "0x%x\t%lu\t%lf\t%lf\t%d\t\t", j->footprint, (unsigned long) _starpu_job_get_data_size(j), measured, j->predicted, cpuid);
  504. unsigned i;
  505. struct starpu_task *task = j->task;
  506. for (i = 0; i < task->cl->nbuffers; i++)
  507. {
  508. struct starpu_data_state_t *state = task->buffers[i].handle;
  509. STARPU_ASSERT(state->ops);
  510. STARPU_ASSERT(state->ops->display);
  511. state->ops->display(state, debug_file);
  512. }
  513. fprintf(debug_file, "\n");
  514. pthread_rwlock_unlock(&model->model_rwlock);
  515. #endif
  516. }
  517. }