perfmodel_history.c 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2013 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2011 Télécom-SudParis
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <dirent.h>
  19. #include <unistd.h>
  20. #include <sys/stat.h>
  21. #include <errno.h>
  22. #include <common/config.h>
  23. #include <common/utils.h>
  24. #include <core/perfmodel/perfmodel.h>
  25. #include <core/jobs.h>
  26. #include <core/workers.h>
  27. #include <datawizard/datawizard.h>
  28. #include <core/perfmodel/regression.h>
  29. #include <common/config.h>
  30. #include <starpu_parameters.h>
  31. #include <common/uthash.h>
  32. #ifdef STARPU_HAVE_WINDOWS
  33. #include <windows.h>
  34. #endif
  35. #define HASH_ADD_UINT32_T(head,field,add) HASH_ADD(hh,head,field,sizeof(uint32_t),add)
  36. #define HASH_FIND_UINT32_T(head,find,out) HASH_FIND(hh,head,find,sizeof(uint32_t),out)
  37. struct starpu_perfmodel_history_table
  38. {
  39. UT_hash_handle hh;
  40. uint32_t footprint;
  41. struct starpu_perfmodel_history_entry *history_entry;
  42. };
  43. /* We want more than 10% variance on X to trust regression */
  44. #define VALID_REGRESSION(reg_model) \
  45. ((reg_model)->minx < (9*(reg_model)->maxx)/10 && (reg_model)->nsample >= _STARPU_CALIBRATION_MINIMUM)
  46. static starpu_pthread_rwlock_t registered_models_rwlock;
  47. static struct _starpu_perfmodel_list *registered_models = NULL;
  48. size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned nimpl, struct _starpu_job *j)
  49. {
  50. struct starpu_task *task = j->task;
  51. if (model && model->per_arch[arch->type][arch->devid][arch->ncore][nimpl].size_base)
  52. {
  53. return model->per_arch[arch->type][arch->devid][arch->ncore][nimpl].size_base(task, arch, nimpl);
  54. }
  55. else if (model && model->size_base)
  56. {
  57. return model->size_base(task, nimpl);
  58. }
  59. else
  60. {
  61. unsigned nbuffers = task->cl->nbuffers;
  62. size_t size = 0;
  63. unsigned buffer;
  64. for (buffer = 0; buffer < nbuffers; buffer++)
  65. {
  66. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
  67. size += _starpu_data_get_size(handle);
  68. }
  69. return size;
  70. }
  71. }
  72. /*
  73. * History based model
  74. */
  75. static void insert_history_entry(struct starpu_perfmodel_history_entry *entry, struct starpu_perfmodel_history_list **list, struct starpu_perfmodel_history_table **history_ptr)
  76. {
  77. struct starpu_perfmodel_history_list *link;
  78. struct starpu_perfmodel_history_table *table;
  79. link = (struct starpu_perfmodel_history_list *) malloc(sizeof(struct starpu_perfmodel_history_list));
  80. link->next = *list;
  81. link->entry = entry;
  82. *list = link;
  83. /* detect concurrency issue */
  84. //HASH_FIND_UINT32_T(*history_ptr, &entry->footprint, table);
  85. //STARPU_ASSERT(table == NULL);
  86. table = (struct starpu_perfmodel_history_table*) malloc(sizeof(*table));
  87. STARPU_ASSERT(table != NULL);
  88. table->footprint = entry->footprint;
  89. table->history_entry = entry;
  90. HASH_ADD_UINT32_T(*history_ptr, footprint, table);
  91. }
  92. static void dump_reg_model(FILE *f, struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  93. {
  94. struct starpu_perfmodel_per_arch *per_arch_model;
  95. per_arch_model = &model->per_arch[arch->type][arch->devid][arch->ncore][nimpl];
  96. struct starpu_perfmodel_regression_model *reg_model;
  97. reg_model = &per_arch_model->regression;
  98. /*
  99. * Linear Regression model
  100. */
  101. /* Unless we have enough measurements, we put NaN in the file to indicate the model is invalid */
  102. double alpha = nan(""), beta = nan("");
  103. if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  104. {
  105. if (reg_model->nsample > 1)
  106. {
  107. alpha = reg_model->alpha;
  108. beta = reg_model->beta;
  109. }
  110. }
  111. fprintf(f, "# sumlnx\tsumlnx2\t\tsumlny\t\tsumlnxlny\talpha\t\tbeta\t\tn\tminx\t\tmaxx\n");
  112. fprintf(f, "%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%u\t%-15lu\t%-15lu\n", reg_model->sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny, alpha, beta, reg_model->nsample, reg_model->minx, reg_model->maxx);
  113. /*
  114. * Non-Linear Regression model
  115. */
  116. double a = nan(""), b = nan(""), c = nan("");
  117. if (model->type == STARPU_NL_REGRESSION_BASED)
  118. _starpu_regression_non_linear_power(per_arch_model->list, &a, &b, &c);
  119. fprintf(f, "# a\t\tb\t\tc\n");
  120. fprintf(f, "%-15le\t%-15le\t%-15le\n", a, b, c);
  121. }
  122. static void scan_reg_model(FILE *f, struct starpu_perfmodel_regression_model *reg_model)
  123. {
  124. int res;
  125. /*
  126. * Linear Regression model
  127. */
  128. _starpu_drop_comments(f);
  129. res = fscanf(f, "%le\t%le\t%le\t%le", &reg_model->sumlnx, &reg_model->sumlnx2, &reg_model->sumlny, &reg_model->sumlnxlny);
  130. STARPU_ASSERT_MSG(res == 4, "Incorrect performance model file");
  131. res = _starpu_read_double(f, "\t%le", &reg_model->alpha);
  132. STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file");
  133. res = _starpu_read_double(f, "\t%le", &reg_model->beta);
  134. STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file");
  135. res = fscanf(f, "\t%u\t%lu\t%lu\n", &reg_model->nsample, &reg_model->minx, &reg_model->maxx);
  136. STARPU_ASSERT_MSG(res == 3, "Incorrect performance model file");
  137. /* If any of the parameters describing the linear regression model is NaN, the model is invalid */
  138. unsigned invalid = (isnan(reg_model->alpha)||isnan(reg_model->beta));
  139. reg_model->valid = !invalid && VALID_REGRESSION(reg_model);
  140. /*
  141. * Non-Linear Regression model
  142. */
  143. _starpu_drop_comments(f);
  144. res = _starpu_read_double(f, "%le\t", &reg_model->a);
  145. STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file");
  146. res = _starpu_read_double(f, "%le\t", &reg_model->b);
  147. STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file");
  148. res = _starpu_read_double(f, "%le\n", &reg_model->c);
  149. STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file");
  150. /* If any of the parameters describing the non-linear regression model is NaN, the model is invalid */
  151. unsigned nl_invalid = (isnan(reg_model->a)||isnan(reg_model->b)||isnan(reg_model->c));
  152. reg_model->nl_valid = !nl_invalid && VALID_REGRESSION(reg_model);
  153. }
  154. static void dump_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
  155. {
  156. fprintf(f, "%08x\t%-15lu\t%-15le\t%-15le\t%-15le\t%-15le\t%-15le\t%u\n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
  157. }
  158. static void scan_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry)
  159. {
  160. int res;
  161. _starpu_drop_comments(f);
  162. /* In case entry is NULL, we just drop these values */
  163. unsigned nsample;
  164. uint32_t footprint;
  165. unsigned long size; /* in bytes */
  166. double flops;
  167. double mean;
  168. double deviation;
  169. double sum;
  170. double sum2;
  171. char line[256];
  172. char *ret;
  173. ret = fgets(line, sizeof(line), f);
  174. STARPU_ASSERT(ret);
  175. STARPU_ASSERT(strchr(line, '\n'));
  176. /* Read the values from the file */
  177. res = sscanf(line, "%x\t%lu\t%le\t%le\t%le\t%le\t%le\t%u", &footprint, &size, &flops, &mean, &deviation, &sum, &sum2, &nsample);
  178. if (res != 8)
  179. {
  180. flops = 0.;
  181. /* Read the values from the file */
  182. res = sscanf(line, "%x\t%lu\t%le\t%le\t%le\t%le\t%u", &footprint, &size, &mean, &deviation, &sum, &sum2, &nsample);
  183. STARPU_ASSERT_MSG(res == 7, "Incorrect performance model file");
  184. }
  185. if (entry)
  186. {
  187. entry->footprint = footprint;
  188. entry->size = size;
  189. entry->flops = flops;
  190. entry->mean = mean;
  191. entry->deviation = deviation;
  192. entry->sum = sum;
  193. entry->sum2 = sum2;
  194. entry->nsample = nsample;
  195. }
  196. }
  197. static void parse_per_arch_model_file(FILE *f, struct starpu_perfmodel_per_arch *per_arch_model, unsigned scan_history)
  198. {
  199. unsigned nentries;
  200. _starpu_drop_comments(f);
  201. int res = fscanf(f, "%u\n", &nentries);
  202. STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file");
  203. scan_reg_model(f, &per_arch_model->regression);
  204. /* parse entries */
  205. unsigned i;
  206. for (i = 0; i < nentries; i++)
  207. {
  208. struct starpu_perfmodel_history_entry *entry = NULL;
  209. if (scan_history)
  210. {
  211. entry = (struct starpu_perfmodel_history_entry *) malloc(sizeof(struct starpu_perfmodel_history_entry));
  212. STARPU_ASSERT(entry);
  213. /* Tell helgrind that we do not care about
  214. * racing access to the sampling, we only want a
  215. * good-enough estimation */
  216. STARPU_HG_DISABLE_CHECKING(entry->nsample);
  217. STARPU_HG_DISABLE_CHECKING(entry->mean);
  218. }
  219. scan_history_entry(f, entry);
  220. /* insert the entry in the hashtable and the list structures */
  221. /* TODO: Insert it at the end of the list, to avoid reversing
  222. * the order... But efficiently! We may have a lot of entries */
  223. if (scan_history)
  224. insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
  225. }
  226. }
  227. static void parse_arch(FILE *f, struct starpu_perfmodel *model, unsigned scan_history,struct starpu_perfmodel_arch* arch)
  228. {
  229. struct starpu_perfmodel_per_arch dummy;
  230. unsigned nimpls, implmax, impl, i, ret;
  231. //_STARPU_DEBUG("Parsing %s_%u_ncore_%u\n",
  232. // starpu_perfmodel_get_archtype_name(arch->type),
  233. // arch->devid,
  234. // arch->ncore);
  235. /* Parsing number of implementation */
  236. _starpu_drop_comments(f);
  237. ret = fscanf(f, "%u\n", &nimpls);
  238. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  239. if( model != NULL)
  240. {
  241. /* Parsing each implementation */
  242. implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
  243. for (impl = 0; impl < implmax; impl++)
  244. parse_per_arch_model_file(f, &model->per_arch[arch->type][arch->devid][arch->ncore][impl], scan_history);
  245. }
  246. else
  247. {
  248. impl = 0;
  249. }
  250. /* if the number of implementation is greater than STARPU_MAXIMPLEMENTATIONS
  251. * we skip the last implementation */
  252. for (i = impl; i < nimpls; i++)
  253. parse_per_arch_model_file(f, &dummy, 0);
  254. }
  255. static void parse_device(FILE *f, struct starpu_perfmodel *model, unsigned scan_history, enum starpu_worker_archtype archtype, unsigned devid)
  256. {
  257. unsigned maxncore, ncore, ret, i;
  258. struct starpu_perfmodel_arch arch;
  259. arch.type = archtype;
  260. arch.devid = devid;
  261. //_STARPU_DEBUG("Parsing device %s_%u arch\n",
  262. // starpu_perfmodel_get_archtype_name(archtype),
  263. // devid);
  264. /* Parsing maximun number of worker for this device */
  265. _starpu_drop_comments(f);
  266. ret = fscanf(f, "%u\n", &maxncore);
  267. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  268. /* Parsing each arch */
  269. if(model !=NULL)
  270. {
  271. for(ncore=0; ncore < maxncore && model->per_arch[archtype][devid][ncore] != NULL; ncore++)
  272. {
  273. arch.ncore = ncore;
  274. parse_arch(f,model,scan_history,&arch);
  275. }
  276. }
  277. else
  278. {
  279. ncore=0;
  280. }
  281. for(i=ncore; i < maxncore; i++)
  282. {
  283. arch.ncore = i;
  284. parse_arch(f,NULL,scan_history,&arch);
  285. }
  286. }
  287. static void parse_archtype(FILE *f, struct starpu_perfmodel *model, unsigned scan_history, enum starpu_worker_archtype archtype)
  288. {
  289. unsigned ndevice, devid, ret, i;
  290. //_STARPU_DEBUG("Parsing %s arch\n", starpu_perfmodel_get_archtype_name(archtype));
  291. /* Parsing number of device for this archtype */
  292. _starpu_drop_comments(f);
  293. ret = fscanf(f, "%u\n", &ndevice);
  294. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  295. /* Parsing each device for this archtype*/
  296. if(model != NULL)
  297. {
  298. for(devid=0; devid < ndevice && model->per_arch[archtype][devid] != NULL; devid++)
  299. {
  300. parse_device(f,model,scan_history,archtype,devid);
  301. }
  302. }
  303. else
  304. {
  305. devid=0;
  306. }
  307. for(i=devid; i < ndevice; i++)
  308. {
  309. parse_device(f,NULL,scan_history,archtype,i);
  310. }
  311. }
  312. static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned scan_history)
  313. {
  314. unsigned archtype;
  315. int ret, version;
  316. //_STARPU_DEBUG("Start parsing\n");
  317. /* Parsing performance model version */
  318. _starpu_drop_comments(f);
  319. ret = fscanf(f, "%d\n", &version);
  320. STARPU_ASSERT_MSG(version == _STARPU_PERFMODEL_VERSION, "Incorrect performance model file with a model version %d not being the current model version (%d)\n",
  321. version, _STARPU_PERFMODEL_VERSION);
  322. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  323. /* Parsing each kind of archtype */
  324. for(archtype=0; archtype<STARPU_NARCH; archtype++)
  325. {
  326. parse_archtype(f, model, scan_history, archtype);
  327. }
  328. }
  329. static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, unsigned nimpl)
  330. {
  331. struct starpu_perfmodel_per_arch *per_arch_model;
  332. per_arch_model = &model->per_arch[arch->type][arch->devid][arch->ncore][nimpl];
  333. /* count the number of elements in the lists */
  334. struct starpu_perfmodel_history_list *ptr = NULL;
  335. unsigned nentries = 0;
  336. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  337. {
  338. /* Dump the list of all entries in the history */
  339. ptr = per_arch_model->list;
  340. while(ptr)
  341. {
  342. nentries++;
  343. ptr = ptr->next;
  344. }
  345. }
  346. /* header */
  347. char archname[32];
  348. starpu_perfmodel_get_arch_name(arch, archname, 32, nimpl);
  349. fprintf(f, "#####\n");
  350. fprintf(f, "# Model for %s\n", archname);
  351. fprintf(f, "# number of entries\n%u\n", nentries);
  352. dump_reg_model(f, model, arch, nimpl);
  353. /* Dump the history into the model file in case it is necessary */
  354. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  355. {
  356. fprintf(f, "# hash\t\tsize\t\tflops\t\tmean (us)\tdev (us)\t\tsum\t\tsum2\t\tn\n");
  357. ptr = per_arch_model->list;
  358. while (ptr)
  359. {
  360. dump_history_entry(f, ptr->entry);
  361. ptr = ptr->next;
  362. }
  363. }
  364. fprintf(f, "\n");
  365. }
  366. static unsigned get_n_entries(struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, unsigned impl)
  367. {
  368. struct starpu_perfmodel_per_arch *per_arch_model;
  369. per_arch_model = &model->per_arch[arch->type][arch->devid][arch->ncore][impl];
  370. /* count the number of elements in the lists */
  371. struct starpu_perfmodel_history_list *ptr = NULL;
  372. unsigned nentries = 0;
  373. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  374. {
  375. /* Dump the list of all entries in the history */
  376. ptr = per_arch_model->list;
  377. while(ptr)
  378. {
  379. nentries++;
  380. ptr = ptr->next;
  381. }
  382. }
  383. return nentries;
  384. }
  385. static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
  386. {
  387. struct _starpu_machine_config *conf = _starpu_get_machine_config();
  388. char *name = "unknown";
  389. unsigned archtype, ndevice, *ncore, devid, nc, nimpl;
  390. struct starpu_perfmodel_arch arch;
  391. fprintf(f, "##################\n");
  392. fprintf(f, "# Performance Model Version\n");
  393. fprintf(f, "%d\n\n", _STARPU_PERFMODEL_VERSION);
  394. for(archtype=0; archtype<STARPU_NARCH; archtype++)
  395. {
  396. arch.type = archtype;
  397. switch (archtype)
  398. {
  399. case STARPU_CPU_WORKER:
  400. ndevice = 1;
  401. ncore = &conf->topology.nhwcpus;
  402. name = "CPU";
  403. break;
  404. case STARPU_CUDA_WORKER:
  405. ndevice = conf->topology.nhwcudagpus;
  406. ncore = NULL;
  407. name = "CUDA";
  408. break;
  409. case STARPU_OPENCL_WORKER:
  410. ndevice = conf->topology.nhwopenclgpus;
  411. ncore = NULL;
  412. name = "OPENCL";
  413. break;
  414. case STARPU_MIC_WORKER:
  415. ndevice = conf->topology.nhwmicdevices;
  416. ncore = conf->topology.nhwmiccores;
  417. name = "MIC";
  418. break;
  419. case STARPU_SCC_WORKER:
  420. ndevice = conf->topology.nhwscc;
  421. ncore = NULL;
  422. name = "SCC";
  423. break;
  424. default:
  425. /* Unknown arch */
  426. STARPU_ABORT();
  427. break;
  428. }
  429. fprintf(f, "####################\n");
  430. fprintf(f, "# %ss\n", name);
  431. fprintf(f, "# number of %s devices\n", name);
  432. fprintf(f, "%u\n", ndevice);
  433. for(devid=0; devid<ndevice; devid++)
  434. {
  435. arch.devid = devid;
  436. fprintf(f, "###############\n");
  437. fprintf(f, "# %s_%u\n", name, devid);
  438. fprintf(f, "# number of workers on device %s_%d\n", name, devid);
  439. if(ncore != NULL)
  440. fprintf(f, "%u\n", ncore[devid]);
  441. else
  442. fprintf(f, "1\n");
  443. for(nc=0; model->per_arch[archtype][devid][nc] != NULL; nc++)
  444. {
  445. arch.ncore = nc;
  446. unsigned max_impl = 0;
  447. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  448. {
  449. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  450. if (get_n_entries(model, &arch, nimpl))
  451. max_impl = nimpl + 1;
  452. }
  453. else if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_PER_ARCH || model->type == STARPU_COMMON)
  454. {
  455. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  456. if (model->per_arch[archtype][devid][nc][nimpl].regression.nsample)
  457. max_impl = nimpl + 1;
  458. }
  459. else
  460. STARPU_ASSERT_MSG(0, "Unknown history-based performance model %u", archtype);
  461. fprintf(f, "##########\n");
  462. fprintf(f, "# %u worker(s) in parallel\n", nc+1);
  463. fprintf(f, "# number of implementations\n");
  464. fprintf(f, "%u\n", max_impl);
  465. for (nimpl = 0; nimpl < max_impl; nimpl++)
  466. {
  467. dump_per_arch_model_file(f, model, &arch, nimpl);
  468. }
  469. }
  470. }
  471. }
  472. }
  473. static void initialize_per_arch_model(struct starpu_perfmodel_per_arch *per_arch_model)
  474. {
  475. memset(per_arch_model, 0, sizeof(struct starpu_perfmodel_per_arch));
  476. }
  477. static struct starpu_perfmodel_per_arch*** initialize_arch_model(int maxdevid, unsigned* maxncore_table)
  478. {
  479. int devid, ncore, nimpl;
  480. struct starpu_perfmodel_per_arch *** arch_model = malloc(sizeof(*arch_model)*(maxdevid+1));
  481. arch_model[maxdevid] = NULL;
  482. for(devid=0; devid<maxdevid; devid++)
  483. {
  484. int maxncore;
  485. if(maxncore_table != NULL)
  486. maxncore = maxncore_table[devid];
  487. else
  488. maxncore = 1;
  489. arch_model[devid] = malloc(sizeof(*arch_model[devid])*(maxncore+1));
  490. arch_model[devid][maxncore] = NULL;
  491. for(ncore=0; ncore<maxncore; ncore++)
  492. {
  493. arch_model[devid][ncore] = malloc(sizeof(*arch_model[devid][ncore])*STARPU_MAXIMPLEMENTATIONS);
  494. for(nimpl=0; nimpl<STARPU_MAXIMPLEMENTATIONS; nimpl++)
  495. {
  496. initialize_per_arch_model(&arch_model[devid][ncore][nimpl]);
  497. }
  498. }
  499. }
  500. return arch_model;
  501. }
  502. static void initialize_model(struct starpu_perfmodel *model)
  503. {
  504. struct _starpu_machine_config *conf = _starpu_get_machine_config();
  505. model->per_arch = malloc(sizeof(*model->per_arch)*(STARPU_NARCH));
  506. model->per_arch[STARPU_CPU_WORKER] = initialize_arch_model(1,&conf->topology.nhwcpus);
  507. model->per_arch[STARPU_CUDA_WORKER] = initialize_arch_model(conf->topology.nhwcudagpus,NULL);
  508. model->per_arch[STARPU_OPENCL_WORKER] = initialize_arch_model(conf->topology.nhwopenclgpus,NULL);
  509. model->per_arch[STARPU_MIC_WORKER] = initialize_arch_model(conf->topology.nhwmicdevices,conf->topology.nhwmiccores);
  510. model->per_arch[STARPU_SCC_WORKER] = initialize_arch_model(conf->topology.nhwscc,NULL);
  511. }
  512. static void initialize_model_with_file(FILE*f, struct starpu_perfmodel *model)
  513. {
  514. unsigned ret, archtype, devid, i, ndevice, * maxncore;
  515. struct starpu_perfmodel_arch arch;
  516. int version;
  517. /* Parsing performance model version */
  518. _starpu_drop_comments(f);
  519. ret = fscanf(f, "%d\n", &version);
  520. STARPU_ASSERT_MSG(version == _STARPU_PERFMODEL_VERSION, "Incorrect performance model file with a model version %d not being the current model version (%d)\n",
  521. version, _STARPU_PERFMODEL_VERSION);
  522. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  523. model->per_arch = malloc(sizeof(*model->per_arch)*(STARPU_NARCH));
  524. for(archtype=0; archtype<STARPU_NARCH; archtype++)
  525. {
  526. arch.type = archtype;
  527. _starpu_drop_comments(f);
  528. ret = fscanf(f, "%u\n", &ndevice);
  529. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  530. if(ndevice != 0)
  531. maxncore = malloc(sizeof((*maxncore)*ndevice));
  532. else
  533. maxncore = NULL;
  534. for(devid=0; devid < ndevice; devid++)
  535. {
  536. arch.devid = devid;
  537. _starpu_drop_comments(f);
  538. ret = fscanf(f, "%u\n", &maxncore[devid]);
  539. STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
  540. for(i=0; i<maxncore[devid]; i++)
  541. {
  542. arch.ncore = i;
  543. parse_arch(f,NULL,0,&arch);
  544. }
  545. }
  546. model->per_arch[archtype] = initialize_arch_model(ndevice,maxncore);
  547. if(maxncore != NULL)
  548. free(maxncore);
  549. }
  550. }
  551. void starpu_perfmodel_init(struct starpu_perfmodel *model)
  552. {
  553. STARPU_ASSERT(model && model->symbol);
  554. int already_init;
  555. STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  556. already_init = model->is_init;
  557. STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  558. if (already_init)
  559. return;
  560. /* The model is still not loaded so we grab the lock in write mode, and
  561. * if it's not loaded once we have the lock, we do load it. */
  562. STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  563. /* Was the model initialized since the previous test ? */
  564. if (model->is_init)
  565. {
  566. STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  567. return;
  568. }
  569. STARPU_PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
  570. if(model->type != STARPU_COMMON)
  571. initialize_model(model);
  572. model->is_init = 1;
  573. STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  574. }
  575. void starpu_perfmodel_init_with_file(FILE*f, struct starpu_perfmodel *model)
  576. {
  577. STARPU_ASSERT(model && model->symbol);
  578. int already_init;
  579. STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  580. already_init = model->is_init;
  581. STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  582. if (already_init)
  583. return;
  584. /* The model is still not loaded so we grab the lock in write mode, and
  585. * if it's not loaded once we have the lock, we do load it. */
  586. STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  587. /* Was the model initialized since the previous test ? */
  588. if (model->is_init)
  589. {
  590. STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  591. return;
  592. }
  593. STARPU_PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
  594. if(model->type != STARPU_COMMON)
  595. initialize_model_with_file(f,model);
  596. model->is_init = 1;
  597. STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  598. }
  599. static void get_model_debug_path(struct starpu_perfmodel *model, const char *arch, char *path, size_t maxlen)
  600. {
  601. STARPU_ASSERT(path);
  602. _starpu_get_perf_model_dir_debug(path, maxlen);
  603. strncat(path, model->symbol, maxlen);
  604. char hostname[65];
  605. _starpu_gethostname(hostname, sizeof(hostname));
  606. strncat(path, ".", maxlen);
  607. strncat(path, hostname, maxlen);
  608. strncat(path, ".", maxlen);
  609. strncat(path, arch, maxlen);
  610. strncat(path, ".debug", maxlen);
  611. }
  612. /*
  613. * Returns 0 is the model was already loaded, 1 otherwise.
  614. */
  615. int _starpu_register_model(struct starpu_perfmodel *model)
  616. {
  617. starpu_perfmodel_init(model);
  618. /* If the model has already been loaded, there is nothing to do */
  619. STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
  620. if (model->is_loaded)
  621. {
  622. STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  623. return 0;
  624. }
  625. STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  626. /* We have to make sure the model has not been loaded since the
  627. * last time we took the lock */
  628. STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  629. if (model->is_loaded)
  630. {
  631. STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  632. return 0;
  633. }
  634. /* add the model to a linked list */
  635. struct _starpu_perfmodel_list *node = (struct _starpu_perfmodel_list *) malloc(sizeof(struct _starpu_perfmodel_list));
  636. node->model = model;
  637. //model->debug_modelid = debug_modelid++;
  638. /* put this model at the beginning of the list */
  639. node->next = registered_models;
  640. registered_models = node;
  641. #ifdef STARPU_MODEL_DEBUG
  642. _starpu_create_sampling_directory_if_needed();
  643. unsigned archtype, devid, ncore, nimpl;
  644. struct starpu_perfmodel_arch arch;
  645. _STARPU_DEBUG("\n\n ###\nHere\n ###\n\n");
  646. if(model->is_init)
  647. {
  648. _STARPU_DEBUG("Init\n");
  649. for (archtype = 0; archtype < STARPU_NARCH; archtype++)
  650. {
  651. _STARPU_DEBUG("Archtype\n");
  652. arch.type = archtype;
  653. if(model->per_arch[archtype] != NULL)
  654. {
  655. for(devid=0; model->per_arch[archtype][devid] != NULL; devid++)
  656. {
  657. _STARPU_DEBUG("Devid\n");
  658. arch.devid = devid;
  659. for(ncore=0; model->per_arch[archtype][devid][ncore] != NULL; ncore++)
  660. {
  661. _STARPU_DEBUG("Ncore\n");
  662. arch.ncore = ncore;
  663. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  664. {
  665. starpu_perfmodel_debugfilepath(model, &arch, model->per_arch[archtype][devid][ncore][nimpl].debug_path, 256, nimpl);
  666. }
  667. }
  668. }
  669. }
  670. }
  671. }
  672. #endif
  673. STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  674. return 1;
  675. }
  676. static void get_model_path(struct starpu_perfmodel *model, char *path, size_t maxlen)
  677. {
  678. _starpu_get_perf_model_dir_codelets(path, maxlen);
  679. strncat(path, model->symbol, maxlen);
  680. char hostname[65];
  681. _starpu_gethostname(hostname, sizeof(hostname));
  682. strncat(path, ".", maxlen);
  683. strncat(path, hostname, maxlen);
  684. }
  685. static void save_history_based_model(struct starpu_perfmodel *model)
  686. {
  687. STARPU_ASSERT(model);
  688. STARPU_ASSERT(model->symbol);
  689. /* TODO checks */
  690. /* filename = $STARPU_PERF_MODEL_DIR/codelets/symbol.hostname */
  691. char path[256];
  692. get_model_path(model, path, 256);
  693. _STARPU_DEBUG("Opening performance model file %s for model %s\n", path, model->symbol);
  694. /* overwrite existing file, or create it */
  695. FILE *f;
  696. f = fopen(path, "w+");
  697. STARPU_ASSERT_MSG(f, "Could not save performance model %s\n", path);
  698. dump_model_file(f, model);
  699. fclose(f);
  700. }
  701. static void _starpu_dump_registered_models(void)
  702. {
  703. STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  704. struct _starpu_perfmodel_list *node;
  705. node = registered_models;
  706. _STARPU_DEBUG("DUMP MODELS !\n");
  707. while (node)
  708. {
  709. save_history_based_model(node->model);
  710. node = node->next;
  711. }
  712. STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  713. }
  714. void _starpu_initialize_registered_performance_models(void)
  715. {
  716. registered_models = NULL;
  717. STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
  718. }
  719. void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
  720. {
  721. unsigned arch, devid, ncore, nimpl;
  722. if(model->is_init && model->per_arch != NULL)
  723. {
  724. for (arch = 0; arch < STARPU_NARCH; arch++)
  725. {
  726. if( model->per_arch[arch] != NULL)
  727. {
  728. for(devid=0; model->per_arch[arch][devid] != NULL; devid++)
  729. {
  730. for(ncore=0; model->per_arch[arch][devid][ncore] != NULL; ncore++)
  731. {
  732. for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
  733. {
  734. struct starpu_perfmodel_per_arch *archmodel = &model->per_arch[arch][devid][ncore][nimpl];
  735. struct starpu_perfmodel_history_list *list, *plist;
  736. struct starpu_perfmodel_history_table *entry, *tmp;
  737. HASH_ITER(hh, archmodel->history, entry, tmp)
  738. {
  739. HASH_DEL(archmodel->history, entry);
  740. free(entry);
  741. }
  742. archmodel->history = NULL;
  743. list = archmodel->list;
  744. while (list)
  745. {
  746. free(list->entry);
  747. plist = list;
  748. list = list->next;
  749. free(plist);
  750. }
  751. archmodel->list = NULL;
  752. }
  753. free(model->per_arch[arch][devid][ncore]);
  754. model->per_arch[arch][devid][ncore] = NULL;
  755. }
  756. free(model->per_arch[arch][devid]);
  757. model->per_arch[arch][devid] = NULL;
  758. }
  759. free(model->per_arch[arch]);
  760. model->per_arch[arch] = NULL;
  761. }
  762. }
  763. free(model->per_arch);
  764. model->per_arch = NULL;
  765. }
  766. model->is_init = 0;
  767. model->is_loaded = 0;
  768. }
  769. void _starpu_deinitialize_registered_performance_models(void)
  770. {
  771. if (_starpu_get_calibrate_flag())
  772. _starpu_dump_registered_models();
  773. STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
  774. struct _starpu_perfmodel_list *node, *pnode;
  775. node = registered_models;
  776. _STARPU_DEBUG("FREE MODELS !\n");
  777. while (node)
  778. {
  779. struct starpu_perfmodel *model = node->model;
  780. STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  781. _starpu_deinitialize_performance_model(model);
  782. STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  783. pnode = node;
  784. node = node->next;
  785. free(pnode);
  786. }
  787. registered_models = NULL;
  788. STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
  789. STARPU_PTHREAD_RWLOCK_DESTROY(&registered_models_rwlock);
  790. }
  791. /*
  792. * XXX: We should probably factorize the beginning of the _starpu_load_*_model
  793. * functions. This is a bit tricky though, because we must be sure to unlock
  794. * registered_models_rwlock at the right place.
  795. */
  796. void _starpu_load_per_arch_based_model(struct starpu_perfmodel *model)
  797. {
  798. starpu_perfmodel_init(model);
  799. }
  800. void _starpu_load_common_based_model(struct starpu_perfmodel *model)
  801. {
  802. starpu_perfmodel_init(model);
  803. }
  804. /* We first try to grab the global lock in read mode to check whether the model
  805. * was loaded or not (this is very likely to have been already loaded). If the
  806. * model was not loaded yet, we take the lock in write mode, and if the model
  807. * is still not loaded once we have the lock, we do load it. */
  808. void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history)
  809. {
  810. starpu_perfmodel_init(model);
  811. STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  812. if(!model->is_loaded)
  813. {
  814. /* make sure the performance model directory exists (or create it) */
  815. _starpu_create_sampling_directory_if_needed();
  816. char path[256];
  817. get_model_path(model, path, 256);
  818. _STARPU_DEBUG("Opening performance model file %s for model %s ...\n", path, model->symbol);
  819. unsigned calibrate_flag = _starpu_get_calibrate_flag();
  820. model->benchmarking = calibrate_flag;
  821. /* try to open an existing file and load it */
  822. int res;
  823. res = access(path, F_OK);
  824. if (res == 0)
  825. {
  826. if (calibrate_flag == 2)
  827. {
  828. /* The user specified that the performance model should
  829. * be overwritten, so we don't load the existing file !
  830. * */
  831. _STARPU_DEBUG("Overwrite existing file\n");
  832. }
  833. else
  834. {
  835. /* We load the available file */
  836. _STARPU_DEBUG("File exists\n");
  837. FILE *f;
  838. f = fopen(path, "r");
  839. STARPU_ASSERT(f);
  840. parse_model_file(f, model, scan_history);
  841. fclose(f);
  842. }
  843. }
  844. else
  845. {
  846. _STARPU_DEBUG("File does not exists\n");
  847. }
  848. _STARPU_DEBUG("Performance model file %s for model %s is loaded\n", path, model->symbol);
  849. model->is_loaded = 1;
  850. }
  851. STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  852. }
  853. void starpu_perfmodel_directory(FILE *output)
  854. {
  855. char perf_model_dir[256];
  856. _starpu_get_perf_model_dir_codelets(perf_model_dir, 256);
  857. fprintf(output, "directory: <%s>\n", perf_model_dir);
  858. }
  859. /* This function is intended to be used by external tools that should read
  860. * the performance model files */
  861. int starpu_perfmodel_list(FILE *output)
  862. {
  863. char path[256];
  864. DIR *dp;
  865. struct dirent *ep;
  866. char perf_model_dir_codelets[256];
  867. _starpu_get_perf_model_dir_codelets(perf_model_dir_codelets, 256);
  868. strncpy(path, perf_model_dir_codelets, 256);
  869. dp = opendir(path);
  870. if (dp != NULL)
  871. {
  872. while ((ep = readdir(dp)))
  873. {
  874. if (strcmp(ep->d_name, ".") && strcmp(ep->d_name, ".."))
  875. fprintf(output, "file: <%s>\n", ep->d_name);
  876. }
  877. closedir (dp);
  878. }
  879. else
  880. {
  881. _STARPU_DISP("Could not open the perfmodel directory <%s>: %s\n", path, strerror(errno));
  882. }
  883. return 0;
  884. }
  885. /* This function is intended to be used by external tools that should read the
  886. * performance model files */
  887. /* TODO: write an clear function, to free symbol and history */
  888. int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
  889. {
  890. model->symbol = strdup(symbol);
  891. starpu_perfmodel_init(model);
  892. /* where is the file if it exists ? */
  893. char path[256];
  894. get_model_path(model, path, 256);
  895. // _STARPU_DEBUG("get_model_path -> %s\n", path);
  896. /* does it exist ? */
  897. int res;
  898. res = access(path, F_OK);
  899. if (res)
  900. {
  901. const char *dot = strrchr(symbol, '.');
  902. if (dot)
  903. {
  904. char *symbol2 = strdup(symbol);
  905. symbol2[dot-symbol] = '\0';
  906. int ret;
  907. _STARPU_DISP("note: loading history from %s instead of %s\n", symbol2, symbol);
  908. ret = starpu_perfmodel_load_symbol(symbol2,model);
  909. free(symbol2);
  910. return ret;
  911. }
  912. _STARPU_DISP("There is no performance model for symbol %s\n", symbol);
  913. return 1;
  914. }
  915. FILE *f = fopen(path, "r");
  916. STARPU_ASSERT(f);
  917. starpu_perfmodel_init_with_file(f, model);
  918. rewind(f);
  919. parse_model_file(f, model, 1);
  920. STARPU_ASSERT(fclose(f) == 0);
  921. return 0;
  922. }
  923. int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
  924. {
  925. free((char *)model->symbol);
  926. _starpu_deinitialize_performance_model(model);
  927. return 0;
  928. }
  929. char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
  930. {
  931. switch(archtype)
  932. {
  933. case(STARPU_CPU_WORKER):
  934. return "cpu";
  935. break;
  936. case(STARPU_CUDA_WORKER):
  937. return "cuda";
  938. break;
  939. case(STARPU_OPENCL_WORKER):
  940. return "opencl";
  941. break;
  942. case(STARPU_MIC_WORKER):
  943. return "mic";
  944. break;
  945. case(STARPU_SCC_WORKER):
  946. return "scc";
  947. break;
  948. default:
  949. STARPU_ABORT();
  950. break;
  951. }
  952. }
  953. void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch* arch, char *archname, size_t maxlen,unsigned nimpl)
  954. {
  955. snprintf(archname, maxlen, "%s_%dncore_%dimpl_%u",
  956. starpu_perfmodel_get_archtype_name(arch->type),
  957. arch->devid,
  958. arch->ncore,
  959. nimpl);
  960. }
  961. void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model,
  962. struct starpu_perfmodel_arch* arch, char *path, size_t maxlen, unsigned nimpl)
  963. {
  964. char archname[32];
  965. starpu_perfmodel_get_arch_name(arch, archname, 32, nimpl);
  966. STARPU_ASSERT(path);
  967. get_model_debug_path(model, archname, path, maxlen);
  968. }
  969. double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl)
  970. {
  971. double exp = NAN;
  972. size_t size = _starpu_job_get_data_size(model, arch, nimpl, j);
  973. struct starpu_perfmodel_regression_model *regmodel;
  974. regmodel = &model->per_arch[arch->type][arch->devid][arch->ncore][nimpl].regression;
  975. if (regmodel->valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1)
  976. exp = regmodel->alpha*pow((double)size, regmodel->beta);
  977. return exp;
  978. }
  979. double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j,unsigned nimpl)
  980. {
  981. double exp = NAN;
  982. size_t size = _starpu_job_get_data_size(model, arch, nimpl, j);
  983. struct starpu_perfmodel_regression_model *regmodel;
  984. regmodel = &model->per_arch[arch->type][arch->devid][arch->ncore][nimpl].regression;
  985. if (regmodel->nl_valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1)
  986. exp = regmodel->a*pow((double)size, regmodel->b) + regmodel->c;
  987. else
  988. {
  989. uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  990. struct starpu_perfmodel_per_arch *per_arch_model = &model->per_arch[arch->type][arch->devid][arch->ncore][nimpl];
  991. struct starpu_perfmodel_history_table *history;
  992. struct starpu_perfmodel_history_table *entry;
  993. STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
  994. history = per_arch_model->history;
  995. HASH_FIND_UINT32_T(history, &key, entry);
  996. STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  997. /* Here helgrind would shout that this is unprotected access.
  998. * We do not care about racing access to the mean, we only want
  999. * a good-enough estimation */
  1000. if (entry && entry->history_entry && entry->history_entry->nsample >= _STARPU_CALIBRATION_MINIMUM)
  1001. exp = entry->history_entry->mean;
  1002. STARPU_HG_DISABLE_CHECKING(model->benchmarking);
  1003. if (isnan(exp) && !model->benchmarking)
  1004. {
  1005. char archname[32];
  1006. starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl);
  1007. _STARPU_DISP("Warning: model %s is not calibrated enough for %s, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol, archname);
  1008. _starpu_set_calibrate_flag(1);
  1009. model->benchmarking = 1;
  1010. }
  1011. }
  1012. return exp;
  1013. }
  1014. double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j,unsigned nimpl)
  1015. {
  1016. double exp = NAN;
  1017. struct starpu_perfmodel_per_arch *per_arch_model;
  1018. struct starpu_perfmodel_history_entry *entry;
  1019. struct starpu_perfmodel_history_table *history, *elt;
  1020. uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  1021. per_arch_model = &model->per_arch[arch->type][arch->devid][arch->ncore][nimpl];
  1022. STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
  1023. history = per_arch_model->history;
  1024. HASH_FIND_UINT32_T(history, &key, elt);
  1025. entry = (elt == NULL) ? NULL : elt->history_entry;
  1026. STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  1027. /* Here helgrind would shout that this is unprotected access.
  1028. * We do not care about racing access to the mean, we only want
  1029. * a good-enough estimation */
  1030. if (entry && entry->nsample >= _STARPU_CALIBRATION_MINIMUM)
  1031. /* TODO: report differently if we've scheduled really enough
  1032. * of that task and the scheduler should perhaps put it aside */
  1033. /* Calibrated enough */
  1034. exp = entry->mean;
  1035. STARPU_HG_DISABLE_CHECKING(model->benchmarking);
  1036. if (isnan(exp) && !model->benchmarking)
  1037. {
  1038. char archname[32];
  1039. starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl);
  1040. _STARPU_DISP("Warning: model %s is not calibrated enough for %s, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol, archname);
  1041. _starpu_set_calibrate_flag(1);
  1042. model->benchmarking = 1;
  1043. }
  1044. return exp;
  1045. }
  1046. double starpu_permodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, uint32_t footprint)
  1047. {
  1048. struct _starpu_job j =
  1049. {
  1050. .footprint = footprint,
  1051. .footprint_is_computed = 1,
  1052. };
  1053. return _starpu_history_based_job_expected_perf(model, arch, &j, j.nimpl);
  1054. }
  1055. void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned nimpl)
  1056. {
  1057. if (model)
  1058. {
  1059. STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
  1060. struct starpu_perfmodel_per_arch *per_arch_model = &model->per_arch[arch->type][arch->devid][arch->ncore][nimpl];
  1061. if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  1062. {
  1063. struct starpu_perfmodel_history_entry *entry;
  1064. struct starpu_perfmodel_history_table *elt;
  1065. struct starpu_perfmodel_history_list **list;
  1066. uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  1067. list = &per_arch_model->list;
  1068. HASH_FIND_UINT32_T(per_arch_model->history, &key, elt);
  1069. entry = (elt == NULL) ? NULL : elt->history_entry;
  1070. if (!entry)
  1071. {
  1072. /* this is the first entry with such a footprint */
  1073. entry = (struct starpu_perfmodel_history_entry *) malloc(sizeof(struct starpu_perfmodel_history_entry));
  1074. STARPU_ASSERT(entry);
  1075. /* Tell helgrind that we do not care about
  1076. * racing access to the sampling, we only want a
  1077. * good-enough estimation */
  1078. STARPU_HG_DISABLE_CHECKING(entry->nsample);
  1079. STARPU_HG_DISABLE_CHECKING(entry->mean);
  1080. entry->mean = measured;
  1081. entry->sum = measured;
  1082. entry->deviation = 0.0;
  1083. entry->sum2 = measured*measured;
  1084. entry->size = _starpu_job_get_data_size(model, arch, nimpl, j);
  1085. entry->flops = j->task->flops;
  1086. entry->footprint = key;
  1087. entry->nsample = 1;
  1088. insert_history_entry(entry, list, &per_arch_model->history);
  1089. }
  1090. else
  1091. {
  1092. /* there is already some entry with the same footprint */
  1093. entry->sum += measured;
  1094. entry->sum2 += measured*measured;
  1095. entry->nsample++;
  1096. unsigned n = entry->nsample;
  1097. entry->mean = entry->sum / n;
  1098. entry->deviation = sqrt((entry->sum2 - (entry->sum*entry->sum)/n)/n);
  1099. if (j->task->flops != 0.)
  1100. {
  1101. if (entry->flops == 0.)
  1102. entry->flops = j->task->flops;
  1103. else if (entry->flops != j->task->flops)
  1104. /* Incoherent flops! forget about trying to record flops */
  1105. entry->flops = NAN;
  1106. }
  1107. }
  1108. STARPU_ASSERT(entry);
  1109. }
  1110. if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED)
  1111. {
  1112. struct starpu_perfmodel_regression_model *reg_model;
  1113. reg_model = &per_arch_model->regression;
  1114. /* update the regression model */
  1115. size_t job_size = _starpu_job_get_data_size(model, arch, nimpl, j);
  1116. double logy, logx;
  1117. logx = log((double)job_size);
  1118. logy = log(measured);
  1119. reg_model->sumlnx += logx;
  1120. reg_model->sumlnx2 += logx*logx;
  1121. reg_model->sumlny += logy;
  1122. reg_model->sumlnxlny += logx*logy;
  1123. if (reg_model->minx == 0 || job_size < reg_model->minx)
  1124. reg_model->minx = job_size;
  1125. if (reg_model->maxx == 0 || job_size > reg_model->maxx)
  1126. reg_model->maxx = job_size;
  1127. reg_model->nsample++;
  1128. if (VALID_REGRESSION(reg_model))
  1129. {
  1130. unsigned n = reg_model->nsample;
  1131. double num = (n*reg_model->sumlnxlny - reg_model->sumlnx*reg_model->sumlny);
  1132. double denom = (n*reg_model->sumlnx2 - reg_model->sumlnx*reg_model->sumlnx);
  1133. reg_model->beta = num/denom;
  1134. reg_model->alpha = exp((reg_model->sumlny - reg_model->beta*reg_model->sumlnx)/n);
  1135. reg_model->valid = 1;
  1136. }
  1137. }
  1138. #ifdef STARPU_MODEL_DEBUG
  1139. struct starpu_task *task = j->task;
  1140. FILE *f = fopen(per_arch_model->debug_path, "a+");
  1141. if (f == NULL)
  1142. {
  1143. _STARPU_DISP("Error <%s> when opening file <%s>\n", strerror(errno), per_arch_model->debug_path);
  1144. STARPU_ABORT();
  1145. }
  1146. if (!j->footprint_is_computed)
  1147. (void) _starpu_compute_buffers_footprint(model, arch, nimpl, j);
  1148. STARPU_ASSERT(j->footprint_is_computed);
  1149. fprintf(f, "0x%x\t%lu\t%f\t%f\t%f\t%d\t\t", j->footprint, (unsigned long) _starpu_job_get_data_size(model, arch, nimpl, j), measured, task->predicted, task->predicted_transfer, cpuid);
  1150. unsigned i;
  1151. for (i = 0; i < task->cl->nbuffers; i++)
  1152. {
  1153. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
  1154. STARPU_ASSERT(handle->ops);
  1155. STARPU_ASSERT(handle->ops->display);
  1156. handle->ops->display(handle, f);
  1157. }
  1158. fprintf(f, "\n");
  1159. fclose(f);
  1160. #endif
  1161. STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
  1162. }
  1163. }
  1164. void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch * arch, unsigned cpuid, unsigned nimpl, double measured)
  1165. {
  1166. struct _starpu_job *job = _starpu_get_job_associated_to_task(task);
  1167. _starpu_load_perfmodel(model);
  1168. /* Record measurement */
  1169. _starpu_update_perfmodel_history(job, model, arch, cpuid, measured, nimpl);
  1170. /* and save perfmodel on termination */
  1171. _starpu_set_calibrate_flag(1);
  1172. }