perfmodel_print.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. * Copyright (C) 2011 Télécom-SudParis
  5. * Copyright (C) 2013 Thibaut Lambert
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <starpu.h>
  19. #include <starpu_perfmodel.h>
  20. #include <common/config.h>
  21. #include <core/workers.h>
  22. #include "perfmodel.h"
  23. static
  24. void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per_arch_model, char *parameter, uint32_t *footprint, FILE *output)
  25. {
  26. struct starpu_perfmodel_history_list *ptr;
  27. ptr = per_arch_model->list;
  28. if (!parameter && ptr)
  29. fprintf(output, "# hash\t\tsize\t\tflops\t\tmean (us or J)\tstddev (us or J)\t\tn\n");
  30. while (ptr)
  31. {
  32. struct starpu_perfmodel_history_entry *entry = ptr->entry;
  33. if (!footprint || entry->footprint == *footprint)
  34. {
  35. if (!parameter)
  36. {
  37. /* There isn't a parameter that is explicitely requested, so we display all parameters */
  38. fprintf(output, "%08x\t%-15lu\t%-15e\t%-15e\t%-15e\t%u\n", entry->footprint,
  39. (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->nsample);
  40. }
  41. else
  42. {
  43. /* only display the parameter that was specifically requested */
  44. if (strcmp(parameter, "mean") == 0)
  45. {
  46. fprintf(output, "%-15e\n", entry->mean);
  47. }
  48. if (strcmp(parameter, "stddev") == 0)
  49. {
  50. fprintf(output, "%-15e\n", entry->deviation);
  51. return;
  52. }
  53. }
  54. }
  55. ptr = ptr->next;
  56. }
  57. }
  58. void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
  59. {
  60. int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
  61. STARPU_ASSERT(comb != -1);
  62. struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][nimpl];
  63. if (arch_model->regression.nsample || arch_model->regression.valid || arch_model->regression.nl_valid || arch_model->list)
  64. {
  65. char archname[32];
  66. starpu_perfmodel_get_arch_name(arch, archname, 32, nimpl);
  67. fprintf(output, "# performance model for %s\n", archname);
  68. }
  69. if (parameter == NULL)
  70. {
  71. /* no specific parameter was requested, so we display everything */
  72. if (arch_model->regression.nsample)
  73. {
  74. fprintf(output, "\tRegression : #sample = %u\n", arch_model->regression.nsample);
  75. }
  76. /* Only display the regression model if we could actually build a model */
  77. if (arch_model->regression.valid)
  78. {
  79. fprintf(output, "\tLinear: y = alpha size ^ beta\n");
  80. fprintf(output, "\t\talpha = %e\n", arch_model->regression.alpha);
  81. fprintf(output, "\t\tbeta = %e\n", arch_model->regression.beta);
  82. }
  83. else
  84. {
  85. //fprintf(output, "\tLinear model is INVALID\n");
  86. }
  87. if (arch_model->regression.nl_valid)
  88. {
  89. fprintf(output, "\tNon-Linear: y = a size ^b + c\n");
  90. fprintf(output, "\t\ta = %e\n", arch_model->regression.a);
  91. fprintf(output, "\t\tb = %e\n", arch_model->regression.b);
  92. fprintf(output, "\t\tc = %e\n", arch_model->regression.c);
  93. }
  94. else
  95. {
  96. //fprintf(output, "\tNon-Linear model is INVALID\n");
  97. }
  98. _starpu_perfmodel_print_history_based(arch_model, parameter, footprint, output);
  99. #if 0
  100. char debugname[1024];
  101. starpu_perfmodel_debugfilepath(model, arch, debugname, 1024, nimpl);
  102. _STARPU_MSG("\t debug file path : %s\n", debugname);
  103. #endif
  104. }
  105. else
  106. {
  107. /* only display the parameter that was specifically requested */
  108. if (strcmp(parameter, "a") == 0)
  109. {
  110. fprintf(output, "%e\n", arch_model->regression.a);
  111. return;
  112. }
  113. if (strcmp(parameter, "b") == 0)
  114. {
  115. fprintf(output, "%e\n", arch_model->regression.b);
  116. return;
  117. }
  118. if (strcmp(parameter, "c") == 0)
  119. {
  120. fprintf(output, "%e\n", arch_model->regression.c);
  121. return;
  122. }
  123. if (strcmp(parameter, "alpha") == 0)
  124. {
  125. fprintf(output, "%e\n", arch_model->regression.alpha);
  126. return;
  127. }
  128. if (strcmp(parameter, "beta") == 0)
  129. {
  130. fprintf(output, "%e\n", arch_model->regression.beta);
  131. return;
  132. }
  133. if (strcmp(parameter, "path-file-debug") == 0)
  134. {
  135. char debugname[256];
  136. starpu_perfmodel_debugfilepath(model, arch, debugname, 256, nimpl);
  137. fprintf(output, "%s\n", debugname);
  138. return;
  139. }
  140. if ((strcmp(parameter, "mean") == 0) || (strcmp(parameter, "stddev") == 0))
  141. {
  142. _starpu_perfmodel_print_history_based(arch_model, parameter, footprint, output);
  143. return;
  144. }
  145. /* TODO display if it's valid ? */
  146. _STARPU_ERROR("Unknown parameter requested, aborting.\n");
  147. }
  148. }
  149. int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output)
  150. {
  151. _starpu_init_and_load_perfmodel(model);
  152. if (arch == NULL)
  153. {
  154. int comb, impl;
  155. for(comb = 0; comb < starpu_perfmodel_get_narch_combs(); comb++)
  156. {
  157. struct starpu_perfmodel_arch *arch_comb = starpu_perfmodel_arch_comb_fetch(comb);
  158. int nimpls = model->state ? model->state->nimpls[comb] : 0;
  159. for(impl = 0; impl < nimpls; impl++)
  160. starpu_perfmodel_print(model, arch_comb, impl, parameter, footprint, output);
  161. }
  162. }
  163. else
  164. {
  165. if (strcmp(arch, "cpu") == 0)
  166. {
  167. int implid;
  168. struct starpu_perfmodel_arch perf_arch;
  169. perf_arch.ndevices = 1;
  170. _STARPU_MALLOC(perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  171. perf_arch.devices[0].type = STARPU_CPU_WORKER;
  172. perf_arch.devices[0].devid = 0;
  173. perf_arch.devices[0].ncores = 1;
  174. int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices);
  175. STARPU_ASSERT(comb != -1);
  176. int nimpls = model->state->nimpls[comb];
  177. for (implid = 0; implid < nimpls; implid++)
  178. starpu_perfmodel_print(model, &perf_arch,implid, parameter, footprint, output); /* Display all codelets on cpu */
  179. free(perf_arch.devices);
  180. return 0;
  181. }
  182. int k;
  183. if (sscanf(arch, "cpu:%d", &k) == 1)
  184. {
  185. /* For combined CPU workers */
  186. if ((k < 1) || (k > STARPU_MAXCPUS))
  187. {
  188. _STARPU_ERROR("Invalid CPU size\n");
  189. }
  190. int implid;
  191. struct starpu_perfmodel_arch perf_arch;
  192. perf_arch.ndevices = 1;
  193. _STARPU_MALLOC(perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  194. perf_arch.devices[0].type = STARPU_CPU_WORKER;
  195. perf_arch.devices[0].devid = 0;
  196. perf_arch.devices[0].ncores = k;
  197. int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices);
  198. STARPU_ASSERT(comb != -1);
  199. int nimpls = model->state->nimpls[comb];
  200. for (implid = 0; implid < nimpls; implid++)
  201. starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output);
  202. free(perf_arch.devices);
  203. return 0;
  204. }
  205. if (strcmp(arch, "cuda") == 0)
  206. {
  207. int implid;
  208. struct starpu_perfmodel_arch perf_arch;
  209. perf_arch.ndevices = 1;
  210. _STARPU_MALLOC(perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  211. perf_arch.devices[0].type = STARPU_CUDA_WORKER;
  212. perf_arch.devices[0].ncores = 1;
  213. int comb;
  214. for(comb = 0; comb < starpu_perfmodel_get_narch_combs(); comb++)
  215. {
  216. struct starpu_perfmodel_arch *arch_comb = starpu_perfmodel_arch_comb_fetch(comb);
  217. if(arch_comb->ndevices == 1 && arch_comb->devices[0].type == STARPU_CUDA_WORKER)
  218. {
  219. perf_arch.devices[0].devid = arch_comb->devices[0].devid;
  220. int nimpls = model->state->nimpls[comb];
  221. for (implid = 0; implid < nimpls; implid++)
  222. starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output);
  223. }
  224. }
  225. free(perf_arch.devices);
  226. return 0;
  227. }
  228. /* TODO: There must be a cleaner way ! */
  229. int gpuid;
  230. int nmatched;
  231. nmatched = sscanf(arch, "cuda_%d", &gpuid);
  232. if (nmatched == 0)
  233. nmatched = sscanf(arch, "cuda%d", &gpuid);
  234. if (nmatched == 1)
  235. {
  236. struct starpu_perfmodel_arch perf_arch;
  237. perf_arch.ndevices = 1;
  238. _STARPU_MALLOC(perf_arch.devices, sizeof(struct starpu_perfmodel_device));
  239. perf_arch.devices[0].type = STARPU_CUDA_WORKER;
  240. perf_arch.devices[0].devid = gpuid;
  241. perf_arch.devices[0].ncores = 1;
  242. int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices);
  243. STARPU_ASSERT(comb != -1);
  244. int nimpls = model->state->nimpls[comb];
  245. int implid;
  246. for (implid = 0; implid < nimpls; implid++)
  247. starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output);
  248. return 0;
  249. }
  250. _STARPU_MSG("Unknown architecture requested\n");
  251. return -1;
  252. }
  253. return 0;
  254. }
  255. int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output)
  256. {
  257. unsigned workerid;
  258. for (workerid = 0; workerid < starpu_worker_get_count(); workerid++)
  259. {
  260. struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS);
  261. int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
  262. struct starpu_perfmodel_per_arch *arch_model;
  263. struct starpu_perfmodel_history_list *ptr = NULL;
  264. if (comb >= 0 && model->state->per_arch[comb])
  265. {
  266. arch_model = &model->state->per_arch[comb][0];
  267. for (ptr = arch_model->list; ptr; ptr = ptr->next)
  268. {
  269. struct starpu_perfmodel_history_entry *entry = ptr->entry;
  270. if (entry->footprint == footprint)
  271. {
  272. fprintf(output, "%s%e", workerid?" ":"", entry->mean);
  273. break;
  274. }
  275. }
  276. }
  277. if (!ptr)
  278. {
  279. /* Didn't find any entry :/ */
  280. fprintf(output, "%sinf", workerid?" ":"");
  281. }
  282. }
  283. return 0;
  284. }