starpu_perfmodel.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. * Copyright (C) 2011 Télécom-SudParis
  5. * Copyright (C) 2013 Thibaut Lambert
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #ifndef __STARPU_PERFMODEL_H__
  19. #define __STARPU_PERFMODEL_H__
  20. #include <starpu.h>
  21. #include <stdio.h>
  22. #ifdef __cplusplus
  23. extern "C"
  24. {
  25. #endif
  26. /**
  27. @defgroup API_Performance_Model Performance Model
  28. @{
  29. */
  30. struct starpu_task;
  31. struct starpu_data_descr;
  32. #define STARPU_NARCH STARPU_ANY_WORKER
  33. /**
  34. todo
  35. */
  36. struct starpu_perfmodel_device
  37. {
  38. enum starpu_worker_archtype type; /**< type of the device */
  39. int devid; /**< identifier of the precise device */
  40. int ncores; /**< number of execution in parallel, minus 1 */
  41. };
  42. /**
  43. todo
  44. */
  45. struct starpu_perfmodel_arch
  46. {
  47. int ndevices; /**< number of the devices for the given arch */
  48. struct starpu_perfmodel_device *devices; /**< list of the devices for the given arch */
  49. };
  50. struct starpu_perfmodel_history_entry
  51. {
  52. double mean; /**< mean_n = 1/n sum */
  53. double deviation; /**< n dev_n = sum2 - 1/n (sum)^2 */
  54. double sum; /**< sum of samples (in µs) */
  55. double sum2; /**< sum of samples^2 */
  56. unsigned nsample; /**< number of samples */
  57. unsigned nerror;
  58. uint32_t footprint; /**< data footprint */
  59. size_t size; /**< in bytes */
  60. double flops; /**< Provided by the application */
  61. double duration;
  62. starpu_tag_t tag;
  63. double *parameters;
  64. };
  65. struct starpu_perfmodel_history_list
  66. {
  67. struct starpu_perfmodel_history_list *next;
  68. struct starpu_perfmodel_history_entry *entry;
  69. };
  70. /**
  71. todo
  72. */
  73. struct starpu_perfmodel_regression_model
  74. {
  75. double sumlny; /**< sum of ln(measured) */
  76. double sumlnx; /**< sum of ln(size) */
  77. double sumlnx2; /**< sum of ln(size)^2 */
  78. unsigned long minx; /**< minimum size */
  79. unsigned long maxx; /**< maximum size */
  80. double sumlnxlny; /**< sum of ln(size)*ln(measured) */
  81. double alpha; /**< estimated = alpha * size ^ beta */
  82. double beta; /**< estimated = alpha * size ^ beta */
  83. unsigned valid; /**< whether the linear regression model is valid (i.e. enough measures) */
  84. double a; /**< estimated = a size ^b + c */
  85. double b; /**< estimated = a size ^b + c */
  86. double c; /**< estimated = a size ^b + c */
  87. unsigned nl_valid; /**< whether the non-linear regression model is valid (i.e. enough measures) */
  88. unsigned nsample; /**< number of sample values for non-linear regression */
  89. double *coeff; /**< list of computed coefficients for multiple linear regression model */
  90. unsigned ncoeff; /**< number of coefficients for multiple linear regression model */
  91. unsigned multi_valid; /**< whether the multiple linear regression model is valid */
  92. };
  93. struct starpu_perfmodel_history_table;
  94. #define starpu_per_arch_perfmodel starpu_perfmodel_per_arch STARPU_DEPRECATED
  95. typedef double (*starpu_perfmodel_per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
  96. typedef size_t (*starpu_perfmodel_per_arch_size_base)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
  97. /**
  98. information about the performance model of a given arch.
  99. */
  100. struct starpu_perfmodel_per_arch
  101. {
  102. /**
  103. Used by ::STARPU_PER_ARCH, must point to functions which take a
  104. task, the target arch and implementation number (as mere
  105. conveniency, since the array is already indexed by these), and
  106. must return a task duration estimation in micro-seconds.
  107. */
  108. starpu_perfmodel_per_arch_cost_function cost_function;
  109. /**
  110. Same as in structure starpu_perfmodel, but per-arch, in case it
  111. depends on the architecture-specific implementation.
  112. */
  113. starpu_perfmodel_per_arch_size_base size_base;
  114. /**
  115. \private
  116. The history of performance measurements.
  117. */
  118. struct starpu_perfmodel_history_table *history;
  119. /**
  120. \private
  121. Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and
  122. ::STARPU_MULTIPLE_REGRESSION_BASED, records all execution history
  123. measures.
  124. */
  125. struct starpu_perfmodel_history_list *list;
  126. /**
  127. \private
  128. Used by ::STARPU_REGRESSION_BASED, ::STARPU_NL_REGRESSION_BASED
  129. and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated
  130. factors of the regression.
  131. */
  132. struct starpu_perfmodel_regression_model regression;
  133. char debug_path[256];
  134. };
  135. /**
  136. todo
  137. */
  138. enum starpu_perfmodel_type
  139. {
  140. STARPU_PERFMODEL_INVALID=0,
  141. STARPU_PER_WORKER, /**< Application-provided per-worker cost model function */
  142. STARPU_PER_ARCH, /**< Application-provided per-arch cost model function */
  143. STARPU_COMMON, /**< Application-provided common cost model function, with per-arch factor */
  144. STARPU_HISTORY_BASED, /**< Automatic history-based cost model */
  145. STARPU_REGRESSION_BASED, /**< Automatic linear regression-based cost model (alpha * size ^ beta) */
  146. STARPU_NL_REGRESSION_BASED, /**< Automatic non-linear regression-based cost model (a * size ^ b + c) */
  147. STARPU_MULTIPLE_REGRESSION_BASED /**< Automatic multiple linear regression-based cost model. Application
  148. provides parameters, their combinations and exponents. */
  149. };
  150. struct _starpu_perfmodel_state;
  151. typedef struct _starpu_perfmodel_state* starpu_perfmodel_state_t;
  152. /**
  153. Contain all information about a performance model. At least the
  154. type and symbol fields have to be filled when defining a performance
  155. model for a codelet. For compatibility, make sure to initialize the
  156. whole structure to zero, either by using explicit memset, or by
  157. letting the compiler implicitly do it in e.g. static storage case. If
  158. not provided, other fields have to be zero.
  159. */
  160. struct starpu_perfmodel
  161. {
  162. /**
  163. type of performance model
  164. <ul>
  165. <li>
  166. ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
  167. ::STARPU_NL_REGRESSION_BASED: No other fields needs to be
  168. provided, this is purely history-based.
  169. </li>
  170. <li>
  171. ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields
  172. starpu_perfmodel::nparameters (number of different parameters),
  173. starpu_perfmodel::ncombinations (number of parameters
  174. combinations-tuples) and table starpu_perfmodel::combinations
  175. which defines exponents of the equation. Function cl_perf_func
  176. also needs to define how to extract parameters from the task.
  177. </li>
  178. <li>
  179. ::STARPU_PER_ARCH: either field
  180. starpu_perfmodel::arch_cost_function has to be filled with a
  181. function that returns the cost in micro-seconds on the arch given
  182. as parameter, or field starpu_perfmodel::per_arch has to be filled
  183. with functions which return the cost in micro-seconds.
  184. </li>
  185. <li>
  186. ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
  187. filled with a function that returns the cost in micro-seconds on a
  188. CPU, timing on other archs will be determined by multiplying by an
  189. arch-specific factor.
  190. </li>
  191. </ul>
  192. */
  193. enum starpu_perfmodel_type type;
  194. /**
  195. Used by ::STARPU_COMMON. Take a task and implementation number,
  196. and must return a task duration estimation in micro-seconds.
  197. */
  198. double (*cost_function)(struct starpu_task *, unsigned nimpl);
  199. /**
  200. Used by ::STARPU_PER_ARCH. Take a task, an arch and implementation
  201. number, and must return a task duration estimation in
  202. micro-seconds on that arch.
  203. */
  204. double (*arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch * arch, unsigned nimpl);
  205. /**
  206. Used by ::STARPU_PER_WORKER. Take a task, a worker id and implementation
  207. number, and must return a task duration estimation in
  208. micro-seconds on that worker.
  209. */
  210. double (*worker_cost_function)(struct starpu_task *, unsigned workerid, unsigned nimpl);
  211. /**
  212. Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
  213. ::STARPU_NL_REGRESSION_BASED. If not <c>NULL</c>, take a task and
  214. implementation number, and return the size to be used as index to
  215. distinguish histories and as a base for regressions.
  216. */
  217. size_t (*size_base)(struct starpu_task *, unsigned nimpl);
  218. /**
  219. Used by ::STARPU_HISTORY_BASED. If not <c>NULL</c>, take a task
  220. and return the footprint to be used as index to distinguish
  221. histories. The default is to use the starpu_task_data_footprint()
  222. function.
  223. */
  224. uint32_t (*footprint)(struct starpu_task *);
  225. /**
  226. symbol name for the performance model, which will be used as file
  227. name to store the model. It must be set otherwise the model will
  228. be ignored.
  229. */
  230. const char *symbol;
  231. /**
  232. \private
  233. Whether the performance model is already loaded from the disk.
  234. */
  235. unsigned is_loaded;
  236. /**
  237. \private
  238. */
  239. unsigned benchmarking;
  240. /**
  241. \private
  242. */
  243. unsigned is_init;
  244. void (*parameters)(struct starpu_task * task, double *parameters);
  245. /**
  246. \private
  247. Names of parameters used for multiple linear regression models (M,
  248. N, K)
  249. */
  250. const char **parameters_names;
  251. /**
  252. \private
  253. Number of parameters used for multiple linear regression models
  254. */
  255. unsigned nparameters;
  256. /**
  257. \private
  258. Table of combinations of parameters (and the exponents) used for
  259. multiple linear regression models
  260. */
  261. unsigned **combinations;
  262. /**
  263. \private
  264. Number of combination of parameters used for multiple linear
  265. regression models
  266. */
  267. unsigned ncombinations;
  268. /**
  269. \private
  270. */
  271. starpu_perfmodel_state_t state;
  272. };
  273. /**
  274. Initialize the \p model performance model structure. This is automatically
  275. called when e.g. submitting a task using a codelet using this performance model.
  276. */
  277. void starpu_perfmodel_init(struct starpu_perfmodel *model);
  278. /**
  279. Deinitialize the \p model performance model structure. You need to call this
  280. before deallocating the structure. You will probably want to call
  281. starpu_perfmodel_unload_model() before calling this function, to save the perfmodel.
  282. */
  283. int starpu_perfmodel_deinit(struct starpu_perfmodel *model);
  284. /**
  285. starpu_energy_start - start counting hardware events in an event set
  286. - \p workerid is the worker on which calibration is to be performed (in the case of GPUs, use -1 for CPUs)
  287. - \p archi is the type of architecture on which calibration will be run
  288. */
  289. int starpu_energy_start(int workerid, enum starpu_worker_archtype archi);
  290. /**
  291. starpu_energy_stop - stop counting hardware events in an event set
  292. - \p model is the energy performance model to be filled with the result
  293. - \p task is a task specimen, so the performance model folds the result according to the parameter sizes of the task.
  294. - \p nimpl is the implementation number run during calibration
  295. - \p ntasks is the number of tasks run during calibration
  296. - \p workerid is the worker on which calibration was performed (in the case of GPUs, use -1 for CPUs)
  297. - \p archi is the type of architecture on which calibration was run
  298. */
  299. int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task, unsigned nimpl, unsigned ntasks, int workerid, enum starpu_worker_archtype archi);
  300. /**
  301. Load the performance model found in the file named \p filename. \p model has to be
  302. completely zero, and will be filled with the information stored in the given file.
  303. */
  304. int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model);
  305. /**
  306. Load a given performance model. \p model has to be
  307. completely zero, and will be filled with the information stored in
  308. <c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
  309. external tools that want to read the performance model files.
  310. */
  311. int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model);
  312. /**
  313. Unload \p model which has been previously loaded
  314. through the function starpu_perfmodel_load_symbol()
  315. */
  316. int starpu_perfmodel_unload_model(struct starpu_perfmodel *model);
  317. /**
  318. Save the performance model in its file.
  319. */
  320. void starpu_save_history_based_model(struct starpu_perfmodel *model);
  321. /**
  322. Fills \p path (supposed to be \p maxlen long) with the full path to the
  323. performance model file for symbol \p symbol. This path can later on be used
  324. for instance with starpu_perfmodel_load_file() .
  325. */
  326. void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen);
  327. /**
  328. Dump performance model \p model to output stream \p output, in XML format.
  329. */
  330. void starpu_perfmodel_dump_xml(FILE *output, struct starpu_perfmodel *model);
  331. /**
  332. Free internal memory used for sampling
  333. management. It should only be called by an application which is not
  334. calling starpu_shutdown() as this function already calls it. See for
  335. example <c>tools/starpu_perfmodel_display.c</c>.
  336. */
  337. void starpu_perfmodel_free_sampling(void);
  338. /**
  339. Return the architecture type of the worker \p workerid.
  340. */
  341. struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id);
  342. int starpu_perfmodel_get_narch_combs(void);
  343. int starpu_perfmodel_arch_comb_add(int ndevices, struct starpu_perfmodel_device* devices);
  344. int starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices);
  345. struct starpu_perfmodel_arch *starpu_perfmodel_arch_comb_fetch(int comb);
  346. struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_arch(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned impl);
  347. struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_devices(struct starpu_perfmodel *model, int impl, ...);
  348. int starpu_perfmodel_set_per_devices_cost_function(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func, ...);
  349. int starpu_perfmodel_set_per_devices_size_base(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func, ...);
  350. /**
  351. Return the path to the debugging information for the performance model.
  352. */
  353. void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl);
  354. char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype);
  355. /**
  356. Return the architecture name for \p arch
  357. */
  358. void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl);
  359. /**
  360. Return the estimated time in µs of a task with the given model and the given footprint.
  361. */
  362. double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint);
  363. /**
  364. If starpu_init() is not used, starpu_perfmodel_initialize() should be used called calling starpu_perfmodel_* functions.
  365. */
  366. void starpu_perfmodel_initialize(void);
  367. /**
  368. Print a list of all performance models on \p output
  369. */
  370. int starpu_perfmodel_list(FILE *output);
  371. void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output);
  372. int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output);
  373. int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output);
  374. int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model);
  375. /**
  376. Feed the performance model \p model with one explicit
  377. measurement (in µs or J), in addition to measurements done by StarPU
  378. itself. This can be useful when the application already has an
  379. existing set of measurements done in good conditions, that StarPU
  380. could benefit from instead of doing on-line measurements. An example
  381. of use can be seen in \ref PerformanceModelExample.
  382. Note that this records only one measurement, and StarPU would ignore
  383. the first measurement (since it is usually disturbed by library loading
  384. etc.). Make sure to call this function several times to record all your
  385. measurements.
  386. You can also call starpu_perfmodel_update_history_n() to directly provide an
  387. average performed on several tasks.
  388. */
  389. void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
  390. /**
  391. Feed the performance model \p model with an explicit average measurement (in µs or J).
  392. This is similar to starpu_perfmodel_update_history(), but records a batch of
  393. \p number measurements provided as the average of the measurements \p average_measured.
  394. */
  395. void starpu_perfmodel_update_history_n(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double average_measured, unsigned number);
  396. /**
  397. Print the directory name storing performance models on \p output
  398. */
  399. void starpu_perfmodel_directory(FILE *output);
  400. /**
  401. Print a matrix of bus bandwidths on \p f.
  402. */
  403. void starpu_bus_print_bandwidth(FILE *f);
  404. /**
  405. Print the affinity devices on \p f.
  406. */
  407. void starpu_bus_print_affinity(FILE *f);
  408. /**
  409. Print on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.
  410. */
  411. void starpu_bus_print_filenames(FILE *f);
  412. /**
  413. Return the bandwidth of data transfer between two memory nodes
  414. */
  415. double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node);
  416. /**
  417. Return the latency of data transfer between two memory nodes
  418. */
  419. double starpu_transfer_latency(unsigned src_node, unsigned dst_node);
  420. /**
  421. Return the estimated time to transfer a given size between two memory nodes.
  422. */
  423. double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size);
  424. /**
  425. Performance model which just always return 1µs.
  426. */
  427. extern struct starpu_perfmodel starpu_perfmodel_nop;
  428. /** @} */
  429. #ifdef __cplusplus
  430. }
  431. #endif
  432. #endif /* __STARPU_PERFMODEL_H__ */