performance_model.doxy 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2013,2016 Inria
  4. * Copyright (C) 2010-2017 CNRS
  5. * Copyright (C) 2009-2011,2013-2017 Université de Bordeaux
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. /*! \defgroup API_Performance_Model Performance Model
  19. \enum starpu_perfmodel_type
  20. \ingroup API_Performance_Model
  21. TODO
  22. \var starpu_perfmodel_type::STARPU_PERFMODEL_INVALID
  23. todo
  24. \var starpu_perfmodel_type::STARPU_PER_ARCH
  25. Application-provided per-arch cost model function
  26. \var starpu_perfmodel_type::STARPU_COMMON
  27. Application-provided common cost model function, with per-arch
  28. factor
  29. \var starpu_perfmodel_type::STARPU_HISTORY_BASED
  30. Automatic history-based cost model
  31. \var starpu_perfmodel_type::STARPU_REGRESSION_BASED
  32. Automatic linear regression-based cost model (alpha * size ^
  33. beta)
  34. \var starpu_perfmodel_type::STARPU_NL_REGRESSION_BASED
  35. Automatic non-linear regression-based cost model (a * size ^ b +
  36. c)
  37. \var starpu_perfmodel_type::STARPU_MULTIPLE_REGRESSION_BASED
  38. Automatic multiple linear regression-based cost model. Application
  39. provides parameters, their combinations and exponents.
  40. \struct starpu_perfmodel_device
  41. todo
  42. \ingroup API_Performance_Model
  43. \var enum starpu_worker_archtype starpu_perfmodel_device::type
  44. type of the device
  45. \var int starpu_perfmodel_device::devid
  46. identifier of the precise device
  47. \var int starpu_perfmodel_device::ncore
  48. number of execution in parallel, minus 1
  49. \struct starpu_perfmodel_arch
  50. todo
  51. \ingroup API_Performance_Model
  52. \var int starpu_perfmodel_arch::ndevices
  53. number of the devices for the given arch
  54. \var struct starpu_perfmodel_device *starpu_perfmodel_arch::devices
  55. list of the devices for the given arch
  56. \struct starpu_perfmodel
  57. Contain all information about a performance model. At least the
  58. type and symbol fields have to be filled when defining a performance
  59. model for a codelet. For compatibility, make sure to initialize the
  60. whole structure to zero, either by using explicit memset, or by
  61. letting the compiler implicitly do it in e.g. static storage case. If
  62. not provided, other fields have to be zero.
  63. \ingroup API_Performance_Model
  64. \var enum starpu_perfmodel_type starpu_perfmodel::type
  65. type of performance model
  66. <ul>
  67. <li>
  68. ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
  69. ::STARPU_NL_REGRESSION_BASED: No other fields needs to be
  70. provided, this is purely history-based.
  71. </li>
  72. <li>
  73. ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields
  74. starpu_perfmodel::nparameters (number of different parameters),
  75. starpu_perfmodel::ncombinations (number of parameters
  76. combinations-tuples) and table starpu_perfmodel::combinations
  77. which defines exponents of the equation. Function cl_perf_func
  78. also needs to define how to extract parameters from the task.
  79. </li>
  80. <li>
  81. ::STARPU_PER_ARCH: either field
  82. starpu_perfmodel::arch_cost_function has to be filled with a
  83. function that returns the cost in micro-seconds on the arch given
  84. as parameter, or field starpu_perfmodel::per_arch has to be filled
  85. with functions which return the cost in micro-seconds.
  86. </li>
  87. <li>
  88. ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
  89. filled with a function that returns the cost in micro-seconds on a
  90. CPU, timing on other archs will be determined by multiplying by an
  91. arch-specific factor.
  92. </li>
  93. </ul>
  94. \var const char *starpu_perfmodel::symbol
  95. symbol name for the performance model, which will be used as file
  96. name to store the model. It must be set otherwise the model will
  97. be ignored.
  98. \var double (*starpu_perfmodel::cost_function)(struct starpu_task *, unsigned nimpl)
  99. Used by ::STARPU_COMMON. Take a task and implementation number,
  100. and must return a task duration estimation in micro-seconds.
  101. \var double (*starpu_perfmodel::arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  102. Used by ::STARPU_COMMON. Take a task, an arch and implementation
  103. number, and must return a task duration estimation in
  104. micro-seconds on that arch.
  105. \var size_t (*starpu_perfmodel::size_base)(struct starpu_task *, unsigned nimpl)
  106. Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
  107. ::STARPU_NL_REGRESSION_BASED. If not <c>NULL</c>, take a task and
  108. implementation number, and return the size to be used as index to
  109. distinguish histories and as a base for regressions.
  110. \var uint32_t (*starpu_perfmodel::footprint)(struct starpu_task *)
  111. Used by ::STARPU_HISTORY_BASED. If not <c>NULL</c>, take a task
  112. and return the footprint to be used as index to distinguish
  113. histories. The default is to use the starpu_task_data_footprint()
  114. function.
  115. \var unsigned starpu_perfmodel::is_loaded
  116. \private
  117. Whether the performance model is already loaded from the disk.
  118. \var unsigned starpu_perfmodel::benchmarking
  119. \private
  120. todo
  121. \var unsigned starpu_perfmodel::is_init
  122. todo
  123. \var starpu_perfmodel_state_t starpu_perfmodel::state
  124. \private
  125. todo
  126. \var void (*starpu_perfmodel::parameters)(struct starpu_task * task, double *parameters);
  127. todo
  128. \var const char ** starpu_perfmodel::parameters_names
  129. \private
  130. Names of parameters used for multiple linear regression models (M,
  131. N, K)
  132. \var unsigned starpu_perfmodel::nparameters
  133. \private
  134. Number of parameters used for multiple linear regression models
  135. \var unsigned ** starpu_perfmodel::combinations
  136. \private
  137. Table of combinations of parameters (and the exponents) used for
  138. multiple linear regression models
  139. \var unsigned starpu_perfmodel::ncombinations
  140. \private
  141. Number of combination of parameters used for multiple linear
  142. regression models
  143. \struct starpu_perfmodel_regression_model
  144. todo
  145. \ingroup API_Performance_Model
  146. \var double starpu_perfmodel_regression_model::sumlny
  147. sum of ln(measured)
  148. \var double starpu_perfmodel_regression_model::sumlnx
  149. sum of ln(size)
  150. \var double starpu_perfmodel_regression_model::sumlnx2
  151. sum of ln(size)^2
  152. \var unsigned long starpu_perfmodel_regression_model::minx
  153. minimum size
  154. \var unsigned long starpu_perfmodel_regression_model::maxx
  155. maximum size
  156. \var double starpu_perfmodel_regression_model::sumlnxlny
  157. sum of ln(size)*ln(measured)
  158. \var double starpu_perfmodel_regression_model::alpha
  159. estimated = alpha * size ^ beta
  160. \var double starpu_perfmodel_regression_model::beta
  161. estimated = alpha * size ^ beta
  162. \var unsigned starpu_perfmodel_regression_model::valid
  163. whether the linear regression model is valid (i.e. enough measures)
  164. \var double starpu_perfmodel_regression_model::a
  165. estimated = a size ^b + c
  166. \var double starpu_perfmodel_regression_model::b
  167. estimated = a size ^b + c
  168. \var double starpu_perfmodel_regression_model::c
  169. estimated = a size ^b + c
  170. \var unsigned starpu_perfmodel_regression_model::nl_valid
  171. whether the non-linear regression model is valid (i.e. enough measures)
  172. \var unsigned starpu_perfmodel_regression_model::nsample
  173. number of sample values for non-linear regression
  174. \var double starpu_perfmodel_regression_model::coeff[]
  175. list of computed coefficients for multiple linear regression model
  176. \var double starpu_perfmodel_regression_model::ncoeff
  177. number of coefficients for multiple linear regression model
  178. \var double starpu_perfmodel_regression_model::multi_valid
  179. whether the multiple linear regression model is valid
  180. \struct starpu_perfmodel_per_arch
  181. contains information about the performance model of a given
  182. arch.
  183. \ingroup API_Performance_Model
  184. \var starpu_perfmodel_per_arch_cost_function starpu_perfmodel_per_arch::cost_function
  185. Used by ::STARPU_PER_ARCH, must point to functions which take a
  186. task, the target arch and implementation number (as mere
  187. conveniency, since the array is already indexed by these), and
  188. must return a task duration estimation in micro-seconds.
  189. \var starpu_perfmodel_per_arch_size_base starpu_perfmodel_per_arch::size_base
  190. Same as in structure starpu_perfmodel, but per-arch, in case it
  191. depends on the architecture-specific implementation.
  192. \var struct starpu_perfmodel_history_table *starpu_perfmodel_per_arch::history
  193. \private
  194. The history of performance measurements.
  195. \var struct starpu_perfmodel_history_list *starpu_perfmodel_per_arch::list
  196. \private
  197. Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and
  198. ::STARPU_MULTIPLE_REGRESSION_BASED, records all execution history
  199. measures.
  200. \var struct starpu_perfmodel_regression_model starpu_perfmodel_per_arch::regression
  201. \private
  202. Used by ::STARPU_REGRESSION_BASED, ::STARPU_NL_REGRESSION_BASED
  203. and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated
  204. factors of the regression.
  205. \struct starpu_perfmodel_history_list
  206. todo
  207. \ingroup API_Performance_Model
  208. \var struct starpu_perfmodel_history_list *starpu_perfmodel_history_list::next
  209. todo
  210. \var struct starpu_perfmodel_history_entry *starpu_perfmodel_history_list::entry
  211. todo
  212. \struct starpu_perfmodel_history_entry
  213. todo
  214. \ingroup API_Performance_Model
  215. \var double starpu_perfmodel_history_entry::mean
  216. mean_n = 1/n sum
  217. \var double starpu_perfmodel_history_entry::deviation
  218. n dev_n = sum2 - 1/n (sum)^2
  219. \var double starpu_perfmodel_history_entry::sum
  220. sum of samples (in µs)
  221. \var double starpu_perfmodel_history_entry::sum2
  222. sum of samples^2
  223. \var unsigned starpu_perfmodel_history_entry::nsample
  224. number of samples
  225. \var uint32_t starpu_perfmodel_history_entry::footprint
  226. data footprint
  227. \var size_t starpu_perfmodel_history_entry::size
  228. in bytes
  229. \var double starpu_perfmodel_history_entry::flops
  230. Provided by the application
  231. \fn void starpu_perfmodel_init(struct starpu_perfmodel *model)
  232. \ingroup API_Performance_Model
  233. todo
  234. \fn void starpu_perfmodel_free_sampling_directories(void)
  235. \ingroup API_Performance_Model
  236. Free internal memory used for sampling directory
  237. management. It should only be called by an application which is not
  238. calling starpu_shutdown() as this function already calls it. See for
  239. example <c>tools/starpu_perfmodel_display.c</c>.
  240. \fn int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model)
  241. \ingroup API_Performance_Model
  242. Load the performance model found in the file named \p filename. \p model has to be
  243. completely zero, and will be filled with the information stored in the given file.
  244. \fn int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
  245. \ingroup API_Performance_Model
  246. Load a given performance model. \p model has to be
  247. completely zero, and will be filled with the information stored in
  248. <c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
  249. external tools that want to read the performance model files.
  250. \fn int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
  251. \ingroup API_Performance_Model
  252. Unload \p model which has been previously loaded
  253. through the function starpu_perfmodel_load_symbol()
  254. \fn void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl)
  255. \ingroup API_Performance_Model
  256. Return the path to the debugging information for the performance model.
  257. \fn char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
  258. \ingroup API_Performance_Model
  259. todo
  260. \fn void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl)
  261. \ingroup API_Performance_Model
  262. Return the architecture name for \p arch
  263. \fn struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id)
  264. \ingroup API_Performance_Model
  265. Return the architecture type of the worker \p workerid.
  266. \fn void starpu_perfmodel_initialize(void)
  267. \ingroup API_Performance_Model
  268. If starpu_init is not used, starpu_perfmodel_initialize should be used before calling starpu_perfmodel_* functions.
  269. \fn int starpu_perfmodel_list(FILE *output)
  270. \ingroup API_Performance_Model
  271. Print a list of all performance models on \p output
  272. \fn void starpu_perfmodel_directory(FILE *output)
  273. \ingroup API_Performance_Model
  274. Print the directory name storing performance models on \p output
  275. \fn void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
  276. \ingroup API_Performance_Model
  277. todo
  278. \fn int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output)
  279. \ingroup API_Performance_Model
  280. todo
  281. \fn int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output)
  282. \ingroup API_Performance_Model
  283. todo
  284. \fn void starpu_bus_print_bandwidth(FILE *f)
  285. \ingroup API_Performance_Model
  286. Print a matrix of bus bandwidths on \p f.
  287. \fn void starpu_bus_print_affinity(FILE *f)
  288. \ingroup API_Performance_Model
  289. Print the affinity devices on \p f.
  290. \fn void starpu_bus_print_filenames(FILE *f)
  291. \ingroup API_Performance_Model
  292. Print on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.
  293. \fn void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
  294. \ingroup API_Performance_Model
  295. Feed the performance model model with an explicit
  296. measurement measured (in µs), in addition to measurements done by StarPU
  297. itself. This can be useful when the application already has an
  298. existing set of measurements done in good conditions, that StarPU
  299. could benefit from instead of doing on-line measurements. An example
  300. of use can be seen in \ref PerformanceModelExample.
  301. \fn double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node)
  302. \ingroup API_Performance_Model
  303. Return the bandwidth of data transfer between two memory nodes
  304. \fn double starpu_transfer_latency(unsigned src_node, unsigned dst_node)
  305. \ingroup API_Performance_Model
  306. Return the latency of data transfer between two memory nodes
  307. \fn double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size)
  308. \ingroup API_Performance_Model
  309. Return the estimated time to transfer a given size between two memory nodes.
  310. \fn double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint)
  311. \ingroup API_Performance_Model
  312. Return the estimated time of a task with the given model and the given footprint.
  313. \var starpu_perfmodel_nop
  314. Performance model which just always return 1µs.
  315. */