performance_model.doxy 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. /*
  2. * This file is part of the StarPU Handbook.
  3. * Copyright (C) 2009--2011 Universit@'e de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 CNRS
  5. * Copyright (C) 2011, 2012, 2016 INRIA
  6. * See the file version.doxy for copying conditions.
  7. */
  8. /*! \defgroup API_Performance_Model Performance Model
  9. \enum starpu_perfmodel_type
  10. \ingroup API_Performance_Model
  11. TODO
  12. \var starpu_perfmodel_type::STARPU_PERFMODEL_INVALID
  13. todo
  14. \var starpu_perfmodel_type::STARPU_PER_ARCH
  15. Application-provided per-arch cost model function
  16. \var starpu_perfmodel_type::STARPU_COMMON
  17. Application-provided common cost model function, with per-arch
  18. factor
  19. \var starpu_perfmodel_type::STARPU_HISTORY_BASED
  20. Automatic history-based cost model
  21. \var starpu_perfmodel_type::STARPU_REGRESSION_BASED
  22. Automatic linear regression-based cost model (alpha * size ^
  23. beta)
  24. \var starpu_perfmodel_type::STARPU_NL_REGRESSION_BASED
  25. Automatic non-linear regression-based cost model (a * size ^ b +
  26. c)
  27. \var starpu_perfmodel_type::STARPU_MULTIPLE_REGRESSION_BASED
  28. Automatic multiple linear regression-based cost model. Application
  29. provides parameters, their combinations and exponents.
  30. \struct starpu_perfmodel_device
  31. todo
  32. \ingroup API_Performance_Model
  33. \var enum starpu_worker_archtype starpu_perfmodel_device::type
  34. type of the device
  35. \var int starpu_perfmodel_device::devid
  36. identifier of the precise device
  37. \var int starpu_perfmodel_device::ncore
  38. number of execution in parallel, minus 1
  39. \struct starpu_perfmodel_arch
  40. todo
  41. \ingroup API_Performance_Model
  42. \var int starpu_perfmodel_arch::ndevices
  43. number of the devices for the given arch
  44. \var struct starpu_perfmodel_device *starpu_perfmodel_arch::devices
  45. list of the devices for the given arch
  46. \struct starpu_perfmodel
  47. Contain all information about a performance model. At least the
  48. type and symbol fields have to be filled when defining a performance
  49. model for a codelet. For compatibility, make sure to initialize the
  50. whole structure to zero, either by using explicit memset, or by
  51. letting the compiler implicitly do it in e.g. static storage case. If
  52. not provided, other fields have to be zero.
  53. \ingroup API_Performance_Model
  54. \var enum starpu_perfmodel_type starpu_perfmodel::type
  55. type of performance model
  56. <ul>
  57. <li>
  58. ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
  59. ::STARPU_NL_REGRESSION_BASED: No other fields needs to be
  60. provided, this is purely history-based.
  61. </li>
  62. <li>
  63. ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields
  64. starpu_perfmodel::nparameters (number of different parameters),
  65. starpu_perfmodel::ncombinations (number of parameters
  66. combinations-tuples) and table starpu_perfmodel::combinations
  67. which defines exponents of the equation. Function cl_perf_func
  68. also needs to define how to extract parameters from the task.
  69. </li>
  70. <li>
  71. ::STARPU_PER_ARCH: either field
  72. starpu_perfmodel::arch_cost_function has to be filled with a
  73. function that returns the cost in micro-seconds on the arch given
  74. as parameter, or field starpu_perfmodel::per_arch has to be filled
  75. with functions which return the cost in micro-seconds.
  76. </li>
  77. <li>
  78. ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
  79. filled with a function that returns the cost in micro-seconds on a
  80. CPU, timing on other archs will be determined by multiplying by an
  81. arch-specific factor.
  82. </li>
  83. </ul>
  84. \var const char *starpu_perfmodel::symbol
  85. symbol name for the performance model, which will be used as file
  86. name to store the model. It must be set otherwise the model will
  87. be ignored.
  88. \var double (*starpu_perfmodel::cost_function)(struct starpu_task *, unsigned nimpl)
  89. Used by ::STARPU_COMMON. Take a task and implementation number,
  90. and must return a task duration estimation in micro-seconds.
  91. \var double (*starpu_perfmodel::arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  92. Used by ::STARPU_COMMON. Take a task, an arch and implementation
  93. number, and must return a task duration estimation in
  94. micro-seconds on that arch.
  95. \var size_t (*starpu_perfmodel::size_base)(struct starpu_task *, unsigned nimpl)
  96. Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
  97. ::STARPU_NL_REGRESSION_BASED. If not <c>NULL</c>, take a task and
  98. implementation number, and return the size to be used as index to
  99. distinguish histories and as a base for regressions.
  100. \var uint32_t (*starpu_perfmodel::footprint)(struct starpu_task *)
  101. Used by ::STARPU_HISTORY_BASED. If not <c>NULL</c>, take a task
  102. and return the footprint to be used as index to distinguish
  103. histories. The default is to use the starpu_task_data_footprint()
  104. function.
  105. \var unsigned starpu_perfmodel::is_loaded
  106. \private
  107. Whether the performance model is already loaded from the disk.
  108. \var unsigned starpu_perfmodel::benchmarking
  109. \private
  110. todo
  111. \var unsigned starpu_perfmodel::is_init
  112. todo
  113. \var starpu_perfmodel_state_t starpu_perfmodel::state
  114. \private
  115. todo
  116. \var void (*starpu_perfmodel::parameters)(struct starpu_task * task, double *parameters);
  117. todo
  118. \var const char ** starpu_perfmodel::parameters_names
  119. \private
  120. Names of parameters used for multiple linear regression models (M,
  121. N, K)
  122. \var unsigned starpu_perfmodel::nparameters
  123. \private
  124. Number of parameters used for multiple linear regression models
  125. \var unsigned ** starpu_perfmodel::combinations
  126. \private
  127. Table of combinations of parameters (and the exponents) used for
  128. multiple linear regression models
  129. \var unsigned starpu_perfmodel::ncombinations
  130. \private
  131. Number of combination of parameters used for multiple linear
  132. regression models
  133. \struct starpu_perfmodel_regression_model
  134. todo
  135. \ingroup API_Performance_Model
  136. \var double starpu_perfmodel_regression_model::sumlny
  137. sum of ln(measured)
  138. \var double starpu_perfmodel_regression_model::sumlnx
  139. sum of ln(size)
  140. \var double starpu_perfmodel_regression_model::sumlnx2
  141. sum of ln(size)^2
  142. \var unsigned long starpu_perfmodel_regression_model::minx
  143. minimum size
  144. \var unsigned long starpu_perfmodel_regression_model::maxx
  145. maximum size
  146. \var double starpu_perfmodel_regression_model::sumlnxlny
  147. sum of ln(size)*ln(measured)
  148. \var double starpu_perfmodel_regression_model::alpha
  149. estimated = alpha * size ^ beta
  150. \var double starpu_perfmodel_regression_model::beta
  151. estimated = alpha * size ^ beta
  152. \var unsigned starpu_perfmodel_regression_model::valid
  153. whether the linear regression model is valid (i.e. enough measures)
  154. \var double starpu_perfmodel_regression_model::a
  155. estimated = a size ^b + c
  156. \var double starpu_perfmodel_regression_model::b
  157. estimated = a size ^b + c
  158. \var double starpu_perfmodel_regression_model::c
  159. estimated = a size ^b + c
  160. \var unsigned starpu_perfmodel_regression_model::nl_valid
  161. whether the non-linear regression model is valid (i.e. enough measures)
  162. \var unsigned starpu_perfmodel_regression_model::nsample
  163. number of sample values for non-linear regression
  164. \var double starpu_perfmodel_regression_model::coeff[]
  165. list of computed coefficients for multiple linear regression model
  166. \var double starpu_perfmodel_regression_model::ncoeff
  167. number of coefficients for multiple linear regression model
  168. \var double starpu_perfmodel_regression_model::multi_valid
  169. whether the multiple linear regression model is valid
  170. \struct starpu_perfmodel_per_arch
  171. contains information about the performance model of a given
  172. arch.
  173. \ingroup API_Performance_Model
  174. \var starpu_perfmodel_per_arch_cost_function starpu_perfmodel_per_arch::cost_function
  175. Used by ::STARPU_PER_ARCH, must point to functions which take a
  176. task, the target arch and implementation number (as mere
  177. conveniency, since the array is already indexed by these), and
  178. must return a task duration estimation in micro-seconds.
  179. \var starpu_perfmodel_per_arch_size_base starpu_perfmodel_per_arch::size_base
  180. Same as in structure starpu_perfmodel, but per-arch, in case it
  181. depends on the architecture-specific implementation.
  182. \var struct starpu_perfmodel_history_table *starpu_perfmodel_per_arch::history
  183. \private
  184. The history of performance measurements.
  185. \var struct starpu_perfmodel_history_list *starpu_perfmodel_per_arch::list
  186. \private
  187. Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and
  188. ::STARPU_MULTIPLE_REGRESSION_BASED, records all execution history
  189. measures.
  190. \var struct starpu_perfmodel_regression_model starpu_perfmodel_per_arch::regression
  191. \private
  192. Used by ::STARPU_REGRESSION_BASED, ::STARPU_NL_REGRESSION_BASED
  193. and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated
  194. factors of the regression.
  195. \struct starpu_perfmodel_history_list
  196. todo
  197. \ingroup API_Performance_Model
  198. \var struct starpu_perfmodel_history_list *starpu_perfmodel_history_list::next
  199. todo
  200. \var struct starpu_perfmodel_history_entry *starpu_perfmodel_history_list::entry
  201. todo
  202. \struct starpu_perfmodel_history_entry
  203. todo
  204. \ingroup API_Performance_Model
  205. \var double starpu_perfmodel_history_entry::mean
  206. mean_n = 1/n sum
  207. \var double starpu_perfmodel_history_entry::deviation
  208. n dev_n = sum2 - 1/n (sum)^2
  209. \var double starpu_perfmodel_history_entry::sum
  210. sum of samples (in µs)
  211. \var double starpu_perfmodel_history_entry::sum2
  212. sum of samples^2
  213. \var unsigned starpu_perfmodel_history_entry::nsample
  214. number of samples
  215. \var uint32_t starpu_perfmodel_history_entry::footprint
  216. data footprint
  217. \var size_t starpu_perfmodel_history_entry::size
  218. in bytes
  219. \var double starpu_perfmodel_history_entry::flops
  220. Provided by the application
  221. \fn void starpu_perfmodel_init(struct starpu_perfmodel *model)
  222. \ingroup API_Performance_Model
  223. todo
  224. \fn void starpu_perfmodel_free_sampling_directories(void)
  225. \ingroup API_Performance_Model
  226. Free internal memory used for sampling directory
  227. management. It should only be called by an application which is not
  228. calling starpu_shutdown() as this function already calls it. See for
  229. example <c>tools/starpu_perfmodel_display.c</c>.
  230. \fn int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model)
  231. \ingroup API_Performance_Model
  232. Load the performance model found in the file named \p filename. \p model has to be
  233. completely zero, and will be filled with the information stored in the given file.
  234. \fn int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
  235. \ingroup API_Performance_Model
  236. Load a given performance model. \p model has to be
  237. completely zero, and will be filled with the information stored in
  238. <c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
  239. external tools that want to read the performance model files.
  240. \fn int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
  241. \ingroup API_Performance_Model
  242. Unload \p model which has been previously loaded
  243. through the function starpu_perfmodel_load_symbol()
  244. \fn void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl)
  245. \ingroup API_Performance_Model
  246. Return the path to the debugging information for the performance model.
  247. \fn char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
  248. \ingroup API_Performance_Model
  249. todo
  250. \fn void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl)
  251. \ingroup API_Performance_Model
  252. Return the architecture name for \p arch
  253. \fn struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id)
  254. \ingroup API_Performance_Model
  255. Return the architecture type of the worker \p workerid.
  256. \fn int starpu_perfmodel_list(FILE *output)
  257. \ingroup API_Performance_Model
  258. Print a list of all performance models on \p output
  259. \fn void starpu_perfmodel_directory(FILE *output)
  260. \ingroup API_Performance_Model
  261. Print the directory name storing performance models on \p output
  262. \fn void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
  263. \ingroup API_Performance_Model
  264. todo
  265. \fn int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output)
  266. \ingroup API_Performance_Model
  267. todo
  268. \fn int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output)
  269. \ingroup API_Performance_Model
  270. todo
  271. \fn void starpu_bus_print_bandwidth(FILE *f)
  272. \ingroup API_Performance_Model
  273. Print a matrix of bus bandwidths on \p f.
  274. \fn void starpu_bus_print_affinity(FILE *f)
  275. \ingroup API_Performance_Model
  276. Print the affinity devices on \p f.
  277. \fn void starpu_bus_print_filenames(FILE *f)
  278. \ingroup API_Performance_Model
  279. Print on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.
  280. \fn void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
  281. \ingroup API_Performance_Model
  282. Feed the performance model model with an explicit
  283. measurement measured (in µs), in addition to measurements done by StarPU
  284. itself. This can be useful when the application already has an
  285. existing set of measurements done in good conditions, that StarPU
  286. could benefit from instead of doing on-line measurements. An example
  287. of use can be seen in \ref PerformanceModelExample.
  288. \fn double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node)
  289. \ingroup API_Performance_Model
  290. Return the bandwidth of data transfer between two memory nodes
  291. \fn double starpu_transfer_latency(unsigned src_node, unsigned dst_node)
  292. \ingroup API_Performance_Model
  293. Return the latency of data transfer between two memory nodes
  294. \fn double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size)
  295. \ingroup API_Performance_Model
  296. Return the estimated time to transfer a given size between two memory nodes.
  297. \fn double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint)
  298. \ingroup API_Performance_Model
  299. Return the estimated time of a task with the given model and the given footprint.
  300. */