performance_model.doxy 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. /*
  2. * This file is part of the StarPU Handbook.
  3. * Copyright (C) 2009--2011 Universit@'e de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 CNRS
  5. * Copyright (C) 2011, 2012, 2016 INRIA
  6. * See the file version.doxy for copying conditions.
  7. */
  8. /*! \defgroup API_Performance_Model Performance Model
  9. \enum starpu_perfmodel_type
  10. \ingroup API_Performance_Model
  11. TODO
  12. \var starpu_perfmodel_type::STARPU_PERFMODEL_INVALID
  13. todo
  14. \var starpu_perfmodel_type::STARPU_PER_ARCH
  15. \ingroup API_Performance_Model
  16. Application-provided per-arch cost model function
  17. \var starpu_perfmodel_type::STARPU_COMMON
  18. \ingroup API_Performance_Model
  19. Application-provided common cost model function, with per-arch factor
  20. \var starpu_perfmodel_type::STARPU_HISTORY_BASED
  21. \ingroup API_Performance_Model
  22. Automatic history-based cost model
  23. \var starpu_perfmodel_type::STARPU_REGRESSION_BASED
  24. \ingroup API_Performance_Model
  25. Automatic linear regression-based cost model (alpha * size ^ beta)
  26. \var starpu_perfmodel_type::STARPU_NL_REGRESSION_BASED
  27. \ingroup API_Performance_Model
  28. Automatic non-linear regression-based cost model (a * size ^ b + c)
  29. \var starpu_perfmodel_type::STARPU_MULTIPLE_REGRESSION_BASED
  30. \ingroup API_Performance_Model
  31. Automatic multiple linear regression-based cost model. Application provides parameters, their combinations and exponents
  32. \struct starpu_perfmodel_device
  33. todo
  34. \ingroup API_Performance_Model
  35. \var enum starpu_worker_archtype starpu_perfmodel_device::type
  36. is the type of the device
  37. \var int starpu_perfmodel_device::devid
  38. is the identifier of the precise device
  39. \var int starpu_perfmodel_device::ncore
  40. is the number of execution in parallel, minus 1
  41. \struct starpu_perfmodel_arch
  42. todo
  43. \ingroup API_Performance_Model
  44. \var int starpu_perfmodel_arch::ndevices
  45. is the number of the devices for the given arch
  46. \var struct starpu_perfmodel_device *starpu_perfmodel_arch::devices
  47. is the list of the devices for the given arch
  48. \struct starpu_perfmodel
  49. Contains all information about a performance model. At least the
  50. type and symbol fields have to be filled when defining a performance
  51. model for a codelet. For compatibility, make sure to initialize the
  52. whole structure to zero, either by using explicit memset, or by
  53. letting the compiler implicitly do it in e.g. static storage case. If
  54. not provided, other fields have to be zero.
  55. \ingroup API_Performance_Model
  56. \var enum starpu_perfmodel_type starpu_perfmodel::type
  57. is the type of performance model
  58. <ul>
  59. <li>::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
  60. ::STARPU_NL_REGRESSION_BASED: No other fields needs to be provided,
  61. this is purely history-based.
  62. </li>
  63. <li> ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields starpu_perfmodel::nparameters (number of different parameters), starpu_perfmodel::ncombinations (number of parameters combinations-tuples) and table starpu_perfmodel::combinations which defines exponents of the equation. Function cl_perf_func also needs to define how to extract parameters from the task.
  64. </li>
  65. <li> ::STARPU_PER_ARCH: either field starpu_perfmodel::arch_cost_function has to be
  66. filled with a function that returns the cost in micro-seconds on the arch given
  67. as parameter, or field starpu_perfmodel::per_arch has to be
  68. filled with functions which return the cost in micro-seconds.
  69. </li>
  70. <li> ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
  71. filled with a function that returns the cost in micro-seconds on a
  72. CPU, timing on other archs will be determined by multiplying by an
  73. arch-specific factor.
  74. </li>
  75. </ul>
  76. \var const char *starpu_perfmodel::symbol
  77. is the symbol name for the performance model, which will be used as
  78. file name to store the model. It must be set otherwise the model will
  79. be ignored.
  80. \var double (*starpu_perfmodel::cost_function)(struct starpu_task *, unsigned nimpl)
  81. Used by ::STARPU_COMMON takes a task and implementation number, and
  82. must return a task duration estimation in micro-seconds.
  83. \var double (*starpu_perfmodel::arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  84. Used by ::STARPU_COMMON takes a task, an arch and implementation number, and
  85. must return a task duration estimation in micro-seconds on that arch.
  86. \var size_t (*starpu_perfmodel::size_base)(struct starpu_task *, unsigned nimpl)
  87. Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
  88. ::STARPU_NL_REGRESSION_BASED. If not <c>NULL</c>, takes a task and
  89. implementation number, and returns the size to be used as index to distinguish
  90. histories and as a base for regressions.
  91. \var uint32_t (*starpu_perfmodel::footprint)(struct starpu_task *)
  92. Used by ::STARPU_HISTORY_BASED. If not <c>NULL</c>, takes a task and returns the
  93. footprint to be used as index to distinguish histories. The default is to use
  94. the starpu_task_data_footprint() function.
  95. \var unsigned starpu_perfmodel::is_loaded
  96. \private
  97. Whether the performance model is already loaded from the disk.
  98. \var unsigned starpu_perfmodel::benchmarking
  99. \private
  100. todo
  101. \var unsigned starpu_perfmodel::is_init
  102. todo
  103. \var starpu_perfmodel_state_t starpu_perfmodel::state
  104. \private
  105. todo
  106. \var void (*starpu_perfmodel::parameters)(struct starpu_task * task, double *parameters);
  107. todo
  108. \var const char ** starpu_perfmodel::parameters_names
  109. \private
  110. Names of parameters used for multiple linear regression models (M, N, K)
  111. \var unsigned starpu_perfmodel::nparameters
  112. \private
  113. Number of parameters used for multiple linear regression models
  114. \var unsigned ** starpu_perfmodel::combinations
  115. \private
  116. Table of combinations of parameters (and the exponents) used for multiple linear regression models
  117. \var unsigned starpu_perfmodel::ncombinations
  118. \private
  119. Number of combination of parameters used for multiple linear regression models
  120. \struct starpu_perfmodel_regression_model
  121. ...
  122. \ingroup API_Performance_Model
  123. \var double starpu_perfmodel_regression_model::sumlny
  124. sum of ln(measured)
  125. \var double starpu_perfmodel_regression_model::sumlnx
  126. sum of ln(size)
  127. \var double starpu_perfmodel_regression_model::sumlnx2
  128. sum of ln(size)^2
  129. \var unsigned long starpu_perfmodel_regression_model::minx
  130. minimum size
  131. \var unsigned long starpu_perfmodel_regression_model::maxx
  132. maximum size
  133. \var double starpu_perfmodel_regression_model::sumlnxlny
  134. sum of ln(size)*ln(measured)
  135. \var double starpu_perfmodel_regression_model::alpha
  136. estimated = alpha * size ^ beta
  137. \var double starpu_perfmodel_regression_model::beta
  138. estimated = alpha * size ^ beta
  139. \var unsigned starpu_perfmodel_regression_model::valid
  140. whether the linear regression model is valid (i.e. enough measures)
  141. \var double starpu_perfmodel_regression_model::a
  142. estimated = a size ^b + c
  143. \var double starpu_perfmodel_regression_model::b
  144. estimated = a size ^b + c
  145. \var double starpu_perfmodel_regression_model::c
  146. estimated = a size ^b + c
  147. \var unsigned starpu_perfmodel_regression_model::nl_valid
  148. whether the non-linear regression model is valid (i.e. enough measures)
  149. \var unsigned starpu_perfmodel_regression_model::nsample
  150. number of sample values for non-linear regression
  151. \var double starpu_perfmodel_regression_model::coeff[]
  152. list of computed coefficients for multiple linear regression model
  153. \var double starpu_perfmodel_regression_model::ncoeff
  154. number of coefficients for multiple linear regression model
  155. \var double starpu_perfmodel_regression_model::multi_valid
  156. whether the multiple linear regression model is valid
  157. \struct starpu_perfmodel_per_arch
  158. contains information about the performance model of a given
  159. arch.
  160. \ingroup API_Performance_Model
  161. \var starpu_perfmodel_per_arch_cost_function starpu_perfmodel_per_arch::cost_function
  162. Used by ::STARPU_PER_ARCH, must point to functions which take a task,
  163. the target arch and implementation number (as mere conveniency, since
  164. the array is already indexed by these), and must return a task
  165. duration estimation in micro-seconds.
  166. \var starpu_perfmodel_per_arch_size_base starpu_perfmodel_per_arch::size_base
  167. Same as in structure starpu_perfmodel, but per-arch, in case it
  168. depends on the architecture-specific implementation.
  169. \var struct starpu_perfmodel_history_table *starpu_perfmodel_per_arch::history
  170. \private
  171. The history of performance measurements.
  172. \var struct starpu_perfmodel_history_list *starpu_perfmodel_per_arch::list
  173. \private
  174. Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and ::STARPU_MULTIPLE_REGRESSION_BASED,
  175. records all execution history measures.
  176. \var struct starpu_perfmodel_regression_model starpu_perfmodel_per_arch::regression
  177. \private
  178. Used by ::STARPU_REGRESSION_BASED,
  179. ::STARPU_NL_REGRESSION_BASED and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated factors of the
  180. regression.
  181. \struct starpu_perfmodel_history_list
  182. todo
  183. \ingroup API_Performance_Model
  184. \var struct starpu_perfmodel_history_list *starpu_perfmodel_history_list::next
  185. todo
  186. \var struct starpu_perfmodel_history_entry *starpu_perfmodel_history_list::entry
  187. todo
  188. \struct starpu_perfmodel_history_entry
  189. todo
  190. \ingroup API_Performance_Model
  191. \var double starpu_perfmodel_history_entry::mean
  192. mean_n = 1/n sum
  193. \var double starpu_perfmodel_history_entry::deviation
  194. n dev_n = sum2 - 1/n (sum)^2
  195. \var double starpu_perfmodel_history_entry::sum
  196. sum of samples (in µs)
  197. \var double starpu_perfmodel_history_entry::sum2
  198. sum of samples^2
  199. \var unsigned starpu_perfmodel_history_entry::nsample
  200. number of samples
  201. \var uint32_t starpu_perfmodel_history_entry::footprint
  202. data footprint
  203. \var size_t starpu_perfmodel_history_entry::size
  204. in bytes
  205. \var double starpu_perfmodel_history_entry::flops
  206. Provided by the application
  207. \fn void starpu_perfmodel_init(struct starpu_perfmodel *model)
  208. \ingroup API_Performance_Model
  209. todo
  210. \fn void starpu_perfmodel_free_sampling_directories(void)
  211. \ingroup API_Performance_Model
  212. this function frees internal memory used for sampling directory
  213. management. It should only be called by an application which is not
  214. calling starpu_shutdown() as this function already calls it. See for
  215. example <c>tools/starpu_perfmodel_display.c</c>.
  216. \fn int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model)
  217. \ingroup API_Performance_Model
  218. loads the performance model found in the given file. The model structure has to be
  219. completely zero, and will be filled with the information stored in the given file.
  220. \fn int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
  221. \ingroup API_Performance_Model
  222. loads a given performance model. The model structure has to be
  223. completely zero, and will be filled with the information saved in
  224. <c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
  225. external tools that should read the performance model files.
  226. \fn int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
  227. \ingroup API_Performance_Model
  228. unloads the given model which has been previously loaded
  229. through the function starpu_perfmodel_load_symbol()
  230. \fn void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl)
  231. \ingroup API_Performance_Model
  232. returns the path to the debugging information for the performance model.
  233. \fn char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
  234. \ingroup API_Performance_Model
  235. todo
  236. \fn void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl)
  237. \ingroup API_Performance_Model
  238. returns the architecture name for \p arch
  239. \fn struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id)
  240. \ingroup API_Performance_Model
  241. returns the architecture type of a given worker.
  242. \fn int starpu_perfmodel_list(FILE *output)
  243. \ingroup API_Performance_Model
  244. prints a list of all performance models on \p output
  245. \fn void starpu_perfmodel_directory(FILE *output)
  246. \ingroup API_Performance_Model
  247. prints the directory name storing performance models on \p output
  248. \fn void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
  249. \ingroup API_Performance_Model
  250. todo
  251. \fn int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output)
  252. \ingroup API_Performance_Model
  253. todo
  254. \fn int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output)
  255. \ingroup API_Performance_Model
  256. todo
  257. \fn void starpu_bus_print_bandwidth(FILE *f)
  258. \ingroup API_Performance_Model
  259. prints a matrix of bus bandwidths on \p f.
  260. \fn void starpu_bus_print_affinity(FILE *f)
  261. \ingroup API_Performance_Model
  262. prints the affinity devices on \p f.
  263. \fn void starpu_bus_print_filenames(FILE *f)
  264. \ingroup API_Performance_Model
  265. prints on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.
  266. \fn void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
  267. \ingroup API_Performance_Model
  268. This feeds the performance model model with an explicit
  269. measurement measured (in µs), in addition to measurements done by StarPU
  270. itself. This can be useful when the application already has an
  271. existing set of measurements done in good conditions, that StarPU
  272. could benefit from instead of doing on-line measurements. And example
  273. of use can be seen in \ref PerformanceModelExample.
  274. \fn double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node)
  275. \ingroup API_Performance_Model
  276. Return the bandwidth of data transfer between two memory nodes
  277. \fn double starpu_transfer_latency(unsigned src_node, unsigned dst_node)
  278. \ingroup API_Performance_Model
  279. Return the latency of data transfer between two memory nodes
  280. \fn double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size)
  281. \ingroup API_Performance_Model
  282. Return the estimated time to transfer a given size between two memory nodes.
  283. \fn double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint)
  284. \ingroup API_Performance_Model
  285. Return the estimated time of a task with the given model and the given footprint.
  286. */