performance_model.doxy 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. /*
  2. * This file is part of the StarPU Handbook.
  3. * Copyright (C) 2009--2011 Universit@'e de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016 CNRS
  5. * Copyright (C) 2011, 2012 INRIA
  6. * See the file version.doxy for copying conditions.
  7. */
  8. /*! \defgroup API_Performance_Model Performance Model
  9. \enum starpu_perfmodel_type
  10. \ingroup API_Performance_Model
  11. TODO
  12. \var starpu_perfmodel_type::STARPU_PER_ARCH
  13. \ingroup API_Performance_Model
  14. Application-provided per-arch cost model function
  15. \var starpu_perfmodel_type::STARPU_COMMON
  16. \ingroup API_Performance_Model
  17. Application-provided common cost model function, with per-arch factor
  18. \var starpu_perfmodel_type::STARPU_HISTORY_BASED
  19. \ingroup API_Performance_Model
  20. Automatic history-based cost model
  21. \var starpu_perfmodel_type::STARPU_REGRESSION_BASED
  22. \ingroup API_Performance_Model
  23. Automatic linear regression-based cost model (alpha * size ^ beta)
  24. \var starpu_perfmodel_type::STARPU_NL_REGRESSION_BASED
  25. \ingroup API_Performance_Model
  26. Automatic non-linear regression-based cost model (a * size ^ b + c)
  27. \struct starpu_perfmodel_device
  28. todo
  29. \ingroup API_Performance_Model
  30. \var enum starpu_worker_archtype starpu_perfmodel_device::type
  31. is the type of the device
  32. \var int starpu_perfmodel_device::devid
  33. is the identifier of the precise device
  34. \var int starpu_perfmodel_device::ncore
  35. is the number of execution in parallel, minus 1
  36. \struct starpu_perfmodel_arch
  37. todo
  38. \ingroup API_Performance_Model
  39. \var int starpu_perfmodel_arch::ndevices
  40. is the number of the devices for the given arch
  41. \var struct starpu_perfmodel_device *starpu_perfmodel_arch::devices
  42. is the list of the devices for the given arch
  43. \struct starpu_perfmodel
  44. Contains all information about a performance model. At least the
  45. type and symbol fields have to be filled when defining a performance
  46. model for a codelet. For compatibility, make sure to initialize the
  47. whole structure to zero, either by using explicit memset, or by
  48. letting the compiler implicitly do it in e.g. static storage case. If
  49. not provided, other fields have to be zero.
  50. \ingroup API_Performance_Model
  51. \var enum starpu_perfmodel_type starpu_perfmodel::type
  52. is the type of performance model
  53. <ul>
  54. <li>::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
  55. ::STARPU_NL_REGRESSION_BASED: No other fields needs to be provided,
  56. this is purely history-based.
  57. </li>
  58. <li> ::STARPU_PER_ARCH: either field starpu_perfmodel::arch_cost_function has to be
  59. filled with a function that returns the cost in micro-seconds on the arch given
  60. as parameter, or field starpu_perfmodel::per_arch has to be
  61. filled with functions which return the cost in micro-seconds.
  62. </li>
  63. <li> ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
  64. filled with a function that returns the cost in micro-seconds on a
  65. CPU, timing on other archs will be determined by multiplying by an
  66. arch-specific factor.
  67. </li>
  68. </ul>
  69. \var const char *starpu_perfmodel::symbol
  70. is the symbol name for the performance model, which will be used as
  71. file name to store the model. It must be set otherwise the model will
  72. be ignored.
  73. \var double (*starpu_perfmodel::cost_function)(struct starpu_task *, unsigned nimpl)
  74. Used by ::STARPU_COMMON: takes a task and implementation number, and
  75. must return a task duration estimation in micro-seconds.
  76. \var double (*starpu_perfmodel::arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  77. Used by ::STARPU_COMMON: takes a task, an arch and implementation number, and
  78. must return a task duration estimation in micro-seconds on that arch.
  79. \var size_t (*starpu_perfmodel::size_base)(struct starpu_task *, unsigned nimpl)
  80. Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
  81. ::STARPU_NL_REGRESSION_BASED. If not NULL, takes a task and
  82. implementation number, and returns the size to be used as index to distinguish
  83. histories and as a base for regressions.
  84. \var uint32_t (*starpu_perfmodel::footprint)(struct starpu_task *)
  85. Used by ::STARPU_HISTORY_BASED. If not NULL, takes a task and returns the
  86. footprint to be used as index to distinguish histories. The default is to use
  87. the starpu_task_data_footprint function.
  88. \var unsigned starpu_perfmodel::is_loaded
  89. \private
  90. Whether the performance model is already loaded from the disk.
  91. \var unsigned starpu_perfmodel::benchmarking
  92. \private
  93. \var unsigned starpu_perfmodel::is_init
  94. todo
  95. \var starpu_perfmodel_state_t starpu_perfmodel::state
  96. \private
  97. \struct starpu_perfmodel_regression_model
  98. ...
  99. \ingroup API_Performance_Model
  100. \var double starpu_perfmodel_regression_model::sumlny
  101. sum of ln(measured)
  102. \var double starpu_perfmodel_regression_model::sumlnx
  103. sum of ln(size)
  104. \var double starpu_perfmodel_regression_model::sumlnx2
  105. sum of ln(size)^2
  106. \var unsigned long starpu_perfmodel_regression_model::minx
  107. minimum size
  108. \var unsigned long starpu_perfmodel_regression_model::maxx
  109. maximum size
  110. \var double starpu_perfmodel_regression_model::sumlnxlny
  111. sum of ln(size)*ln(measured)
  112. \var double starpu_perfmodel_regression_model::alpha
  113. estimated = alpha * size ^ beta
  114. \var double starpu_perfmodel_regression_model::beta
  115. estimated = alpha * size ^ beta
  116. \var unsigned starpu_perfmodel_regression_model::valid
  117. whether the linear regression model is valid (i.e. enough measures)
  118. \var double starpu_perfmodel_regression_model::a
  119. estimated = a size ^b + c
  120. \var double starpu_perfmodel_regression_model::b
  121. estimated = a size ^b + c
  122. \var double starpu_perfmodel_regression_model::c
  123. estimated = a size ^b + c
  124. \var unsigned starpu_perfmodel_regression_model::nl_valid
  125. whether the non-linear regression model is valid (i.e. enough measures)
  126. \var unsigned starpu_perfmodel_regression_model::nsample
  127. number of sample values for non-linear regression
  128. \struct starpu_perfmodel_per_arch
  129. contains information about the performance model of a given
  130. arch.
  131. \ingroup API_Performance_Model
  132. \var starpu_perfmodel_per_arch_cost_function starpu_perfmodel_per_arch::cost_function
  133. Used by ::STARPU_PER_ARCH, must point to functions which take a task,
  134. the target arch and implementation number (as mere conveniency, since
  135. the array is already indexed by these), and must return a task
  136. duration estimation in micro-seconds.
  137. \var starpu_perfmodel_per_arch_size_base starpu_perfmodel_per_arch::size_base
  138. Same as in structure starpu_perfmodel, but per-arch, in case it
  139. depends on the architecture-specific implementation.
  140. \var struct starpu_perfmodel_history_table *starpu_perfmodel_per_arch::history
  141. \private
  142. The history of performance measurements.
  143. \var struct starpu_perfmodel_history_list *starpu_perfmodel_per_arch::list
  144. \private
  145. Used by ::STARPU_HISTORY_BASED and ::STARPU_NL_REGRESSION_BASED,
  146. records all execution history measures.
  147. \var struct starpu_perfmodel_regression_model starpu_perfmodel_per_arch::regression
  148. \private
  149. Used by ::STARPU_REGRESSION_BASED and
  150. ::STARPU_NL_REGRESSION_BASED, contains the estimated factors of the
  151. regression.
  152. \struct starpu_perfmodel_history_list
  153. todo
  154. \ingroup API_Performance_Model
  155. \var struct starpu_perfmodel_history_list *starpu_perfmodel_history_list::next
  156. todo
  157. \var struct starpu_perfmodel_history_entry *starpu_perfmodel_history_list::entry
  158. todo
  159. \struct starpu_perfmodel_history_entry
  160. todo
  161. \ingroup API_Performance_Model
  162. \var double starpu_perfmodel_history_entry::mean
  163. mean_n = 1/n sum
  164. \var double starpu_perfmodel_history_entry::deviation
  165. n dev_n = sum2 - 1/n (sum)^2
  166. \var double starpu_perfmodel_history_entry::sum
  167. sum of samples (in µs)
  168. \var double starpu_perfmodel_history_entry::sum2
  169. sum of samples^2
  170. \var unsigned starpu_perfmodel_history_entry::nsample
  171. number of samples
  172. \var uint32_t starpu_perfmodel_history_entry::footprint
  173. data footprint
  174. \var size_t starpu_perfmodel_history_entry::size
  175. in bytes
  176. \var double starpu_perfmodel_history_entry::flops
  177. Provided by the application
  178. \fn void starpu_perfmodel_init(struct starpu_perfmodel *model)
  179. \ingroup API_Performance_Model
  180. todo
  181. \fn void starpu_perfmodel_free_sampling_directories(void)
  182. \ingroup API_Performance_Model
  183. this function frees internal memory used for sampling directory
  184. management. It should only be called by an application which is not
  185. calling starpu_shutdown as this function already calls it. See for
  186. example <c>tools/starpu_perfmodel_display.c</c>.
  187. \fn int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model)
  188. \ingroup API_Performance_Model
  189. loads the performance model found in the given file. The model structure has to be
  190. completely zero, and will be filled with the information stored in the given file.
  191. \fn int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
  192. \ingroup API_Performance_Model
  193. loads a given performance model. The model structure has to be
  194. completely zero, and will be filled with the information saved in
  195. <c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
  196. external tools that should read the performance model files.
  197. \fn int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
  198. \ingroup API_Performance_Model
  199. unloads the given model which has been previously loaded
  200. through the function starpu_perfmodel_load_symbol()
  201. \fn void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl)
  202. \ingroup API_Performance_Model
  203. returns the path to the debugging information for the performance model.
  204. \fn char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
  205. \ingroup API_Performance_Model
  206. todo
  207. \fn void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl)
  208. \ingroup API_Performance_Model
  209. returns the architecture name for \p arch
  210. \fn struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id)
  211. \ingroup API_Performance_Model
  212. returns the architecture type of a given worker.
  213. \fn int starpu_perfmodel_list(FILE *output)
  214. \ingroup API_Performance_Model
  215. prints a list of all performance models on \p output
  216. \fn void starpu_perfmodel_directory(FILE *output)
  217. \ingroup API_Performance_Model
  218. prints the directory name storing performance models on \p output
  219. \fn void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
  220. \ingroup API_Performance_Model
  221. todo
  222. \fn int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output)
  223. \ingroup API_Performance_Model
  224. todo
  225. \fn int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output)
  226. \ingroup API_Performance_Model
  227. todo
  228. \fn void starpu_bus_print_bandwidth(FILE *f)
  229. \ingroup API_Performance_Model
  230. prints a matrix of bus bandwidths on \p f.
  231. \fn void starpu_bus_print_affinity(FILE *f)
  232. \ingroup API_Performance_Model
  233. prints the affinity devices on \p f.
  234. \fn void starpu_bus_print_filenames(FILE *f)
  235. \ingroup API_Performance_Model
  236. prints on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.
  237. \fn void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
  238. \ingroup API_Performance_Model
  239. This feeds the performance model model with an explicit
  240. measurement measured (in µs), in addition to measurements done by StarPU
  241. itself. This can be useful when the application already has an
  242. existing set of measurements done in good conditions, that StarPU
  243. could benefit from instead of doing on-line measurements. And example
  244. of use can be seen in \ref PerformanceModelExample.
  245. \fn double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node)
  246. \ingroup API_Performance_Model
  247. Return the bandwidth of data transfer between two memory nodes
  248. \fn double starpu_transfer_latency(unsigned src_node, unsigned dst_node)
  249. \ingroup API_Performance_Model
  250. Return the latency of data transfer between two memory nodes
  251. \fn double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size)
  252. \ingroup API_Performance_Model
  253. Return the estimated time to transfer a given size between two memory nodes.
  254. \fn double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint)
  255. \ingroup API_Performance_Model
  256. Return the estimated time of a task whose model is named \p and whose footprint is \p footprint
  257. */