performance_model.doxy 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /*
  2. * This file is part of the StarPU Handbook.
  3. * Copyright (C) 2009--2011 Universit@'e de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015 CNRS
  5. * Copyright (C) 2011, 2012 INRIA
  6. * See the file version.doxy for copying conditions.
  7. */
  8. /*! \defgroup API_Performance_Model Performance Model
  9. \enum starpu_perfmodel_archtype
  10. \ingroup API_Performance_Model
  11. Enumerates the various types of architectures.
  12. it is possible that we have multiple versions of the same kind of
  13. workers, for instance multiple GPUs or even different CPUs within
  14. the same machine so we do not use the archtype enum type directly
  15. for performance models.
  16. <ul>
  17. <li> CPU types range within ::STARPU_CPU_DEFAULT (1 CPU),
  18. ::STARPU_CPU_DEFAULT+1 (2 CPUs), ... ::STARPU_CPU_DEFAULT +
  19. STARPU_MAXCPUS - 1 (STARPU_MAXCPUS CPUs).
  20. </li>
  21. <li> CUDA types range within ::STARPU_CUDA_DEFAULT (GPU number 0),
  22. ::STARPU_CUDA_DEFAULT + 1 (GPU number 1), ..., ::STARPU_CUDA_DEFAULT +
  23. STARPU_MAXCUDADEVS - 1 (GPU number STARPU_MAXCUDADEVS - 1).
  24. </li>
  25. <li> OpenCL types range within ::STARPU_OPENCL_DEFAULT (GPU number
  26. 0), ::STARPU_OPENCL_DEFAULT + 1 (GPU number 1), ...,
  27. ::STARPU_OPENCL_DEFAULT + STARPU_MAXOPENCLDEVS - 1 (GPU number
  28. STARPU_MAXOPENCLDEVS - 1).
  29. </ul>
  30. \var starpu_perfmodel_archtype::STARPU_CPU_DEFAULT
  31. \ingroup API_Performance_Model
  32. CPU combined workers between 0 and STARPU_MAXCPUS-1
  33. \var starpu_perfmodel_archtype::STARPU_CUDA_DEFAULT
  34. \ingroup API_Performance_Model
  35. CUDA workers
  36. \var starpu_perfmodel_archtype::STARPU_OPENCL_DEFAULT
  37. \ingroup API_Performance_Model
  38. OpenCL workers
  39. \var starpu_perfmodel_archtype::STARPU_MIC_DEFAULT
  40. \ingroup API_Performance_Model
  41. MIC workers
  42. \var starpu_perfmodel_archtype::STARPU_SCC_DEFAULT
  43. \ingroup API_Performance_Model
  44. SCC workers
  45. \enum starpu_perfmodel_type
  46. \ingroup API_Performance_Model
  47. TODO
  48. \var starpu_perfmodel_type::STARPU_PER_ARCH
  49. \ingroup API_Performance_Model
  50. Application-provided per-arch cost model function
  51. \var starpu_perfmodel_type::STARPU_COMMON
  52. \ingroup API_Performance_Model
  53. Application-provided common cost model function, with per-arch factor
  54. \var starpu_perfmodel_type::STARPU_HISTORY_BASED
  55. \ingroup API_Performance_Model
  56. Automatic history-based cost model
  57. \var starpu_perfmodel_type::STARPU_REGRESSION_BASED
  58. \ingroup API_Performance_Model
  59. Automatic linear regression-based cost model (alpha * size ^ beta)
  60. \var starpu_perfmodel_type::STARPU_NL_REGRESSION_BASED
  61. \ingroup API_Performance_Model
  62. Automatic non-linear regression-based cost model (a * size ^ b + c)
  63. \struct starpu_perfmodel
  64. Contains all information about a performance model. At least the
  65. type and symbol fields have to be filled when defining a performance
  66. model for a codelet. For compatibility, make sure to initialize the
  67. whole structure to zero, either by using explicit memset, or by
  68. letting the compiler implicitly do it in e.g. static storage case. If
  69. not provided, other fields have to be zero.
  70. \ingroup API_Performance_Model
  71. \var enum starpu_perfmodel_type starpu_perfmodel::type
  72. is the type of performance model
  73. <ul>
  74. <li>::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
  75. ::STARPU_NL_REGRESSION_BASED: No other fields needs to be provided,
  76. this is purely history-based.
  77. </li>
  78. <li> ::STARPU_PER_ARCH: either field starpu_perfmodel::arch_cost_function has to be
  79. filled with a function that returns the cost in micro-seconds on the arch given
  80. as parameter, or field starpu_perfmodel::per_arch has to be
  81. filled with functions which return the cost in micro-seconds.
  82. </li>
  83. <li> ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
  84. filled with a function that returns the cost in micro-seconds on a
  85. CPU, timing on other archs will be determined by multiplying by an
  86. arch-specific factor.
  87. </li>
  88. </ul>
  89. \var const char *starpu_perfmodel::symbol
  90. is the symbol name for the performance model, which will be used as
  91. file name to store the model. It must be set otherwise the model will
  92. be ignored.
  93. \var double (*starpu_perfmodel::cost_function)(struct starpu_task *, unsigned nimpl)
  94. Used by ::STARPU_COMMON: takes a task and implementation number, and
  95. must return a task duration estimation in micro-seconds.
  96. \var double (*starpu_perfmodel::arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl)
  97. Used by ::STARPU_COMMON: takes a task, an arch and implementation number, and
  98. must return a task duration estimation in micro-seconds on that arch.
  99. \var size_t (*starpu_perfmodel::size_base)(struct starpu_task *, unsigned nimpl)
  100. Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
  101. ::STARPU_NL_REGRESSION_BASED. If not NULL, takes a task and
  102. implementation number, and returns the size to be used as index to distinguish
  103. histories and as a base for regressions.
  104. \var uint32_t (*starpu_perfmodel::footprint)(struct starpu_task *)
  105. Used by ::STARPU_HISTORY_BASED. If not NULL, takes a task and returns the
  106. footprint to be used as index to distinguish histories. The default is to use
  107. the starpu_task_data_footprint function.
  108. \var unsigned starpu_perfmodel::is_loaded
  109. \private
  110. Whether the performance model is already loaded from the disk.
  111. \var unsigned starpu_perfmodel::benchmarking
  112. \private
  113. \var unsigned starpu_perfmodel::is_init
  114. todo
  115. \var starpu_perfmodel_state_t starpu_perfmodel::state
  116. \private
  117. \struct starpu_perfmodel_regression_model
  118. ...
  119. \ingroup API_Performance_Model
  120. \var double starpu_perfmodel_regression_model::sumlny
  121. sum of ln(measured)
  122. \var double starpu_perfmodel_regression_model::sumlnx
  123. sum of ln(size)
  124. \var double starpu_perfmodel_regression_model::sumlnx2
  125. sum of ln(size)^2
  126. \var unsigned long starpu_perfmodel_regression_model::minx
  127. minimum size
  128. \var unsigned long starpu_perfmodel_regression_model::maxx
  129. maximum size
  130. \var double starpu_perfmodel_regression_model::sumlnxlny
  131. sum of ln(size)*ln(measured)
  132. \var double starpu_perfmodel_regression_model::alpha
  133. estimated = alpha * size ^ beta
  134. \var double starpu_perfmodel_regression_model::beta
  135. estimated = alpha * size ^ beta
  136. \var unsigned starpu_perfmodel_regression_model::valid
  137. whether the linear regression model is valid (i.e. enough measures)
  138. \var double starpu_perfmodel_regression_model::a
  139. estimated = a size ^b + c
  140. \var double starpu_perfmodel_regression_model::b
  141. estimated = a size ^b + c
  142. \var double starpu_perfmodel_regression_model::c
  143. estimated = a size ^b + c
  144. \var unsigned starpu_perfmodel_regression_model::nl_valid
  145. whether the non-linear regression model is valid (i.e. enough measures)
  146. \var unsigned starpu_perfmodel_regression_model::nsample
  147. number of sample values for non-linear regression
  148. \struct starpu_perfmodel_per_arch
  149. contains information about the performance model of a given
  150. arch.
  151. \ingroup API_Performance_Model
  152. \var starpu_perfmodel_per_arch_cost_function starpu_perfmodel_per_arch::cost_function
  153. Used by ::STARPU_PER_ARCH, must point to functions which take a task,
  154. the target arch and implementation number (as mere conveniency, since
  155. the array is already indexed by these), and must return a task
  156. duration estimation in micro-seconds.
  157. \var starpu_perfmodel_per_arch_size_base starpu_perfmodel_per_arch::size_base
  158. Same as in structure starpu_perfmodel, but per-arch, in case it
  159. depends on the architecture-specific implementation.
  160. \var struct starpu_perfmodel_history_table *starpu_perfmodel_per_arch::history
  161. \private
  162. The history of performance measurements.
  163. \var struct starpu_perfmodel_history_list *starpu_perfmodel_per_arch::list
  164. \private
  165. Used by ::STARPU_HISTORY_BASED and ::STARPU_NL_REGRESSION_BASED,
  166. records all execution history measures.
  167. \var struct starpu_perfmodel_regression_model starpu_perfmodel_per_arch::regression
  168. \private
  169. Used by ::STARPU_REGRESSION_BASED and
  170. ::STARPU_NL_REGRESSION_BASED, contains the estimated factors of the
  171. regression.
  172. \struct starpu_perfmodel_history_list
  173. todo
  174. \ingroup API_Performance_Model
  175. \var struct starpu_perfmodel_history_list *starpu_perfmodel_history_list::next
  176. todo
  177. \var struct starpu_perfmodel_history_entry *starpu_perfmodel_history_list::entry
  178. todo
  179. \struct starpu_perfmodel_history_entry
  180. todo
  181. \ingroup API_Performance_Model
  182. \var double starpu_perfmodel_history_entry::mean
  183. mean_n = 1/n sum
  184. \var double starpu_perfmodel_history_entry::deviation
  185. n dev_n = sum2 - 1/n (sum)^2
  186. \var double starpu_perfmodel_history_entry::sum
  187. sum of samples (in µs)
  188. \var double starpu_perfmodel_history_entry::sum2
  189. sum of samples^2
  190. \var unsigned starpu_perfmodel_history_entry::nsample
  191. number of samples
  192. \var uint32_t starpu_perfmodel_history_entry::footprint
  193. data footprint
  194. \var size_t starpu_perfmodel_history_entry::size
  195. in bytes
  196. \var double starpu_perfmodel_history_entry::flops
  197. Provided by the application
  198. \fn void starpu_perfmodel_init(struct starpu_perfmodel *model)
  199. \ingroup API_Performance_Model
  200. todo
  201. \fn void starpu_perfmodel_free_sampling_directories(void)
  202. \ingroup API_Performance_Model
  203. this function frees internal memory used for sampling directory
  204. management. It should only be called by an application which is not
  205. calling starpu_shutdown as this function already calls it. See for
  206. example <c>tools/starpu_perfmodel_display.c</c>.
  207. \fn int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model)
  208. \ingroup API_Performance_Model
  209. loads the performance model found in the given file. The model structure has to be
  210. completely zero, and will be filled with the information stored in the given file.
  211. \fn int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
  212. \ingroup API_Performance_Model
  213. loads a given performance model. The model structure has to be
  214. completely zero, and will be filled with the information saved in
  215. <c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
  216. external tools that should read the performance model files.
  217. \fn int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
  218. \ingroup API_Performance_Model
  219. unloads the given model which has been previously loaded
  220. through the function starpu_perfmodel_load_symbol()
  221. \fn void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl)
  222. \ingroup API_Performance_Model
  223. returns the path to the debugging information for the performance model.
  224. \fn char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
  225. \ingroup API_Performance_Model
  226. todo
  227. \fn void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl)
  228. \ingroup API_Performance_Model
  229. returns the architecture name for \p arch
  230. \fn struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id)
  231. \ingroup API_Performance_Model
  232. returns the architecture type of a given worker.
  233. \fn int starpu_perfmodel_list(FILE *output)
  234. \ingroup API_Performance_Model
  235. prints a list of all performance models on \p output
  236. \fn void starpu_perfmodel_directory(FILE *output)
  237. \ingroup API_Performance_Model
  238. prints the directory name storing performance models on \p output
  239. \fn void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
  240. \ingroup API_Performance_Model
  241. todo
  242. \fn int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output)
  243. \ingroup API_Performance_Model
  244. todo
  245. \fn void starpu_bus_print_bandwidth(FILE *f)
  246. \ingroup API_Performance_Model
  247. prints a matrix of bus bandwidths on \p f.
  248. \fn void starpu_bus_print_affinity(FILE *f)
  249. \ingroup API_Performance_Model
  250. prints the affinity devices on \p f.
  251. \fn void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
  252. \ingroup API_Performance_Model
  253. This feeds the performance model model with an explicit
  254. measurement measured (in µs), in addition to measurements done by StarPU
  255. itself. This can be useful when the application already has an
  256. existing set of measurements done in good conditions, that StarPU
  257. could benefit from instead of doing on-line measurements. And example
  258. of use can be seen in \ref PerformanceModelExample.
  259. \fn double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node)
  260. \ingroup API_Performance_Model
  261. Return the bandwidth of data transfer between two memory nodes
  262. \fn double starpu_transfer_latency(unsigned src_node, unsigned dst_node)
  263. \ingroup API_Performance_Model
  264. Return the latency of data transfer between two memory nodes
  265. \fn double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size)
  266. \ingroup API_Performance_Model
  267. Return the estimated time to transfer a given size between two memory nodes.
  268. \fn double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint)
  269. \ingroup API_Performance_Model
  270. Return the estimated time of a task whose model is named \p and whose footprint is \p footprint
  271. */