performance_model.doxy 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. /*
  2. * This file is part of the StarPU Handbook.
  3. * Copyright (C) 2009--2011 Universit@'e de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
  6. * See the file version.doxy for copying conditions.
  7. */
  8. /*! \defgroup API_Performance_Model Performance Model
  9. \enum starpu_perfmodel_archtype
  10. \ingroup API_Performance_Model
  11. Enumerates the various types of architectures.
  12. it is possible that we have multiple versions of the same kind of
  13. workers, for instance multiple GPUs or even different CPUs within
  14. the same machine so we do not use the archtype enum type directly
  15. for performance models.
  16. <ul>
  17. <li> CPU types range within ::STARPU_CPU_DEFAULT (1 CPU),
  18. ::STARPU_CPU_DEFAULT+1 (2 CPUs), ... ::STARPU_CPU_DEFAULT +
  19. STARPU_MAXCPUS - 1 (STARPU_MAXCPUS CPUs).
  20. </li>
  21. <li> CUDA types range within ::STARPU_CUDA_DEFAULT (GPU number 0),
  22. ::STARPU_CUDA_DEFAULT + 1 (GPU number 1), ..., ::STARPU_CUDA_DEFAULT +
  23. STARPU_MAXCUDADEVS - 1 (GPU number STARPU_MAXCUDADEVS - 1).
  24. </li>
  25. <li> OpenCL types range within ::STARPU_OPENCL_DEFAULT (GPU number
  26. 0), ::STARPU_OPENCL_DEFAULT + 1 (GPU number 1), ...,
  27. ::STARPU_OPENCL_DEFAULT + STARPU_MAXOPENCLDEVS - 1 (GPU number
  28. STARPU_MAXOPENCLDEVS - 1).
  29. </ul>
  30. \var starpu_perfmodel_archtype::STARPU_CPU_DEFAULT
  31. \ingroup API_Performance_Model
  32. CPU combined workers between 0 and STARPU_MAXCPUS-1
  33. \var starpu_perfmodel_archtype::STARPU_CUDA_DEFAULT
  34. \ingroup API_Performance_Model
  35. CUDA workers
  36. \var starpu_perfmodel_archtype::STARPU_OPENCL_DEFAULT
  37. \ingroup API_Performance_Model
  38. OpenCL workers
  39. \var starpu_perfmodel_archtype::STARPU_MIC_DEFAULT
  40. \ingroup API_Performance_Model
  41. MIC workers
  42. \var starpu_perfmodel_archtype::STARPU_SCC_DEFAULT
  43. \ingroup API_Performance_Model
  44. SCC workers
  45. \enum starpu_perfmodel_type
  46. \ingroup API_Performance_Model
  47. TODO
  48. \var starpu_perfmodel_type::STARPU_PER_ARCH
  49. \ingroup API_Performance_Model
  50. Application-provided per-arch cost model function
  51. \var starpu_perfmodel_type::STARPU_COMMON
  52. \ingroup API_Performance_Model
  53. Application-provided common cost model function, with per-arch factor
  54. \var starpu_perfmodel_type::STARPU_HISTORY_BASED
  55. \ingroup API_Performance_Model
  56. Automatic history-based cost model
  57. \var starpu_perfmodel_type::STARPU_REGRESSION_BASED
  58. \ingroup API_Performance_Model
  59. Automatic linear regression-based cost model (alpha * size ^ beta)
  60. \var starpu_perfmodel_type::STARPU_NL_REGRESSION_BASED
  61. \ingroup API_Performance_Model
  62. Automatic non-linear regression-based cost model (a * size ^ b + c)
  63. \struct starpu_perfmodel
  64. Contains all information about a performance model. At least the
  65. type and symbol fields have to be filled when defining a performance
  66. model for a codelet. For compatibility, make sure to initialize the
  67. whole structure to zero, either by using explicit memset, or by
  68. letting the compiler implicitly do it in e.g. static storage case. If
  69. not provided, other fields have to be zero.
  70. \ingroup API_Performance_Model
  71. \var starpu_perfmodel::type
  72. is the type of performance model
  73. <ul>
  74. <li>::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
  75. ::STARPU_NL_REGRESSION_BASED: No other fields needs to be provided,
  76. this is purely history-based.
  77. </li>
  78. <li> ::STARPU_PER_ARCH: field starpu_perfmodel::per_arch has to be
  79. filled with functions which return the cost in micro-seconds.
  80. </li>
  81. <li> ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
  82. filled with a function that returns the cost in micro-seconds on a
  83. CPU, timing on other archs will be determined by multiplying by an
  84. arch-specific factor.
  85. </li>
  86. </ul>
  87. \var starpu_perfmodel::symbol
  88. is the symbol name for the performance model, which will be used as
  89. file name to store the model. It must be set otherwise the model will
  90. be ignored.
  91. \var starpu_perfmodel::cost_model
  92. \deprecated
  93. This field is deprecated. Use instead the field starpu_perfmodel::cost_function field.
  94. \var starpu_perfmodel::cost_function
  95. Used by ::STARPU_COMMON: takes a task and implementation number, and
  96. must return a task duration estimation in micro-seconds.
  97. \var starpu_perfmodel::size_base
  98. Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
  99. ::STARPU_NL_REGRESSION_BASED. If not NULL, takes a task and
  100. implementation number, and returns the size to be used as index for
  101. history and regression.
  102. \var starpu_perfmodel::per_arch
  103. Used by ::STARPU_PER_ARCH: array of structures starpu_per_arch_perfmodel
  104. \var starpu_perfmodel::is_loaded
  105. \private
  106. Whether the performance model is already loaded from the disk.
  107. \var starpu_perfmodel::benchmarking
  108. \private
  109. Whether the performance model is still being calibrated.
  110. \var starpu_perfmodel::model_rwlock
  111. \private
  112. Lock to protect concurrency between loading from disk (W), updating
  113. the values (W), and making a performance estimation (R).
  114. \struct starpu_perfmodel_regression_model
  115. ...
  116. \ingroup API_Performance_Model
  117. \var starpu_perfmodel_regression_model::sumlny
  118. sum of ln(measured)
  119. \var starpu_perfmodel_regression_model::sumlnx
  120. sum of ln(size)
  121. \var starpu_perfmodel_regression_model::sumlnx2
  122. sum of ln(size)^2
  123. \var starpu_perfmodel_regression_model::minx
  124. minimum size
  125. \var starpu_perfmodel_regression_model::maxx
  126. maximum size
  127. \var starpu_perfmodel_regression_model::sumlnxlny
  128. sum of ln(size)*ln(measured)
  129. \var starpu_perfmodel_regression_model::alpha
  130. estimated = alpha * size ^ beta
  131. \var starpu_perfmodel_regression_model::beta
  132. estimated = alpha * size ^ beta
  133. \var starpu_perfmodel_regression_model::valid
  134. whether the linear regression model is valid (i.e. enough measures)
  135. \var starpu_perfmodel_regression_model::a
  136. estimated = a size ^b + c
  137. \var starpu_perfmodel_regression_model::b
  138. estimated = a size ^b + c
  139. \var starpu_perfmodel_regression_model::c
  140. estimated = a size ^b + c
  141. \var starpu_perfmodel_regression_model::nl_valid
  142. whether the non-linear regression model is valid (i.e. enough measures)
  143. \var starpu_perfmodel_regression_model::nsample
  144. number of sample values for non-linear regression
  145. \struct starpu_perfmodel_per_arch
  146. contains information about the performance model of a given
  147. arch.
  148. \ingroup API_Performance_Model
  149. \var starpu_perfmodel_per_arch::cost_model
  150. \deprecated
  151. This field is deprecated. Use instead the field
  152. starpu_perfmodel_per_arch::cost_function.
  153. \var starpu_perfmodel_per_arch::cost_function
  154. Used by ::STARPU_PER_ARCH, must point to functions which take a task,
  155. the target arch and implementation number (as mere conveniency, since
  156. the array is already indexed by these), and must return a task
  157. duration estimation in micro-seconds.
  158. \var starpu_perfmodel_per_arch::size_base
  159. Same as in structure starpu_perfmodel, but per-arch, in case it
  160. depends on the architecture-specific implementation.
  161. \var starpu_perfmodel_per_arch::history
  162. \private
  163. The history of performance measurements.
  164. \var starpu_perfmodel_per_arch::list
  165. \private
  166. Used by ::STARPU_HISTORY_BASED and ::STARPU_NL_REGRESSION_BASED,
  167. records all execution history measures.
  168. \var starpu_perfmodel_per_arch::regression
  169. \private
  170. Used by ::STARPU_HISTORY_BASED and
  171. ::STARPU_NL_REGRESSION_BASED, contains the estimated factors of the
  172. regression.
  173. \struct starpu_perfmodel_history_list
  174. todo
  175. \ingroup API_Performance_Model
  176. \var starpu_perfmodel_history_list::next
  177. todo
  178. \var starpu_perfmodel_history_list::entry
  179. todo
  180. \struct starpu_perfmodel_history_entry
  181. todo
  182. \ingroup API_Performance_Model
  183. \var starpu_perfmodel_history_entry::mean
  184. mean_n = 1/n sum
  185. \var starpu_perfmodel_history_entry::deviation
  186. n dev_n = sum2 - 1/n (sum)^2
  187. \var starpu_perfmodel_history_entry::sum
  188. num of samples
  189. \var starpu_perfmodel_history_entry::sum2
  190. sum of samples^2
  191. \var starpu_perfmodel_history_entry::nsample
  192. todo
  193. \var starpu_perfmodel_history_entry::footprint
  194. todo
  195. \var starpu_perfmodel_history_entry::size
  196. in bytes
  197. \var starpu_perfmodel_history_entry::flops
  198. Provided by the application
  199. \fn int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
  200. \ingroup API_Performance_Model
  201. loads a given performance model. The model structure has to be
  202. completely zero, and will be filled with the information saved in
  203. <c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
  204. external tools that should read the performance model files.
  205. \fn int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
  206. \ingroup API_Performance_Model
  207. unloads the given model which has been previously loaded
  208. through the function starpu_perfmodel_load_symbol()
  209. \fn void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, enum starpu_perfmodel_archtype arch, char *path, size_t maxlen, unsigned nimpl)
  210. \ingroup API_Performance_Model
  211. returns the path to the debugging information for the performance model.
  212. \fn void starpu_perfmodel_get_arch_name(enum starpu_perfmodel_archtype arch, char *archname, size_t maxlen, unsigned nimpl)
  213. \ingroup API_Performance_Model
  214. returns the architecture name for \p arch
  215. \fn enum starpu_perfmodel_archtype starpu_worker_get_perf_archtype(int workerid)
  216. \ingroup API_Performance_Model
  217. returns the architecture type of a given worker.
  218. \fn int starpu_perfmodel_list(FILE *output)
  219. \ingroup API_Performance_Model
  220. prints a list of all performance models on \p output
  221. \fn void starpu_perfmodel_print(struct starpu_perfmodel *model, enum starpu_perfmodel_archtype arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
  222. \ingroup API_Performance_Model
  223. todo
  224. \fn int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output)
  225. \ingroup API_Performance_Model
  226. todo
  227. \fn void starpu_bus_print_bandwidth(FILE *f)
  228. \ingroup API_Performance_Model
  229. prints a matrix of bus bandwidths on \p f.
  230. \fn void starpu_bus_print_affinity(FILE *f)
  231. \ingroup API_Performance_Model
  232. prints the affinity devices on \p f.
  233. \fn void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, enum starpu_perfmodel_archtype arch, unsigned cpuid, unsigned nimpl, double measured);
  234. \ingroup API_Performance_Model
  235. This feeds the performance model model with an explicit
  236. measurement measured, in addition to measurements done by StarPU
  237. itself. This can be useful when the application already has an
  238. existing set of measurements done in good conditions, that StarPU
  239. could benefit from instead of doing on-line measurements. And example
  240. of use can be seen in \ref PerformanceModelExample.
  241. \fn double starpu_get_bandwidth_RAM_CUDA(unsigned cudadev)
  242. \ingroup API_Performance_Model
  243. Used to compute the velocity of resources
  244. \fn double starpu_get_latency_RAM_CUDA(unsigned cudadev)
  245. \ingroup API_Performance_Model
  246. Used to compute the velocity of resources
  247. */