starpu_sgemm_gemm.idgraf 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. ##################
  2. # Performance Model Version
  3. 45
  4. ####################
  5. # COMBs
  6. # number of combinations
  7. 9
  8. ####################
  9. # COMB_2
  10. # number of types devices
  11. 1
  12. ####################
  13. # DEV_0
  14. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  15. 1
  16. ####################
  17. # DEV_0
  18. # device id
  19. 0
  20. ####################
  21. # DEV_0
  22. # number of cores
  23. 1
  24. ##########
  25. # number of implementations
  26. 1
  27. #####
  28. # Model for cuda0_impl0 (Comb2)
  29. # number of entries
  30. 4
  31. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  32. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  33. # a b c
  34. nan nan nan
  35. # not multiple-regression-base
  36. 0
  37. # hash size flops mean (us) dev (us) sum sum2 n
  38. 0b0b0ce8 3686400 2.621440e+08 6.801013e+02 7.013561e+01 4.760709e+04 3.272198e+07 70
  39. 4220e23d 14745600 2.097152e+09 5.623635e+03 5.419920e+02 4.442672e+05 2.521603e+09 79
  40. 492beed5 33177600 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87
  41. 24c84a50 11059200 1.769472e+09 2.875903e+03 1.471204e+02 2.502035e+05 7.214438e+08 87
  42. ####################
  43. # COMB_4
  44. # number of types devices
  45. 1
  46. ####################
  47. # DEV_0
  48. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  49. 1
  50. ####################
  51. # DEV_0
  52. # device id
  53. 1
  54. ####################
  55. # DEV_0
  56. # number of cores
  57. 1
  58. ##########
  59. # number of implementations
  60. 1
  61. #####
  62. # Model for cuda1_impl0 (Comb4)
  63. # number of entries
  64. 4
  65. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  66. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  67. # a b c
  68. nan nan nan
  69. # not multiple-regression-base
  70. 0
  71. # hash size flops mean (us) dev (us) sum sum2 n
  72. 0b0b0ce8 3686400 2.621440e+08 6.717051e+02 6.137607e+01 4.500424e+04 3.048197e+07 67
  73. 4220e23d 14745600 2.097152e+09 5.648275e+03 4.677390e+02 4.575103e+05 2.601865e+09 81
  74. 492beed5 33177600 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88
  75. 24c84a50 11059200 1.769472e+09 2.892550e+03 1.630257e+02 2.545445e+05 7.386219e+08 88
  76. ####################
  77. # COMB_6
  78. # number of types devices
  79. 1
  80. ####################
  81. # DEV_0
  82. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  83. 1
  84. ####################
  85. # DEV_0
  86. # device id
  87. 2
  88. ####################
  89. # DEV_0
  90. # number of cores
  91. 1
  92. ##########
  93. # number of implementations
  94. 1
  95. #####
  96. # Model for cuda2_impl0 (Comb6)
  97. # number of entries
  98. 4
  99. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  100. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  101. # a b c
  102. nan nan nan
  103. # not multiple-regression-base
  104. 0
  105. # hash size flops mean (us) dev (us) sum sum2 n
  106. 0b0b0ce8 3686400 2.621440e+08 6.265559e+02 5.536840e+01 4.824481e+04 3.046412e+07 77
  107. 4220e23d 14745600 2.097152e+09 5.631203e+03 4.767455e+02 4.561275e+05 2.586957e+09 81
  108. 492beed5 33177600 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88
  109. 24c84a50 11059200 1.769472e+09 2.907065e+03 1.689325e+02 2.558215e+05 7.462012e+08 88
  110. ####################
  111. # COMB_7
  112. # number of types devices
  113. 1
  114. ####################
  115. # DEV_0
  116. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  117. 1
  118. ####################
  119. # DEV_0
  120. # device id
  121. 3
  122. ####################
  123. # DEV_0
  124. # number of cores
  125. 1
  126. ##########
  127. # number of implementations
  128. 1
  129. #####
  130. # Model for cuda3_impl0 (Comb7)
  131. # number of entries
  132. 4
  133. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  134. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  135. # a b c
  136. nan nan nan
  137. # not multiple-regression-base
  138. 0
  139. # hash size flops mean (us) dev (us) sum sum2 n
  140. 0b0b0ce8 3686400 2.621440e+08 6.780899e+02 4.241206e+01 4.543202e+04 3.092751e+07 67
  141. 4220e23d 14745600 2.097152e+09 5.857201e+03 8.346836e+02 4.744333e+05 2.835284e+09 81
  142. 492beed5 33177600 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86
  143. 24c84a50 11059200 1.769472e+09 2.876245e+03 1.063523e+02 2.473571e+05 7.124325e+08 86
  144. ####################
  145. # COMB_0
  146. # number of types devices
  147. 1
  148. ####################
  149. # DEV_0
  150. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  151. 1
  152. ####################
  153. # DEV_0
  154. # device id
  155. 4
  156. ####################
  157. # DEV_0
  158. # number of cores
  159. 1
  160. ##########
  161. # number of implementations
  162. 1
  163. #####
  164. # Model for cuda4_impl0 (Comb0)
  165. # number of entries
  166. 4
  167. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  168. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  169. # a b c
  170. nan nan nan
  171. # not multiple-regression-base
  172. 0
  173. # hash size flops mean (us) dev (us) sum sum2 n
  174. 0b0b0ce8 3686400 2.621440e+08 6.759139e+02 4.092799e+01 4.190666e+04 2.842915e+07 62
  175. 4220e23d 14745600 2.097152e+09 5.527477e+03 2.733928e+02 4.421982e+05 2.450220e+09 80
  176. 492beed5 33177600 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96
  177. 24c84a50 11059200 1.769472e+09 2.866925e+03 4.422272e+01 2.752248e+05 7.892362e+08 96
  178. ####################
  179. # COMB_1
  180. # number of types devices
  181. 1
  182. ####################
  183. # DEV_0
  184. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  185. 1
  186. ####################
  187. # DEV_0
  188. # device id
  189. 5
  190. ####################
  191. # DEV_0
  192. # number of cores
  193. 1
  194. ##########
  195. # number of implementations
  196. 1
  197. #####
  198. # Model for cuda5_impl0 (Comb1)
  199. # number of entries
  200. 4
  201. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  202. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  203. # a b c
  204. nan nan nan
  205. # not multiple-regression-base
  206. 0
  207. # hash size flops mean (us) dev (us) sum sum2 n
  208. 0b0b0ce8 3686400 2.621440e+08 6.339465e+02 7.125158e+01 4.184047e+04 2.685969e+07 66
  209. 4220e23d 14745600 2.097152e+09 5.624130e+03 4.755864e+02 4.668028e+05 2.644133e+09 83
  210. 492beed5 33177600 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97
  211. 24c84a50 11059200 1.769472e+09 2.872755e+03 1.343797e+02 2.786572e+05 8.022656e+08 97
  212. ####################
  213. # COMB_3
  214. # number of types devices
  215. 1
  216. ####################
  217. # DEV_0
  218. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  219. 1
  220. ####################
  221. # DEV_0
  222. # device id
  223. 6
  224. ####################
  225. # DEV_0
  226. # number of cores
  227. 1
  228. ##########
  229. # number of implementations
  230. 1
  231. #####
  232. # Model for cuda6_impl0 (Comb3)
  233. # number of entries
  234. 4
  235. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  236. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  237. # a b c
  238. nan nan nan
  239. # not multiple-regression-base
  240. 0
  241. # hash size flops mean (us) dev (us) sum sum2 n
  242. 0b0b0ce8 3686400 2.621440e+08 6.389750e+02 8.615382e+01 4.728415e+04 3.076266e+07 74
  243. 4220e23d 14745600 2.097152e+09 5.648331e+03 5.220897e+02 4.631632e+05 2.638450e+09 82
  244. 492beed5 33177600 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96
  245. 24c84a50 11059200 1.769472e+09 2.887673e+03 1.415212e+02 2.772165e+05 8.024331e+08 96
  246. ####################
  247. # COMB_5
  248. # number of types devices
  249. 1
  250. ####################
  251. # DEV_0
  252. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  253. 1
  254. ####################
  255. # DEV_0
  256. # device id
  257. 7
  258. ####################
  259. # DEV_0
  260. # number of cores
  261. 1
  262. ##########
  263. # number of implementations
  264. 1
  265. #####
  266. # Model for cuda7_impl0 (Comb5)
  267. # number of entries
  268. 4
  269. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  270. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  271. # a b c
  272. nan nan nan
  273. # not multiple-regression-base
  274. 0
  275. # hash size flops mean (us) dev (us) sum sum2 n
  276. 0b0b0ce8 3686400 2.621440e+08 6.386625e+02 8.094896e+01 4.342905e+04 2.818209e+07 68
  277. 4220e23d 14745600 2.097152e+09 5.638657e+03 3.709019e+02 4.454539e+05 2.522630e+09 79
  278. 492beed5 33177600 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97
  279. 24c84a50 11059200 1.769472e+09 2.860030e+03 6.327770e+01 2.774228e+05 7.938262e+08 97
  280. ####################
  281. # COMB_8
  282. # number of types devices
  283. 1
  284. ####################
  285. # DEV_0
  286. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  287. 0
  288. ####################
  289. # DEV_0
  290. # device id
  291. 0
  292. ####################
  293. # DEV_0
  294. # number of cores
  295. 1
  296. ##########
  297. # number of implementations
  298. 1
  299. #####
  300. # Model for cpu0_impl0 (Comb8)
  301. # number of entries
  302. 4
  303. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  304. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  305. # a b c
  306. nan nan nan
  307. # not multiple-regression-base
  308. 0
  309. # hash size flops mean (us) dev (us) sum sum2 n
  310. 0b0b0ce8 3686400 2.621440e+08 1.414338e+04 6.441210e+02 3.535844e+05 5.011251e+09 25
  311. 4220e23d 14745600 2.097152e+09 1.091117e+05 2.701159e+03 3.382462e+06 3.692924e+11 31
  312. 492beed5 33177600 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23
  313. 24c84a50 11059200 1.769472e+09 9.053390e+04 1.941152e+03 2.082280e+06 1.886036e+11 23