starpu_sgemm_gemm.idgraf 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. ##################
  2. # Performance Model Version
  3. 45
  4. ####################
  5. # COMBs
  6. # number of combinations
  7. 9
  8. ####################
  9. # COMB_2
  10. # number of types devices
  11. 1
  12. ####################
  13. # DEV_0
  14. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  15. 1
  16. ####################
  17. # DEV_0
  18. # device id
  19. 0
  20. ####################
  21. # DEV_0
  22. # number of cores
  23. 1
  24. ##########
  25. # number of implementations
  26. 1
  27. #####
  28. # Model for cuda0_impl0 (Comb2)
  29. # number of entries
  30. 8
  31. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  32. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  33. # a b c
  34. nan nan nan
  35. # not multiple-regression-base
  36. 0
  37. # hash size flops mean (us) dev (us) sum sum2 n
  38. 0b0b0ce8 3686400 2.621440e+08 6.801013e+02 7.013561e+01 4.760709e+04 3.272198e+07 70
  39. 4220e23d 14745600 2.097152e+09 5.623635e+03 5.419920e+02 4.442672e+05 2.521603e+09 79
  40. 492beed5 33177600 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87
  41. 9c6670ef 29491200 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87
  42. c00cf6b7 29491200 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87
  43. 78a2cc08 29491200 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87
  44. a7cdf15b 44236800 1.415578e+10 2.300722e+04 1.176963e+03 2.001628e+06 4.617240e+10 87
  45. 24c84a50 11059200 1.769472e+09 2.875903e+03 1.471204e+02 2.502035e+05 7.214438e+08 87
  46. ####################
  47. # COMB_4
  48. # number of types devices
  49. 1
  50. ####################
  51. # DEV_0
  52. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  53. 1
  54. ####################
  55. # DEV_0
  56. # device id
  57. 1
  58. ####################
  59. # DEV_0
  60. # number of cores
  61. 1
  62. ##########
  63. # number of implementations
  64. 1
  65. #####
  66. # Model for cuda1_impl0 (Comb4)
  67. # number of entries
  68. 8
  69. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  70. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  71. # a b c
  72. nan nan nan
  73. # not multiple-regression-base
  74. 0
  75. # hash size flops mean (us) dev (us) sum sum2 n
  76. 0b0b0ce8 3686400 2.621440e+08 6.717051e+02 6.137607e+01 4.500424e+04 3.048197e+07 67
  77. 4220e23d 14745600 2.097152e+09 5.648275e+03 4.677390e+02 4.575103e+05 2.601865e+09 81
  78. 492beed5 33177600 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88
  79. 9c6670ef 29491200 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88
  80. c00cf6b7 29491200 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88
  81. 78a2cc08 29491200 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88
  82. a7cdf15b 44236800 1.415578e+10 2.314040e+04 1.304205e+03 2.036356e+06 4.727180e+10 88
  83. 24c84a50 11059200 1.769472e+09 2.892550e+03 1.630257e+02 2.545445e+05 7.386219e+08 88
  84. ####################
  85. # COMB_6
  86. # number of types devices
  87. 1
  88. ####################
  89. # DEV_0
  90. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  91. 1
  92. ####################
  93. # DEV_0
  94. # device id
  95. 2
  96. ####################
  97. # DEV_0
  98. # number of cores
  99. 1
  100. ##########
  101. # number of implementations
  102. 1
  103. #####
  104. # Model for cuda2_impl0 (Comb6)
  105. # number of entries
  106. 8
  107. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  108. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  109. # a b c
  110. nan nan nan
  111. # not multiple-regression-base
  112. 0
  113. # hash size flops mean (us) dev (us) sum sum2 n
  114. 0b0b0ce8 3686400 2.621440e+08 6.265559e+02 5.536840e+01 4.824481e+04 3.046412e+07 77
  115. 4220e23d 14745600 2.097152e+09 5.631203e+03 4.767455e+02 4.561275e+05 2.586957e+09 81
  116. 492beed5 33177600 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88
  117. 9c6670ef 29491200 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88
  118. c00cf6b7 29491200 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88
  119. 78a2cc08 29491200 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88
  120. a7cdf15b 44236800 1.415578e+10 2.325652e+04 1.351460e+03 2.046572e+06 4.775688e+10 88
  121. 24c84a50 11059200 1.769472e+09 2.907065e+03 1.689325e+02 2.558215e+05 7.462012e+08 88
  122. ####################
  123. # COMB_7
  124. # number of types devices
  125. 1
  126. ####################
  127. # DEV_0
  128. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  129. 1
  130. ####################
  131. # DEV_0
  132. # device id
  133. 3
  134. ####################
  135. # DEV_0
  136. # number of cores
  137. 1
  138. ##########
  139. # number of implementations
  140. 1
  141. #####
  142. # Model for cuda3_impl0 (Comb7)
  143. # number of entries
  144. 8
  145. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  146. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  147. # a b c
  148. nan nan nan
  149. # not multiple-regression-base
  150. 0
  151. # hash size flops mean (us) dev (us) sum sum2 n
  152. 0b0b0ce8 3686400 2.621440e+08 6.780899e+02 4.241206e+01 4.543202e+04 3.092751e+07 67
  153. 4220e23d 14745600 2.097152e+09 5.857201e+03 8.346836e+02 4.744333e+05 2.835284e+09 81
  154. 492beed5 33177600 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86
  155. 9c6670ef 29491200 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86
  156. c00cf6b7 29491200 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86
  157. 78a2cc08 29491200 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86
  158. a7cdf15b 44236800 1.415578e+10 2.300996e+04 8.508186e+02 1.978857e+06 4.559568e+10 86
  159. 24c84a50 11059200 1.769472e+09 2.876245e+03 1.063523e+02 2.473571e+05 7.124325e+08 86
  160. ####################
  161. # COMB_0
  162. # number of types devices
  163. 1
  164. ####################
  165. # DEV_0
  166. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  167. 1
  168. ####################
  169. # DEV_0
  170. # device id
  171. 4
  172. ####################
  173. # DEV_0
  174. # number of cores
  175. 1
  176. ##########
  177. # number of implementations
  178. 1
  179. #####
  180. # Model for cuda4_impl0 (Comb0)
  181. # number of entries
  182. 8
  183. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  184. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  185. # a b c
  186. nan nan nan
  187. # not multiple-regression-base
  188. 0
  189. # hash size flops mean (us) dev (us) sum sum2 n
  190. 0b0b0ce8 3686400 2.621440e+08 6.759139e+02 4.092799e+01 4.190666e+04 2.842915e+07 62
  191. 4220e23d 14745600 2.097152e+09 5.527477e+03 2.733928e+02 4.421982e+05 2.450220e+09 80
  192. 492beed5 33177600 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96
  193. 9c6670ef 29491200 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96
  194. c00cf6b7 29491200 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96
  195. 78a2cc08 29491200 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96
  196. a7cdf15b 44236800 1.415578e+10 2.293540e+04 3.537818e+02 2.201798e+06 5.051112e+10 96
  197. 24c84a50 11059200 1.769472e+09 2.866925e+03 4.422272e+01 2.752248e+05 7.892362e+08 96
  198. ####################
  199. # COMB_1
  200. # number of types devices
  201. 1
  202. ####################
  203. # DEV_0
  204. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  205. 1
  206. ####################
  207. # DEV_0
  208. # device id
  209. 5
  210. ####################
  211. # DEV_0
  212. # number of cores
  213. 1
  214. ##########
  215. # number of implementations
  216. 1
  217. #####
  218. # Model for cuda5_impl0 (Comb1)
  219. # number of entries
  220. 8
  221. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  222. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  223. # a b c
  224. nan nan nan
  225. # not multiple-regression-base
  226. 0
  227. # hash size flops mean (us) dev (us) sum sum2 n
  228. 0b0b0ce8 3686400 2.621440e+08 6.339465e+02 7.125158e+01 4.184047e+04 2.685969e+07 66
  229. 4220e23d 14745600 2.097152e+09 5.624130e+03 4.755864e+02 4.668028e+05 2.644133e+09 83
  230. 492beed5 33177600 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97
  231. 9c6670ef 29491200 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97
  232. c00cf6b7 29491200 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97
  233. 78a2cc08 29491200 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97
  234. a7cdf15b 44236800 1.415578e+10 2.298204e+04 1.075038e+03 2.229258e+06 5.134500e+10 97
  235. 24c84a50 11059200 1.769472e+09 2.872755e+03 1.343797e+02 2.786572e+05 8.022656e+08 97
  236. ####################
  237. # COMB_3
  238. # number of types devices
  239. 1
  240. ####################
  241. # DEV_0
  242. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  243. 1
  244. ####################
  245. # DEV_0
  246. # device id
  247. 6
  248. ####################
  249. # DEV_0
  250. # number of cores
  251. 1
  252. ##########
  253. # number of implementations
  254. 1
  255. #####
  256. # Model for cuda6_impl0 (Comb3)
  257. # number of entries
  258. 8
  259. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  260. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  261. # a b c
  262. nan nan nan
  263. # not multiple-regression-base
  264. 0
  265. # hash size flops mean (us) dev (us) sum sum2 n
  266. 0b0b0ce8 3686400 2.621440e+08 6.389750e+02 8.615382e+01 4.728415e+04 3.076266e+07 74
  267. 4220e23d 14745600 2.097152e+09 5.648331e+03 5.220897e+02 4.631632e+05 2.638450e+09 82
  268. 492beed5 33177600 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96
  269. 9c6670ef 29491200 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96
  270. c00cf6b7 29491200 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96
  271. 78a2cc08 29491200 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96
  272. a7cdf15b 44236800 1.415578e+10 2.310138e+04 1.132169e+03 2.217732e+06 5.135572e+10 96
  273. 24c84a50 11059200 1.769472e+09 2.887673e+03 1.415212e+02 2.772165e+05 8.024331e+08 96
  274. ####################
  275. # COMB_5
  276. # number of types devices
  277. 1
  278. ####################
  279. # DEV_0
  280. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  281. 1
  282. ####################
  283. # DEV_0
  284. # device id
  285. 7
  286. ####################
  287. # DEV_0
  288. # number of cores
  289. 1
  290. ##########
  291. # number of implementations
  292. 1
  293. #####
  294. # Model for cuda7_impl0 (Comb5)
  295. # number of entries
  296. 8
  297. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  298. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  299. # a b c
  300. nan nan nan
  301. # not multiple-regression-base
  302. 0
  303. # hash size flops mean (us) dev (us) sum sum2 n
  304. 0b0b0ce8 3686400 2.621440e+08 6.386625e+02 8.094896e+01 4.342905e+04 2.818209e+07 68
  305. 4220e23d 14745600 2.097152e+09 5.638657e+03 3.709019e+02 4.454539e+05 2.522630e+09 79
  306. 492beed5 33177600 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97
  307. 9c6670ef 29491200 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97
  308. c00cf6b7 29491200 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97
  309. 78a2cc08 29491200 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97
  310. a7cdf15b 44236800 1.415578e+10 2.288024e+04 5.062216e+02 2.219382e+06 5.080488e+10 97
  311. 24c84a50 11059200 1.769472e+09 2.860030e+03 6.327770e+01 2.774228e+05 7.938262e+08 97
  312. ####################
  313. # COMB_8
  314. # number of types devices
  315. 1
  316. ####################
  317. # DEV_0
  318. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  319. 0
  320. ####################
  321. # DEV_0
  322. # device id
  323. 0
  324. ####################
  325. # DEV_0
  326. # number of cores
  327. 1
  328. ##########
  329. # number of implementations
  330. 1
  331. #####
  332. # Model for cpu0_impl0 (Comb8)
  333. # number of entries
  334. 8
  335. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  336. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  337. # a b c
  338. nan nan nan
  339. # not multiple-regression-base
  340. 0
  341. # hash size flops mean (us) dev (us) sum sum2 n
  342. 0b0b0ce8 3686400 2.621440e+08 1.414338e+04 6.441210e+02 3.535844e+05 5.011251e+09 25
  343. 4220e23d 14745600 2.097152e+09 1.091117e+05 2.701159e+03 3.382462e+06 3.692924e+11 31
  344. 492beed5 33177600 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23
  345. 9c6670ef 29491200 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23
  346. c00cf6b7 29491200 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23
  347. 78a2cc08 29491200 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23
  348. a7cdf15b 44236800 1.415578e+10 7.242712e+05 1.552922e+04 1.665824e+07 1.207063e+13 23
  349. 24c84a50 11059200 1.769472e+09 9.053390e+04 1.941152e+03 2.082280e+06 1.886036e+11 23