starpu_sgemm_gemm.idgraf 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. ##################
  2. # Performance Model Version
  3. 45
  4. ####################
  5. # COMBs
  6. # number of combinations
  7. 9
  8. ####################
  9. # COMB_2
  10. # number of types devices
  11. 1
  12. ####################
  13. # DEV_0
  14. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  15. 1
  16. ####################
  17. # DEV_0
  18. # device id
  19. 0
  20. ####################
  21. # DEV_0
  22. # number of cores
  23. 1
  24. ##########
  25. # number of implementations
  26. 1
  27. #####
  28. # Model for cuda0_impl0 (Comb2)
  29. # number of entries
  30. 7
  31. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  32. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  33. # a b c
  34. nan nan nan
  35. # not multiple-regression-base
  36. 0
  37. # hash size flops mean (us) dev (us) sum sum2 n
  38. 0b0b0ce8 3686400 2.621440e+08 6.801013e+02 7.013561e+01 4.760709e+04 3.272198e+07 70
  39. 4220e23d 14745600 2.097152e+09 5.623635e+03 5.419920e+02 4.442672e+05 2.521603e+09 79
  40. 492beed5 33177600 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87
  41. 9c6670ef 29491200 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87
  42. c00cf6b7 29491200 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87
  43. 78a2cc08 29491200 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87
  44. 24c84a50 11059200 1.769472e+09 2.875903e+03 1.471204e+02 2.502035e+05 7.214438e+08 87
  45. ####################
  46. # COMB_4
  47. # number of types devices
  48. 1
  49. ####################
  50. # DEV_0
  51. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  52. 1
  53. ####################
  54. # DEV_0
  55. # device id
  56. 1
  57. ####################
  58. # DEV_0
  59. # number of cores
  60. 1
  61. ##########
  62. # number of implementations
  63. 1
  64. #####
  65. # Model for cuda1_impl0 (Comb4)
  66. # number of entries
  67. 7
  68. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  69. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  70. # a b c
  71. nan nan nan
  72. # not multiple-regression-base
  73. 0
  74. # hash size flops mean (us) dev (us) sum sum2 n
  75. 0b0b0ce8 3686400 2.621440e+08 6.717051e+02 6.137607e+01 4.500424e+04 3.048197e+07 67
  76. 4220e23d 14745600 2.097152e+09 5.648275e+03 4.677390e+02 4.575103e+05 2.601865e+09 81
  77. 492beed5 33177600 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88
  78. 9c6670ef 29491200 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88
  79. c00cf6b7 29491200 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88
  80. 78a2cc08 29491200 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88
  81. 24c84a50 11059200 1.769472e+09 2.892550e+03 1.630257e+02 2.545445e+05 7.386219e+08 88
  82. ####################
  83. # COMB_6
  84. # number of types devices
  85. 1
  86. ####################
  87. # DEV_0
  88. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  89. 1
  90. ####################
  91. # DEV_0
  92. # device id
  93. 2
  94. ####################
  95. # DEV_0
  96. # number of cores
  97. 1
  98. ##########
  99. # number of implementations
  100. 1
  101. #####
  102. # Model for cuda2_impl0 (Comb6)
  103. # number of entries
  104. 7
  105. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  106. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  107. # a b c
  108. nan nan nan
  109. # not multiple-regression-base
  110. 0
  111. # hash size flops mean (us) dev (us) sum sum2 n
  112. 0b0b0ce8 3686400 2.621440e+08 6.265559e+02 5.536840e+01 4.824481e+04 3.046412e+07 77
  113. 4220e23d 14745600 2.097152e+09 5.631203e+03 4.767455e+02 4.561275e+05 2.586957e+09 81
  114. 492beed5 33177600 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88
  115. 9c6670ef 29491200 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88
  116. c00cf6b7 29491200 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88
  117. 78a2cc08 29491200 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88
  118. 24c84a50 11059200 1.769472e+09 2.907065e+03 1.689325e+02 2.558215e+05 7.462012e+08 88
  119. ####################
  120. # COMB_7
  121. # number of types devices
  122. 1
  123. ####################
  124. # DEV_0
  125. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  126. 1
  127. ####################
  128. # DEV_0
  129. # device id
  130. 3
  131. ####################
  132. # DEV_0
  133. # number of cores
  134. 1
  135. ##########
  136. # number of implementations
  137. 1
  138. #####
  139. # Model for cuda3_impl0 (Comb7)
  140. # number of entries
  141. 7
  142. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  143. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  144. # a b c
  145. nan nan nan
  146. # not multiple-regression-base
  147. 0
  148. # hash size flops mean (us) dev (us) sum sum2 n
  149. 0b0b0ce8 3686400 2.621440e+08 6.780899e+02 4.241206e+01 4.543202e+04 3.092751e+07 67
  150. 4220e23d 14745600 2.097152e+09 5.857201e+03 8.346836e+02 4.744333e+05 2.835284e+09 81
  151. 492beed5 33177600 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86
  152. 9c6670ef 29491200 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86
  153. c00cf6b7 29491200 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86
  154. 78a2cc08 29491200 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86
  155. 24c84a50 11059200 1.769472e+09 2.876245e+03 1.063523e+02 2.473571e+05 7.124325e+08 86
  156. ####################
  157. # COMB_0
  158. # number of types devices
  159. 1
  160. ####################
  161. # DEV_0
  162. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  163. 1
  164. ####################
  165. # DEV_0
  166. # device id
  167. 4
  168. ####################
  169. # DEV_0
  170. # number of cores
  171. 1
  172. ##########
  173. # number of implementations
  174. 1
  175. #####
  176. # Model for cuda4_impl0 (Comb0)
  177. # number of entries
  178. 7
  179. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  180. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  181. # a b c
  182. nan nan nan
  183. # not multiple-regression-base
  184. 0
  185. # hash size flops mean (us) dev (us) sum sum2 n
  186. 0b0b0ce8 3686400 2.621440e+08 6.759139e+02 4.092799e+01 4.190666e+04 2.842915e+07 62
  187. 4220e23d 14745600 2.097152e+09 5.527477e+03 2.733928e+02 4.421982e+05 2.450220e+09 80
  188. 492beed5 33177600 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96
  189. 9c6670ef 29491200 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96
  190. c00cf6b7 29491200 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96
  191. 78a2cc08 29491200 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96
  192. 24c84a50 11059200 1.769472e+09 2.866925e+03 4.422272e+01 2.752248e+05 7.892362e+08 96
  193. ####################
  194. # COMB_1
  195. # number of types devices
  196. 1
  197. ####################
  198. # DEV_0
  199. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  200. 1
  201. ####################
  202. # DEV_0
  203. # device id
  204. 5
  205. ####################
  206. # DEV_0
  207. # number of cores
  208. 1
  209. ##########
  210. # number of implementations
  211. 1
  212. #####
  213. # Model for cuda5_impl0 (Comb1)
  214. # number of entries
  215. 7
  216. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  217. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  218. # a b c
  219. nan nan nan
  220. # not multiple-regression-base
  221. 0
  222. # hash size flops mean (us) dev (us) sum sum2 n
  223. 0b0b0ce8 3686400 2.621440e+08 6.339465e+02 7.125158e+01 4.184047e+04 2.685969e+07 66
  224. 4220e23d 14745600 2.097152e+09 5.624130e+03 4.755864e+02 4.668028e+05 2.644133e+09 83
  225. 492beed5 33177600 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97
  226. 9c6670ef 29491200 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97
  227. c00cf6b7 29491200 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97
  228. 78a2cc08 29491200 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97
  229. 24c84a50 11059200 1.769472e+09 2.872755e+03 1.343797e+02 2.786572e+05 8.022656e+08 97
  230. ####################
  231. # COMB_3
  232. # number of types devices
  233. 1
  234. ####################
  235. # DEV_0
  236. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  237. 1
  238. ####################
  239. # DEV_0
  240. # device id
  241. 6
  242. ####################
  243. # DEV_0
  244. # number of cores
  245. 1
  246. ##########
  247. # number of implementations
  248. 1
  249. #####
  250. # Model for cuda6_impl0 (Comb3)
  251. # number of entries
  252. 7
  253. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  254. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  255. # a b c
  256. nan nan nan
  257. # not multiple-regression-base
  258. 0
  259. # hash size flops mean (us) dev (us) sum sum2 n
  260. 0b0b0ce8 3686400 2.621440e+08 6.389750e+02 8.615382e+01 4.728415e+04 3.076266e+07 74
  261. 4220e23d 14745600 2.097152e+09 5.648331e+03 5.220897e+02 4.631632e+05 2.638450e+09 82
  262. 492beed5 33177600 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96
  263. 9c6670ef 29491200 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96
  264. c00cf6b7 29491200 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96
  265. 78a2cc08 29491200 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96
  266. 24c84a50 11059200 1.769472e+09 2.887673e+03 1.415212e+02 2.772165e+05 8.024331e+08 96
  267. ####################
  268. # COMB_5
  269. # number of types devices
  270. 1
  271. ####################
  272. # DEV_0
  273. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  274. 1
  275. ####################
  276. # DEV_0
  277. # device id
  278. 7
  279. ####################
  280. # DEV_0
  281. # number of cores
  282. 1
  283. ##########
  284. # number of implementations
  285. 1
  286. #####
  287. # Model for cuda7_impl0 (Comb5)
  288. # number of entries
  289. 7
  290. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  291. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  292. # a b c
  293. nan nan nan
  294. # not multiple-regression-base
  295. 0
  296. # hash size flops mean (us) dev (us) sum sum2 n
  297. 0b0b0ce8 3686400 2.621440e+08 6.386625e+02 8.094896e+01 4.342905e+04 2.818209e+07 68
  298. 4220e23d 14745600 2.097152e+09 5.638657e+03 3.709019e+02 4.454539e+05 2.522630e+09 79
  299. 492beed5 33177600 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97
  300. 9c6670ef 29491200 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97
  301. c00cf6b7 29491200 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97
  302. 78a2cc08 29491200 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97
  303. 24c84a50 11059200 1.769472e+09 2.860030e+03 6.327770e+01 2.774228e+05 7.938262e+08 97
  304. ####################
  305. # COMB_8
  306. # number of types devices
  307. 1
  308. ####################
  309. # DEV_0
  310. # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
  311. 0
  312. ####################
  313. # DEV_0
  314. # device id
  315. 0
  316. ####################
  317. # DEV_0
  318. # number of cores
  319. 1
  320. ##########
  321. # number of implementations
  322. 1
  323. #####
  324. # Model for cpu0_impl0 (Comb8)
  325. # number of entries
  326. 7
  327. # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx
  328. 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0
  329. # a b c
  330. nan nan nan
  331. # not multiple-regression-base
  332. 0
  333. # hash size flops mean (us) dev (us) sum sum2 n
  334. 0b0b0ce8 3686400 2.621440e+08 1.414338e+04 6.441210e+02 3.535844e+05 5.011251e+09 25
  335. 4220e23d 14745600 2.097152e+09 1.091117e+05 2.701159e+03 3.382462e+06 3.692924e+11 31
  336. 492beed5 33177600 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23
  337. 9c6670ef 29491200 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23
  338. c00cf6b7 29491200 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23
  339. 78a2cc08 29491200 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23
  340. 24c84a50 11059200 1.769472e+09 9.053390e+04 1.941152e+03 2.082280e+06 1.886036e+11 23