Explorar o código

Add 1440 blocksize perfmodels for sirocco

Samuel Thibault %!s(int64=9) %!d(string=hai) anos
pai
achega
bae07be5f1

+ 1 - 1
doc/doxygen/chapters/01building.doxy

@@ -363,7 +363,7 @@ system. It will be interesting to try with different matrix sizes and
 schedulers.
 
 Performance models are available for cholesky_*, lu_*, *gemm, with block sizes
-320, 640, or 960, and for stencil with block size 128x128x128, 192x192x192, and
+320, 640, or 960 (plus 1440 for sirocco), and for stencil with block size 128x128x128, 192x192x192, and
 256x256x256.
 
 */

+ 16 - 11
tools/perfmodels/sampling/codelets/44/chol_model_11.sirocco

@@ -28,18 +28,19 @@
 #####
 # Model for cpu0_impl0 (Comb0)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+25ebb669	8294400        	9.963650e+08   	1.842284e+04   	2.612920e+03   	1.271176e+06   	2.388975e+10   	69
 617e5fe6	3686400        	2.953730e+08   	7.379027e+03   	1.089221e+03   	5.903221e+05   	4.450915e+09   	80
 afdd228b	1638400        	8.758624e+07   	2.799281e+03   	5.546704e+02   	2.323403e+05   	6.759213e+08   	83
 cea37d6d	409600         	1.097392e+07   	4.391691e+02   	4.300491e+01   	7.597626e+04   	3.368638e+07   	173
 
 ####################
-# COMB_2
+# COMB_3
 # number of types devices
 1
 ####################
@@ -58,20 +59,21 @@ cea37d6d	409600         	1.097392e+07   	4.391691e+02   	4.300491e+01   	7.59762
 # number of implementations
 1
 #####
-# Model for cuda3_impl0 (Comb2)
+# Model for cuda3_impl0 (Comb3)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+25ebb669	8294400        	9.963650e+08   	4.852293e+04   	1.266847e+04   	4.852293e+05   	2.514965e+10   	10
 617e5fe6	3686400        	2.953730e+08   	7.699799e+03   	1.513211e+03   	9.239759e+04   	7.389205e+08   	12
 afdd228b	1638400        	8.758624e+07   	5.010966e+03   	7.544427e+02   	5.010965e+04   	2.567896e+08   	10
 cea37d6d	409600         	1.097392e+07   	3.343709e+03   	3.943178e+02   	3.343709e+04   	1.133588e+08   	10
 
 ####################
-# COMB_4
+# COMB_2
 # number of types devices
 1
 ####################
@@ -90,14 +92,15 @@ cea37d6d	409600         	1.097392e+07   	3.343709e+03   	3.943178e+02   	3.34370
 # number of implementations
 1
 #####
-# Model for cuda1_impl0 (Comb4)
+# Model for cuda1_impl0 (Comb2)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+25ebb669	8294400        	9.963650e+08   	3.188534e+04   	1.123169e+04   	3.188534e+05   	1.142826e+10   	10
 617e5fe6	3686400        	2.953730e+08   	8.514691e+03   	1.568765e+03   	9.366160e+04   	8.245709e+08   	11
 afdd228b	1638400        	8.758624e+07   	5.436465e+03   	1.325711e+03   	5.436465e+04   	3.131266e+08   	10
 cea37d6d	409600         	1.097392e+07   	3.336739e+03   	3.113015e+02   	3.336739e+04   	1.123074e+08   	10
@@ -124,18 +127,19 @@ cea37d6d	409600         	1.097392e+07   	3.336739e+03   	3.113015e+02   	3.33673
 #####
 # Model for cuda0_impl0 (Comb1)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+25ebb669	8294400        	9.963650e+08   	3.051372e+04   	1.107281e+04   	3.051372e+05   	1.053694e+10   	10
 617e5fe6	3686400        	2.953730e+08   	8.456328e+03   	1.411439e+03   	1.014759e+05   	8.820196e+08   	12
 afdd228b	1638400        	8.758624e+07   	4.996835e+03   	9.396038e+02   	4.996835e+04   	2.585122e+08   	10
 cea37d6d	409600         	1.097392e+07   	3.060839e+03   	4.968177e+01   	3.060839e+04   	9.371202e+07   	10
 
 ####################
-# COMB_3
+# COMB_4
 # number of types devices
 1
 ####################
@@ -154,14 +158,15 @@ cea37d6d	409600         	1.097392e+07   	3.060839e+03   	4.968177e+01   	3.06083
 # number of implementations
 1
 #####
-# Model for cuda2_impl0 (Comb3)
+# Model for cuda2_impl0 (Comb4)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+25ebb669	8294400        	9.963650e+08   	1.332935e+04   	3.063746e+03   	2.132696e+05   	2.992929e+09   	16
 617e5fe6	3686400        	2.953730e+08   	8.333388e+03   	1.108400e+03   	1.000007e+05   	8.480868e+08   	12
 afdd228b	1638400        	8.758624e+07   	5.517925e+03   	1.047059e+03   	5.517925e+04   	3.154382e+08   	10
 cea37d6d	409600         	1.097392e+07   	3.435367e+03   	2.405829e+02   	3.435367e+04   	1.185962e+08   	10

+ 16 - 14
tools/perfmodels/sampling/codelets/44/chol_model_21.sirocco

@@ -28,18 +28,19 @@
 #####
 # Model for cpu0_impl0 (Comb0)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0e8bce2b	16588800       	2.988058e+09   	6.085177e+04   	1.761936e+04   	4.551712e+07   	3.002008e+12   	748
 ff82dda0	7372800        	8.856576e+08   	1.775772e+04   	3.736007e+03   	2.386637e+07   	4.425714e+11   	1344
 d39bff17	3276800        	2.625536e+08   	5.276862e+03   	9.789431e+02   	7.070995e+06   	3.859682e+10   	1340
 2c1922b7	819200         	3.287040e+07   	7.675336e+02   	1.464194e+02   	2.842177e+06   	2.260854e+09   	3703
 
 ####################
-# COMB_2
+# COMB_3
 # number of types devices
 1
 ####################
@@ -58,14 +59,15 @@ d39bff17	3276800        	2.625536e+08   	5.276862e+03   	9.789431e+02   	7.07099
 # number of implementations
 1
 #####
-# Model for cuda3_impl0 (Comb2)
+# Model for cuda3_impl0 (Comb3)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0e8bce2b	16588800       	2.988058e+09   	5.422549e+03   	1.109859e+03   	2.917331e+06   	1.648207e+10   	538
 ff82dda0	7372800        	8.856576e+08   	2.018325e+03   	2.870643e+02   	1.687320e+06   	3.474450e+09   	836
 d39bff17	3276800        	2.625536e+08   	1.179394e+03   	1.705358e+02   	3.538181e+05   	4.260157e+08   	300
 2c1922b7	819200         	3.287040e+07   	4.644748e+02   	7.687001e+01   	3.297771e+04   	1.573685e+07   	71
@@ -92,18 +94,16 @@ d39bff17	3276800        	2.625536e+08   	1.179394e+03   	1.705358e+02   	3.53818
 #####
 # Model for cuda0_impl0 (Comb1)
 # number of entries
-3
+1
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-ff82dda0	7372800        	8.856576e+08   	1.972468e+03   	2.888901e+02   	1.510910e+06   	3.044151e+09   	766
-d39bff17	3276800        	2.625536e+08   	1.215766e+03   	1.649819e+02   	2.869207e+05   	3.552521e+08   	236
-2c1922b7	819200         	3.287040e+07   	4.764697e+02   	7.471348e+01   	4.621756e+04   	2.256273e+07   	97
+0e8bce2b	16588800       	2.988058e+09   	5.480822e+03   	1.130650e+03   	2.899355e+06   	1.656711e+10   	529
 
 ####################
-# COMB_4
+# COMB_2
 # number of types devices
 1
 ####################
@@ -122,20 +122,21 @@ d39bff17	3276800        	2.625536e+08   	1.215766e+03   	1.649819e+02   	2.86920
 # number of implementations
 1
 #####
-# Model for cuda1_impl0 (Comb4)
+# Model for cuda1_impl0 (Comb2)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0e8bce2b	16588800       	2.988058e+09   	5.510320e+03   	1.138149e+03   	2.992104e+06   	1.719084e+10   	543
 ff82dda0	7372800        	8.856576e+08   	2.005118e+03   	2.787124e+02   	1.836689e+06   	3.753933e+09   	916
 d39bff17	3276800        	2.625536e+08   	1.227664e+03   	1.874122e+02   	2.970946e+05   	3.732321e+08   	242
 2c1922b7	819200         	3.287040e+07   	4.209987e+02   	9.547071e+01   	6.441281e+04   	2.851225e+07   	153
 
 ####################
-# COMB_3
+# COMB_4
 # number of types devices
 1
 ####################
@@ -154,14 +155,15 @@ d39bff17	3276800        	2.625536e+08   	1.227664e+03   	1.874122e+02   	2.97094
 # number of implementations
 1
 #####
-# Model for cuda2_impl0 (Comb3)
+# Model for cuda2_impl0 (Comb4)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0e8bce2b	16588800       	2.988058e+09   	5.534879e+03   	1.226333e+03   	3.210230e+06   	1.864049e+10   	580
 ff82dda0	7372800        	8.856576e+08   	2.051755e+03   	2.742098e+02   	1.811700e+06   	3.783559e+09   	883
 d39bff17	3276800        	2.625536e+08   	1.153240e+03   	1.913332e+02   	3.194475e+05   	3.785401e+08   	277
 2c1922b7	819200         	3.287040e+07   	4.950127e+02   	6.747714e+01   	5.445140e+04   	2.745498e+07   	110

+ 16 - 11
tools/perfmodels/sampling/codelets/44/chol_model_22.sirocco

@@ -7,7 +7,7 @@
 # number of combinations
 5
 ####################
-# COMB_4
+# COMB_2
 # number of types devices
 1
 ####################
@@ -26,14 +26,15 @@
 # number of implementations
 1
 #####
-# Model for cuda1_impl0 (Comb4)
+# Model for cuda1_impl0 (Comb2)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8cfc3ba0	24883200       	5.971968e+09   	2.632193e+03   	3.963412e+02   	2.414774e+07   	6.500262e+10   	9174
 24c84a50	11059200       	1.769472e+09   	7.545142e+02   	4.622075e+01   	1.174100e+07   	8.891991e+09   	15561
 f0ac7beb	4915200        	5.242880e+08   	2.651541e+02   	2.896639e+01   	2.197862e+06   	5.897272e+08   	8289
 d46431bb	1228800        	6.553600e+07   	5.633559e+01   	1.027680e+01   	7.345034e+05   	4.275566e+07   	13038
@@ -60,18 +61,19 @@ d46431bb	1228800        	6.553600e+07   	5.633559e+01   	1.027680e+01   	7.34503
 #####
 # Model for cuda0_impl0 (Comb1)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8cfc3ba0	24883200       	5.971968e+09   	2.577659e+03   	3.627579e+02   	2.443621e+07   	6.423574e+10   	9480
 24c84a50	11059200       	1.769472e+09   	7.434516e+02   	4.620554e+01   	1.184541e+07   	8.840509e+09   	15933
 f0ac7beb	4915200        	5.242880e+08   	2.633265e+02   	2.997768e+01   	2.185610e+06   	5.829880e+08   	8300
 d46431bb	1228800        	6.553600e+07   	5.716015e+01   	1.157773e+01   	6.223597e+05   	3.703364e+07   	10888
 
 ####################
-# COMB_2
+# COMB_3
 # number of types devices
 1
 ####################
@@ -90,20 +92,21 @@ d46431bb	1228800        	6.553600e+07   	5.716015e+01   	1.157773e+01   	6.22359
 # number of implementations
 1
 #####
-# Model for cuda3_impl0 (Comb2)
+# Model for cuda3_impl0 (Comb3)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8cfc3ba0	24883200       	5.971968e+09   	2.575702e+03   	3.709810e+02   	2.479886e+07   	6.519954e+10   	9628
 24c84a50	11059200       	1.769472e+09   	7.437036e+02   	4.816754e+01   	1.192752e+07   	8.907749e+09   	16038
 f0ac7beb	4915200        	5.242880e+08   	2.593699e+02   	2.791728e+01   	2.302427e+06   	6.040986e+08   	8877
 d46431bb	1228800        	6.553600e+07   	5.656092e+01   	1.160148e+01   	5.523739e+05   	3.255722e+07   	9766
 
 ####################
-# COMB_3
+# COMB_4
 # number of types devices
 1
 ####################
@@ -122,14 +125,15 @@ d46431bb	1228800        	6.553600e+07   	5.656092e+01   	1.160148e+01   	5.52373
 # number of implementations
 1
 #####
-# Model for cuda2_impl0 (Comb3)
+# Model for cuda2_impl0 (Comb4)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8cfc3ba0	24883200       	5.971968e+09   	2.620269e+03   	3.685780e+02   	2.431872e+07   	6.498239e+10   	9281
 24c84a50	11059200       	1.769472e+09   	7.558763e+02   	4.610795e+01   	1.204791e+07   	9.140616e+09   	15939
 f0ac7beb	4915200        	5.242880e+08   	2.625144e+02   	2.860172e+01   	2.207221e+06   	5.863054e+08   	8408
 d46431bb	1228800        	6.553600e+07   	5.829194e+01   	1.220705e+01   	7.805874e+05   	4.749737e+07   	13391
@@ -156,12 +160,13 @@ d46431bb	1228800        	6.553600e+07   	5.829194e+01   	1.220705e+01   	7.80587
 #####
 # Model for cpu0_impl0 (Comb0)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8cfc3ba0	24883200       	5.971968e+09   	1.120418e+05   	2.694960e+04   	3.755640e+08   	4.451334e+13   	3352
 24c84a50	11059200       	1.769472e+09   	3.494026e+04   	6.587010e+03   	1.697048e+08   	6.140270e+12   	4857
 f0ac7beb	4915200        	5.242880e+08   	1.147661e+04   	2.242393e+03   	3.799907e+07   	4.527495e+11   	3311
 d46431bb	1228800        	6.553600e+07   	1.593513e+03   	3.073908e+02   	2.396962e+07   	3.961722e+10   	15042

+ 18 - 13
tools/perfmodels/sampling/codelets/44/starpu_dgemm_gemm.sirocco

@@ -7,7 +7,7 @@
 # number of combinations
 5
 ####################
-# COMB_0
+# COMB_2
 # number of types devices
 1
 ####################
@@ -26,9 +26,9 @@
 # number of implementations
 1
 #####
-# Model for cuda0_impl0 (Comb0)
+# Model for cuda0_impl0 (Comb2)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -37,9 +37,10 @@ nan            	nan            	nan
 492beed5	66355200       	7.077888e+09   	6.467396e+03   	5.820387e+02   	1.403425e+06   	9.150018e+09   	217
 0b0b0ce8	7372800        	2.621440e+08   	2.828637e+02   	4.132770e+01   	2.376055e+04   	6.864469e+06   	84
 4220e23d	29491200       	2.097152e+09   	2.091138e+03   	2.430963e+02   	3.764048e+05   	7.977516e+08   	180
+87a7dc42	149299200      	2.388787e+10   	2.171545e+04   	9.979353e+02   	3.431041e+06   	7.466394e+10   	158
 
 ####################
-# COMB_2
+# COMB_0
 # number of types devices
 1
 ####################
@@ -58,9 +59,9 @@ nan            	nan            	nan
 # number of implementations
 1
 #####
-# Model for cuda3_impl0 (Comb2)
+# Model for cuda3_impl0 (Comb0)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -69,9 +70,10 @@ nan            	nan            	nan
 492beed5	66355200       	7.077888e+09   	6.530201e+03   	6.982602e+02   	1.214617e+06   	8.022384e+09   	186
 0b0b0ce8	7372800        	2.621440e+08   	2.596160e+02   	3.720670e+01   	2.907699e+04   	7.703898e+06   	112
 4220e23d	29491200       	2.097152e+09   	2.068075e+03   	2.561461e+02   	4.156832e+05   	8.728519e+08   	201
+87a7dc42	149299200      	2.388787e+10   	2.178854e+04   	1.485331e+03   	3.355435e+06   	7.344977e+10   	154
 
 ####################
-# COMB_3
+# COMB_1
 # number of types devices
 1
 ####################
@@ -90,9 +92,9 @@ nan            	nan            	nan
 # number of implementations
 1
 #####
-# Model for cuda1_impl0 (Comb3)
+# Model for cuda1_impl0 (Comb1)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -101,9 +103,10 @@ nan            	nan            	nan
 492beed5	66355200       	7.077888e+09   	6.594324e+03   	6.341124e+02   	1.384808e+06   	9.216313e+09   	210
 0b0b0ce8	7372800        	2.621440e+08   	2.592059e+02   	3.728165e+01   	2.773503e+04   	7.337807e+06   	107
 4220e23d	29491200       	2.097152e+09   	2.149687e+03   	2.853500e+02   	3.847940e+05   	8.417616e+08   	179
+87a7dc42	149299200      	2.388787e+10   	2.210351e+04   	9.525598e+02   	3.426044e+06   	7.586825e+10   	155
 
 ####################
-# COMB_1
+# COMB_3
 # number of types devices
 1
 ####################
@@ -122,9 +125,9 @@ nan            	nan            	nan
 # number of implementations
 1
 #####
-# Model for cuda2_impl0 (Comb1)
+# Model for cuda2_impl0 (Comb3)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -133,6 +136,7 @@ nan            	nan            	nan
 492beed5	66355200       	7.077888e+09   	6.615698e+03   	6.959563e+02   	1.210673e+06   	8.098082e+09   	183
 0b0b0ce8	7372800        	2.621440e+08   	2.665077e+02   	3.721734e+01   	3.278045e+04   	8.906615e+06   	123
 4220e23d	29491200       	2.097152e+09   	2.090283e+03   	2.730830e+02   	4.285080e+05   	9.109906e+08   	205
+87a7dc42	149299200      	2.388787e+10   	2.206407e+04   	1.175820e+03   	3.397867e+06   	7.518367e+10   	154
 
 ####################
 # COMB_4
@@ -156,7 +160,7 @@ nan            	nan            	nan
 #####
 # Model for cpu0_impl0 (Comb4)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -165,4 +169,5 @@ nan            	nan            	nan
 492beed5	66355200       	7.077888e+09   	2.793361e+05   	4.545353e+04   	2.039154e+07   	5.846913e+12   	73
 0b0b0ce8	7372800        	2.621440e+08   	1.003329e+04   	9.763114e+02   	3.471519e+06   	3.516056e+10   	346
 4220e23d	29491200       	2.097152e+09   	8.266143e+04   	1.577004e+04   	6.860899e+06   	5.877733e+11   	83
+87a7dc42	149299200      	2.388787e+10   	9.422627e+05   	1.729617e+05   	1.517043e+08   	1.477617e+14   	161
 

+ 18 - 13
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_11.sirocco

@@ -28,7 +28,7 @@
 #####
 # Model for cpu0_impl0 (Comb4)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -37,9 +37,10 @@ nan            	nan            	nan
 617e5fe6	7372800        	0.000000e+00   	1.778570e+05   	1.735127e+04   	1.778570e+06   	3.193419e+11   	10
 cea37d6d	819200         	0.000000e+00   	5.904224e+03   	6.575598e+02   	5.668055e+05   	3.388055e+09   	96
 afdd228b	3276800        	0.000000e+00   	4.953149e+04   	6.709149e+03   	6.439093e+05   	3.247895e+10   	13
+25ebb669	16588800       	0.000000e+00   	7.801727e+05   	1.214440e+05   	7.801727e+06   	6.234180e+12   	10
 
 ####################
-# COMB_3
+# COMB_0
 # number of types devices
 1
 ####################
@@ -58,9 +59,9 @@ afdd228b	3276800        	0.000000e+00   	4.953149e+04   	6.709149e+03   	6.43909
 # number of implementations
 1
 #####
-# Model for cuda2_impl0 (Comb3)
+# Model for cuda2_impl0 (Comb0)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -69,9 +70,10 @@ nan            	nan            	nan
 617e5fe6	7372800        	0.000000e+00   	5.552387e+04   	5.714037e+03   	1.054954e+06   	5.919546e+10   	19
 cea37d6d	819200         	0.000000e+00   	9.707597e+03   	9.439210e+02   	9.707597e+04   	9.512842e+08   	10
 afdd228b	3276800        	0.000000e+00   	2.633937e+04   	3.608518e+03   	3.950905e+05   	1.060175e+10   	15
+25ebb669	16588800       	0.000000e+00   	1.397955e+05   	9.676594e+03   	1.537750e+06   	2.160006e+11   	11
 
 ####################
-# COMB_1
+# COMB_2
 # number of types devices
 1
 ####################
@@ -90,9 +92,9 @@ afdd228b	3276800        	0.000000e+00   	2.633937e+04   	3.608518e+03   	3.95090
 # number of implementations
 1
 #####
-# Model for cuda0_impl0 (Comb1)
+# Model for cuda0_impl0 (Comb2)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -101,9 +103,10 @@ nan            	nan            	nan
 617e5fe6	7372800        	0.000000e+00   	5.675882e+04   	6.232185e+03   	1.248694e+06   	7.172888e+10   	22
 cea37d6d	819200         	0.000000e+00   	9.541018e+03   	9.285702e+02   	9.541018e+04   	9.189326e+08   	10
 afdd228b	3276800        	0.000000e+00   	2.651477e+04   	2.554649e+03   	3.181772e+05   	8.514711e+09   	12
+25ebb669	16588800       	0.000000e+00   	1.382255e+05   	7.304438e+03   	1.382255e+06   	1.915965e+11   	10
 
 ####################
-# COMB_2
+# COMB_1
 # number of types devices
 1
 ####################
@@ -122,9 +125,9 @@ afdd228b	3276800        	0.000000e+00   	2.651477e+04   	2.554649e+03   	3.18177
 # number of implementations
 1
 #####
-# Model for cuda1_impl0 (Comb2)
+# Model for cuda1_impl0 (Comb1)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -133,9 +136,10 @@ nan            	nan            	nan
 617e5fe6	7372800        	0.000000e+00   	5.624387e+04   	5.549003e+03   	8.436581e+05   	4.791247e+10   	15
 cea37d6d	819200         	0.000000e+00   	9.661577e+03   	7.114114e+02   	9.661577e+04   	9.385217e+08   	10
 afdd228b	3276800        	0.000000e+00   	2.574090e+04   	2.071791e+03   	5.148179e+05   	1.333772e+10   	20
+25ebb669	16588800       	0.000000e+00   	1.361676e+05   	1.958095e+03   	1.770178e+06   	2.410907e+11   	13
 
 ####################
-# COMB_0
+# COMB_3
 # number of types devices
 1
 ####################
@@ -154,9 +158,9 @@ afdd228b	3276800        	0.000000e+00   	2.574090e+04   	2.071791e+03   	5.14817
 # number of implementations
 1
 #####
-# Model for cuda3_impl0 (Comb0)
+# Model for cuda3_impl0 (Comb3)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -165,4 +169,5 @@ nan            	nan            	nan
 617e5fe6	7372800        	0.000000e+00   	5.895007e+04   	8.369498e+03   	7.663509e+05   	4.608707e+10   	13
 cea37d6d	819200         	0.000000e+00   	9.910778e+03   	1.200981e+03   	9.910778e+04   	9.966588e+08   	10
 afdd228b	3276800        	0.000000e+00   	2.572979e+04   	2.095041e+03   	5.917851e+05   	1.532746e+10   	23
+25ebb669	16588800       	0.000000e+00   	1.422314e+05   	1.728252e+04   	1.422314e+06   	2.052844e+11   	10
 

+ 18 - 13
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_12.sirocco

@@ -28,7 +28,7 @@
 #####
 # Model for cpu0_impl0 (Comb4)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -37,9 +37,10 @@ nan            	nan            	nan
 ff82dda0	14745600       	0.000000e+00   	3.402547e+04   	6.005726e+03   	7.111323e+06   	2.495045e+11   	209
 2c1922b7	1638400        	0.000000e+00   	6.443940e+03   	1.476966e+03   	1.610985e+05   	1.092645e+09   	25
 d39bff17	6553600        	0.000000e+00   	1.041247e+04   	1.992240e+03   	3.092503e+06   	3.337940e+10   	297
+0e8bce2b	33177600       	0.000000e+00   	1.103734e+05   	1.699353e+04   	1.037510e+07   	1.172281e+12   	94
 
 ####################
-# COMB_2
+# COMB_1
 # number of types devices
 1
 ####################
@@ -58,9 +59,9 @@ d39bff17	6553600        	0.000000e+00   	1.041247e+04   	1.992240e+03   	3.09250
 # number of implementations
 1
 #####
-# Model for cuda1_impl0 (Comb2)
+# Model for cuda1_impl0 (Comb1)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -69,9 +70,10 @@ nan            	nan            	nan
 ff82dda0	14745600       	0.000000e+00   	3.238292e+03   	4.902889e+02   	6.768030e+05   	2.241926e+09   	209
 2c1922b7	1638400        	0.000000e+00   	5.889641e+02   	1.063542e+02   	1.272162e+05   	7.736903e+07   	216
 d39bff17	6553600        	0.000000e+00   	1.349909e+03   	1.936514e+02   	2.942801e+05   	4.054266e+08   	218
+0e8bce2b	33177600       	0.000000e+00   	7.038455e+03   	8.353918e+02   	1.182460e+06   	8.439938e+09   	168
 
 ####################
-# COMB_1
+# COMB_2
 # number of types devices
 1
 ####################
@@ -90,9 +92,9 @@ d39bff17	6553600        	0.000000e+00   	1.349909e+03   	1.936514e+02   	2.94280
 # number of implementations
 1
 #####
-# Model for cuda0_impl0 (Comb1)
+# Model for cuda0_impl0 (Comb2)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -101,9 +103,10 @@ nan            	nan            	nan
 ff82dda0	14745600       	0.000000e+00   	3.179744e+03   	4.016259e+02   	6.804652e+05   	2.198224e+09   	214
 2c1922b7	1638400        	0.000000e+00   	5.796961e+02   	1.048897e+02   	1.199971e+05   	7.183924e+07   	207
 d39bff17	6553600        	0.000000e+00   	1.343917e+03   	2.039127e+02   	2.244341e+05   	3.085646e+08   	167
+0e8bce2b	33177600       	0.000000e+00   	6.913467e+03   	8.366528e+02   	1.244424e+06   	8.729283e+09   	180
 
 ####################
-# COMB_0
+# COMB_3
 # number of types devices
 1
 ####################
@@ -122,9 +125,9 @@ d39bff17	6553600        	0.000000e+00   	1.343917e+03   	2.039127e+02   	2.24434
 # number of implementations
 1
 #####
-# Model for cuda3_impl0 (Comb0)
+# Model for cuda3_impl0 (Comb3)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -133,9 +136,10 @@ nan            	nan            	nan
 ff82dda0	14745600       	0.000000e+00   	3.362936e+03   	5.457359e+02   	6.524096e+05   	2.251791e+09   	194
 2c1922b7	1638400        	0.000000e+00   	5.405600e+02   	9.344101e+01   	1.513568e+05   	8.426217e+07   	280
 d39bff17	6553600        	0.000000e+00   	1.275634e+03   	1.830051e+02   	2.270629e+05   	2.956105e+08   	178
+0e8bce2b	33177600       	0.000000e+00   	6.852169e+03   	8.897789e+02   	8.291125e+05   	5.777016e+09   	121
 
 ####################
-# COMB_3
+# COMB_0
 # number of types devices
 1
 ####################
@@ -154,9 +158,9 @@ d39bff17	6553600        	0.000000e+00   	1.275634e+03   	1.830051e+02   	2.27062
 # number of implementations
 1
 #####
-# Model for cuda2_impl0 (Comb3)
+# Model for cuda2_impl0 (Comb0)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -165,4 +169,5 @@ nan            	nan            	nan
 ff82dda0	14745600       	0.000000e+00   	3.306190e+03   	4.921154e+02   	7.009122e+05   	2.368690e+09   	212
 2c1922b7	1638400        	0.000000e+00   	5.641572e+02   	1.012475e+02   	1.376544e+05   	8.015997e+07   	244
 d39bff17	6553600        	0.000000e+00   	1.355727e+03   	1.656730e+02   	2.331851e+05   	3.208564e+08   	172
+0e8bce2b	33177600       	0.000000e+00   	6.732998e+03   	6.928655e+02   	1.144610e+06   	7.788266e+09   	170
 

+ 18 - 13
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_21.sirocco

@@ -28,7 +28,7 @@
 #####
 # Model for cpu0_impl0 (Comb4)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -37,9 +37,10 @@ nan            	nan            	nan
 ff82dda0	14745600       	0.000000e+00   	4.935132e+04   	1.056942e+04   	1.875350e+06   	9.679609e+10   	38
 2c1922b7	1638400        	0.000000e+00   	1.386830e+03   	2.636504e+02   	1.256468e+06   	1.805486e+09   	906
 d39bff17	6553600        	0.000000e+00   	1.762919e+04   	3.396463e+03   	6.170218e+05   	1.128135e+10   	35
+0e8bce2b	33177600       	0.000000e+00   	2.090375e+05   	4.666688e+04   	3.762675e+06   	8.257406e+11   	18
 
 ####################
-# COMB_3
+# COMB_0
 # number of types devices
 1
 ####################
@@ -58,9 +59,9 @@ d39bff17	6553600        	0.000000e+00   	1.762919e+04   	3.396463e+03   	6.17021
 # number of implementations
 1
 #####
-# Model for cuda2_impl0 (Comb3)
+# Model for cuda2_impl0 (Comb0)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -69,9 +70,10 @@ nan            	nan            	nan
 ff82dda0	14745600       	0.000000e+00   	2.580226e+03   	3.710017e+02   	5.495882e+05   	1.447380e+09   	213
 2c1922b7	1638400        	0.000000e+00   	3.441326e+02   	6.695097e+01   	4.267244e+04   	1.524080e+07   	124
 d39bff17	6553600        	0.000000e+00   	9.089165e+02   	1.570596e+02   	1.590604e+05   	1.488895e+08   	175
+0e8bce2b	33177600       	0.000000e+00   	5.659456e+03   	7.119452e+02   	1.058318e+06   	6.084289e+09   	187
 
 ####################
-# COMB_1
+# COMB_2
 # number of types devices
 1
 ####################
@@ -90,9 +92,9 @@ d39bff17	6553600        	0.000000e+00   	9.089165e+02   	1.570596e+02   	1.59060
 # number of implementations
 1
 #####
-# Model for cuda0_impl0 (Comb1)
+# Model for cuda0_impl0 (Comb2)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -101,9 +103,10 @@ nan            	nan            	nan
 ff82dda0	14745600       	0.000000e+00   	2.485190e+03   	3.288259e+02   	6.188122e+05   	1.564789e+09   	249
 2c1922b7	1638400        	0.000000e+00   	3.565977e+02   	7.541526e+01   	3.672956e+04   	1.368348e+07   	103
 d39bff17	6553600        	0.000000e+00   	9.441529e+02   	1.527244e+02   	2.398148e+05   	2.323464e+08   	254
+0e8bce2b	33177600       	0.000000e+00   	5.713813e+03   	7.827526e+02   	8.456444e+05   	4.922534e+09   	148
 
 ####################
-# COMB_0
+# COMB_3
 # number of types devices
 1
 ####################
@@ -122,9 +125,9 @@ d39bff17	6553600        	0.000000e+00   	9.441529e+02   	1.527244e+02   	2.39814
 # number of implementations
 1
 #####
-# Model for cuda3_impl0 (Comb0)
+# Model for cuda3_impl0 (Comb3)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -133,9 +136,10 @@ nan            	nan            	nan
 ff82dda0	14745600       	0.000000e+00   	2.615067e+03   	4.428563e+02   	5.334737e+05   	1.435078e+09   	204
 2c1922b7	1638400        	0.000000e+00   	3.667814e+02   	6.561130e+01   	3.227676e+04   	1.221734e+07   	88
 d39bff17	6553600        	0.000000e+00   	9.018562e+02   	1.587421e+02   	1.470026e+05   	1.366826e+08   	163
+0e8bce2b	33177600       	0.000000e+00   	5.604694e+03   	6.527594e+02   	1.132148e+06   	6.431415e+09   	202
 
 ####################
-# COMB_2
+# COMB_1
 # number of types devices
 1
 ####################
@@ -154,9 +158,9 @@ d39bff17	6553600        	0.000000e+00   	9.018562e+02   	1.587421e+02   	1.47002
 # number of implementations
 1
 #####
-# Model for cuda1_impl0 (Comb2)
+# Model for cuda1_impl0 (Comb1)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -165,4 +169,5 @@ nan            	nan            	nan
 ff82dda0	14745600       	0.000000e+00   	2.480709e+03   	3.749281e+02   	4.514890e+05   	1.145597e+09   	182
 2c1922b7	1638400        	0.000000e+00   	3.718262e+02   	7.249781e+01   	2.342505e+04   	9.041172e+06   	63
 d39bff17	6553600        	0.000000e+00   	9.130900e+02   	1.739240e+02   	2.182285e+05   	2.064919e+08   	239
+0e8bce2b	33177600       	0.000000e+00   	5.804941e+03   	6.806052e+02   	9.113758e+05   	5.363209e+09   	157
 

+ 18 - 13
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_22.sirocco

@@ -7,7 +7,7 @@
 # number of combinations
 5
 ####################
-# COMB_3
+# COMB_0
 # number of types devices
 1
 ####################
@@ -26,9 +26,9 @@
 # number of implementations
 1
 #####
-# Model for cuda2_impl0 (Comb3)
+# Model for cuda2_impl0 (Comb0)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -37,9 +37,10 @@ nan            	nan            	nan
 24c84a50	22118400       	0.000000e+00   	1.754881e+03   	1.567907e+02   	8.516439e+06   	1.506464e+10   	4853
 d46431bb	2457600        	0.000000e+00   	9.227862e+01   	1.339393e+01   	6.585925e+05   	6.205436e+07   	7137
 f0ac7beb	9830400        	0.000000e+00   	5.560171e+02   	4.481480e+01   	2.452035e+06   	1.372230e+09   	4410
+8cfc3ba0	49766400       	0.000000e+00   	5.688840e+03   	4.278238e+02   	1.962081e+07   	1.122509e+11   	3449
 
 ####################
-# COMB_0
+# COMB_3
 # number of types devices
 1
 ####################
@@ -58,9 +59,9 @@ f0ac7beb	9830400        	0.000000e+00   	5.560171e+02   	4.481480e+01   	2.45203
 # number of implementations
 1
 #####
-# Model for cuda3_impl0 (Comb0)
+# Model for cuda3_impl0 (Comb3)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -69,9 +70,10 @@ nan            	nan            	nan
 24c84a50	22118400       	0.000000e+00   	1.725012e+03   	1.556789e+02   	8.775134e+06   	1.526049e+10   	5087
 d46431bb	2457600        	0.000000e+00   	9.099306e+01   	1.290433e+01   	7.117477e+05   	6.606663e+07   	7822
 f0ac7beb	9830400        	0.000000e+00   	5.497124e+02   	4.364744e+01   	2.308242e+06   	1.276869e+09   	4199
+8cfc3ba0	49766400       	0.000000e+00   	5.591076e+03   	4.188165e+02   	1.997692e+07   	1.123192e+11   	3573
 
 ####################
-# COMB_1
+# COMB_2
 # number of types devices
 1
 ####################
@@ -90,9 +92,9 @@ f0ac7beb	9830400        	0.000000e+00   	5.497124e+02   	4.364744e+01   	2.30824
 # number of implementations
 1
 #####
-# Model for cuda0_impl0 (Comb1)
+# Model for cuda0_impl0 (Comb2)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -101,6 +103,7 @@ nan            	nan            	nan
 24c84a50	22118400       	0.000000e+00   	1.724207e+03   	1.559700e+02   	8.529651e+06   	1.482723e+10   	4947
 d46431bb	2457600        	0.000000e+00   	9.395983e+01   	1.410875e+01   	5.884704e+05   	5.653928e+07   	6263
 f0ac7beb	9830400        	0.000000e+00   	5.531811e+02   	3.935565e+01   	3.264875e+06   	1.815209e+09   	5902
+8cfc3ba0	49766400       	0.000000e+00   	5.682607e+03   	4.627422e+02   	2.006529e+07   	1.147792e+11   	3531
 
 ####################
 # COMB_4
@@ -124,7 +127,7 @@ f0ac7beb	9830400        	0.000000e+00   	5.531811e+02   	3.935565e+01   	3.26487
 #####
 # Model for cpu0_impl0 (Comb4)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -133,9 +136,10 @@ nan            	nan            	nan
 24c84a50	22118400       	0.000000e+00   	6.659236e+04   	1.142300e+04   	1.507651e+08   	1.033522e+13   	2264
 d46431bb	2457600        	0.000000e+00   	3.623237e+03   	8.721045e+02   	1.668138e+07   	6.394225e+10   	4604
 f0ac7beb	9830400        	0.000000e+00   	2.355764e+04   	4.984182e+03   	4.405279e+07   	1.084235e+12   	1870
+8cfc3ba0	49766400       	0.000000e+00   	2.164742e+05   	3.673582e+04   	3.325044e+08   	7.405148e+13   	1536
 
 ####################
-# COMB_2
+# COMB_1
 # number of types devices
 1
 ####################
@@ -154,9 +158,9 @@ f0ac7beb	9830400        	0.000000e+00   	2.355764e+04   	4.984182e+03   	4.40527
 # number of implementations
 1
 #####
-# Model for cuda1_impl0 (Comb2)
+# Model for cuda1_impl0 (Comb1)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -165,4 +169,5 @@ nan            	nan            	nan
 24c84a50	22118400       	0.000000e+00   	1.746747e+03   	1.539679e+02   	8.628932e+06   	1.518967e+10   	4940
 d46431bb	2457600        	0.000000e+00   	9.539483e+01   	1.447066e+01   	7.032507e+05   	6.863017e+07   	7372
 f0ac7beb	9830400        	0.000000e+00   	5.601014e+02   	3.783630e+01   	3.218342e+06   	1.810824e+09   	5746
+8cfc3ba0	49766400       	0.000000e+00   	5.710157e+03   	4.302245e+02   	2.027106e+07   	1.164080e+11   	3550
 

+ 18 - 13
tools/perfmodels/sampling/codelets/44/starpu_sgemm_gemm.sirocco

@@ -7,7 +7,7 @@
 # number of combinations
 5
 ####################
-# COMB_0
+# COMB_2
 # number of types devices
 1
 ####################
@@ -26,9 +26,9 @@
 # number of implementations
 1
 #####
-# Model for cuda0_impl0 (Comb0)
+# Model for cuda0_impl0 (Comb2)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -37,9 +37,10 @@ nan            	nan            	nan
 492beed5	33177600       	7.077888e+09   	2.745578e+03   	3.064191e+02   	6.616844e+05   	1.839335e+09   	241
 0b0b0ce8	3686400        	2.621440e+08   	1.582927e+02   	3.333442e+01   	3.434951e+04   	5.678402e+06   	217
 4220e23d	14745600       	2.097152e+09   	8.206871e+02   	1.017181e+02   	1.148962e+05   	9.574235e+07   	140
+87a7dc42	74649600       	2.388787e+10   	9.813897e+03   	7.998509e+02   	1.570224e+06   	1.551237e+10   	160
 
 ####################
-# COMB_2
+# COMB_1
 # number of types devices
 1
 ####################
@@ -58,9 +59,9 @@ nan            	nan            	nan
 # number of implementations
 1
 #####
-# Model for cuda3_impl0 (Comb2)
+# Model for cuda3_impl0 (Comb1)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -69,9 +70,10 @@ nan            	nan            	nan
 492beed5	33177600       	7.077888e+09   	2.686428e+03   	2.002215e+02   	6.716071e+05   	1.814247e+09   	250
 0b0b0ce8	3686400        	2.621440e+08   	1.630480e+02   	3.438768e+01   	3.097912e+04   	5.275762e+06   	190
 4220e23d	14745600       	2.097152e+09   	8.448030e+02   	7.773742e+01   	2.433033e+05   	2.072837e+08   	288
+87a7dc42	74649600       	2.388787e+10   	9.873153e+03   	8.026227e+02   	1.579704e+06   	1.569974e+10   	160
 
 ####################
-# COMB_3
+# COMB_0
 # number of types devices
 1
 ####################
@@ -90,9 +92,9 @@ nan            	nan            	nan
 # number of implementations
 1
 #####
-# Model for cuda1_impl0 (Comb3)
+# Model for cuda1_impl0 (Comb0)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -101,9 +103,10 @@ nan            	nan            	nan
 492beed5	33177600       	7.077888e+09   	2.791098e+03   	3.147711e+02   	6.503258e+05   	1.838209e+09   	233
 0b0b0ce8	3686400        	2.621440e+08   	1.624855e+02   	3.298013e+01   	2.940987e+04   	4.975550e+06   	181
 4220e23d	14745600       	2.097152e+09   	8.152506e+02   	1.017614e+02   	1.173961e+05   	9.719839e+07   	144
+87a7dc42	74649600       	2.388787e+10   	1.001360e+04   	7.827579e+02   	1.582149e+06   	1.593981e+10   	158
 
 ####################
-# COMB_1
+# COMB_3
 # number of types devices
 1
 ####################
@@ -122,9 +125,9 @@ nan            	nan            	nan
 # number of implementations
 1
 #####
-# Model for cuda2_impl0 (Comb1)
+# Model for cuda2_impl0 (Comb3)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -133,6 +136,7 @@ nan            	nan            	nan
 492beed5	33177600       	7.077888e+09   	2.754203e+03   	2.682327e+02   	6.830422e+05   	1.899080e+09   	248
 0b0b0ce8	3686400        	2.621440e+08   	1.622246e+02   	3.553894e+01   	3.714942e+04   	6.315779e+06   	229
 4220e23d	14745600       	2.097152e+09   	8.611626e+02   	9.290485e+01   	2.411255e+05   	2.100651e+08   	280
+87a7dc42	74649600       	2.388787e+10   	9.935915e+03   	7.366769e+02   	1.569875e+06   	1.568389e+10   	158
 
 ####################
 # COMB_4
@@ -156,7 +160,7 @@ nan            	nan            	nan
 #####
 # Model for cpu0_impl0 (Comb4)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
@@ -165,4 +169,5 @@ nan            	nan            	nan
 492beed5	33177600       	7.077888e+09   	1.712078e+05   	4.163047e+04   	2.773567e+07   	5.029326e+12   	162
 0b0b0ce8	3686400        	2.621440e+08   	6.441655e+03   	1.152866e+03   	3.220827e+05   	2.141201e+09   	50
 4220e23d	14745600       	2.097152e+09   	4.927734e+04   	1.166029e+04   	5.913281e+06   	3.077063e+11   	120
+87a7dc42	74649600       	2.388787e+10   	5.091210e+05   	1.022002e+05   	6.974957e+07   	3.694192e+13   	137
 

+ 28 - 23
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_11.sirocco

@@ -28,18 +28,19 @@
 #####
 # Model for cpu0_impl0 (Comb4)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-617e5fe6	3686400        	0.000000e+00   	1.029624e+05   	6.177928e+03   	1.029624e+06   	1.063943e+11   	10
-cea37d6d	409600         	0.000000e+00   	4.037068e+03   	3.335771e+02   	2.906689e+05   	1.181462e+09   	72
+25ebb669	8294400        	0.000000e+00   	4.111343e+05   	7.639666e+04   	4.111343e+06   	1.748679e+12   	10
 afdd228b	1638400        	0.000000e+00   	2.923093e+04   	1.278718e+03   	5.553877e+05   	1.626557e+10   	19
+cea37d6d	409600         	0.000000e+00   	4.037068e+03   	3.335771e+02   	2.906689e+05   	1.181462e+09   	72
+617e5fe6	3686400        	0.000000e+00   	1.029624e+05   	6.177928e+03   	1.029624e+06   	1.063943e+11   	10
 
 ####################
-# COMB_0
+# COMB_3
 # number of types devices
 1
 ####################
@@ -58,20 +59,21 @@ afdd228b	1638400        	0.000000e+00   	2.923093e+04   	1.278718e+03   	5.55387
 # number of implementations
 1
 #####
-# Model for cuda3_impl0 (Comb0)
+# Model for cuda3_impl0 (Comb3)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-617e5fe6	3686400        	0.000000e+00   	4.153583e+04   	9.473225e+02   	9.968599e+05   	4.142694e+10   	24
-afdd228b	1638400        	0.000000e+00   	2.088164e+04   	1.502169e+03   	4.176328e+05   	8.765989e+09   	20
 cea37d6d	409600         	0.000000e+00   	9.866251e+03   	7.665217e+02   	9.866251e+04   	9.793047e+08   	10
+afdd228b	1638400        	0.000000e+00   	2.088164e+04   	1.502169e+03   	4.176328e+05   	8.765989e+09   	20
+617e5fe6	3686400        	0.000000e+00   	4.153583e+04   	9.473225e+02   	9.968599e+05   	4.142694e+10   	24
+25ebb669	8294400        	0.000000e+00   	9.378398e+04   	2.901838e+03   	1.594328e+06   	1.496655e+11   	17
 
 ####################
-# COMB_1
+# COMB_2
 # number of types devices
 1
 ####################
@@ -90,20 +92,21 @@ cea37d6d	409600         	0.000000e+00   	9.866251e+03   	7.665217e+02   	9.86625
 # number of implementations
 1
 #####
-# Model for cuda1_impl0 (Comb1)
+# Model for cuda1_impl0 (Comb2)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-617e5fe6	3686400        	0.000000e+00   	4.357190e+04   	5.271768e+03   	7.842942e+05   	3.467343e+10   	18
-cea37d6d	409600         	0.000000e+00   	9.238189e+03   	1.713378e+02   	9.238189e+04   	8.537349e+08   	10
+25ebb669	8294400        	0.000000e+00   	9.434448e+04   	6.197321e+03   	2.075578e+06   	1.966643e+11   	22
 afdd228b	1638400        	0.000000e+00   	2.242688e+04   	2.707726e+03   	3.139763e+05   	7.144153e+09   	14
+cea37d6d	409600         	0.000000e+00   	9.238189e+03   	1.713378e+02   	9.238189e+04   	8.537349e+08   	10
+617e5fe6	3686400        	0.000000e+00   	4.357190e+04   	5.271768e+03   	7.842942e+05   	3.467343e+10   	18
 
 ####################
-# COMB_3
+# COMB_1
 # number of types devices
 1
 ####################
@@ -122,20 +125,21 @@ afdd228b	1638400        	0.000000e+00   	2.242688e+04   	2.707726e+03   	3.13976
 # number of implementations
 1
 #####
-# Model for cuda0_impl0 (Comb3)
+# Model for cuda0_impl0 (Comb1)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-617e5fe6	3686400        	0.000000e+00   	4.647825e+04   	9.283373e+03   	5.577390e+05   	2.695691e+10   	12
-cea37d6d	409600         	0.000000e+00   	9.471831e+03   	5.475075e+02   	9.471831e+04   	9.001535e+08   	10
+25ebb669	8294400        	0.000000e+00   	9.395404e+04   	4.337001e+03   	1.973035e+06   	1.857696e+11   	21
 afdd228b	1638400        	0.000000e+00   	2.096495e+04   	7.732458e+02   	3.773690e+05   	7.922284e+09   	18
+cea37d6d	409600         	0.000000e+00   	9.471831e+03   	5.475075e+02   	9.471831e+04   	9.001535e+08   	10
+617e5fe6	3686400        	0.000000e+00   	4.647825e+04   	9.283373e+03   	5.577390e+05   	2.695691e+10   	12
 
 ####################
-# COMB_2
+# COMB_0
 # number of types devices
 1
 ####################
@@ -154,15 +158,16 @@ afdd228b	1638400        	0.000000e+00   	2.096495e+04   	7.732458e+02   	3.77369
 # number of implementations
 1
 #####
-# Model for cuda2_impl0 (Comb2)
+# Model for cuda2_impl0 (Comb0)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-617e5fe6	3686400        	0.000000e+00   	4.258012e+04   	2.921691e+03   	8.090223e+05   	3.461046e+10   	19
-cea37d6d	409600         	0.000000e+00   	9.338877e+03   	3.249828e+02   	9.338877e+04   	8.732025e+08   	10
+25ebb669	8294400        	0.000000e+00   	9.896522e+04   	1.438963e+04   	1.187583e+06   	1.200141e+11   	12
 afdd228b	1638400        	0.000000e+00   	2.172039e+04   	1.567348e+03   	2.823650e+05   	6.165013e+09   	13
+cea37d6d	409600         	0.000000e+00   	9.338877e+03   	3.249828e+02   	9.338877e+04   	8.732025e+08   	10
+617e5fe6	3686400        	0.000000e+00   	4.258012e+04   	2.921691e+03   	8.090223e+05   	3.461046e+10   	19
 

+ 28 - 23
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_12.sirocco

@@ -28,18 +28,19 @@
 #####
 # Model for cpu0_impl0 (Comb4)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-ff82dda0	7372800        	0.000000e+00   	1.726784e+04   	3.264426e+03   	3.021872e+06   	5.404608e+10   	175
-2c1922b7	819200         	0.000000e+00   	4.245334e+03   	7.020174e+02   	6.368000e+04   	2.777353e+08   	15
+0e8bce2b	16588800       	0.000000e+00   	5.637990e+04   	1.036285e+04   	8.118706e+06   	4.731958e+11   	144
 d39bff17	3276800        	0.000000e+00   	5.106660e+03   	6.848530e+02   	2.134584e+06   	1.109665e+10   	418
+2c1922b7	819200         	0.000000e+00   	4.245334e+03   	7.020174e+02   	6.368000e+04   	2.777353e+08   	15
+ff82dda0	7372800        	0.000000e+00   	1.726784e+04   	3.264426e+03   	3.021872e+06   	5.404608e+10   	175
 
 ####################
-# COMB_0
+# COMB_3
 # number of types devices
 1
 ####################
@@ -58,20 +59,21 @@ d39bff17	3276800        	0.000000e+00   	5.106660e+03   	6.848530e+02   	2.13458
 # number of implementations
 1
 #####
-# Model for cuda3_impl0 (Comb0)
+# Model for cuda3_impl0 (Comb3)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-ff82dda0	7372800        	0.000000e+00   	2.106719e+03   	2.638200e+02   	5.646006e+05   	1.208108e+09   	268
-2c1922b7	819200         	0.000000e+00   	4.901281e+02   	6.729653e+01   	6.616730e+04   	3.304185e+07   	135
+0e8bce2b	16588800       	0.000000e+00   	5.668892e+03   	6.964909e+02   	1.394547e+06   	8.024874e+09   	246
 d39bff17	3276800        	0.000000e+00   	1.216432e+03   	1.410794e+02   	1.934127e+05   	2.384382e+08   	159
+2c1922b7	819200         	0.000000e+00   	4.901281e+02   	6.729653e+01   	6.616730e+04   	3.304185e+07   	135
+ff82dda0	7372800        	0.000000e+00   	2.106719e+03   	2.638200e+02   	5.646006e+05   	1.208108e+09   	268
 
 ####################
-# COMB_1
+# COMB_2
 # number of types devices
 1
 ####################
@@ -90,20 +92,21 @@ d39bff17	3276800        	0.000000e+00   	1.216432e+03   	1.410794e+02   	1.93412
 # number of implementations
 1
 #####
-# Model for cuda1_impl0 (Comb1)
+# Model for cuda1_impl0 (Comb2)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-ff82dda0	7372800        	0.000000e+00   	2.138085e+03   	2.696288e+02   	6.371492e+05   	1.383944e+09   	298
-2c1922b7	819200         	0.000000e+00   	4.968224e+02   	7.860110e+01   	5.415364e+04   	2.757816e+07   	109
+0e8bce2b	16588800       	0.000000e+00   	5.677828e+03   	6.218985e+02   	1.311578e+06   	7.536257e+09   	231
 d39bff17	3276800        	0.000000e+00   	1.199302e+03   	1.658297e+02   	1.774966e+05   	2.169419e+08   	148
+2c1922b7	819200         	0.000000e+00   	4.968224e+02   	7.860110e+01   	5.415364e+04   	2.757816e+07   	109
+ff82dda0	7372800        	0.000000e+00   	2.138085e+03   	2.696288e+02   	6.371492e+05   	1.383944e+09   	298
 
 ####################
-# COMB_2
+# COMB_0
 # number of types devices
 1
 ####################
@@ -122,20 +125,21 @@ d39bff17	3276800        	0.000000e+00   	1.199302e+03   	1.658297e+02   	1.77496
 # number of implementations
 1
 #####
-# Model for cuda2_impl0 (Comb2)
+# Model for cuda2_impl0 (Comb0)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-ff82dda0	7372800        	0.000000e+00   	2.196957e+03   	3.265420e+02   	4.349975e+05   	9.767837e+08   	198
-2c1922b7	819200         	0.000000e+00   	8.901347e+01   	1.918734e+01   	2.412265e+04   	2.247011e+06   	271
+0e8bce2b	16588800       	0.000000e+00   	5.593766e+03   	7.653530e+02   	1.510317e+06   	8.606516e+09   	270
 d39bff17	3276800        	0.000000e+00   	1.148300e+03   	2.163448e+02   	2.021009e+05   	2.403102e+08   	176
+2c1922b7	819200         	0.000000e+00   	8.901347e+01   	1.918734e+01   	2.412265e+04   	2.247011e+06   	271
+ff82dda0	7372800        	0.000000e+00   	2.196957e+03   	3.265420e+02   	4.349975e+05   	9.767837e+08   	198
 
 ####################
-# COMB_3
+# COMB_1
 # number of types devices
 1
 ####################
@@ -154,15 +158,16 @@ d39bff17	3276800        	0.000000e+00   	1.148300e+03   	2.163448e+02   	2.02100
 # number of implementations
 1
 #####
-# Model for cuda0_impl0 (Comb3)
+# Model for cuda0_impl0 (Comb1)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-ff82dda0	7372800        	0.000000e+00   	2.164310e+03   	2.607466e+02   	4.869698e+05   	1.069251e+09   	225
-2c1922b7	819200         	0.000000e+00   	4.930666e+02   	7.623523e+01   	7.642532e+04   	3.858360e+07   	155
+0e8bce2b	16588800       	0.000000e+00   	5.652338e+03   	6.245997e+02   	1.520479e+06   	8.699205e+09   	269
 d39bff17	3276800        	0.000000e+00   	1.203544e+03   	1.679024e+02   	2.286733e+05   	2.805746e+08   	190
+2c1922b7	819200         	0.000000e+00   	4.930666e+02   	7.623523e+01   	7.642532e+04   	3.858360e+07   	155
+ff82dda0	7372800        	0.000000e+00   	2.164310e+03   	2.607466e+02   	4.869698e+05   	1.069251e+09   	225
 

+ 28 - 23
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_21.sirocco

@@ -28,18 +28,19 @@
 #####
 # Model for cpu0_impl0 (Comb4)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-ff82dda0	7372800        	0.000000e+00   	1.583302e+04   	2.624137e+03   	3.974089e+06   	6.465024e+10   	251
-2c1922b7	819200         	0.000000e+00   	3.523655e+03   	5.077738e+02   	5.990214e+04   	2.154576e+08   	17
+0e8bce2b	16588800       	0.000000e+00   	8.483517e+04   	1.709999e+04   	1.781539e+06   	1.572777e+11   	21
 d39bff17	3276800        	0.000000e+00   	8.986208e+03   	1.629610e+03   	1.797242e+05   	1.668151e+09   	20
+2c1922b7	819200         	0.000000e+00   	3.523655e+03   	5.077738e+02   	5.990214e+04   	2.154576e+08   	17
+ff82dda0	7372800        	0.000000e+00   	1.583302e+04   	2.624137e+03   	3.974089e+06   	6.465024e+10   	251
 
 ####################
-# COMB_3
+# COMB_1
 # number of types devices
 1
 ####################
@@ -58,20 +59,21 @@ d39bff17	3276800        	0.000000e+00   	8.986208e+03   	1.629610e+03   	1.79724
 # number of implementations
 1
 #####
-# Model for cuda0_impl0 (Comb3)
+# Model for cuda0_impl0 (Comb1)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-ff82dda0	7372800        	0.000000e+00   	1.570696e+03   	2.281691e+02   	4.115224e+05   	6.600167e+08   	262
-2c1922b7	819200         	0.000000e+00   	2.882912e+02   	5.271451e+01   	7.409085e+04   	2.207390e+07   	257
+0e8bce2b	16588800       	0.000000e+00   	4.641113e+03   	5.516013e+02   	1.257742e+06   	5.919777e+09   	271
 d39bff17	3276800        	0.000000e+00   	8.365056e+02   	1.344660e+02   	1.396964e+05   	1.198764e+08   	167
+2c1922b7	819200         	0.000000e+00   	2.882912e+02   	5.271451e+01   	7.409085e+04   	2.207390e+07   	257
+ff82dda0	7372800        	0.000000e+00   	1.570696e+03   	2.281691e+02   	4.115224e+05   	6.600167e+08   	262
 
 ####################
-# COMB_2
+# COMB_0
 # number of types devices
 1
 ####################
@@ -90,20 +92,21 @@ d39bff17	3276800        	0.000000e+00   	8.365056e+02   	1.344660e+02   	1.39696
 # number of implementations
 1
 #####
-# Model for cuda2_impl0 (Comb2)
+# Model for cuda2_impl0 (Comb0)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-ff82dda0	7372800        	0.000000e+00   	1.571709e+03   	2.150516e+02   	4.007858e+05   	6.417117e+08   	255
-2c1922b7	819200         	0.000000e+00   	9.967334e+01   	2.197557e+01   	2.372225e+04   	2.479413e+06   	238
+0e8bce2b	16588800       	0.000000e+00   	4.556926e+03   	5.099622e+02   	1.048093e+06   	4.835897e+09   	230
 d39bff17	3276800        	0.000000e+00   	7.019049e+02   	1.632697e+02   	1.109010e+05   	8.205375e+07   	158
+2c1922b7	819200         	0.000000e+00   	9.967334e+01   	2.197557e+01   	2.372225e+04   	2.479413e+06   	238
+ff82dda0	7372800        	0.000000e+00   	1.571709e+03   	2.150516e+02   	4.007858e+05   	6.417117e+08   	255
 
 ####################
-# COMB_0
+# COMB_3
 # number of types devices
 1
 ####################
@@ -122,20 +125,21 @@ d39bff17	3276800        	0.000000e+00   	7.019049e+02   	1.632697e+02   	1.10901
 # number of implementations
 1
 #####
-# Model for cuda3_impl0 (Comb0)
+# Model for cuda3_impl0 (Comb3)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-ff82dda0	7372800        	0.000000e+00   	1.569547e+03   	2.419662e+02   	2.589752e+05   	4.161341e+08   	165
-2c1922b7	819200         	0.000000e+00   	2.858293e+02   	5.241353e+01   	7.460146e+04   	2.204030e+07   	261
+0e8bce2b	16588800       	0.000000e+00   	4.650733e+03   	7.073225e+02   	1.232444e+06   	5.864350e+09   	265
 d39bff17	3276800        	0.000000e+00   	8.352707e+02   	1.515223e+02   	1.587014e+05   	1.369209e+08   	190
+2c1922b7	819200         	0.000000e+00   	2.858293e+02   	5.241353e+01   	7.460146e+04   	2.204030e+07   	261
+ff82dda0	7372800        	0.000000e+00   	1.569547e+03   	2.419662e+02   	2.589752e+05   	4.161341e+08   	165
 
 ####################
-# COMB_1
+# COMB_2
 # number of types devices
 1
 ####################
@@ -154,15 +158,16 @@ d39bff17	3276800        	0.000000e+00   	8.352707e+02   	1.515223e+02   	1.58701
 # number of implementations
 1
 #####
-# Model for cuda1_impl0 (Comb1)
+# Model for cuda1_impl0 (Comb2)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-ff82dda0	7372800        	0.000000e+00   	1.591448e+03   	2.256700e+02   	2.387172e+05   	3.875451e+08   	150
-2c1922b7	819200         	0.000000e+00   	2.930233e+02   	5.590601e+01   	5.362326e+04   	1.628483e+07   	183
+0e8bce2b	16588800       	0.000000e+00   	4.671203e+03   	5.859459e+02   	1.331293e+06   	6.316588e+09   	285
 d39bff17	3276800        	0.000000e+00   	8.453596e+02   	1.395049e+02   	1.420204e+05   	1.233279e+08   	168
+2c1922b7	819200         	0.000000e+00   	2.930233e+02   	5.590601e+01   	5.362326e+04   	1.628483e+07   	183
+ff82dda0	7372800        	0.000000e+00   	1.591448e+03   	2.256700e+02   	2.387172e+05   	3.875451e+08   	150
 

+ 28 - 23
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_22.sirocco

@@ -28,18 +28,19 @@
 #####
 # Model for cpu0_impl0 (Comb4)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-24c84a50	11059200       	0.000000e+00   	3.517390e+04   	7.045528e+03   	6.925741e+07   	2.533794e+12   	1969
-d46431bb	1228800        	0.000000e+00   	1.613402e+03   	3.115535e+02   	8.438094e+06   	1.412169e+10   	5230
+8cfc3ba0	24883200       	0.000000e+00   	1.164877e+05   	2.576301e+04   	2.054842e+08   	2.510721e+13   	1764
 f0ac7beb	4915200        	0.000000e+00   	1.087142e+04   	2.109400e+03   	2.505863e+07   	2.826792e+11   	2305
+d46431bb	1228800        	0.000000e+00   	1.613402e+03   	3.115535e+02   	8.438094e+06   	1.412169e+10   	5230
+24c84a50	11059200       	0.000000e+00   	3.517390e+04   	7.045528e+03   	6.925741e+07   	2.533794e+12   	1969
 
 ####################
-# COMB_3
+# COMB_1
 # number of types devices
 1
 ####################
@@ -58,20 +59,21 @@ f0ac7beb	4915200        	0.000000e+00   	1.087142e+04   	2.109400e+03   	2.50586
 # number of implementations
 1
 #####
-# Model for cuda0_impl0 (Comb3)
+# Model for cuda0_impl0 (Comb1)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-24c84a50	11059200       	0.000000e+00   	7.851775e+02   	4.684799e+01   	4.315336e+06   	3.400367e+09   	5496
-d46431bb	1228800        	0.000000e+00   	6.142508e+01   	1.012391e+01   	4.393736e+05   	2.772170e+07   	7153
+8cfc3ba0	24883200       	0.000000e+00   	2.688252e+03   	2.597845e+02   	1.459721e+07   	3.960743e+10   	5430
 f0ac7beb	4915200        	0.000000e+00   	2.657700e+02   	2.996380e+01   	1.356225e+06   	3.650255e+08   	5103
+d46431bb	1228800        	0.000000e+00   	6.142508e+01   	1.012391e+01   	4.393736e+05   	2.772170e+07   	7153
+24c84a50	11059200       	0.000000e+00   	7.851775e+02   	4.684799e+01   	4.315336e+06   	3.400367e+09   	5496
 
 ####################
-# COMB_2
+# COMB_0
 # number of types devices
 1
 ####################
@@ -90,20 +92,21 @@ f0ac7beb	4915200        	0.000000e+00   	2.657700e+02   	2.996380e+01   	1.35622
 # number of implementations
 1
 #####
-# Model for cuda2_impl0 (Comb2)
+# Model for cuda2_impl0 (Comb0)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-24c84a50	11059200       	0.000000e+00   	7.926860e+02   	4.760061e+01   	4.363736e+06   	3.471546e+09   	5505
-d46431bb	1228800        	0.000000e+00   	6.592485e+01   	1.426453e+01   	1.071279e+05   	7.393038e+06   	1625
+8cfc3ba0	24883200       	0.000000e+00   	2.707789e+03   	2.773178e+02   	1.421860e+07   	3.890480e+10   	5251
 f0ac7beb	4915200        	0.000000e+00   	2.693001e+02   	2.710216e+01   	1.308798e+06   	3.560293e+08   	4860
+d46431bb	1228800        	0.000000e+00   	6.592485e+01   	1.426453e+01   	1.071279e+05   	7.393038e+06   	1625
+24c84a50	11059200       	0.000000e+00   	7.926860e+02   	4.760061e+01   	4.363736e+06   	3.471546e+09   	5505
 
 ####################
-# COMB_1
+# COMB_2
 # number of types devices
 1
 ####################
@@ -122,20 +125,21 @@ f0ac7beb	4915200        	0.000000e+00   	2.693001e+02   	2.710216e+01   	1.30879
 # number of implementations
 1
 #####
-# Model for cuda1_impl0 (Comb1)
+# Model for cuda1_impl0 (Comb2)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-24c84a50	11059200       	0.000000e+00   	7.922324e+02   	5.091772e+01   	4.156844e+06   	3.306790e+09   	5247
-d46431bb	1228800        	0.000000e+00   	6.317490e+01   	1.087216e+01   	2.866877e+05   	1.864788e+07   	4538
+8cfc3ba0	24883200       	0.000000e+00   	2.706383e+03   	2.631153e+02   	1.444938e+07   	3.947516e+10   	5339
 f0ac7beb	4915200        	0.000000e+00   	2.686331e+02   	2.912062e+01   	1.401996e+06   	3.810483e+08   	5219
+d46431bb	1228800        	0.000000e+00   	6.317490e+01   	1.087216e+01   	2.866877e+05   	1.864788e+07   	4538
+24c84a50	11059200       	0.000000e+00   	7.922324e+02   	5.091772e+01   	4.156844e+06   	3.306790e+09   	5247
 
 ####################
-# COMB_0
+# COMB_3
 # number of types devices
 1
 ####################
@@ -154,15 +158,16 @@ f0ac7beb	4915200        	0.000000e+00   	2.686331e+02   	2.912062e+01   	1.40199
 # number of implementations
 1
 #####
-# Model for cuda3_impl0 (Comb0)
+# Model for cuda3_impl0 (Comb3)
 # number of entries
-3
+4
 # sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
 0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
 # a		b		c
 nan            	nan            	nan            
 # hash		size		flops		mean (us)	dev (us)	sum		sum2		n
-24c84a50	11059200       	0.000000e+00   	7.867204e+02   	4.699968e+01   	4.148377e+06   	3.275261e+09   	5273
-d46431bb	1228800        	0.000000e+00   	5.975719e+01   	9.345113e+00   	4.033610e+05   	2.469321e+07   	6750
+8cfc3ba0	24883200       	0.000000e+00   	2.681149e+03   	2.665822e+02   	1.451306e+07   	3.929636e+10   	5413
 f0ac7beb	4915200        	0.000000e+00   	2.642224e+02   	2.666799e+01   	1.450317e+06   	3.871098e+08   	5489
+d46431bb	1228800        	0.000000e+00   	5.975719e+01   	9.345113e+00   	4.033610e+05   	2.469321e+07   	6750
+24c84a50	11059200       	0.000000e+00   	7.867204e+02   	4.699968e+01   	4.148377e+06   	3.275261e+09   	5273