浏览代码

ajout modèles pour attila

Samuel Thibault 10 年之前
父节点
当前提交
ab7af769cf

+ 9 - 0
tools/perfmodels/README

@@ -8,10 +8,19 @@ The architecture 'mirage' is composed of:
   - CUDA 6.0
   - Magma 1.6.0
 
+The architecture 'attila' is composed of:
+- 2 Intel Xeon X5650 @2.67GHz, thus 12 CPU cores
+  - OpenBlas 0.2.12-1
+- 3 NVidia GF100 Tesla C2050 / C2070, thus 3 GPUs
+  - CUDA 6.0
+
 To use performance models stored in this directory, one needs to set
 the environment variable 'STARPU_PERF_MODEL_DIR' to the location of
 the directory, e.g.:
 
 export STARPU_PERF_MODEL_DIR=.../tools/perfmodels/sampling
 
+and then select the desired architecture:
+
+export STARPU_HOSTNAME=mirage
 

+ 136 - 0
tools/perfmodels/sampling/codelets/44/chol_model_11.attila

@@ -0,0 +1,136 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	7.593370e+04   	4.030251e+03   	2.353945e+06   	1.792473e+11   	31
+afdd228b	1638400        	8.758624e+07   	2.346245e+04   	1.988237e+03   	1.032348e+06   	2.439534e+10   	44
+cea37d6d	409600         	1.097392e+07   	3.401100e+03   	3.819888e+02   	1.564506e+05   	5.388163e+08   	46
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	4.425357e+04   	2.763302e+03   	7.523107e+05   	3.342224e+10   	17
+afdd228b	1638400        	8.758624e+07   	2.425311e+04   	2.094515e+03   	2.910373e+05   	7.111204e+09   	12
+cea37d6d	409600         	1.097392e+07   	1.130795e+04   	5.745206e+02   	1.130795e+05   	1.281997e+09   	10
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	4.555777e+04   	5.907293e+03   	7.744821e+05   	3.587692e+10   	17
+afdd228b	1638400        	8.758624e+07   	2.509024e+04   	3.469720e+03   	2.509024e+05   	6.415590e+09   	10
+cea37d6d	409600         	1.097392e+07   	1.082278e+04   	2.477308e+02   	1.082278e+05   	1.171939e+09   	10
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	4.828333e+04   	3.379151e+03   	5.311166e+05   	2.576968e+10   	11
+afdd228b	1638400        	8.758624e+07   	2.737149e+04   	3.217773e+03   	2.737149e+05   	7.595526e+09   	10
+cea37d6d	409600         	1.097392e+07   	1.143954e+04   	1.654563e+02   	1.143954e+05   	1.308904e+09   	10
+

+ 136 - 0
tools/perfmodels/sampling/codelets/44/chol_model_21.attila

@@ -0,0 +1,136 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	4.361594e+04   	1.992990e+03   	1.840593e+07   	8.044680e+11   	422
+d39bff17	3276800        	2.625536e+08   	1.391260e+04   	9.732436e+02   	6.552836e+06   	9.161314e+10   	471
+2c1922b7	819200         	3.287040e+07   	2.026126e+03   	2.243730e+02   	1.355478e+06   	2.780050e+09   	669
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	6.302421e+03   	2.284702e+02   	1.739468e+06   	1.097727e+10   	276
+d39bff17	3276800        	2.625536e+08   	2.553136e+03   	1.599096e+02   	6.714747e+05   	1.721091e+09   	263
+2c1922b7	819200         	3.287040e+07   	6.889531e+02   	1.162392e+02   	1.198778e+05   	8.494121e+07   	174
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	6.325409e+03   	1.859324e+02   	1.688884e+06   	1.069211e+10   	267
+d39bff17	3276800        	2.625536e+08   	2.539349e+03   	1.556256e+02   	6.297585e+05   	1.605183e+09   	248
+2c1922b7	819200         	3.287040e+07   	6.837878e+02   	1.012279e+02   	1.319710e+05   	9.221787e+07   	193
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	6.297128e+03   	2.218514e+02   	1.542796e+06   	9.727245e+09   	245
+d39bff17	3276800        	2.625536e+08   	2.528040e+03   	1.085312e+02   	7.255475e+05   	1.837594e+09   	287
+2c1922b7	819200         	3.287040e+07   	6.470080e+02   	5.924722e+01   	1.598110e+05   	1.042660e+08   	247

+ 136 - 0
tools/perfmodels/sampling/codelets/44/chol_model_22.attila

@@ -0,0 +1,136 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	8.407559e+04   	3.415249e+03   	1.399859e+08   	1.178881e+13   	1665
+f0ac7beb	4915200        	5.242880e+08   	2.610119e+04   	1.422415e+03   	4.251883e+07   	1.113088e+12   	1629
+d46431bb	1228800        	6.553600e+07   	3.432588e+03   	1.640071e+02   	9.130685e+06   	3.141343e+10   	2660
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	2.795670e+03   	5.624760e+01   	1.818024e+07   	5.084653e+10   	6503
+f0ac7beb	4915200        	5.242880e+08   	8.880682e+02   	3.243424e+01   	5.760010e+06   	5.122105e+09   	6486
+d46431bb	1228800        	6.553600e+07   	2.022322e+02   	1.071833e+01   	1.116119e+06   	2.263493e+08   	5519
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	2.815870e+03   	4.694553e+01   	1.827781e+07   	5.148226e+10   	6491
+f0ac7beb	4915200        	5.242880e+08   	8.961392e+02   	3.565427e+01   	5.741564e+06   	5.153386e+09   	6407
+d46431bb	1228800        	6.553600e+07   	2.020566e+02   	9.551669e+00   	1.107876e+06   	2.243540e+08   	5483
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	2.810209e+03   	3.946230e+01   	1.806121e+07   	5.076578e+10   	6427
+f0ac7beb	4915200        	5.242880e+08   	8.833768e+02   	3.092949e+01   	5.707497e+06   	5.048051e+09   	6461
+d46431bb	1228800        	6.553600e+07   	1.637484e+02   	6.969807e+00   	1.084015e+06   	1.778273e+08   	6620

+ 136 - 0
tools/perfmodels/sampling/codelets/44/cl_update.attila

@@ -0,0 +1,136 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+49ec0825	34613280       	0.000000e+00   	7.774706e+04   	9.102018e+02   	4.664824e+06   	3.627260e+11   	60
+6d78e48f	4461600        	0.000000e+00   	9.929947e+03   	1.596124e+02   	1.797320e+06   	1.785191e+10   	181
+8ec75d42	14753312       	0.000000e+00   	3.310870e+04   	5.189822e+02   	1.920304e+06   	6.359440e+10   	58
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+49ec0825	34613280       	0.000000e+00   	5.051327e+03   	6.437605e+02   	2.692357e+06   	1.382086e+10   	533
+6d78e48f	4461600        	0.000000e+00   	1.009230e+03   	9.724548e+01   	3.835076e+05   	3.906410e+08   	380
+8ec75d42	14753312       	0.000000e+00   	1.883088e+03   	3.340290e+02   	1.069594e+06   	2.077513e+09   	568
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+49ec0825	34613280       	0.000000e+00   	5.105276e+03   	6.922431e+02   	2.807902e+06   	1.459867e+10   	550
+6d78e48f	4461600        	0.000000e+00   	1.012651e+03   	9.766669e+01   	3.686049e+05   	3.767403e+08   	364
+8ec75d42	14753312       	0.000000e+00   	2.097710e+03   	2.383227e+02   	9.880215e+05   	2.099334e+09   	471
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+49ec0825	34613280       	0.000000e+00   	4.938766e+03   	7.348710e+02   	2.780525e+06   	1.403640e+10   	563
+6d78e48f	4461600        	0.000000e+00   	1.022015e+03   	1.088844e+02   	3.740577e+05   	3.866319e+08   	366
+8ec75d42	14753312       	0.000000e+00   	1.829845e+03   	3.604651e+02   	8.893047e+05   	1.690438e+09   	486
+

+ 136 - 0
tools/perfmodels/sampling/codelets/44/save_cl_bottom.attila

@@ -0,0 +1,136 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+f2ff9ae5	34480152       	0.000000e+00   	4.419323e+01   	9.394629e+00   	1.825180e+04   	8.430572e+05   	413
+fb4b8624	4427800        	0.000000e+00   	1.267467e+01   	2.411186e+00   	5.754301e+03   	7.557335e+04   	454
+4af260f6	14678040       	0.000000e+00   	2.442142e+01   	5.135780e+00   	1.394463e+04   	3.556084e+05   	571
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+f2ff9ae5	34480152       	0.000000e+00   	3.910144e+01   	6.371108e+00   	1.329449e+03   	5.336347e+04   	34
+fb4b8624	4427800        	0.000000e+00   	3.998483e+01   	8.150933e+00   	2.519044e+03   	1.049091e+05   	63
+4af260f6	14678040       	0.000000e+00   	3.398450e+01   	5.156207e+00   	8.156280e+02   	2.835679e+04   	24
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+f2ff9ae5	34480152       	0.000000e+00   	3.173112e+01   	3.816078e+00   	7.615470e+02   	2.451424e+04   	24
+fb4b8624	4427800        	0.000000e+00   	2.860497e+01   	5.248990e+00   	1.029779e+03   	3.044867e+04   	36
+4af260f6	14678040       	0.000000e+00   	3.652883e+01   	8.229435e+00   	1.716855e+03   	6.589771e+04   	47
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+f2ff9ae5	34480152       	0.000000e+00   	3.719045e+01   	7.851419e+00   	2.566141e+03   	9.968943e+04   	69
+fb4b8624	4427800        	0.000000e+00   	4.509905e+01   	6.110617e+00   	9.470800e+02   	4.349654e+04   	21
+4af260f6	14678040       	0.000000e+00   	2.634116e+01   	3.746211e+00   	6.479926e+03   	1.741412e+05   	246
+

+ 136 - 0
tools/perfmodels/sampling/codelets/44/save_cl_top.attila

@@ -0,0 +1,136 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+f2ff9ae5	34480152       	0.000000e+00   	4.346555e+01   	9.370422e+00   	2.103733e+04   	9.568966e+05   	484
+fb4b8624	4427800        	0.000000e+00   	1.080055e+01   	2.408554e+00   	6.631537e+03   	7.518614e+04   	614
+4af260f6	14678040       	0.000000e+00   	2.045608e+01   	4.186697e+00   	1.294870e+04   	2.759751e+05   	633
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+f2ff9ae5	34480152       	0.000000e+00   	3.184284e+01   	5.707419e+00   	4.840111e+03   	1.590742e+05   	152
+fb4b8624	4427800        	0.000000e+00   	3.194475e+01   	5.964283e+00   	1.150011e+03   	3.801743e+04   	36
+4af260f6	14678040       	0.000000e+00   	3.430576e+01   	6.297323e+00   	5.523228e+03   	1.958632e+05   	161
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+f2ff9ae5	34480152       	0.000000e+00   	2.735681e+01   	5.823931e+00   	3.063963e+03   	8.761910e+04   	112
+fb4b8624	4427800        	0.000000e+00   	3.161427e+01   	5.733859e+00   	2.212999e+03   	7.226375e+04   	70
+4af260f6	14678040       	0.000000e+00   	3.666193e+01   	6.692591e+00   	6.819119e+03   	2.583331e+05   	186
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+f2ff9ae5	34480152       	0.000000e+00   	3.900993e+01   	8.465923e+00   	7.489907e+03   	3.059418e+05   	192
+fb4b8624	4427800        	0.000000e+00   	3.364966e+01   	7.354940e+00   	1.278687e+03   	4.508300e+04   	38
+4af260f6	14678040       	0.000000e+00   	2.853135e+01   	5.469952e+00   	8.730594e+03   	2.582513e+05   	306
+

+ 137 - 0
tools/perfmodels/sampling/codelets/44/starpu_dgemm_gemm.attila

@@ -0,0 +1,137 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+a904baa5	33587200       	1.310720e+09   	1.325410e+05   	6.551456e+03   	1.020565e+07   	1.355972e+12   	77
+19d944cc	302284800      	3.538944e+10   	3.351379e+06   	5.140990e+04   	2.681103e+08   	8.987506e+14   	80
+9e87ad36	134348800      	1.048576e+10   	1.018200e+06   	1.999744e+04   	8.145602e+07   	8.297054e+13   	80
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+a904baa5	33587200       	1.310720e+09   	5.299792e+03   	3.590016e+02   	5.511784e+05   	2.934534e+09   	104
+19d944cc	302284800      	3.538944e+10   	1.430442e+05   	1.747241e+04   	7.867431e+06   	1.142181e+12   	55
+9e87ad36	134348800      	1.048576e+10   	3.517587e+04   	1.024947e+02   	3.623115e+06   	1.274473e+11   	103
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+a904baa5	33587200       	1.310720e+09   	5.301676e+03   	3.783139e+02   	5.460726e+05   	2.909842e+09   	103
+19d944cc	302284800      	3.538944e+10   	1.433132e+05   	1.648772e+04   	7.738910e+06   	1.123767e+12   	54
+9e87ad36	134348800      	1.048576e+10   	3.516032e+04   	6.444220e+01   	3.551193e+06   	1.248615e+11   	101
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+a904baa5	33587200       	1.310720e+09   	5.174822e+03   	5.540579e+01   	5.433563e+05   	2.812095e+09   	105
+19d944cc	302284800      	3.538944e+10   	1.362048e+05   	1.589418e+04   	6.401628e+06   	8.838060e+11   	47
+9e87ad36	134348800      	1.048576e+10   	3.562387e+04   	2.173490e+03   	3.598011e+06   	1.286522e+11   	101
+

+ 136 - 0
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_11.attila

@@ -0,0 +1,136 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	2.515766e+05   	2.096151e+04   	2.515766e+06   	6.373017e+11   	10
+afdd228b	3276800        	0.000000e+00   	7.350482e+04   	4.292777e+03   	9.555626e+05   	7.047802e+10   	13
+cea37d6d	819200         	0.000000e+00   	9.586125e+03   	1.023620e+03   	2.108948e+05   	2.044715e+09   	22
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	6.990473e+04   	4.071360e+03   	1.118476e+06   	7.845196e+10   	16
+afdd228b	3276800        	0.000000e+00   	2.992444e+04   	7.760944e+02   	4.787910e+05   	1.433719e+10   	16
+cea37d6d	819200         	0.000000e+00   	9.620220e+03   	2.335102e+02   	1.058224e+05   	1.018635e+09   	11
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	6.793522e+04   	8.600858e+02   	6.793522e+05   	4.615934e+10   	10
+afdd228b	3276800        	0.000000e+00   	2.989699e+04   	1.490344e+03   	3.587638e+05   	1.075261e+10   	12
+cea37d6d	819200         	0.000000e+00   	9.974140e+03   	1.055336e+03   	1.097155e+05   	1.106569e+09   	11
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	6.962168e+04   	1.952172e+02   	1.322812e+06   	9.209711e+10   	19
+afdd228b	3276800        	0.000000e+00   	3.047853e+04   	4.777511e+01   	4.571780e+05   	1.393415e+10   	15
+cea37d6d	819200         	0.000000e+00   	1.119488e+04   	2.171263e+03   	1.231437e+05   	1.430437e+09   	11
+

+ 137 - 0
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_12.attila

@@ -0,0 +1,137 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	8.869540e+04   	4.010843e+03   	1.765039e+07   	1.568709e+12   	199
+d39bff17	6553600        	0.000000e+00   	2.736718e+04   	1.452565e+03   	3.886139e+06   	1.066523e+11   	142
+2c1922b7	1638400        	0.000000e+00   	4.006489e+03   	3.502972e+02   	8.493756e+05   	3.429028e+09   	212
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	7.250005e+03   	1.530886e+03   	8.555006e+05   	6.478930e+09   	118
+d39bff17	6553600        	0.000000e+00   	2.060505e+03   	3.149423e+02   	4.265246e+05   	8.993882e+08   	207
+2c1922b7	1638400        	0.000000e+00   	5.794447e+02   	1.035504e+02   	9.155226e+04   	5.474365e+07   	158
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	6.906255e+03   	1.105050e+03   	1.042844e+06   	7.386541e+09   	151
+d39bff17	6553600        	0.000000e+00   	2.044032e+03   	3.248232e+02   	3.863220e+05   	8.095958e+08   	189
+2c1922b7	1638400        	0.000000e+00   	6.103626e+02   	1.085471e+02   	1.062031e+05   	6.687255e+07   	174
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	6.859160e+03   	1.144150e+03   	1.008296e+06   	7.108501e+09   	147
+d39bff17	6553600        	0.000000e+00   	2.022724e+03   	3.006626e+02   	4.308402e+05   	8.907256e+08   	213
+2c1922b7	1638400        	0.000000e+00   	5.771721e+02   	9.999833e+01   	9.638774e+04   	5.730226e+07   	167
+

+ 137 - 0
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_21.attila

@@ -0,0 +1,137 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	8.795690e+04   	4.598673e+03   	1.715160e+07   	1.512725e+12   	195
+d39bff17	6553600        	0.000000e+00   	2.744119e+04   	1.740624e+03   	4.390591e+06   	1.209678e+11   	160
+2c1922b7	1638400        	0.000000e+00   	4.091631e+03   	5.062810e+02   	7.283104e+05   	3.025603e+09   	178
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	6.847320e+03   	1.168001e+03   	8.285257e+05   	5.838253e+09   	121
+d39bff17	6553600        	0.000000e+00   	2.123746e+03   	3.153004e+02   	4.226254e+05   	9.173322e+08   	199
+2c1922b7	1638400        	0.000000e+00   	5.446913e+02   	9.530021e+01   	1.040360e+05   	5.840221e+07   	191
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	6.816836e+03   	1.179433e+03   	1.172496e+06   	8.231973e+09   	172
+d39bff17	6553600        	0.000000e+00   	2.055421e+03   	3.154533e+02   	3.864192e+05   	8.129623e+08   	188
+2c1922b7	1638400        	0.000000e+00   	6.121868e+02   	1.314710e+02   	9.550115e+04   	6.116094e+07   	156
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	6.940618e+03   	1.147691e+03   	1.006390e+06   	7.175959e+09   	145
+d39bff17	6553600        	0.000000e+00   	2.093041e+03   	3.377347e+02   	3.851195e+05   	8.270585e+08   	184
+2c1922b7	1638400        	0.000000e+00   	6.098259e+02   	1.286153e+02   	1.091588e+05   	6.952888e+07   	179
+

+ 137 - 0
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_22.attila

@@ -0,0 +1,137 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	1.687897e+05   	8.023245e+03   	3.110794e+08   	5.262564e+13   	1843
+f0ac7beb	9830400        	0.000000e+00   	5.125521e+04   	2.656019e+03   	7.375625e+07   	3.790543e+12   	1439
+d46431bb	2457600        	0.000000e+00   	6.821106e+03   	3.878220e+02   	1.100926e+07   	7.533811e+10   	1614
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	5.852261e+03   	2.067645e+02   	2.783335e+07   	1.630914e+11   	4756
+f0ac7beb	9830400        	0.000000e+00   	1.831142e+03   	6.447275e+01   	9.274735e+06   	1.700441e+10   	5065
+d46431bb	2457600        	0.000000e+00   	2.556618e+02   	1.838263e+01   	1.252232e+06   	3.218030e+08   	4898
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	5.846675e+03   	2.141855e+02   	2.732736e+07   	1.599886e+11   	4674
+f0ac7beb	9830400        	0.000000e+00   	1.834114e+03   	5.380375e+01   	9.566740e+06   	1.756159e+10   	5216
+d46431bb	2457600        	0.000000e+00   	2.610266e+02   	2.020042e+01   	1.246402e+06   	3.272925e+08   	4775
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	5.840318e+03   	1.756302e+02   	2.921911e+07   	1.708032e+11   	5003
+f0ac7beb	9830400        	0.000000e+00   	1.840833e+03   	4.879997e+01   	9.542881e+06   	1.757920e+10   	5184
+d46431bb	2457600        	0.000000e+00   	2.617920e+02   	1.853601e+01   	1.281472e+06   	3.371609e+08   	4895
+

+ 137 - 0
tools/perfmodels/sampling/codelets/44/starpu_sgemm_gemm.attila

@@ -0,0 +1,137 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+a904baa5	16793600       	1.310720e+09   	6.437897e+04   	1.392917e+03   	4.699665e+06   	3.027012e+11   	73
+19d944cc	151142400      	3.538944e+10   	1.651467e+06   	2.533878e+04   	1.321174e+08   	2.182388e+14   	80
+9e87ad36	67174400       	1.048576e+10   	4.988315e+05   	1.083517e+04   	3.990652e+07   	1.991602e+13   	80
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+a904baa5	16793600       	1.310720e+09   	3.518696e+03   	2.456600e+02   	3.659444e+05   	1.293924e+09   	104
+19d944cc	151142400      	3.538944e+10   	5.651725e+04   	3.211294e+03   	4.973518e+06   	2.819970e+11   	88
+9e87ad36	67174400       	1.048576e+10   	2.754677e+04   	3.854974e+02   	2.919957e+06   	8.045113e+10   	106
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+a904baa5	16793600       	1.310720e+09   	3.519726e+03   	2.643171e+02   	3.660515e+05   	1.295666e+09   	104
+19d944cc	151142400      	3.538944e+10   	5.631482e+04   	2.073911e+03   	4.955704e+06   	2.794581e+11   	88
+9e87ad36	67174400       	1.048576e+10   	2.756588e+04   	3.850160e+02   	2.894417e+06   	7.980271e+10   	105
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+a904baa5	16793600       	1.310720e+09   	3.490259e+03   	2.149136e+02   	3.699675e+05   	1.296178e+09   	106
+19d944cc	151142400      	3.538944e+10   	5.827658e+04   	5.717882e+03   	5.477999e+06   	3.223123e+11   	94
+9e87ad36	67174400       	1.048576e+10   	2.758960e+04   	5.859871e+01   	2.896907e+06   	7.992487e+10   	105
+

+ 136 - 0
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_11.attila

@@ -0,0 +1,136 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+afdd228b	1638400        	0.000000e+00   	4.182946e+04   	4.195402e+03   	1.171225e+06   	4.948453e+10   	28
+617e5fe6	3686400        	0.000000e+00   	1.431791e+05   	1.961610e+04   	1.431791e+06   	2.088506e+11   	10
+cea37d6d	409600         	0.000000e+00   	4.839229e+03   	3.061560e+02   	1.258200e+05   	6.113086e+08   	26
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+afdd228b	1638400        	0.000000e+00   	2.565619e+04   	2.729977e+03   	4.618114e+05   	1.198247e+10   	18
+617e5fe6	3686400        	0.000000e+00   	5.517976e+04   	5.023576e+03   	8.828762e+05   	4.912068e+10   	16
+cea37d6d	409600         	0.000000e+00   	9.325377e+03   	4.741281e+02   	9.325377e+04   	8.718745e+08   	10
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+afdd228b	1638400        	0.000000e+00   	2.512124e+04   	2.223761e+03   	4.773036e+05   	1.208442e+10   	19
+617e5fe6	3686400        	0.000000e+00   	5.116041e+04   	1.272422e+03   	7.674062e+05   	3.928511e+10   	15
+cea37d6d	409600         	0.000000e+00   	9.353760e+03   	7.152342e+02   	9.353760e+04   	8.800438e+08   	10
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+afdd228b	1638400        	0.000000e+00   	2.814234e+04   	3.880171e+03   	5.065622e+05   	1.452685e+10   	18
+617e5fe6	3686400        	0.000000e+00   	5.467956e+04   	6.741916e+03   	8.201934e+05   	4.552961e+10   	15
+cea37d6d	409600         	0.000000e+00   	1.004502e+04   	9.839619e+02   	1.004502e+05   	1.018706e+09   	10
+

+ 137 - 0
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_12.attila

@@ -0,0 +1,137 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d39bff17	3276800        	0.000000e+00   	1.416946e+04   	8.998511e+02   	5.341885e+06   	7.599687e+10   	377
+ff82dda0	7372800        	0.000000e+00   	4.394377e+04   	1.700468e+03   	1.138144e+07   	5.008920e+11   	259
+2c1922b7	819200         	0.000000e+00   	1.978198e+03   	1.079993e+02   	7.517154e+05   	1.491475e+09   	380
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d39bff17	3276800        	0.000000e+00   	2.676312e+03   	2.039650e+02   	4.549731e+05   	1.224722e+09   	170
+ff82dda0	7372800        	0.000000e+00   	6.450199e+03   	3.193507e+02   	5.482669e+05   	3.545099e+09   	85
+2c1922b7	819200         	0.000000e+00   	7.090855e+02   	1.344985e+02   	5.247233e+04   	3.854602e+07   	74
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d39bff17	3276800        	0.000000e+00   	2.648361e+03   	2.330106e+02   	2.913197e+05   	7.774920e+08   	110
+ff82dda0	7372800        	0.000000e+00   	3.907893e+03   	1.767346e+02   	3.790657e+05   	1.484378e+09   	97
+2c1922b7	819200         	0.000000e+00   	5.977702e+02   	1.137267e+02   	6.695026e+04   	4.146945e+07   	112
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d39bff17	3276800        	0.000000e+00   	2.649815e+03   	2.112061e+02   	4.054218e+05   	1.081118e+09   	153
+ff82dda0	7372800        	0.000000e+00   	6.517136e+03   	3.918474e+02   	3.454082e+05   	2.259210e+09   	53
+2c1922b7	819200         	0.000000e+00   	6.507707e+02   	8.750699e+01   	4.750626e+04   	3.147468e+07   	73
+

+ 137 - 0
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_21.attila

@@ -0,0 +1,137 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d39bff17	3276800        	0.000000e+00   	1.377909e+04   	1.008911e+03   	6.145473e+06   	8.513300e+10   	446
+ff82dda0	7372800        	0.000000e+00   	4.298380e+04   	1.919778e+03   	1.177756e+07   	5.072542e+11   	274
+2c1922b7	819200         	0.000000e+00   	1.936516e+03   	1.503574e+02   	4.725100e+05   	9.205395e+08   	244
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d39bff17	3276800        	0.000000e+00   	2.656425e+03   	2.270595e+02   	3.320531e+05   	8.885184e+08   	125
+ff82dda0	7372800        	0.000000e+00   	6.358340e+03   	3.816293e+02   	5.023088e+05   	3.205356e+09   	79
+2c1922b7	819200         	0.000000e+00   	3.867923e+02   	4.867053e+01   	4.564149e+04   	1.793330e+07   	118
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d39bff17	3276800        	0.000000e+00   	1.902887e+03   	4.574719e+02   	4.585957e+05   	9.230924e+08   	241
+ff82dda0	7372800        	0.000000e+00   	3.810456e+03   	1.334249e+02   	3.353201e+05   	1.279289e+09   	88
+2c1922b7	819200         	0.000000e+00   	3.835296e+02   	4.543249e+01   	2.262825e+04   	8.800385e+06   	59
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d39bff17	3276800        	0.000000e+00   	2.657310e+03   	2.918518e+02   	3.162199e+05   	8.504305e+08   	119
+ff82dda0	7372800        	0.000000e+00   	3.819809e+03   	1.073068e+02   	3.055848e+05   	1.168197e+09   	80
+2c1922b7	819200         	0.000000e+00   	4.020211e+02   	5.372009e+01   	3.256371e+04   	1.332505e+07   	81
+

+ 137 - 0
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_22.attila

@@ -0,0 +1,137 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+f0ac7beb	4915200        	0.000000e+00   	2.587052e+04   	1.487038e+03   	5.386241e+07   	1.398052e+12   	2082
+24c84a50	11059200       	0.000000e+00   	8.218890e+04   	3.347888e+03   	1.244340e+08   	1.024406e+13   	1514
+d46431bb	1228800        	0.000000e+00   	3.265838e+03   	1.561177e+02   	8.347482e+06   	2.732382e+10   	2556
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+f0ac7beb	4915200        	0.000000e+00   	9.047163e+02   	4.943457e+01   	7.022408e+06   	6.372255e+09   	7762
+24c84a50	11059200       	0.000000e+00   	2.963966e+03   	7.453353e+01   	1.530888e+07   	4.540369e+10   	5165
+d46431bb	1228800        	0.000000e+00   	1.924610e+02   	1.043827e+01   	8.556817e+05   	1.651698e+08   	4446
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+f0ac7beb	4915200        	0.000000e+00   	8.810829e+02   	4.167975e+01   	6.874209e+06   	6.070301e+09   	7802
+24c84a50	11059200       	0.000000e+00   	2.960803e+03   	8.260112e+01   	1.519780e+07   	4.503271e+10   	5133
+d46431bb	1228800        	0.000000e+00   	1.894698e+02   	9.561378e+00   	8.340462e+05   	1.584290e+08   	4402
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+f0ac7beb	4915200        	0.000000e+00   	8.953024e+02   	5.096374e+01   	6.835634e+06   	6.139790e+09   	7635
+24c84a50	11059200       	0.000000e+00   	2.963787e+03   	5.048433e+01   	1.524275e+07   	4.518938e+10   	5143
+d46431bb	1228800        	0.000000e+00   	1.803248e+02   	8.617192e+00   	8.859357e+05   	1.601210e+08   	4913
+