Browse Source

Add idgraf performance models

Samuel Thibault 10 years ago
parent
commit
04b2a7d937
24 changed files with 6308 additions and 3 deletions
  1. 2 2
      doc/doxygen/chapters/01building.doxy
  2. 7 1
      tools/perfmodels/README
  3. 9 0
      tools/perfmodels/sampling/bus/idgraf.affinity
  4. 17 0
      tools/perfmodels/sampling/bus/idgraf.bandwidth
  5. 5 0
      tools/perfmodels/sampling/bus/idgraf.config
  6. 17 0
      tools/perfmodels/sampling/bus/idgraf.latency
  7. 1507 0
      tools/perfmodels/sampling/bus/idgraf.platform.xml
  8. 296 0
      tools/perfmodels/sampling/codelets/44/chol_model_11.idgraf
  9. 296 0
      tools/perfmodels/sampling/codelets/44/chol_model_21.idgraf
  10. 296 0
      tools/perfmodels/sampling/codelets/44/chol_model_22.idgraf
  11. 296 0
      tools/perfmodels/sampling/codelets/44/cl_update.idgraf
  12. 8 0
      tools/perfmodels/sampling/codelets/44/null.idgraf
  13. 296 0
      tools/perfmodels/sampling/codelets/44/save_cl_bottom.idgraf
  14. 296 0
      tools/perfmodels/sampling/codelets/44/save_cl_top.idgraf
  15. 296 0
      tools/perfmodels/sampling/codelets/44/starpu_dgemm_gemm.idgraf
  16. 296 0
      tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_11.idgraf
  17. 296 0
      tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_12.idgraf
  18. 296 0
      tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_21.idgraf
  19. 296 0
      tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_22.idgraf
  20. 296 0
      tools/perfmodels/sampling/codelets/44/starpu_sgemm_gemm.idgraf
  21. 296 0
      tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_11.idgraf
  22. 296 0
      tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_12.idgraf
  23. 296 0
      tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_21.idgraf
  24. 296 0
      tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_22.idgraf

+ 2 - 2
doc/doxygen/chapters/01building.doxy

@@ -348,14 +348,14 @@ at CPU-GPU scheduling without actually having a GPU at hand. This can be done by
 using the simgrid version of StarPU: first install the simgrid simulator from
 http://simgrid.gforge.inria.fr/ , then configure StarPU with \ref enable-simgrid "--enable-simgrid"
 and rebuild and install it, and then you can simulate the performance for a
-couple of virtualized system shipped along StarPU: attila and mirage.
+few virtualized systems shipped along StarPU: attila, mirage and idgraf.
 
 For instance:
 
 \verbatim
 $ export STARPU_PERF_MODEL_DIR=$STARPU_PATH/share/starpu/perfmodels/sampling
 $ export STARPU_HOSTNAME=attila
-$ $STARPU_PATH/lib/starpu/examples/cholesky_implicit
+$ $STARPU_PATH/lib/starpu/examples/cholesky_implicit -size $((960*20)) -nblocks 20
 \endverbatim
 
 Will show the performance of the cholesky factorization with the attila

+ 7 - 1
tools/perfmodels/README

@@ -11,7 +11,13 @@ The architecture 'mirage' is composed of:
 The architecture 'attila' is composed of:
 - 2 Intel Xeon X5650 @2.67GHz, thus 12 CPU cores
   - OpenBlas 0.2.12-1
-- 3 NVidia GF100 Tesla C2050 / C2070, thus 3 GPUs
+- 3 NVidia GF100 Tesla C2050, thus 3 GPUs
+  - CUDA 6.0
+
+The architecture 'idgraf' is composed of:
+- 2 Intel Xeon X5650 @2.67GHz, thus 12 CPU cores
+  - MKL 11.1.0
+- 8 NVIDIA GF100 Tesla C2050, thus 8 GPUs
   - CUDA 6.0
 
 To use performance models stored in this directory, one needs to set

+ 9 - 0
tools/perfmodels/sampling/bus/idgraf.affinity

@@ -0,0 +1,9 @@
+# GPU	CPU0	CPU1	CPU2	CPU3	CPU4	CPU5	CPU6	CPU7	CPU8	CPU9	CPU10	CPU11	
+0	0	1	2	3	4	5	6	7	8	9	10	11	
+1	0	1	2	3	4	5	6	7	8	9	10	11	
+2	0	1	2	3	4	5	6	7	8	9	10	11	
+3	0	1	2	3	4	5	6	7	8	9	10	11	
+4	6	7	8	9	10	11	0	1	2	3	4	5	
+5	6	7	8	9	10	11	0	1	2	3	4	5	
+6	6	7	8	9	10	11	0	1	2	3	4	5	
+7	6	7	8	9	10	11	0	1	2	3	4	5	

+ 17 - 0
tools/perfmodels/sampling/bus/idgraf.bandwidth

@@ -0,0 +1,17 @@
+# to 0		to 1		to 2		to 3		to 4		to 5		to 6		to 7		to 8		to 9		to 10		to 11		to 12		to 13		to 14		to 15		
+0.000000	5985.150529	5982.867814	6004.601450	6002.864228	6017.068835	6017.279500	6014.519783	6025.792854	nan	nan	nan	nan	nan	nan	nan
+6518.853316	0.000000	6634.627873	5290.661415	5290.656130	3070.759552	3077.363038	3081.101044	3071.316544	nan	nan	nan	nan	nan	nan	nan
+6507.263283	6634.402756	0.000000	5290.497370	5290.566592	3070.780185	3077.514743	3081.028777	3071.425106	nan	nan	nan	nan	nan	nan	nan
+6468.888505	5290.537572	5290.537715	0.000000	6634.508569	3071.001714	3077.753708	3081.299378	3071.319606	nan	nan	nan	nan	nan	nan	nan
+6379.975977	5290.504747	5290.535878	6634.456149	0.000000	3070.823130	3077.536005	3081.267119	3071.334077	nan	nan	nan	nan	nan	nan	nan
+6520.767791	3803.989690	3804.097536	3799.684659	3800.145340	0.000000	6635.277188	5293.782380	5293.769441	nan	nan	nan	nan	nan	nan	nan
+6520.979807	3803.774735	3804.546566	3799.981880	3800.522991	6635.252627	0.000000	5293.483381	5293.507488	nan	nan	nan	nan	nan	nan	nan
+6520.981045	3803.433709	3804.330189	3799.708364	3800.220748	5293.757566	5293.607121	0.000000	6635.079661	nan	nan	nan	nan	nan	nan	nan
+6518.969813	3803.670471	3803.908300	3799.582824	3800.130361	5293.416171	5293.465355	6635.049331	0.000000	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan

+ 5 - 0
tools/perfmodels/sampling/bus/idgraf.config

@@ -0,0 +1,5 @@
+# Current configuration
+12 # Number of CPUs
+8 # Number of CUDA devices
+0 # Number of OpenCL devices
+0 # Number of MIC devices

+ 17 - 0
tools/perfmodels/sampling/bus/idgraf.latency

@@ -0,0 +1,17 @@
+# to 0		to 1		to 2		to 3		to 4		to 5		to 6		to 7		to 8		to 9		to 10		to 11		to 12		to 13		to 14		to 15		
+0.000000	11.018609	10.962211	10.954469	11.007844	11.005922	11.098250	11.109961	11.063805	nan	nan	nan	nan	nan	nan	nan
+11.927148	0.000000	18.151891	18.879328	18.799250	28.088742	27.684023	27.682789	27.425391	nan	nan	nan	nan	nan	nan	nan
+11.955969	18.258547	0.000000	18.841516	18.742984	27.801375	27.632695	27.804492	27.925742	nan	nan	nan	nan	nan	nan	nan
+11.869312	18.796500	18.733070	0.000000	18.166664	27.588664	27.297711	27.356961	27.379555	nan	nan	nan	nan	nan	nan	nan
+11.895391	18.823414	18.725125	18.160563	0.000000	27.561664	27.485000	27.345914	27.047594	nan	nan	nan	nan	nan	nan	nan
+12.104258	27.886172	28.017508	27.861555	27.938109	0.000000	17.586773	18.179898	18.207852	nan	nan	nan	nan	nan	nan	nan
+12.097562	28.033187	27.980211	27.902445	28.026523	17.499156	0.000000	18.177344	18.203297	nan	nan	nan	nan	nan	nan	nan
+12.132922	27.677352	27.910406	27.592461	27.664289	18.172375	18.238891	0.000000	17.600648	nan	nan	nan	nan	nan	nan	nan
+12.117094	23.135703	23.079305	23.071563	23.124938	23.123016	23.215344	23.227055	0.000000	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan

File diff suppressed because it is too large
+ 1507 - 0
tools/perfmodels/sampling/bus/idgraf.platform.xml


+ 296 - 0
tools/perfmodels/sampling/codelets/44/chol_model_11.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	2.069576e+04   	7.440388e+01   	4.346110e+05   	8.994720e+09   	21
+cea37d6d	409600         	1.097392e+07   	1.068290e+03   	2.074934e+01   	3.098041e+04   	3.310855e+07   	29
+afdd228b	1638400        	8.758624e+07   	6.632886e+03   	6.634864e+01   	1.392906e+05   	9.239911e+08   	21
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	5.998499e+04   	6.600211e+03   	5.998499e+05   	3.641761e+10   	10
+cea37d6d	409600         	1.097392e+07   	1.500406e+04   	5.740284e+02   	1.500406e+05   	2.254514e+09   	10
+afdd228b	1638400        	8.758624e+07   	3.368249e+04   	5.947857e+03   	3.368249e+05   	1.169887e+10   	10
+
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	5.745646e+04   	7.363450e+03   	5.745646e+05   	3.355466e+10   	10
+cea37d6d	409600         	1.097392e+07   	1.559370e+04   	1.137871e+03   	1.559370e+05   	2.444583e+09   	10
+afdd228b	1638400        	8.758624e+07   	3.216379e+04   	4.954206e+03   	3.216379e+05   	1.059054e+10   	10
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	5.761859e+04   	8.603827e+03   	5.761859e+05   	3.393928e+10   	10
+cea37d6d	409600         	1.097392e+07   	1.498399e+04   	7.885417e+02   	1.498399e+05   	2.251416e+09   	10
+afdd228b	1638400        	8.758624e+07   	3.033086e+04   	2.968298e+03   	3.033086e+05   	9.287718e+09   	10
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	5.207197e+04   	2.780602e+03   	5.207197e+05   	2.719222e+10   	10
+cea37d6d	409600         	1.097392e+07   	1.609271e+04   	2.681035e+03   	1.609271e+05   	2.661633e+09   	10
+afdd228b	1638400        	8.758624e+07   	3.107603e+04   	1.620445e+03   	3.107603e+05   	9.683455e+09   	10
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	5.062446e+04   	2.429137e+03   	5.062446e+05   	2.568736e+10   	10
+cea37d6d	409600         	1.097392e+07   	1.506158e+04   	8.561331e+02   	1.506158e+05   	2.275840e+09   	10
+afdd228b	1638400        	8.758624e+07   	3.034398e+04   	4.027845e+03   	3.034398e+05   	9.369809e+09   	10
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	5.174953e+04   	6.183695e+03   	5.174953e+05   	2.716252e+10   	10
+cea37d6d	409600         	1.097392e+07   	1.518996e+04   	9.403764e+02   	1.518996e+05   	2.316193e+09   	10
+afdd228b	1638400        	8.758624e+07   	3.100983e+04   	5.124047e+03   	3.100983e+05   	9.878653e+09   	10
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	5.475211e+04   	6.512121e+03   	5.475211e+05   	3.040202e+10   	10
+cea37d6d	409600         	1.097392e+07   	1.504708e+04   	5.064339e+02   	1.504708e+05   	2.266711e+09   	10
+afdd228b	1638400        	8.758624e+07   	2.918927e+04   	4.035680e+03   	2.918927e+05   	8.683004e+09   	10
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+cea37d6d	409600         	1.097392e+07   	1.467875e+04   	2.659893e+02   	1.467875e+05   	2.155366e+09   	10
+afdd228b	1638400        	8.758624e+07   	3.186232e+04   	5.396938e+03   	3.186232e+05   	1.044334e+10   	10
+617e5fe6	3686400        	2.953730e+08   	5.896762e+04   	1.233845e+04   	5.896762e+05   	3.629418e+10   	10
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/chol_model_21.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	4.711469e+04   	4.337925e+02   	3.203799e+06   	1.509588e+11   	68
+2c1922b7	819200         	3.287040e+07   	1.979166e+03   	8.798869e+01   	6.828124e+05   	1.354070e+09   	345
+d39bff17	3276800        	2.625536e+08   	1.482664e+04   	2.506296e+02   	2.298130e+06   	3.408328e+10   	155
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	6.573848e+03   	6.169449e+02   	1.360787e+06   	9.024393e+09   	207
+2c1922b7	819200         	3.287040e+07   	6.955196e+02   	8.976154e+01   	1.286711e+05   	9.098386e+07   	185
+d39bff17	3276800        	2.625536e+08   	2.647434e+03   	2.520462e+02   	4.685958e+05   	1.251821e+09   	177
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	6.555664e+03   	6.950469e+02   	1.252132e+06   	8.300825e+09   	191
+2c1922b7	819200         	3.287040e+07   	6.812499e+02   	8.342802e+01   	1.273937e+05   	8.808853e+07   	187
+d39bff17	3276800        	2.625536e+08   	2.596800e+03   	1.668067e+02   	5.894736e+05   	1.537061e+09   	227
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	6.446442e+03   	5.413553e+02   	1.276395e+06   	8.286236e+09   	198
+2c1922b7	819200         	3.287040e+07   	6.941204e+02   	8.002896e+01   	1.277182e+05   	8.983023e+07   	184
+d39bff17	3276800        	2.625536e+08   	2.630763e+03   	2.300111e+02   	4.603835e+05   	1.220418e+09   	175
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	6.554622e+03   	7.028631e+02   	1.238824e+06   	8.213390e+09   	189
+2c1922b7	819200         	3.287040e+07   	6.905951e+02   	7.284704e+01   	1.353566e+05   	9.451674e+07   	196
+d39bff17	3276800        	2.625536e+08   	2.623425e+03   	2.211699e+02   	4.905805e+05   	1.296149e+09   	187
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	6.504271e+03   	6.367049e+02   	9.951534e+05   	6.534773e+09   	153
+2c1922b7	819200         	3.287040e+07   	7.029111e+02   	9.289767e+01   	7.169693e+04   	5.127683e+07   	102
+d39bff17	3276800        	2.625536e+08   	2.684586e+03   	3.481310e+02   	4.080571e+05   	1.113886e+09   	152
+
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	6.618862e+03   	8.843940e+02   	8.405955e+05   	5.663119e+09   	127
+2c1922b7	819200         	3.287040e+07   	7.079333e+02   	9.356613e+01   	6.796160e+04   	4.895273e+07   	96
+d39bff17	3276800        	2.625536e+08   	2.800887e+03   	4.371231e+02   	3.221020e+05   	9.241450e+08   	115
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	6.576395e+03   	7.489644e+02   	8.878133e+05   	5.914339e+09   	135
+2c1922b7	819200         	3.287040e+07   	7.050156e+02   	1.025857e+02   	8.037177e+04   	5.786307e+07   	114
+d39bff17	3276800        	2.625536e+08   	2.645162e+03   	2.750078e+02   	4.205807e+05   	1.124529e+09   	159
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	6.544427e+03   	6.576164e+02   	9.358531e+05   	6.186464e+09   	143
+2c1922b7	819200         	3.287040e+07   	7.150712e+02   	1.054194e+02   	8.223319e+04   	6.008061e+07   	115
+d39bff17	3276800        	2.625536e+08   	2.613530e+03   	2.505172e+02   	3.972565e+05   	1.047781e+09   	152
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/chol_model_22.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	8.987520e+04   	9.682708e+02   	2.085105e+07   	1.874210e+12   	232
+d46431bb	1228800        	6.553600e+07   	3.465410e+03   	7.427679e+01   	2.737674e+06   	9.491521e+09   	790
+f0ac7beb	4915200        	5.242880e+08   	2.744657e+04   	5.713498e+02   	7.575252e+06   	2.080048e+11   	276
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	2.825219e+03   	1.227364e+02   	1.106356e+07   	3.131597e+10   	3916
+d46431bb	1228800        	6.553600e+07   	2.060677e+02   	2.909125e+01   	5.497887e+05   	1.155516e+08   	2668
+f0ac7beb	4915200        	5.242880e+08   	9.076961e+02   	5.977819e+01   	3.441076e+06   	3.136998e+09   	3791
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	2.823818e+03   	1.560928e+02   	1.063450e+07   	3.012164e+10   	3766
+d46431bb	1228800        	6.553600e+07   	1.632767e+02   	2.137437e+01   	5.224853e+05   	8.677162e+07   	3200
+f0ac7beb	4915200        	5.242880e+08   	9.226606e+02   	6.080500e+01   	3.410153e+06   	3.160079e+09   	3696
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	2.821988e+03   	1.631366e+02   	1.046111e+07   	2.961977e+10   	3707
+d46431bb	1228800        	6.553600e+07   	1.661504e+02   	2.199315e+01   	5.172263e+05   	8.744312e+07   	3113
+f0ac7beb	4915200        	5.242880e+08   	9.172785e+02   	5.967474e+01   	3.492079e+06   	3.216766e+09   	3807
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	2.817019e+03   	1.506473e+02   	1.119202e+07   	3.161828e+10   	3973
+d46431bb	1228800        	6.553600e+07   	2.042642e+02   	2.707555e+01   	5.498791e+05   	1.142941e+08   	2692
+f0ac7beb	4915200        	5.242880e+08   	9.044446e+02   	5.780357e+01   	3.477590e+06   	3.158134e+09   	3845
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	2.816710e+03   	1.414989e+02   	1.032888e+07   	2.916687e+10   	3667
+d46431bb	1228800        	6.553600e+07   	1.639497e+02   	2.257894e+01   	4.474187e+05   	7.474542e+07   	2729
+f0ac7beb	4915200        	5.242880e+08   	9.331501e+02   	5.611510e+01   	3.235231e+06   	3.029874e+09   	3467
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	2.815225e+03   	1.445443e+02   	1.009821e+07   	2.850368e+10   	3587
+d46431bb	1228800        	6.553600e+07   	1.659035e+02   	2.475202e+01   	4.006569e+05   	6.794997e+07   	2415
+f0ac7beb	4915200        	5.242880e+08   	9.137585e+02   	6.301297e+01   	3.125968e+06   	2.869963e+09   	3421
+
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	2.807699e+03   	1.292512e+02   	1.006279e+07   	2.831317e+10   	3584
+d46431bb	1228800        	6.553600e+07   	1.680450e+02   	2.634123e+01   	3.922170e+05   	6.752957e+07   	2334
+f0ac7beb	4915200        	5.242880e+08   	8.912551e+02   	5.629783e+01   	3.090873e+06   	2.765747e+09   	3468
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	2.827622e+03   	1.304764e+02   	1.027841e+07   	2.912533e+10   	3635
+d46431bb	1228800        	6.553600e+07   	1.666216e+02   	2.357918e+01   	4.083895e+05   	6.940921e+07   	2451
+f0ac7beb	4915200        	5.242880e+08   	9.077285e+02   	5.688987e+01   	3.089908e+06   	2.815814e+09   	3404
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/cl_update.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	1.774670e+03   	3.622348e+02   	1.348749e+06   	2.493306e+09   	760
+6d78e48f	4461600        	0.000000e+00   	1.036351e+03   	9.390524e+01   	2.839601e+05   	2.966985e+08   	274
+49ec0825	34613280       	0.000000e+00   	4.962997e+03   	6.650844e+02   	5.096998e+06   	2.575067e+10   	1027
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	1.813689e+03   	3.729019e+02   	1.331248e+06   	2.516537e+09   	734
+6d78e48f	4461600        	0.000000e+00   	1.023951e+03   	1.005326e+02   	3.553110e+05   	3.673281e+08   	347
+49ec0825	34613280       	0.000000e+00   	5.017264e+03   	7.095917e+02   	4.365019e+06   	2.233852e+10   	870
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	4.692078e+04   	5.010795e+02   	1.501465e+06   	7.045793e+10   	32
+6d78e48f	4461600        	0.000000e+00   	1.405585e+04   	1.896523e+02   	7.590156e+05   	1.067055e+10   	54
+49ec0825	34613280       	0.000000e+00   	1.108029e+05   	1.348959e+03   	3.545692e+06   	3.929311e+11   	32
+
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	1.859413e+03   	3.366203e+02   	1.309027e+06   	2.513795e+09   	704
+6d78e48f	4461600        	0.000000e+00   	1.027564e+03   	1.046018e+02   	3.483442e+05   	3.616551e+08   	339
+49ec0825	34613280       	0.000000e+00   	5.060000e+03   	7.405627e+02   	4.164380e+06   	2.152312e+10   	823
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	1.862800e+03   	3.783314e+02   	8.438483e+05   	1.636760e+09   	453
+6d78e48f	4461600        	0.000000e+00   	9.567271e+02   	3.148502e+01   	5.606421e+05   	5.369624e+08   	586
+49ec0825	34613280       	0.000000e+00   	4.965851e+03   	6.509733e+02   	5.810046e+06   	2.934763e+10   	1170
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	1.889366e+03   	3.958521e+02   	1.186522e+06   	2.340180e+09   	628
+6d78e48f	4461600        	0.000000e+00   	1.028680e+03   	8.044529e+01   	2.880303e+05   	2.981029e+08   	280
+49ec0825	34613280       	0.000000e+00   	5.035634e+03   	7.113130e+02   	4.899672e+06   	2.516526e+10   	973
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	1.814024e+03   	3.173708e+02   	1.186372e+06   	2.217980e+09   	654
+6d78e48f	4461600        	0.000000e+00   	1.025445e+03   	7.185494e+01   	3.466003e+05   	3.571646e+08   	338
+49ec0825	34613280       	0.000000e+00   	5.092715e+03   	7.051028e+02   	3.513973e+06   	1.823871e+10   	690
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	1.793350e+03   	3.531620e+02   	1.388053e+06   	2.585801e+09   	774
+6d78e48f	4461600        	0.000000e+00   	1.033622e+03   	1.055186e+02   	3.783058e+05   	3.951004e+08   	366
+49ec0825	34613280       	0.000000e+00   	4.986601e+03   	7.025210e+02   	5.345636e+06   	2.718562e+10   	1072
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	1.781570e+03   	3.261441e+02   	1.501864e+06   	2.765346e+09   	843
+6d78e48f	4461600        	0.000000e+00   	1.022184e+03   	1.018115e+02   	3.751415e+05   	3.872679e+08   	367
+49ec0825	34613280       	0.000000e+00   	5.102994e+03   	7.050225e+02   	4.327339e+06   	2.250389e+10   	848
+

+ 8 - 0
tools/perfmodels/sampling/codelets/44/null.idgraf

@@ -0,0 +1,8 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+0

+ 296 - 0
tools/perfmodels/sampling/codelets/44/save_cl_bottom.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.985465e+01   	6.158621e+00   	3.552703e+03   	1.105782e+05   	119
+fb4b8624	4427800        	0.000000e+00   	1.132689e+01   	2.249702e+00   	2.423954e+03   	2.853894e+04   	214
+f2ff9ae5	34480152       	0.000000e+00   	5.622304e+01   	1.121739e+01   	9.276802e+03   	5.423319e+05   	165
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.632587e+01   	4.244468e+00   	1.174134e+04   	3.171360e+05   	446
+fb4b8624	4427800        	0.000000e+00   	2.560067e+01   	2.946464e+00   	6.809779e+03   	1.766442e+05   	266
+f2ff9ae5	34480152       	0.000000e+00   	2.687395e+01   	3.041318e+00   	1.378634e+04   	3.752385e+05   	513
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.682825e+01   	3.688517e+00   	9.309402e+03   	2.544759e+05   	347
+fb4b8624	4427800        	0.000000e+00   	2.601287e+01   	3.034296e+00   	6.711320e+03   	1.769561e+05   	258
+f2ff9ae5	34480152       	0.000000e+00   	2.650277e+01   	3.250317e+00   	1.327789e+04   	3.571937e+05   	501
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.844841e+01   	4.987346e+00   	1.118023e+04   	3.278350e+05   	393
+fb4b8624	4427800        	0.000000e+00   	2.545228e+01   	3.038424e+00   	7.533874e+03   	1.944869e+05   	296
+f2ff9ae5	34480152       	0.000000e+00   	2.671593e+01   	2.977811e+00   	9.510870e+03   	2.572485e+05   	356
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.683154e+01   	3.643920e+00   	1.145707e+04   	3.130806e+05   	427
+fb4b8624	4427800        	0.000000e+00   	2.439165e+01   	2.519213e+00   	6.951620e+03   	1.713702e+05   	285
+f2ff9ae5	34480152       	0.000000e+00   	2.686670e+01   	3.337051e+00   	1.332588e+04   	3.635460e+05   	496
+
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.743422e+01   	5.313897e+00   	8.504608e+03   	2.420709e+05   	310
+fb4b8624	4427800        	0.000000e+00   	2.591823e+01   	3.009457e+00   	7.879143e+03   	2.069667e+05   	304
+f2ff9ae5	34480152       	0.000000e+00   	2.671837e+01   	2.963498e+00   	1.282482e+04   	3.468736e+05   	480
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.572873e+01   	2.716127e+00   	1.232406e+04   	3.206163e+05   	479
+fb4b8624	4427800        	0.000000e+00   	2.555023e+01   	2.989409e+00   	6.362008e+03   	1.647760e+05   	249
+f2ff9ae5	34480152       	0.000000e+00   	2.648407e+01   	2.593556e+00   	1.504295e+04   	4.022192e+05   	568
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.535931e+01   	2.801001e+00   	7.303480e+03   	1.874707e+05   	288
+fb4b8624	4427800        	0.000000e+00   	2.706983e+01   	4.764143e+00   	1.228970e+04   	3.429847e+05   	454
+f2ff9ae5	34480152       	0.000000e+00   	2.645378e+01   	3.704623e+00   	1.251264e+04   	3.374982e+05   	473
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.565071e+01   	2.881053e+00   	1.064505e+04   	2.764977e+05   	415
+fb4b8624	4427800        	0.000000e+00   	2.622930e+01   	4.388391e+00   	8.262230e+03   	2.227788e+05   	315
+f2ff9ae5	34480152       	0.000000e+00   	2.628917e+01   	2.974884e+00   	1.204044e+04   	3.205863e+05   	458
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/save_cl_top.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	3.246622e+01   	6.891153e+00   	3.928413e+03   	1.332868e+05   	121
+fb4b8624	4427800        	0.000000e+00   	1.139753e+01   	2.243693e+00   	2.644226e+03   	3.130556e+04   	232
+f2ff9ae5	34480152       	0.000000e+00   	5.591168e+01   	1.328211e+01   	1.241239e+04   	7.331618e+05   	222
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.604823e+01   	3.286196e+00   	1.130493e+04   	2.991603e+05   	434
+fb4b8624	4427800        	0.000000e+00   	2.652276e+01   	4.354433e+00   	7.850738e+03   	2.138358e+05   	296
+f2ff9ae5	34480152       	0.000000e+00   	2.714414e+01   	3.836601e+00   	1.555359e+04   	4.306232e+05   	573
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.599288e+01   	3.808778e+00   	1.115095e+04   	2.960687e+05   	429
+fb4b8624	4427800        	0.000000e+00   	2.539365e+01   	2.861737e+00   	6.678529e+03   	1.717461e+05   	263
+f2ff9ae5	34480152       	0.000000e+00   	2.629746e+01   	2.517281e+00   	1.159718e+04   	3.077710e+05   	441
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.561750e+01   	2.633232e+00   	1.155349e+04   	2.990988e+05   	451
+fb4b8624	4427800        	0.000000e+00   	2.673210e+01   	4.378492e+00   	1.031859e+04   	2.832378e+05   	386
+f2ff9ae5	34480152       	0.000000e+00   	2.631930e+01   	2.903449e+00   	1.339652e+04   	3.568781e+05   	509
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.607685e+01   	3.121182e+00   	7.979517e+03   	2.110617e+05   	306
+fb4b8624	4427800        	0.000000e+00   	2.621449e+01   	3.800716e+00   	7.654632e+03   	2.048804e+05   	292
+f2ff9ae5	34480152       	0.000000e+00   	2.661811e+01   	2.706929e+00   	1.810031e+04   	4.867788e+05   	680
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.759081e+01   	5.463486e+00   	5.435390e+03   	1.558472e+05   	197
+fb4b8624	4427800        	0.000000e+00   	2.575898e+01   	3.723342e+00   	7.779212e+03   	2.045713e+05   	302
+f2ff9ae5	34480152       	0.000000e+00   	2.684177e+01   	3.098778e+00   	1.181038e+04   	3.212366e+05   	440
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.613306e+01   	2.901462e+00   	1.100202e+04   	2.910606e+05   	421
+fb4b8624	4427800        	0.000000e+00   	2.615768e+01   	3.461177e+00   	7.010257e+03   	1.865826e+05   	268
+f2ff9ae5	34480152       	0.000000e+00   	2.749333e+01   	3.923485e+00   	1.492888e+04   	4.188033e+05   	543
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.628755e+01   	3.829892e+00   	1.025215e+04   	2.752243e+05   	390
+fb4b8624	4427800        	0.000000e+00   	2.540957e+01   	3.333356e+00   	8.258109e+03   	2.134461e+05   	325
+f2ff9ae5	34480152       	0.000000e+00   	2.728087e+01   	3.903560e+00   	1.404965e+04   	3.911340e+05   	515
+
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	2.705248e+01   	4.049710e+00   	1.163257e+04   	3.217418e+05   	430
+fb4b8624	4427800        	0.000000e+00   	2.626990e+01   	3.774104e+00   	6.908983e+03   	1.852444e+05   	263
+f2ff9ae5	34480152       	0.000000e+00   	2.670502e+01   	3.597311e+00   	1.303205e+04   	3.543362e+05   	488
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/starpu_dgemm_gemm.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	7372800        	2.621440e+08   	1.052061e+03   	3.198115e+01   	6.838395e+04   	7.201055e+07   	65
+4220e23d	29491200       	2.097152e+09   	7.092203e+03   	4.667104e+02   	6.028372e+05   	4.293959e+09   	85
+492beed5	66355200       	7.077888e+09   	2.348390e+04   	1.879558e+03   	2.230970e+06   	5.272750e+10   	95
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	7372800        	2.621440e+08   	1.052063e+03   	4.974434e+01   	7.680058e+04   	8.097966e+07   	73
+4220e23d	29491200       	2.097152e+09   	7.169429e+03   	6.510141e+02   	6.165709e+05   	4.456910e+09   	86
+492beed5	66355200       	7.077888e+09   	2.369721e+04   	2.666656e+03   	2.203840e+06   	5.288620e+10   	93
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	7372800        	2.621440e+08   	1.073351e+03   	1.039589e+02   	5.796097e+04   	6.279609e+07   	54
+4220e23d	29491200       	2.097152e+09   	7.178253e+03   	6.674450e+02   	6.245080e+05   	4.521634e+09   	87
+492beed5	66355200       	7.077888e+09   	2.322028e+04   	3.606800e+02   	2.252367e+06   	5.231319e+10   	97
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	7372800        	2.621440e+08   	1.047275e+03   	4.810046e+01   	6.074194e+04   	6.374769e+07   	58
+4220e23d	29491200       	2.097152e+09   	7.215871e+03   	7.571281e+02   	6.277808e+05   	4.579858e+09   	87
+492beed5	66355200       	7.077888e+09   	2.323291e+04   	1.169036e+03   	2.230359e+06   	5.194892e+10   	96
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	7372800        	2.621440e+08   	1.045464e+03   	2.548321e+01   	6.168239e+04   	6.452506e+07   	59
+4220e23d	29491200       	2.097152e+09   	7.130284e+03   	4.158059e+02   	5.632924e+05   	4.030093e+09   	79
+492beed5	66355200       	7.077888e+09   	2.322391e+04   	7.530407e+02   	2.090152e+06   	4.859253e+10   	90
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	7372800        	2.621440e+08   	1.058842e+03   	8.984549e+01   	6.353054e+04   	6.775316e+07   	60
+4220e23d	29491200       	2.097152e+09   	7.197321e+03   	6.902584e+02   	6.549562e+05   	4.757287e+09   	91
+492beed5	66355200       	7.077888e+09   	2.322727e+04   	1.128695e+03   	2.253045e+06   	5.245566e+10   	97
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	7372800        	2.621440e+08   	1.063382e+03   	9.562944e+01   	5.529587e+04   	5.927619e+07   	52
+4220e23d	29491200       	2.097152e+09   	7.227464e+03   	8.541890e+02   	6.287894e+05   	4.608031e+09   	87
+492beed5	66355200       	7.077888e+09   	2.322877e+04   	9.079114e+02   	2.253191e+06   	5.241882e+10   	97
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	7372800        	2.621440e+08   	1.057961e+03   	6.059722e+01   	5.289807e+04   	5.614771e+07   	50
+4220e23d	29491200       	2.097152e+09   	7.169935e+03   	6.166650e+02   	5.592549e+05   	4.039483e+09   	78
+492beed5	66355200       	7.077888e+09   	2.322622e+04   	8.447450e+02   	2.090360e+06   	4.861539e+10   	90
+
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	7372800        	2.621440e+08   	3.132122e+04   	3.995607e+03   	8.456730e+05   	2.691857e+10   	27
+4220e23d	29491200       	2.097152e+09   	2.241875e+05   	7.780157e+03   	6.053063e+06   	1.358656e+12   	27
+492beed5	66355200       	7.077888e+09   	7.222063e+05   	7.344712e+03   	1.661074e+07   	1.199762e+13   	23
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_11.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	2.127055e+05   	1.216918e+04   	3.190582e+06   	6.808756e+11   	15
+afdd228b	3276800        	0.000000e+00   	6.346686e+04   	7.329654e+02   	6.346686e+05   	4.028580e+10   	10
+cea37d6d	819200         	0.000000e+00   	7.969263e+03   	1.770463e+02   	1.354775e+05   	1.080188e+09   	17
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	8.656100e+04   	6.943816e+03   	1.471537e+06   	1.281974e+11   	17
+afdd228b	3276800        	0.000000e+00   	3.567215e+04   	3.302464e+03   	3.567215e+05   	1.283409e+10   	10
+cea37d6d	819200         	0.000000e+00   	1.101988e+04   	5.146633e+02   	1.101988e+05   	1.217027e+09   	10
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+afdd228b	3276800        	0.000000e+00   	3.935885e+04   	6.351673e+03   	3.935885e+05   	1.589463e+10   	10
+cea37d6d	819200         	0.000000e+00   	1.194615e+04   	1.359754e+03   	1.194615e+05   	1.445595e+09   	10
+617e5fe6	7372800        	0.000000e+00   	8.781176e+04   	9.198610e+03   	1.317176e+06   	1.169328e+11   	15
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	8.754335e+04   	8.654029e+03   	1.575780e+06   	1.392972e+11   	18
+afdd228b	3276800        	0.000000e+00   	3.542725e+04   	1.501284e+03   	3.542725e+05   	1.257344e+10   	10
+cea37d6d	819200         	0.000000e+00   	1.193774e+04   	1.685032e+03   	1.193774e+05   	1.453490e+09   	10
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	8.763521e+04   	5.876858e+03   	9.639873e+05   	8.485914e+10   	11
+afdd228b	3276800        	0.000000e+00   	3.909159e+04   	6.650440e+03   	4.300075e+05   	1.729619e+10   	11
+cea37d6d	819200         	0.000000e+00   	1.211577e+04   	1.649480e+03   	1.211577e+05   	1.495126e+09   	10
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	8.616388e+04   	4.981316e+03   	1.550950e+06   	1.340825e+11   	18
+afdd228b	3276800        	0.000000e+00   	3.647899e+04   	2.965394e+03   	4.377479e+05   	1.607412e+10   	12
+cea37d6d	819200         	0.000000e+00   	1.073272e+04   	1.010096e+02   	1.073272e+05   	1.152015e+09   	10
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	8.786078e+04   	7.200822e+03   	1.317912e+06   	1.165705e+11   	15
+afdd228b	3276800        	0.000000e+00   	3.795195e+04   	3.399141e+03   	3.795195e+05   	1.451905e+10   	10
+cea37d6d	819200         	0.000000e+00   	1.163527e+04   	1.023060e+03   	1.163527e+05   	1.364262e+09   	10
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	8.814631e+04   	6.725805e+03   	1.498487e+06   	1.328551e+11   	17
+cea37d6d	819200         	0.000000e+00   	1.170806e+04   	1.094676e+03   	1.170806e+05   	1.382770e+09   	10
+afdd228b	3276800        	0.000000e+00   	4.283079e+04   	7.621190e+03   	4.283079e+05   	1.892559e+10   	10
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	9.172766e+04   	1.075608e+04   	1.375915e+06   	1.279449e+11   	15
+cea37d6d	819200         	0.000000e+00   	1.117240e+04   	8.447401e+02   	1.117240e+05   	1.255362e+09   	10
+afdd228b	3276800        	0.000000e+00   	3.472448e+04   	1.278416e+03   	3.819693e+05   	1.328166e+10   	11
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_12.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	6.925318e+03   	8.376976e+02   	5.748014e+05   	4.038926e+09   	83
+d39bff17	6553600        	0.000000e+00   	2.271937e+03   	3.454949e+02   	2.340095e+05   	5.439496e+08   	103
+2c1922b7	1638400        	0.000000e+00   	7.049814e+02   	1.197767e+02   	1.254867e+05   	9.101946e+07   	178
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	7.291615e+03   	1.041939e+03   	4.593717e+05   	3.417957e+09   	63
+d39bff17	6553600        	0.000000e+00   	2.282720e+03   	4.096195e+02   	3.903452e+05   	9.197407e+08   	171
+2c1922b7	1638400        	0.000000e+00   	6.999720e+02   	1.145665e+02   	1.343946e+05   	9.659256e+07   	192
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	7.177388e+03   	9.455873e+02   	3.947563e+05   	2.882497e+09   	55
+d39bff17	6553600        	0.000000e+00   	2.335362e+03   	3.317057e+02   	2.825788e+05   	6.732374e+08   	121
+2c1922b7	1638400        	0.000000e+00   	7.266144e+02   	9.381637e+01   	4.432348e+04   	3.274297e+07   	61
+
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	9.210227e+04   	5.563000e+02   	1.252591e+07   	1.153707e+12   	136
+d39bff17	6553600        	0.000000e+00   	2.809162e+04   	4.267578e+02   	1.573131e+06   	4.420199e+10   	56
+2c1922b7	1638400        	0.000000e+00   	3.732094e+03   	1.582101e+02   	3.993341e+05   	1.493031e+09   	107
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	7.047943e+03   	9.923280e+02   	4.017327e+05   	2.887518e+09   	57
+d39bff17	6553600        	0.000000e+00   	2.358363e+03   	2.904964e+02   	2.381946e+05   	5.702726e+08   	101
+2c1922b7	1638400        	0.000000e+00   	7.376273e+02   	1.192099e+02   	4.425764e+04   	3.349831e+07   	60
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	7.125894e+03   	1.170430e+03   	6.769599e+05   	4.954085e+09   	95
+d39bff17	6553600        	0.000000e+00   	2.913435e+03   	7.837592e+02   	2.651226e+05   	8.283167e+08   	91
+2c1922b7	1638400        	0.000000e+00   	7.396845e+02   	1.557697e+02   	7.692719e+04   	5.942533e+07   	104
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	6.906666e+03   	1.069281e+03   	3.177066e+05   	2.246888e+09   	46
+d39bff17	6553600        	0.000000e+00   	2.331985e+03   	3.108312e+02   	2.914982e+05   	6.918465e+08   	125
+2c1922b7	1638400        	0.000000e+00   	7.036069e+02   	1.117682e+02   	5.277052e+04   	3.806661e+07   	75
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	7.634969e+03   	1.278868e+03   	4.122883e+05   	3.236126e+09   	54
+d39bff17	6553600        	0.000000e+00   	2.361692e+03   	2.763159e+02   	1.747652e+05   	4.183915e+08   	74
+2c1922b7	1638400        	0.000000e+00   	7.215132e+02   	1.060983e+02   	7.287283e+04   	5.371565e+07   	101
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	7.011366e+03   	8.280915e+02   	6.871138e+05   	4.884809e+09   	98
+d39bff17	6553600        	0.000000e+00   	2.294721e+03   	3.366230e+02   	4.451759e+05   	1.043537e+09   	194
+2c1922b7	1638400        	0.000000e+00   	6.840134e+02   	1.166270e+02   	1.114942e+05   	7.848061e+07   	163
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_21.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	6.700359e+03   	1.036459e+03   	3.886208e+05   	2.666205e+09   	58
+d39bff17	6553600        	0.000000e+00   	2.067623e+03   	3.658691e+02   	3.825102e+05   	8.156510e+08   	185
+2c1922b7	1638400        	0.000000e+00   	6.344928e+02   	1.313164e+02   	1.091328e+05   	7.220992e+07   	172
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	6.634729e+03   	1.380283e+03   	4.777005e+05   	3.306586e+09   	72
+d39bff17	6553600        	0.000000e+00   	2.102108e+03   	3.770829e+02   	2.690698e+05   	5.838144e+08   	128
+2c1922b7	1638400        	0.000000e+00   	6.251127e+02   	1.334964e+02   	1.168961e+05   	7.640580e+07   	187
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	5.973111e+03   	7.873858e+02   	4.420102e+05   	2.686054e+09   	74
+d39bff17	6553600        	0.000000e+00   	2.088129e+03   	3.411148e+02   	2.129891e+05   	4.566174e+08   	102
+2c1922b7	1638400        	0.000000e+00   	5.816119e+02   	1.098601e+02   	6.165086e+04   	3.713622e+07   	106
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	5.813439e+03   	5.835403e+02   	2.441645e+05   	1.433737e+09   	42
+d39bff17	6553600        	0.000000e+00   	2.170079e+03   	5.032568e+02   	7.161259e+04   	1.637628e+08   	33
+2c1922b7	1638400        	0.000000e+00   	6.080488e+02   	1.225789e+02   	3.101049e+04   	1.962219e+07   	51
+
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	9.133611e+04   	7.141260e+02   	1.032098e+07   	9.427358e+11   	113
+d39bff17	6553600        	0.000000e+00   	2.797330e+04   	6.068477e+02   	1.482585e+06   	4.149232e+10   	53
+2c1922b7	1638400        	0.000000e+00   	3.803279e+03   	2.345034e+02   	3.308852e+05   	1.263233e+09   	87
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	6.609495e+03   	1.035460e+03   	4.296172e+05   	2.909244e+09   	65
+d39bff17	6553600        	0.000000e+00   	2.129873e+03   	3.868465e+02   	3.407797e+05   	7.497615e+08   	160
+2c1922b7	1638400        	0.000000e+00   	6.443548e+02   	1.239934e+02   	8.054435e+04   	5.382094e+07   	125
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	5.938773e+03   	5.045720e+02   	2.078570e+05   	1.243326e+09   	35
+d39bff17	6553600        	0.000000e+00   	2.180034e+03   	4.239424e+02   	1.286220e+05   	2.910041e+08   	59
+2c1922b7	1638400        	0.000000e+00   	5.996256e+02   	1.220514e+02   	5.816368e+04   	3.632139e+07   	97
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	6.467618e+03   	9.651621e+02   	2.910428e+05   	1.924273e+09   	45
+d39bff17	6553600        	0.000000e+00   	2.057931e+03   	3.333471e+02   	1.872717e+05   	3.955042e+08   	91
+2c1922b7	1638400        	0.000000e+00   	6.141799e+02   	1.365857e+02   	5.159111e+04   	3.325329e+07   	84
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	6.429538e+03   	9.929716e+02   	5.015040e+05   	3.301346e+09   	78
+d39bff17	6553600        	0.000000e+00   	2.056349e+03   	3.356881e+02   	4.565094e+05   	9.637588e+08   	222
+2c1922b7	1638400        	0.000000e+00   	6.374873e+02   	1.360140e+02   	9.498561e+04   	6.330859e+07   	149
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_22.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	5.901996e+03   	2.140574e+02   	2.574451e+07   	1.521439e+11   	4362
+f0ac7beb	9830400        	0.000000e+00   	1.855425e+03   	1.035707e+02   	7.464374e+06   	1.389274e+10   	4023
+d46431bb	2457600        	0.000000e+00   	2.667843e+02   	3.133790e+01   	9.321442e+05   	2.521128e+08   	3494
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	5.924812e+03   	2.030546e+02   	2.715934e+07   	1.611030e+11   	4584
+f0ac7beb	9830400        	0.000000e+00   	1.850857e+03   	1.138114e+02   	6.774137e+06   	1.258537e+10   	3660
+d46431bb	2457600        	0.000000e+00   	2.743267e+02   	3.237528e+01   	7.903352e+05   	2.198298e+08   	2881
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	5.905959e+03   	2.324106e+02   	2.885061e+07   	1.706544e+11   	4885
+f0ac7beb	9830400        	0.000000e+00   	1.844033e+03   	9.904039e+01   	7.516278e+06   	1.390024e+10   	4076
+d46431bb	2457600        	0.000000e+00   	2.662813e+02   	2.651200e+01   	1.098144e+06   	2.953140e+08   	4124
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	5.905860e+03   	2.088868e+02   	2.619840e+07   	1.549176e+11   	4436
+f0ac7beb	9830400        	0.000000e+00   	1.843182e+03   	9.714398e+01   	7.671323e+06   	1.417892e+10   	4162
+d46431bb	2457600        	0.000000e+00   	2.666213e+02   	3.154593e+01   	1.003829e+06   	2.713890e+08   	3765
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	5.916743e+03   	2.291447e+02   	2.686793e+07   	1.592091e+11   	4541
+f0ac7beb	9830400        	0.000000e+00   	1.837574e+03   	9.255327e+01   	7.197777e+06   	1.326000e+10   	3917
+d46431bb	2457600        	0.000000e+00   	2.645367e+02   	2.904285e+01   	1.078252e+06   	2.886751e+08   	4076
+
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	1.776444e+05   	1.603881e+03   	1.085407e+08   	1.928322e+13   	611
+f0ac7beb	9830400        	0.000000e+00   	5.438487e+04   	1.553469e+03   	1.598915e+07   	8.702776e+11   	294
+d46431bb	2457600        	0.000000e+00   	6.892168e+03   	1.879454e+02   	4.218007e+06   	2.909283e+10   	612
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	5.908798e+03   	2.191022e+02   	2.432652e+07   	1.439381e+11   	4117
+f0ac7beb	9830400        	0.000000e+00   	1.870298e+03   	1.158137e+02   	6.306645e+06   	1.184053e+10   	3372
+d46431bb	2457600        	0.000000e+00   	2.622005e+02   	3.221213e+01   	7.630036e+05   	2.030794e+08   	2910
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	5.908890e+03   	2.133901e+02   	2.731089e+07   	1.615875e+11   	4622
+f0ac7beb	9830400        	0.000000e+00   	1.853662e+03   	1.234628e+02   	6.493379e+06   	1.208993e+10   	3503
+d46431bb	2457600        	0.000000e+00   	2.672063e+02   	3.249622e+01   	7.262666e+05   	1.969332e+08   	2718
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	5.908429e+03   	2.149126e+02   	2.371053e+07   	1.402773e+11   	4013
+f0ac7beb	9830400        	0.000000e+00   	1.855601e+03   	1.161756e+02   	6.509447e+06   	1.212628e+10   	3508
+d46431bb	2457600        	0.000000e+00   	2.697690e+02   	3.186509e+01   	7.013994e+05   	1.918558e+08   	2600
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/starpu_sgemm_gemm.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	3686400        	2.621440e+08   	6.801013e+02   	7.013561e+01   	4.760709e+04   	3.272198e+07   	70
+4220e23d	14745600       	2.097152e+09   	5.623635e+03   	5.419920e+02   	4.442672e+05   	2.521603e+09   	79
+492beed5	33177600       	7.077888e+09   	1.150361e+04   	5.884814e+02   	1.000814e+06   	1.154310e+10   	87
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	3686400        	2.621440e+08   	6.717051e+02   	6.137607e+01   	4.500424e+04   	3.048197e+07   	67
+4220e23d	14745600       	2.097152e+09   	5.648275e+03   	4.677390e+02   	4.575103e+05   	2.601865e+09   	81
+492beed5	33177600       	7.077888e+09   	1.157020e+04   	6.521027e+02   	1.018178e+06   	1.181795e+10   	88
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	3686400        	2.621440e+08   	6.265559e+02   	5.536840e+01   	4.824481e+04   	3.046412e+07   	77
+4220e23d	14745600       	2.097152e+09   	5.631203e+03   	4.767455e+02   	4.561275e+05   	2.586957e+09   	81
+492beed5	33177600       	7.077888e+09   	1.162826e+04   	6.757302e+02   	1.023286e+06   	1.193922e+10   	88
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	3686400        	2.621440e+08   	6.780899e+02   	4.241206e+01   	4.543202e+04   	3.092751e+07   	67
+4220e23d	14745600       	2.097152e+09   	5.857201e+03   	8.346836e+02   	4.744333e+05   	2.835284e+09   	81
+492beed5	33177600       	7.077888e+09   	1.150498e+04   	4.254093e+02   	9.894285e+05   	1.139892e+10   	86
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	3686400        	2.621440e+08   	6.759139e+02   	4.092799e+01   	4.190666e+04   	2.842915e+07   	62
+4220e23d	14745600       	2.097152e+09   	5.527477e+03   	2.733928e+02   	4.421982e+05   	2.450220e+09   	80
+492beed5	33177600       	7.077888e+09   	1.146770e+04   	1.768909e+02   	1.100899e+06   	1.262778e+10   	96
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	3686400        	2.621440e+08   	6.339465e+02   	7.125158e+01   	4.184047e+04   	2.685969e+07   	66
+4220e23d	14745600       	2.097152e+09   	5.624130e+03   	4.755864e+02   	4.668028e+05   	2.644133e+09   	83
+492beed5	33177600       	7.077888e+09   	1.149102e+04   	5.375188e+02   	1.114629e+06   	1.283625e+10   	97
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	3686400        	2.621440e+08   	6.389750e+02   	8.615382e+01   	4.728415e+04   	3.076266e+07   	74
+4220e23d	14745600       	2.097152e+09   	5.648331e+03   	5.220897e+02   	4.631632e+05   	2.638450e+09   	82
+492beed5	33177600       	7.077888e+09   	1.155069e+04   	5.660846e+02   	1.108866e+06   	1.283893e+10   	96
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	3686400        	2.621440e+08   	6.386625e+02   	8.094896e+01   	4.342905e+04   	2.818209e+07   	68
+4220e23d	14745600       	2.097152e+09   	5.638657e+03   	3.709019e+02   	4.454539e+05   	2.522630e+09   	79
+492beed5	33177600       	7.077888e+09   	1.144012e+04   	2.531108e+02   	1.109691e+06   	1.270122e+10   	97
+
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+0b0b0ce8	3686400        	2.621440e+08   	1.414338e+04   	6.441210e+02   	3.535844e+05   	5.011251e+09   	25
+4220e23d	14745600       	2.097152e+09   	1.091117e+05   	2.701159e+03   	3.382462e+06   	3.692924e+11   	31
+492beed5	33177600       	7.077888e+09   	3.621356e+05   	7.764608e+03   	8.329119e+06   	3.017657e+12   	23
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_11.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+cea37d6d	409600         	0.000000e+00   	4.307978e+03   	6.474305e+01   	1.249314e+05   	5.383232e+08   	29
+afdd228b	1638400        	0.000000e+00   	3.550524e+04   	4.451382e+02   	3.550524e+05   	1.260821e+10   	10
+617e5fe6	3686400        	0.000000e+00   	1.169735e+05   	9.368471e+02   	1.169735e+06   	1.368368e+11   	10
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+cea37d6d	409600         	0.000000e+00   	1.140547e+04   	1.799023e+03   	1.140547e+05   	1.333212e+09   	10
+afdd228b	1638400        	0.000000e+00   	2.728447e+04   	8.307498e+02   	2.728447e+05   	7.451326e+09   	10
+617e5fe6	3686400        	0.000000e+00   	6.234962e+04   	7.670296e+03   	6.858458e+05   	4.340939e+10   	11
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+afdd228b	1638400        	0.000000e+00   	3.084154e+04   	4.741973e+03   	3.084154e+05   	9.736872e+09   	10
+cea37d6d	409600         	0.000000e+00   	1.194801e+04   	1.916839e+03   	1.194801e+05   	1.464291e+09   	10
+617e5fe6	3686400        	0.000000e+00   	6.590141e+04   	1.170188e+04   	6.590141e+05   	4.479930e+10   	10
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	0.000000e+00   	7.169178e+04   	1.134864e+04   	7.886096e+05   	5.795353e+10   	11
+cea37d6d	409600         	0.000000e+00   	1.144166e+04   	1.161786e+03   	1.144166e+05   	1.322613e+09   	10
+afdd228b	1638400        	0.000000e+00   	2.872444e+04   	2.010264e+03   	3.159688e+05   	9.120481e+09   	11
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+cea37d6d	409600         	0.000000e+00   	1.150326e+04   	1.434617e+03   	1.150326e+05   	1.343832e+09   	10
+afdd228b	1638400        	0.000000e+00   	3.088151e+04   	4.858348e+03   	3.088151e+05   	9.772711e+09   	10
+617e5fe6	3686400        	0.000000e+00   	6.102500e+04   	7.308309e+03   	6.102500e+05   	3.777463e+10   	10
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	0.000000e+00   	5.751510e+04   	2.006299e+03   	6.901812e+05   	3.974415e+10   	12
+cea37d6d	409600         	0.000000e+00   	1.125363e+04   	1.219431e+03   	1.125363e+05   	1.281312e+09   	10
+afdd228b	1638400        	0.000000e+00   	3.238968e+04   	5.459084e+03   	3.238968e+05   	1.078893e+10   	10
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+afdd228b	1638400        	0.000000e+00   	2.926764e+04   	3.325362e+03   	3.219440e+05   	9.544181e+09   	11
+cea37d6d	409600         	0.000000e+00   	1.088648e+04   	1.129883e+03   	1.088648e+05   	1.197920e+09   	10
+617e5fe6	3686400        	0.000000e+00   	6.506731e+04   	1.183046e+04   	8.458750e+05   	5.685829e+10   	13
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+afdd228b	1638400        	0.000000e+00   	2.775893e+04   	1.476662e+03   	3.331071e+05   	9.272862e+09   	12
+cea37d6d	409600         	0.000000e+00   	1.026126e+04   	8.160679e+01   	1.026126e+05   	1.053001e+09   	10
+617e5fe6	3686400        	0.000000e+00   	6.215917e+04   	1.023772e+04   	6.215917e+05   	3.968573e+10   	10
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+cea37d6d	409600         	0.000000e+00   	1.022286e+04   	3.601879e+01   	1.022286e+05   	1.045081e+09   	10
+afdd228b	1638400        	0.000000e+00   	2.891317e+04   	4.592264e+03   	2.891317e+05   	8.570604e+09   	10
+617e5fe6	3686400        	0.000000e+00   	5.724831e+04   	3.045025e+03   	7.442280e+05   	4.272633e+10   	13
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_12.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	2.469013e+03   	5.595193e+01   	2.765294e+05   	6.831054e+08   	112
+d39bff17	3276800        	0.000000e+00   	1.667528e+04   	1.964808e+02   	1.300672e+06   	2.169208e+10   	78
+ff82dda0	7372800        	0.000000e+00   	5.216745e+04   	4.664151e+02   	3.443052e+06   	1.796296e+11   	66
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	7.490410e+02   	1.344248e+02   	7.415506e+04   	5.733412e+07   	99
+d39bff17	3276800        	0.000000e+00   	2.737524e+03   	2.974057e+02   	3.942034e+05   	1.091878e+09   	144
+ff82dda0	7372800        	0.000000e+00   	7.212728e+03   	1.319942e+03   	6.924219e+05   	5.161506e+09   	96
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	7.688939e+02   	1.457751e+02   	6.843156e+04   	5.450789e+07   	89
+d39bff17	3276800        	0.000000e+00   	2.735563e+03   	2.889694e+02   	2.899697e+05   	8.020820e+08   	106
+ff82dda0	7372800        	0.000000e+00   	6.820126e+03   	9.314994e+02   	7.638542e+05   	5.306763e+09   	112
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	7.150281e+02   	1.235393e+02   	6.363750e+04   	4.686092e+07   	89
+d39bff17	3276800        	0.000000e+00   	2.835249e+03   	4.125186e+02   	1.899617e+05   	5.499903e+08   	67
+ff82dda0	7372800        	0.000000e+00   	6.720945e+03   	7.632032e+02   	6.989783e+05   	4.758372e+09   	104
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	7.190609e+02   	1.144317e+02   	7.406327e+04   	5.460474e+07   	103
+d39bff17	3276800        	0.000000e+00   	2.867186e+03   	4.168496e+02   	2.838514e+05   	8.310575e+08   	99
+ff82dda0	7372800        	0.000000e+00   	6.809425e+03   	9.031920e+02   	6.400859e+05   	4.435298e+09   	94
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	7.136273e+02   	1.258701e+02   	7.350362e+04   	5.408605e+07   	103
+d39bff17	3276800        	0.000000e+00   	2.942246e+03   	4.585544e+02   	1.706502e+05   	5.142907e+08   	58
+ff82dda0	7372800        	0.000000e+00   	6.744194e+03   	8.416374e+02   	5.597681e+05   	3.833978e+09   	83
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	7.204798e+02   	9.746533e+01   	1.080720e+05   	7.928859e+07   	150
+d39bff17	3276800        	0.000000e+00   	2.539831e+03   	4.296517e+02   	3.885942e+05   	1.015208e+09   	153
+ff82dda0	7372800        	0.000000e+00   	7.293979e+03   	1.385713e+03   	6.929280e+05   	5.236621e+09   	95
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	7.460951e+02   	1.203288e+02   	7.386342e+04   	5.654256e+07   	99
+d39bff17	3276800        	0.000000e+00   	2.972783e+03   	5.066224e+02   	2.259315e+05   	6.911522e+08   	76
+ff82dda0	7372800        	0.000000e+00   	6.643349e+03   	8.230064e+02   	6.510482e+05   	4.391520e+09   	98
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	7.518059e+02   	1.406096e+02   	8.495406e+04   	6.610309e+07   	113
+d39bff17	3276800        	0.000000e+00   	2.794983e+03   	3.357608e+02   	4.164524e+05   	1.180775e+09   	149
+ff82dda0	7372800        	0.000000e+00   	6.735838e+03   	7.525487e+02   	6.803197e+05   	4.639723e+09   	101
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_21.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	1.946809e+03   	8.216247e+01   	4.049363e+05   	7.897378e+08   	208
+d39bff17	3276800        	0.000000e+00   	1.423970e+04   	2.281585e+02   	1.395491e+06   	1.987647e+10   	98
+ff82dda0	7372800        	0.000000e+00   	4.640991e+04   	5.437505e+02   	4.919451e+06   	2.283426e+11   	106
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	4.484181e+02   	9.807341e+01   	3.004401e+04   	1.411671e+07   	67
+d39bff17	3276800        	0.000000e+00   	1.658665e+03   	2.005859e+02   	2.388477e+05   	4.019622e+08   	144
+ff82dda0	7372800        	0.000000e+00   	3.922083e+03   	2.271290e+02   	2.588575e+05   	1.018665e+09   	66
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	5.042679e+02   	1.229686e+02   	7.362312e+04   	3.933348e+07   	146
+d39bff17	3276800        	0.000000e+00   	2.167031e+03   	5.827483e+02   	2.773800e+05   	6.445595e+08   	128
+ff82dda0	7372800        	0.000000e+00   	4.035358e+03   	4.245106e+02   	4.035358e+05   	1.646433e+09   	100
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	4.873044e+02   	1.174149e+02   	2.631443e+04   	1.356760e+07   	54
+d39bff17	3276800        	0.000000e+00   	1.705876e+03   	1.721886e+02   	1.808228e+05   	3.116041e+08   	106
+ff82dda0	7372800        	0.000000e+00   	3.936492e+03   	2.608005e+02   	2.440625e+05   	9.649671e+08   	62
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	5.272057e+02   	1.307030e+02   	5.535660e+04   	3.097805e+07   	105
+d39bff17	3276800        	0.000000e+00   	1.638590e+03   	9.390080e+01   	1.163399e+05   	1.912593e+08   	71
+ff82dda0	7372800        	0.000000e+00   	4.055643e+03   	3.711103e+02   	4.177313e+05   	1.708355e+09   	103
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	4.303558e+02   	7.490536e+01   	2.840348e+04   	1.259392e+07   	66
+d39bff17	3276800        	0.000000e+00   	1.669452e+03   	1.444951e+02   	1.419035e+05   	2.386758e+08   	85
+ff82dda0	7372800        	0.000000e+00   	4.288060e+03   	7.671104e+02   	2.744359e+05   	1.214459e+09   	64
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	4.394264e+02   	8.387153e+01   	6.283798e+04   	2.861859e+07   	143
+d39bff17	3276800        	0.000000e+00   	2.098818e+03   	5.403136e+02   	2.140795e+05   	4.790917e+08   	102
+ff82dda0	7372800        	0.000000e+00   	4.766912e+03   	1.123433e+03   	7.579390e+05   	3.813703e+09   	159
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	4.660051e+02   	1.021627e+02   	6.477470e+04   	3.163611e+07   	139
+d39bff17	3276800        	0.000000e+00   	2.103985e+03   	5.293854e+02   	2.377503e+05   	5.318912e+08   	113
+ff82dda0	7372800        	0.000000e+00   	3.972257e+03   	3.858968e+02   	2.899747e+05   	1.162725e+09   	73
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+2c1922b7	819200         	0.000000e+00   	5.222345e+02   	1.241013e+02   	2.715620e+04   	1.498276e+07   	52
+d39bff17	3276800        	0.000000e+00   	1.941135e+03   	4.386059e+02   	1.824667e+05   	3.722759e+08   	94
+ff82dda0	7372800        	0.000000e+00   	4.892155e+03   	1.147723e+03   	2.397156e+05   	1.237272e+09   	49
+

+ 296 - 0
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_22.idgraf

@@ -0,0 +1,296 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+9
+####################
+# COMB_8
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb8)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d46431bb	1228800        	0.000000e+00   	3.393927e+03   	8.566524e+01   	3.533078e+06   	1.199865e+10   	1041
+f0ac7beb	4915200        	0.000000e+00   	2.682238e+04   	4.332821e+02   	9.951104e+06   	2.669820e+11   	371
+24c84a50	11059200       	0.000000e+00   	8.930213e+04   	1.450773e+03   	2.679064e+07   	2.393092e+12   	300
+
+####################
+# COMB_5
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb5)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d46431bb	1228800        	0.000000e+00   	1.946363e+02   	2.537099e+01   	6.294539e+05   	1.245963e+08   	3234
+f0ac7beb	4915200        	0.000000e+00   	9.257288e+02   	6.590058e+01   	3.791785e+06   	3.527953e+09   	4096
+24c84a50	11059200       	0.000000e+00   	2.991139e+03   	1.645886e+02   	1.221282e+07   	3.664085e+10   	4083
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+4
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda4_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d46431bb	1228800        	0.000000e+00   	1.954243e+02   	2.831383e+01   	5.661443e+05   	1.129608e+08   	2897
+f0ac7beb	4915200        	0.000000e+00   	9.376794e+02   	7.341921e+01   	3.415966e+06   	3.222718e+09   	3643
+24c84a50	11059200       	0.000000e+00   	2.995872e+03   	1.614697e+02   	1.133938e+07   	3.407000e+10   	3785
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+6
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda6_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d46431bb	1228800        	0.000000e+00   	1.867261e+02   	2.556099e+01   	5.342234e+05   	1.016227e+08   	2861
+f0ac7beb	4915200        	0.000000e+00   	8.996740e+02   	6.639270e+01   	3.427758e+06   	3.100659e+09   	3810
+24c84a50	11059200       	0.000000e+00   	2.987519e+03   	1.530428e+02   	1.113747e+07   	3.336072e+10   	3728
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d46431bb	1228800        	0.000000e+00   	1.927028e+02   	2.478568e+01   	6.783137e+05   	1.328754e+08   	3520
+f0ac7beb	4915200        	0.000000e+00   	9.234475e+02   	6.432680e+01   	3.846159e+06   	3.568960e+09   	4165
+24c84a50	11059200       	0.000000e+00   	2.982449e+03   	1.542480e+02   	1.210278e+07   	3.619247e+10   	4058
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+5
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda5_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d46431bb	1228800        	0.000000e+00   	1.868734e+02   	2.558187e+01   	5.049318e+05   	9.612659e+07   	2702
+f0ac7beb	4915200        	0.000000e+00   	9.407115e+02   	6.874274e+01   	3.317889e+06   	3.137844e+09   	3527
+24c84a50	11059200       	0.000000e+00   	2.972987e+03   	1.569773e+02   	1.177600e+07   	3.510750e+10   	3961
+
+####################
+# COMB_6
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb6)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d46431bb	1228800        	0.000000e+00   	1.924732e+02   	2.459364e+01   	6.245755e+05   	1.221768e+08   	3245
+f0ac7beb	4915200        	0.000000e+00   	9.173887e+02   	7.039530e+01   	3.781476e+06   	3.489510e+09   	4122
+24c84a50	11059200       	0.000000e+00   	3.001859e+03   	1.612679e+02   	1.156916e+07   	3.482922e+10   	3854
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+7
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda7_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d46431bb	1228800        	0.000000e+00   	1.877972e+02   	2.764994e+01   	5.324050e+05   	1.021516e+08   	2835
+f0ac7beb	4915200        	0.000000e+00   	9.245688e+02   	6.946750e+01   	3.363581e+06   	3.127419e+09   	3638
+24c84a50	11059200       	0.000000e+00   	3.005524e+03   	1.690713e+02   	1.154422e+07   	3.480621e+10   	3841
+
+####################
+# COMB_7
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb7)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+d46431bb	1228800        	0.000000e+00   	1.865351e+02   	2.381101e+01   	6.651841e+05   	1.261020e+08   	3566
+f0ac7beb	4915200        	0.000000e+00   	9.257403e+02   	6.896157e+01   	3.669635e+06   	3.415980e+09   	3964
+24c84a50	11059200       	0.000000e+00   	3.007743e+03   	1.477912e+02   	1.238889e+07   	3.735258e+10   	4119
+