Przeglądaj źródła

Add sirocco perfmodels

Samuel Thibault 9 lat temu
rodzic
commit
69358e31be
22 zmienionych plików z 2872 dodań i 0 usunięć
  1. 9 0
      tools/perfmodels/sampling/bus/sirocco.affinity
  2. 17 0
      tools/perfmodels/sampling/bus/sirocco.bandwidth
  3. 5 0
      tools/perfmodels/sampling/bus/sirocco.config
  4. 17 0
      tools/perfmodels/sampling/bus/sirocco.latency
  5. 128 0
      tools/perfmodels/sampling/bus/sirocco.platform.xml
  6. 168 0
      tools/perfmodels/sampling/codelets/44/chol_model_11.sirocco
  7. 168 0
      tools/perfmodels/sampling/codelets/44/chol_model_21.sirocco
  8. 168 0
      tools/perfmodels/sampling/codelets/44/chol_model_22.sirocco
  9. 168 0
      tools/perfmodels/sampling/codelets/44/cl_update.sirocco
  10. 8 0
      tools/perfmodels/sampling/codelets/44/null.sirocco
  11. 168 0
      tools/perfmodels/sampling/codelets/44/save_cl_bottom.sirocco
  12. 168 0
      tools/perfmodels/sampling/codelets/44/save_cl_top.sirocco
  13. 168 0
      tools/perfmodels/sampling/codelets/44/starpu_dgemm_gemm.sirocco
  14. 168 0
      tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_11.sirocco
  15. 168 0
      tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_12.sirocco
  16. 168 0
      tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_21.sirocco
  17. 168 0
      tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_22.sirocco
  18. 168 0
      tools/perfmodels/sampling/codelets/44/starpu_sgemm_gemm.sirocco
  19. 168 0
      tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_11.sirocco
  20. 168 0
      tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_12.sirocco
  21. 168 0
      tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_21.sirocco
  22. 168 0
      tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_22.sirocco

+ 9 - 0
tools/perfmodels/sampling/bus/sirocco.affinity

@@ -0,0 +1,9 @@
+# GPU	CPU0	CPU1	CPU2	CPU3	CPU4	CPU5	CPU6	CPU7	CPU8	CPU9	CPU10	CPU11	CPU12	CPU13	CPU14	CPU15	CPU16	CPU17	CPU18	CPU19	CPU20	CPU21	CPU22	CPU23	
+0	6	7	8	9	10	11	0	1	2	3	4	5	18	19	20	21	22	23	12	13	14	15	16	17	
+1	6	7	8	9	10	11	0	1	2	3	4	5	18	19	20	21	22	23	12	13	14	15	16	17	
+2	18	19	20	21	22	23	12	13	14	15	16	17	0	1	2	3	4	5	6	7	8	9	10	11	
+3	18	19	20	21	22	23	12	13	14	15	16	17	0	1	2	3	4	5	6	7	8	9	10	11	
+0	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	
+1	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	
+2	12	13	14	15	16	17	18	19	20	21	22	23	0	1	2	3	4	5	6	7	8	9	10	11	
+3	12	13	14	15	16	17	18	19	20	21	22	23	0	1	2	3	4	5	6	7	8	9	10	11	

+ 17 - 0
tools/perfmodels/sampling/bus/sirocco.bandwidth

@@ -0,0 +1,17 @@
+# to 0		to 1		to 2		to 3		to 4		to 5		to 6		to 7		to 8		to 9		to 10		to 11		to 12		to 13		to 14		to 15		
+0.000000e+00	1.051768e+04	1.051743e+04	1.051732e+04	1.051718e+04	7.997534e+03	7.978223e+03	8.025122e+03	8.002101e+03	nan	nan	nan	nan	nan	nan	nan
+1.052170e+04	0.000000e+00	1.024409e+04	7.662719e+03	8.527736e+03	4.543798e+03	4.537558e+03	4.552690e+03	4.545272e+03	nan	nan	nan	nan	nan	nan	nan
+1.052123e+04	1.024068e+04	0.000000e+00	7.630370e+03	8.542254e+03	4.543711e+03	4.537471e+03	4.552602e+03	4.545185e+03	nan	nan	nan	nan	nan	nan	nan
+1.052183e+04	8.504225e+03	8.517476e+03	0.000000e+00	1.023200e+04	4.543822e+03	4.537582e+03	4.552715e+03	4.545296e+03	nan	nan	nan	nan	nan	nan	nan
+1.052172e+04	8.496221e+03	8.514240e+03	1.024287e+04	0.000000e+00	4.543801e+03	4.537561e+03	4.552693e+03	4.545275e+03	nan	nan	nan	nan	nan	nan	nan
+7.434276e+03	4.355589e+03	4.355546e+03	4.355527e+03	4.355503e+03	0.000000e+00	3.848326e+03	3.859204e+03	3.853873e+03	nan	nan	nan	nan	nan	nan	nan
+7.232140e+03	4.285414e+03	4.285373e+03	4.285355e+03	4.285331e+03	3.797802e+03	0.000000e+00	3.804012e+03	3.798832e+03	nan	nan	nan	nan	nan	nan	nan
+7.300126e+03	4.309194e+03	4.309152e+03	4.309134e+03	4.309110e+03	3.816466e+03	3.812063e+03	0.000000e+00	3.817506e+03	nan	nan	nan	nan	nan	nan	nan
+7.333166e+03	4.320685e+03	4.320643e+03	4.320625e+03	4.320601e+03	3.825477e+03	3.821053e+03	3.831778e+03	0.000000e+00	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan

+ 5 - 0
tools/perfmodels/sampling/bus/sirocco.config

@@ -0,0 +1,5 @@
+# Current configuration
+24 # Number of CPUs
+4 # Number of CUDA devices
+4 # Number of OpenCL devices
+0 # Number of MIC devices

+ 17 - 0
tools/perfmodels/sampling/bus/sirocco.latency

@@ -0,0 +1,17 @@
+# to 0		to 1		to 2		to 3		to 4		to 5		to 6		to 7		to 8		to 9		to 10		to 11		to 12		to 13		to 14		to 15		
+0.000000e+00	1.029027e+01	1.031898e+01	9.529422e+00	1.039846e+01	9.643953e+00	1.113670e+01	1.055939e+01	1.004796e+01	nan	nan	nan	nan	nan	nan	nan
+1.085040e+01	0.000000e+00	1.152573e+01	2.350899e+01	2.337711e+01	2.049435e+01	2.198709e+01	2.140979e+01	2.089836e+01	nan	nan	nan	nan	nan	nan	nan
+9.920578e+00	1.167180e+01	0.000000e+00	2.304539e+01	2.330630e+01	1.956453e+01	2.105727e+01	2.047997e+01	1.996854e+01	nan	nan	nan	nan	nan	nan	nan
+1.093016e+01	2.380006e+01	2.358666e+01	0.000000e+00	1.101548e+01	2.057412e+01	2.206686e+01	2.148955e+01	2.097812e+01	nan	nan	nan	nan	nan	nan	nan
+1.097311e+01	2.126338e+01	2.129209e+01	2.050253e+01	0.000000e+00	2.061706e+01	2.210980e+01	2.153250e+01	2.102107e+01	nan	nan	nan	nan	nan	nan	nan
+1.162996e+01	2.192023e+01	2.194894e+01	2.115938e+01	2.202842e+01	0.000000e+00	2.276666e+01	2.218935e+01	2.167792e+01	nan	nan	nan	nan	nan	nan	nan
+1.359506e+01	2.388534e+01	2.391404e+01	2.312448e+01	2.399352e+01	2.323902e+01	0.000000e+00	2.415445e+01	2.364302e+01	nan	nan	nan	nan	nan	nan	nan
+1.245815e+01	2.274842e+01	2.277712e+01	2.198757e+01	2.285661e+01	2.210210e+01	2.359484e+01	0.000000e+00	2.250611e+01	nan	nan	nan	nan	nan	nan	nan
+1.236026e+01	2.265053e+01	2.267923e+01	2.188968e+01	2.275872e+01	2.200421e+01	2.349695e+01	2.291965e+01	0.000000e+00	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan

+ 128 - 0
tools/perfmodels/sampling/bus/sirocco.platform.xml

@@ -0,0 +1,128 @@
+<?xml version='1.0'?>
+ <!DOCTYPE platform SYSTEM 'http://simgrid.gforge.inria.fr/simgrid.dtd'>
+ <platform version='3'>
+ <config id='General'>
+   <prop id='network/TCP_gamma' value='-1'></prop>
+   <prop id='network/latency_factor' value='1'></prop>
+   <prop id='network/bandwidth_factor' value='1'></prop>
+ </config>
+ <AS  id='AS0'  routing='Full'>
+   <host id='MAIN' power='1'/>
+   <host id='CPU0' power='2000000000'/>
+   <host id='CPU1' power='2000000000'/>
+   <host id='CPU2' power='2000000000'/>
+   <host id='CPU3' power='2000000000'/>
+   <host id='CPU4' power='2000000000'/>
+   <host id='CPU5' power='2000000000'/>
+   <host id='CPU6' power='2000000000'/>
+   <host id='CPU7' power='2000000000'/>
+   <host id='CPU8' power='2000000000'/>
+   <host id='CPU9' power='2000000000'/>
+   <host id='CPU10' power='2000000000'/>
+   <host id='CPU11' power='2000000000'/>
+   <host id='CPU12' power='2000000000'/>
+   <host id='CPU13' power='2000000000'/>
+   <host id='CPU14' power='2000000000'/>
+   <host id='CPU15' power='2000000000'/>
+   <host id='CPU16' power='2000000000'/>
+   <host id='CPU17' power='2000000000'/>
+   <host id='CPU18' power='2000000000'/>
+   <host id='CPU19' power='2000000000'/>
+   <host id='CPU20' power='2000000000'/>
+   <host id='CPU21' power='2000000000'/>
+   <host id='CPU22' power='2000000000'/>
+   <host id='CPU23' power='2000000000'/>
+   <host id='CUDA0' power='2000000000'>
+     <prop id='memsize' value='12079136768'/>
+     <prop id='memcpy_peer' value='1'/>
+   </host>
+   <host id='CUDA1' power='2000000000'>
+     <prop id='memsize' value='12079136768'/>
+     <prop id='memcpy_peer' value='1'/>
+   </host>
+   <host id='CUDA2' power='2000000000'>
+     <prop id='memsize' value='12079136768'/>
+     <prop id='memcpy_peer' value='1'/>
+   </host>
+   <host id='CUDA3' power='2000000000'>
+     <prop id='memsize' value='12079136768'/>
+     <prop id='memcpy_peer' value='1'/>
+   </host>
+   <host id='OpenCL0' power='2000000000'>
+     <prop id='memsize' value='12079136768'/>
+   </host>
+   <host id='OpenCL1' power='2000000000'>
+     <prop id='memsize' value='12079136768'/>
+   </host>
+   <host id='OpenCL2' power='2000000000'>
+     <prop id='memsize' value='12079136768'/>
+   </host>
+   <host id='OpenCL3' power='2000000000'>
+     <prop id='memsize' value='12079136768'/>
+   </host>
+
+   <host id='RAM' power='1'/>
+
+   <link id='Host' bandwidth='10521832623.517040' latency='0.000000'/>
+
+   <link id='RAM-OpenCL0' bandwidth='7997534022.141151' latency='0.000010'/>
+   <link id='OpenCL0-RAM' bandwidth='7434276438.572320' latency='0.000012'/>
+   <link id='RAM-OpenCL1' bandwidth='7978223026.445667' latency='0.000011'/>
+   <link id='OpenCL1-RAM' bandwidth='7232140009.638909' latency='0.000014'/>
+   <link id='RAM-OpenCL2' bandwidth='8025122400.678086' latency='0.000011'/>
+   <link id='OpenCL2-RAM' bandwidth='7300126055.185305' latency='0.000012'/>
+   <link id='RAM-OpenCL3' bandwidth='8002101228.048121' latency='0.000010'/>
+   <link id='OpenCL3-RAM' bandwidth='7333165510.983491' latency='0.000012'/>
+
+   <link id='RAM-CUDA0' bandwidth='10517678844.278971' latency='0.000010'/>
+   <link id='CUDA0-RAM' bandwidth='10521701010.666672' latency='0.000011'/>
+   <link id='RAM-CUDA1' bandwidth='10517427805.652538' latency='0.000010'/>
+   <link id='CUDA1-RAM' bandwidth='10521233123.485935' latency='0.000010'/>
+   <link id='RAM-CUDA2' bandwidth='10517320202.942270' latency='0.000010'/>
+   <link id='CUDA2-RAM' bandwidth='10521832623.517040' latency='0.000011'/>
+   <link id='RAM-CUDA3' bandwidth='10517178916.561483' latency='0.000010'/>
+   <link id='CUDA3-RAM' bandwidth='10521716373.062309' latency='0.000011'/>
+
+   <link id='CUDA0-CUDA1' bandwidth='10244090134.034805' latency='0.000012'/>
+   <link id='CUDA0-CUDA2' bandwidth='7662719221.740685' latency='0.000024'/>
+   <link id='CUDA0-CUDA3' bandwidth='8527735591.087247' latency='0.000023'/>
+   <link id='CUDA1-CUDA0' bandwidth='10240684078.899693' latency='0.000012'/>
+   <link id='CUDA1-CUDA2' bandwidth='7630369996.384952' latency='0.000023'/>
+   <link id='CUDA1-CUDA3' bandwidth='8542253951.429195' latency='0.000023'/>
+   <link id='CUDA2-CUDA0' bandwidth='8504224628.018895' latency='0.000024'/>
+   <link id='CUDA2-CUDA1' bandwidth='8517475744.443908' latency='0.000024'/>
+   <link id='CUDA2-CUDA3' bandwidth='10232000931.164429' latency='0.000011'/>
+   <link id='CUDA3-CUDA0' bandwidth='8496220968.849647' latency='0.000023'/>
+   <link id='CUDA3-CUDA1' bandwidth='8514239613.171523' latency='0.000023'/>
+   <link id='CUDA3-CUDA2' bandwidth='10242870726.441437' latency='0.000011'/>
+   <route src='RAM' dst='CUDA0' symmetrical='NO'><link_ctn id='RAM-CUDA0'/><link_ctn id='Host'/></route>
+   <route src='CUDA0' dst='RAM' symmetrical='NO'><link_ctn id='CUDA0-RAM'/><link_ctn id='Host'/></route>
+   <route src='RAM' dst='CUDA1' symmetrical='NO'><link_ctn id='RAM-CUDA1'/><link_ctn id='Host'/></route>
+   <route src='CUDA1' dst='RAM' symmetrical='NO'><link_ctn id='CUDA1-RAM'/><link_ctn id='Host'/></route>
+   <route src='RAM' dst='CUDA2' symmetrical='NO'><link_ctn id='RAM-CUDA2'/><link_ctn id='Host'/></route>
+   <route src='CUDA2' dst='RAM' symmetrical='NO'><link_ctn id='CUDA2-RAM'/><link_ctn id='Host'/></route>
+   <route src='RAM' dst='CUDA3' symmetrical='NO'><link_ctn id='RAM-CUDA3'/><link_ctn id='Host'/></route>
+   <route src='CUDA3' dst='RAM' symmetrical='NO'><link_ctn id='CUDA3-RAM'/><link_ctn id='Host'/></route>
+   <route src='CUDA0' dst='CUDA1' symmetrical='NO'><link_ctn id='CUDA0-CUDA1'/><link_ctn id='Host'/></route>
+   <route src='CUDA0' dst='CUDA2' symmetrical='NO'><link_ctn id='CUDA0-CUDA2'/><link_ctn id='Host'/></route>
+   <route src='CUDA0' dst='CUDA3' symmetrical='NO'><link_ctn id='CUDA0-CUDA3'/><link_ctn id='Host'/></route>
+   <route src='CUDA1' dst='CUDA0' symmetrical='NO'><link_ctn id='CUDA1-CUDA0'/><link_ctn id='Host'/></route>
+   <route src='CUDA1' dst='CUDA2' symmetrical='NO'><link_ctn id='CUDA1-CUDA2'/><link_ctn id='Host'/></route>
+   <route src='CUDA1' dst='CUDA3' symmetrical='NO'><link_ctn id='CUDA1-CUDA3'/><link_ctn id='Host'/></route>
+   <route src='CUDA2' dst='CUDA0' symmetrical='NO'><link_ctn id='CUDA2-CUDA0'/><link_ctn id='Host'/></route>
+   <route src='CUDA2' dst='CUDA1' symmetrical='NO'><link_ctn id='CUDA2-CUDA1'/><link_ctn id='Host'/></route>
+   <route src='CUDA2' dst='CUDA3' symmetrical='NO'><link_ctn id='CUDA2-CUDA3'/><link_ctn id='Host'/></route>
+   <route src='CUDA3' dst='CUDA0' symmetrical='NO'><link_ctn id='CUDA3-CUDA0'/><link_ctn id='Host'/></route>
+   <route src='CUDA3' dst='CUDA1' symmetrical='NO'><link_ctn id='CUDA3-CUDA1'/><link_ctn id='Host'/></route>
+   <route src='CUDA3' dst='CUDA2' symmetrical='NO'><link_ctn id='CUDA3-CUDA2'/><link_ctn id='Host'/></route>
+
+   <route src='RAM' dst='OpenCL0' symmetrical='NO'><link_ctn id='RAM-OpenCL0'/><link_ctn id='Host'/></route>
+   <route src='OpenCL0' dst='RAM' symmetrical='NO'><link_ctn id='OpenCL0-RAM'/><link_ctn id='Host'/></route>
+   <route src='RAM' dst='OpenCL1' symmetrical='NO'><link_ctn id='RAM-OpenCL1'/><link_ctn id='Host'/></route>
+   <route src='OpenCL1' dst='RAM' symmetrical='NO'><link_ctn id='OpenCL1-RAM'/><link_ctn id='Host'/></route>
+   <route src='RAM' dst='OpenCL2' symmetrical='NO'><link_ctn id='RAM-OpenCL2'/><link_ctn id='Host'/></route>
+   <route src='OpenCL2' dst='RAM' symmetrical='NO'><link_ctn id='OpenCL2-RAM'/><link_ctn id='Host'/></route>
+   <route src='RAM' dst='OpenCL3' symmetrical='NO'><link_ctn id='RAM-OpenCL3'/><link_ctn id='Host'/></route>
+   <route src='OpenCL3' dst='RAM' symmetrical='NO'><link_ctn id='OpenCL3-RAM'/><link_ctn id='Host'/></route>
+ </AS>
+ </platform>

+ 168 - 0
tools/perfmodels/sampling/codelets/44/chol_model_11.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	7.379027e+03   	1.089221e+03   	5.903221e+05   	4.450915e+09   	80
+afdd228b	1638400        	8.758624e+07   	2.799281e+03   	5.546704e+02   	2.323403e+05   	6.759213e+08   	83
+cea37d6d	409600         	1.097392e+07   	4.391691e+02   	4.300491e+01   	7.597626e+04   	3.368638e+07   	173
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	7.699799e+03   	1.513211e+03   	9.239759e+04   	7.389205e+08   	12
+afdd228b	1638400        	8.758624e+07   	5.010966e+03   	7.544427e+02   	5.010965e+04   	2.567896e+08   	10
+cea37d6d	409600         	1.097392e+07   	3.343709e+03   	3.943178e+02   	3.343709e+04   	1.133588e+08   	10
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	8.514691e+03   	1.568765e+03   	9.366160e+04   	8.245709e+08   	11
+afdd228b	1638400        	8.758624e+07   	5.436465e+03   	1.325711e+03   	5.436465e+04   	3.131266e+08   	10
+cea37d6d	409600         	1.097392e+07   	3.336739e+03   	3.113015e+02   	3.336739e+04   	1.123074e+08   	10
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	8.456328e+03   	1.411439e+03   	1.014759e+05   	8.820196e+08   	12
+afdd228b	1638400        	8.758624e+07   	4.996835e+03   	9.396038e+02   	4.996835e+04   	2.585122e+08   	10
+cea37d6d	409600         	1.097392e+07   	3.060839e+03   	4.968177e+01   	3.060839e+04   	9.371202e+07   	10
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	2.953730e+08   	8.333388e+03   	1.108400e+03   	1.000007e+05   	8.480868e+08   	12
+afdd228b	1638400        	8.758624e+07   	5.517925e+03   	1.047059e+03   	5.517925e+04   	3.154382e+08   	10
+cea37d6d	409600         	1.097392e+07   	3.435367e+03   	2.405829e+02   	3.435367e+04   	1.185962e+08   	10
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/chol_model_21.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	1.775772e+04   	3.736007e+03   	2.386637e+07   	4.425714e+11   	1344
+d39bff17	3276800        	2.625536e+08   	5.276862e+03   	9.789431e+02   	7.070995e+06   	3.859682e+10   	1340
+2c1922b7	819200         	3.287040e+07   	7.675336e+02   	1.464194e+02   	2.842177e+06   	2.260854e+09   	3703
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	2.018325e+03   	2.870643e+02   	1.687320e+06   	3.474450e+09   	836
+d39bff17	3276800        	2.625536e+08   	1.179394e+03   	1.705358e+02   	3.538181e+05   	4.260157e+08   	300
+2c1922b7	819200         	3.287040e+07   	4.644748e+02   	7.687001e+01   	3.297771e+04   	1.573685e+07   	71
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	1.972468e+03   	2.888901e+02   	1.510910e+06   	3.044151e+09   	766
+d39bff17	3276800        	2.625536e+08   	1.215766e+03   	1.649819e+02   	2.869207e+05   	3.552521e+08   	236
+2c1922b7	819200         	3.287040e+07   	4.764697e+02   	7.471348e+01   	4.621756e+04   	2.256273e+07   	97
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	2.005118e+03   	2.787124e+02   	1.836689e+06   	3.753933e+09   	916
+d39bff17	3276800        	2.625536e+08   	1.227664e+03   	1.874122e+02   	2.970946e+05   	3.732321e+08   	242
+2c1922b7	819200         	3.287040e+07   	4.209987e+02   	9.547071e+01   	6.441281e+04   	2.851225e+07   	153
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	8.856576e+08   	2.051755e+03   	2.742098e+02   	1.811700e+06   	3.783559e+09   	883
+d39bff17	3276800        	2.625536e+08   	1.153240e+03   	1.913332e+02   	3.194475e+05   	3.785401e+08   	277
+2c1922b7	819200         	3.287040e+07   	4.950127e+02   	6.747714e+01   	5.445140e+04   	2.745498e+07   	110
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/chol_model_22.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	7.545142e+02   	4.622075e+01   	1.174100e+07   	8.891991e+09   	15561
+f0ac7beb	4915200        	5.242880e+08   	2.651541e+02   	2.896639e+01   	2.197862e+06   	5.897272e+08   	8289
+d46431bb	1228800        	6.553600e+07   	5.633559e+01   	1.027680e+01   	7.345034e+05   	4.275566e+07   	13038
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	7.434516e+02   	4.620554e+01   	1.184541e+07   	8.840509e+09   	15933
+f0ac7beb	4915200        	5.242880e+08   	2.633265e+02   	2.997768e+01   	2.185610e+06   	5.829880e+08   	8300
+d46431bb	1228800        	6.553600e+07   	5.716015e+01   	1.157773e+01   	6.223597e+05   	3.703364e+07   	10888
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	7.437036e+02   	4.816754e+01   	1.192752e+07   	8.907749e+09   	16038
+f0ac7beb	4915200        	5.242880e+08   	2.593699e+02   	2.791728e+01   	2.302427e+06   	6.040986e+08   	8877
+d46431bb	1228800        	6.553600e+07   	5.656092e+01   	1.160148e+01   	5.523739e+05   	3.255722e+07   	9766
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	7.558763e+02   	4.610795e+01   	1.204791e+07   	9.140616e+09   	15939
+f0ac7beb	4915200        	5.242880e+08   	2.625144e+02   	2.860172e+01   	2.207221e+06   	5.863054e+08   	8408
+d46431bb	1228800        	6.553600e+07   	5.829194e+01   	1.220705e+01   	7.805874e+05   	4.749737e+07   	13391
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	1.769472e+09   	3.494026e+04   	6.587010e+03   	1.697048e+08   	6.140270e+12   	4857
+f0ac7beb	4915200        	5.242880e+08   	1.147661e+04   	2.242393e+03   	3.799907e+07   	4.527495e+11   	3311
+d46431bb	1228800        	6.553600e+07   	1.593513e+03   	3.073908e+02   	2.396962e+07   	3.961722e+10   	15042
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/cl_update.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	1.292737e+03   	8.111826e+01   	1.783977e+06   	2.315293e+09   	1380
+6d78e48f	4461600        	0.000000e+00   	7.254397e+02   	8.693801e+01   	2.717497e+06   	1.999693e+09   	3746
+49ec0825	34613280       	0.000000e+00   	2.847204e+03   	1.159244e+02   	5.255939e+06   	1.498954e+10   	1846
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	2.010356e+04   	1.739800e+03   	4.201644e+06   	8.510064e+10   	209
+6d78e48f	4461600        	0.000000e+00   	6.471465e+03   	9.708551e+02   	2.344612e+07   	1.551456e+11   	3623
+49ec0825	34613280       	0.000000e+00   	4.705100e+04   	5.067137e+03   	1.383299e+07   	6.584049e+11   	294
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	1.333639e+03   	8.095890e+01   	1.871095e+06   	2.504561e+09   	1403
+6d78e48f	4461600        	0.000000e+00   	7.466797e+02   	9.599725e+01   	2.594712e+06   	1.969443e+09   	3475
+49ec0825	34613280       	0.000000e+00   	2.914989e+03   	1.085303e+02   	5.037101e+06   	1.470345e+10   	1728
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	1.319437e+03   	8.470023e+01   	1.921100e+06   	2.545216e+09   	1456
+6d78e48f	4461600        	0.000000e+00   	7.342135e+02   	9.435309e+01   	2.608661e+06   	1.946944e+09   	3553
+49ec0825	34613280       	0.000000e+00   	2.843027e+03   	1.019747e+02   	5.080489e+06   	1.446255e+10   	1787
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+8ec75d42	14753312       	0.000000e+00   	1.324805e+03   	7.460278e+01   	1.748742e+06   	2.324088e+09   	1320
+6d78e48f	4461600        	0.000000e+00   	7.321696e+02   	8.668478e+01   	2.571380e+06   	1.909076e+09   	3512
+49ec0825	34613280       	0.000000e+00   	2.873920e+03   	1.173279e+02   	5.566783e+06   	1.602515e+10   	1937
+

+ 8 - 0
tools/perfmodels/sampling/codelets/44/null.sirocco

@@ -0,0 +1,8 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+0

+ 168 - 0
tools/perfmodels/sampling/codelets/44/save_cl_bottom.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	3.447447e+01   	7.398265e+00   	3.237153e+04   	1.167387e+06   	939
+fb4b8624	4427800        	0.000000e+00   	5.439097e+01   	1.253425e+01   	3.094846e+05   	1.772711e+07   	5690
+f2ff9ae5	34480152       	0.000000e+00   	5.041329e+01   	1.085485e+01   	6.226042e+04   	3.284270e+06   	1235
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	3.176283e+01   	6.812714e+00   	1.673901e+04   	5.561382e+05   	527
+fb4b8624	4427800        	0.000000e+00   	5.311651e+01   	1.346481e+01   	1.290731e+04   	7.296474e+05   	243
+f2ff9ae5	34480152       	0.000000e+00   	4.192896e+01   	9.759572e+00   	3.207566e+04   	1.417765e+06   	765
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	3.786531e+01   	9.328071e+00   	1.991715e+04   	7.999380e+05   	526
+fb4b8624	4427800        	0.000000e+00   	5.555598e+01   	1.303330e+01   	9.444517e+03   	5.535768e+05   	170
+f2ff9ae5	34480152       	0.000000e+00   	4.359390e+01   	1.022197e+01   	2.218929e+04   	1.020503e+06   	509
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	4.035980e+01   	9.947105e+00   	1.989738e+04   	8.518341e+05   	493
+fb4b8624	4427800        	0.000000e+00   	8.863692e+01   	1.909792e+01   	1.161144e+04   	1.076982e+06   	131
+f2ff9ae5	34480152       	0.000000e+00   	3.838146e+01   	9.359960e+00   	2.890124e+04   	1.175241e+06   	753
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	3.505264e+01   	8.845541e+00   	1.945422e+04   	7.253469e+05   	555
+fb4b8624	4427800        	0.000000e+00   	4.717545e+01   	1.027132e+01   	6.227160e+03   	3.076951e+05   	132
+f2ff9ae5	34480152       	0.000000e+00   	3.110432e+01   	6.170515e+00   	2.370149e+04   	7.662320e+05   	762
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/save_cl_top.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	3.501046e+01   	7.466097e+00   	2.835847e+04   	1.037995e+06   	810
+fb4b8624	4427800        	0.000000e+00   	2.773216e+01   	6.482940e+00   	1.680846e+05   	4.916085e+06   	6061
+f2ff9ae5	34480152       	0.000000e+00   	5.337509e+01   	1.160081e+01   	6.591824e+04   	3.684596e+06   	1235
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	3.362211e+01   	7.569501e+00   	1.795421e+04   	6.342550e+05   	534
+fb4b8624	4427800        	0.000000e+00   	3.231969e+01   	5.862640e+00   	1.877774e+04   	6.268602e+05   	581
+f2ff9ae5	34480152       	0.000000e+00   	3.995777e+01   	9.699452e+00   	3.180638e+04   	1.345799e+06   	796
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	3.654429e+01   	8.110997e+00   	1.710273e+04   	6.557958e+05   	468
+fb4b8624	4427800        	0.000000e+00   	3.606370e+01   	8.402269e+00   	2.171034e+04   	8.254553e+05   	602
+f2ff9ae5	34480152       	0.000000e+00   	3.192218e+01   	6.956874e+00   	1.695068e+04   	5.668019e+05   	531
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	3.551068e+01   	7.784366e+00   	1.278384e+04   	4.757777e+05   	360
+fb4b8624	4427800        	0.000000e+00   	3.460669e+01   	7.284566e+00   	1.996806e+04   	7.216470e+05   	577
+f2ff9ae5	34480152       	0.000000e+00   	3.547098e+01   	8.430109e+00   	2.805755e+04   	1.051443e+06   	791
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+4af260f6	14678040       	0.000000e+00   	3.712899e+01   	8.969310e+00   	2.030956e+04   	7.980787e+05   	547
+fb4b8624	4427800        	0.000000e+00   	3.619911e+01   	8.162351e+00   	2.287784e+04   	8.702638e+05   	632
+f2ff9ae5	34480152       	0.000000e+00   	3.132201e+01   	6.179930e+00   	2.584066e+04   	8.408892e+05   	825
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/starpu_dgemm_gemm.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+492beed5	66355200       	7.077888e+09   	6.467396e+03   	5.820387e+02   	1.403425e+06   	9.150018e+09   	217
+0b0b0ce8	7372800        	2.621440e+08   	2.828637e+02   	4.132770e+01   	2.376055e+04   	6.864469e+06   	84
+4220e23d	29491200       	2.097152e+09   	2.091138e+03   	2.430963e+02   	3.764048e+05   	7.977516e+08   	180
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+492beed5	66355200       	7.077888e+09   	6.530201e+03   	6.982602e+02   	1.214617e+06   	8.022384e+09   	186
+0b0b0ce8	7372800        	2.621440e+08   	2.596160e+02   	3.720670e+01   	2.907699e+04   	7.703898e+06   	112
+4220e23d	29491200       	2.097152e+09   	2.068075e+03   	2.561461e+02   	4.156832e+05   	8.728519e+08   	201
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+492beed5	66355200       	7.077888e+09   	6.594324e+03   	6.341124e+02   	1.384808e+06   	9.216313e+09   	210
+0b0b0ce8	7372800        	2.621440e+08   	2.592059e+02   	3.728165e+01   	2.773503e+04   	7.337807e+06   	107
+4220e23d	29491200       	2.097152e+09   	2.149687e+03   	2.853500e+02   	3.847940e+05   	8.417616e+08   	179
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+492beed5	66355200       	7.077888e+09   	6.615698e+03   	6.959563e+02   	1.210673e+06   	8.098082e+09   	183
+0b0b0ce8	7372800        	2.621440e+08   	2.665077e+02   	3.721734e+01   	3.278045e+04   	8.906615e+06   	123
+4220e23d	29491200       	2.097152e+09   	2.090283e+03   	2.730830e+02   	4.285080e+05   	9.109906e+08   	205
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+492beed5	66355200       	7.077888e+09   	2.793361e+05   	4.545353e+04   	2.039154e+07   	5.846913e+12   	73
+0b0b0ce8	7372800        	2.621440e+08   	1.003329e+04   	9.763114e+02   	3.471519e+06   	3.516056e+10   	346
+4220e23d	29491200       	2.097152e+09   	8.266143e+04   	1.577004e+04   	6.860899e+06   	5.877733e+11   	83
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_11.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	1.778570e+05   	1.735127e+04   	1.778570e+06   	3.193419e+11   	10
+cea37d6d	819200         	0.000000e+00   	5.904224e+03   	6.575598e+02   	5.668055e+05   	3.388055e+09   	96
+afdd228b	3276800        	0.000000e+00   	4.953149e+04   	6.709149e+03   	6.439093e+05   	3.247895e+10   	13
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	5.552387e+04   	5.714037e+03   	1.054954e+06   	5.919546e+10   	19
+cea37d6d	819200         	0.000000e+00   	9.707597e+03   	9.439210e+02   	9.707597e+04   	9.512842e+08   	10
+afdd228b	3276800        	0.000000e+00   	2.633937e+04   	3.608518e+03   	3.950905e+05   	1.060175e+10   	15
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	5.675882e+04   	6.232185e+03   	1.248694e+06   	7.172888e+10   	22
+cea37d6d	819200         	0.000000e+00   	9.541018e+03   	9.285702e+02   	9.541018e+04   	9.189326e+08   	10
+afdd228b	3276800        	0.000000e+00   	2.651477e+04   	2.554649e+03   	3.181772e+05   	8.514711e+09   	12
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	5.624387e+04   	5.549003e+03   	8.436581e+05   	4.791247e+10   	15
+cea37d6d	819200         	0.000000e+00   	9.661577e+03   	7.114114e+02   	9.661577e+04   	9.385217e+08   	10
+afdd228b	3276800        	0.000000e+00   	2.574090e+04   	2.071791e+03   	5.148179e+05   	1.333772e+10   	20
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	7372800        	0.000000e+00   	5.895007e+04   	8.369498e+03   	7.663509e+05   	4.608707e+10   	13
+cea37d6d	819200         	0.000000e+00   	9.910778e+03   	1.200981e+03   	9.910778e+04   	9.966588e+08   	10
+afdd228b	3276800        	0.000000e+00   	2.572979e+04   	2.095041e+03   	5.917851e+05   	1.532746e+10   	23
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_12.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	3.402547e+04   	6.005726e+03   	7.111323e+06   	2.495045e+11   	209
+2c1922b7	1638400        	0.000000e+00   	6.443940e+03   	1.476966e+03   	1.610985e+05   	1.092645e+09   	25
+d39bff17	6553600        	0.000000e+00   	1.041247e+04   	1.992240e+03   	3.092503e+06   	3.337940e+10   	297
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	3.238292e+03   	4.902889e+02   	6.768030e+05   	2.241926e+09   	209
+2c1922b7	1638400        	0.000000e+00   	5.889641e+02   	1.063542e+02   	1.272162e+05   	7.736903e+07   	216
+d39bff17	6553600        	0.000000e+00   	1.349909e+03   	1.936514e+02   	2.942801e+05   	4.054266e+08   	218
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	3.179744e+03   	4.016259e+02   	6.804652e+05   	2.198224e+09   	214
+2c1922b7	1638400        	0.000000e+00   	5.796961e+02   	1.048897e+02   	1.199971e+05   	7.183924e+07   	207
+d39bff17	6553600        	0.000000e+00   	1.343917e+03   	2.039127e+02   	2.244341e+05   	3.085646e+08   	167
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	3.362936e+03   	5.457359e+02   	6.524096e+05   	2.251791e+09   	194
+2c1922b7	1638400        	0.000000e+00   	5.405600e+02   	9.344101e+01   	1.513568e+05   	8.426217e+07   	280
+d39bff17	6553600        	0.000000e+00   	1.275634e+03   	1.830051e+02   	2.270629e+05   	2.956105e+08   	178
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	3.306190e+03   	4.921154e+02   	7.009122e+05   	2.368690e+09   	212
+2c1922b7	1638400        	0.000000e+00   	5.641572e+02   	1.012475e+02   	1.376544e+05   	8.015997e+07   	244
+d39bff17	6553600        	0.000000e+00   	1.355727e+03   	1.656730e+02   	2.331851e+05   	3.208564e+08   	172
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_21.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	4.935132e+04   	1.056942e+04   	1.875350e+06   	9.679609e+10   	38
+2c1922b7	1638400        	0.000000e+00   	1.386830e+03   	2.636504e+02   	1.256468e+06   	1.805486e+09   	906
+d39bff17	6553600        	0.000000e+00   	1.762919e+04   	3.396463e+03   	6.170218e+05   	1.128135e+10   	35
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	2.580226e+03   	3.710017e+02   	5.495882e+05   	1.447380e+09   	213
+2c1922b7	1638400        	0.000000e+00   	3.441326e+02   	6.695097e+01   	4.267244e+04   	1.524080e+07   	124
+d39bff17	6553600        	0.000000e+00   	9.089165e+02   	1.570596e+02   	1.590604e+05   	1.488895e+08   	175
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	2.485190e+03   	3.288259e+02   	6.188122e+05   	1.564789e+09   	249
+2c1922b7	1638400        	0.000000e+00   	3.565977e+02   	7.541526e+01   	3.672956e+04   	1.368348e+07   	103
+d39bff17	6553600        	0.000000e+00   	9.441529e+02   	1.527244e+02   	2.398148e+05   	2.323464e+08   	254
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	2.615067e+03   	4.428563e+02   	5.334737e+05   	1.435078e+09   	204
+2c1922b7	1638400        	0.000000e+00   	3.667814e+02   	6.561130e+01   	3.227676e+04   	1.221734e+07   	88
+d39bff17	6553600        	0.000000e+00   	9.018562e+02   	1.587421e+02   	1.470026e+05   	1.366826e+08   	163
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	14745600       	0.000000e+00   	2.480709e+03   	3.749281e+02   	4.514890e+05   	1.145597e+09   	182
+2c1922b7	1638400        	0.000000e+00   	3.718262e+02   	7.249781e+01   	2.342505e+04   	9.041172e+06   	63
+d39bff17	6553600        	0.000000e+00   	9.130900e+02   	1.739240e+02   	2.182285e+05   	2.064919e+08   	239
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/starpu_dlu_lu_model_22.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	1.754881e+03   	1.567907e+02   	8.516439e+06   	1.506464e+10   	4853
+d46431bb	2457600        	0.000000e+00   	9.227862e+01   	1.339393e+01   	6.585925e+05   	6.205436e+07   	7137
+f0ac7beb	9830400        	0.000000e+00   	5.560171e+02   	4.481480e+01   	2.452035e+06   	1.372230e+09   	4410
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	1.725012e+03   	1.556789e+02   	8.775134e+06   	1.526049e+10   	5087
+d46431bb	2457600        	0.000000e+00   	9.099306e+01   	1.290433e+01   	7.117477e+05   	6.606663e+07   	7822
+f0ac7beb	9830400        	0.000000e+00   	5.497124e+02   	4.364744e+01   	2.308242e+06   	1.276869e+09   	4199
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	1.724207e+03   	1.559700e+02   	8.529651e+06   	1.482723e+10   	4947
+d46431bb	2457600        	0.000000e+00   	9.395983e+01   	1.410875e+01   	5.884704e+05   	5.653928e+07   	6263
+f0ac7beb	9830400        	0.000000e+00   	5.531811e+02   	3.935565e+01   	3.264875e+06   	1.815209e+09   	5902
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	6.659236e+04   	1.142300e+04   	1.507651e+08   	1.033522e+13   	2264
+d46431bb	2457600        	0.000000e+00   	3.623237e+03   	8.721045e+02   	1.668138e+07   	6.394225e+10   	4604
+f0ac7beb	9830400        	0.000000e+00   	2.355764e+04   	4.984182e+03   	4.405279e+07   	1.084235e+12   	1870
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	22118400       	0.000000e+00   	1.746747e+03   	1.539679e+02   	8.628932e+06   	1.518967e+10   	4940
+d46431bb	2457600        	0.000000e+00   	9.539483e+01   	1.447066e+01   	7.032507e+05   	6.863017e+07   	7372
+f0ac7beb	9830400        	0.000000e+00   	5.601014e+02   	3.783630e+01   	3.218342e+06   	1.810824e+09   	5746
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/starpu_sgemm_gemm.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+492beed5	33177600       	7.077888e+09   	2.745578e+03   	3.064191e+02   	6.616844e+05   	1.839335e+09   	241
+0b0b0ce8	3686400        	2.621440e+08   	1.582927e+02   	3.333442e+01   	3.434951e+04   	5.678402e+06   	217
+4220e23d	14745600       	2.097152e+09   	8.206871e+02   	1.017181e+02   	1.148962e+05   	9.574235e+07   	140
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+492beed5	33177600       	7.077888e+09   	2.686428e+03   	2.002215e+02   	6.716071e+05   	1.814247e+09   	250
+0b0b0ce8	3686400        	2.621440e+08   	1.630480e+02   	3.438768e+01   	3.097912e+04   	5.275762e+06   	190
+4220e23d	14745600       	2.097152e+09   	8.448030e+02   	7.773742e+01   	2.433033e+05   	2.072837e+08   	288
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+492beed5	33177600       	7.077888e+09   	2.791098e+03   	3.147711e+02   	6.503258e+05   	1.838209e+09   	233
+0b0b0ce8	3686400        	2.621440e+08   	1.624855e+02   	3.298013e+01   	2.940987e+04   	4.975550e+06   	181
+4220e23d	14745600       	2.097152e+09   	8.152506e+02   	1.017614e+02   	1.173961e+05   	9.719839e+07   	144
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+492beed5	33177600       	7.077888e+09   	2.754203e+03   	2.682327e+02   	6.830422e+05   	1.899080e+09   	248
+0b0b0ce8	3686400        	2.621440e+08   	1.622246e+02   	3.553894e+01   	3.714942e+04   	6.315779e+06   	229
+4220e23d	14745600       	2.097152e+09   	8.611626e+02   	9.290485e+01   	2.411255e+05   	2.100651e+08   	280
+
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+492beed5	33177600       	7.077888e+09   	1.712078e+05   	4.163047e+04   	2.773567e+07   	5.029326e+12   	162
+0b0b0ce8	3686400        	2.621440e+08   	6.441655e+03   	1.152866e+03   	3.220827e+05   	2.141201e+09   	50
+4220e23d	14745600       	2.097152e+09   	4.927734e+04   	1.166029e+04   	5.913281e+06   	3.077063e+11   	120
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_11.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	0.000000e+00   	1.029624e+05   	6.177928e+03   	1.029624e+06   	1.063943e+11   	10
+cea37d6d	409600         	0.000000e+00   	4.037068e+03   	3.335771e+02   	2.906689e+05   	1.181462e+09   	72
+afdd228b	1638400        	0.000000e+00   	2.923093e+04   	1.278718e+03   	5.553877e+05   	1.626557e+10   	19
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	0.000000e+00   	4.153583e+04   	9.473225e+02   	9.968599e+05   	4.142694e+10   	24
+afdd228b	1638400        	0.000000e+00   	2.088164e+04   	1.502169e+03   	4.176328e+05   	8.765989e+09   	20
+cea37d6d	409600         	0.000000e+00   	9.866251e+03   	7.665217e+02   	9.866251e+04   	9.793047e+08   	10
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	0.000000e+00   	4.357190e+04   	5.271768e+03   	7.842942e+05   	3.467343e+10   	18
+cea37d6d	409600         	0.000000e+00   	9.238189e+03   	1.713378e+02   	9.238189e+04   	8.537349e+08   	10
+afdd228b	1638400        	0.000000e+00   	2.242688e+04   	2.707726e+03   	3.139763e+05   	7.144153e+09   	14
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	0.000000e+00   	4.647825e+04   	9.283373e+03   	5.577390e+05   	2.695691e+10   	12
+cea37d6d	409600         	0.000000e+00   	9.471831e+03   	5.475075e+02   	9.471831e+04   	9.001535e+08   	10
+afdd228b	1638400        	0.000000e+00   	2.096495e+04   	7.732458e+02   	3.773690e+05   	7.922284e+09   	18
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+617e5fe6	3686400        	0.000000e+00   	4.258012e+04   	2.921691e+03   	8.090223e+05   	3.461046e+10   	19
+cea37d6d	409600         	0.000000e+00   	9.338877e+03   	3.249828e+02   	9.338877e+04   	8.732025e+08   	10
+afdd228b	1638400        	0.000000e+00   	2.172039e+04   	1.567348e+03   	2.823650e+05   	6.165013e+09   	13
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_12.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	0.000000e+00   	1.726784e+04   	3.264426e+03   	3.021872e+06   	5.404608e+10   	175
+2c1922b7	819200         	0.000000e+00   	4.245334e+03   	7.020174e+02   	6.368000e+04   	2.777353e+08   	15
+d39bff17	3276800        	0.000000e+00   	5.106660e+03   	6.848530e+02   	2.134584e+06   	1.109665e+10   	418
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	0.000000e+00   	2.106719e+03   	2.638200e+02   	5.646006e+05   	1.208108e+09   	268
+2c1922b7	819200         	0.000000e+00   	4.901281e+02   	6.729653e+01   	6.616730e+04   	3.304185e+07   	135
+d39bff17	3276800        	0.000000e+00   	1.216432e+03   	1.410794e+02   	1.934127e+05   	2.384382e+08   	159
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	0.000000e+00   	2.138085e+03   	2.696288e+02   	6.371492e+05   	1.383944e+09   	298
+2c1922b7	819200         	0.000000e+00   	4.968224e+02   	7.860110e+01   	5.415364e+04   	2.757816e+07   	109
+d39bff17	3276800        	0.000000e+00   	1.199302e+03   	1.658297e+02   	1.774966e+05   	2.169419e+08   	148
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	0.000000e+00   	2.196957e+03   	3.265420e+02   	4.349975e+05   	9.767837e+08   	198
+2c1922b7	819200         	0.000000e+00   	8.901347e+01   	1.918734e+01   	2.412265e+04   	2.247011e+06   	271
+d39bff17	3276800        	0.000000e+00   	1.148300e+03   	2.163448e+02   	2.021009e+05   	2.403102e+08   	176
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	0.000000e+00   	2.164310e+03   	2.607466e+02   	4.869698e+05   	1.069251e+09   	225
+2c1922b7	819200         	0.000000e+00   	4.930666e+02   	7.623523e+01   	7.642532e+04   	3.858360e+07   	155
+d39bff17	3276800        	0.000000e+00   	1.203544e+03   	1.679024e+02   	2.286733e+05   	2.805746e+08   	190
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_21.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	0.000000e+00   	1.583302e+04   	2.624137e+03   	3.974089e+06   	6.465024e+10   	251
+2c1922b7	819200         	0.000000e+00   	3.523655e+03   	5.077738e+02   	5.990214e+04   	2.154576e+08   	17
+d39bff17	3276800        	0.000000e+00   	8.986208e+03   	1.629610e+03   	1.797242e+05   	1.668151e+09   	20
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	0.000000e+00   	1.570696e+03   	2.281691e+02   	4.115224e+05   	6.600167e+08   	262
+2c1922b7	819200         	0.000000e+00   	2.882912e+02   	5.271451e+01   	7.409085e+04   	2.207390e+07   	257
+d39bff17	3276800        	0.000000e+00   	8.365056e+02   	1.344660e+02   	1.396964e+05   	1.198764e+08   	167
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	0.000000e+00   	1.571709e+03   	2.150516e+02   	4.007858e+05   	6.417117e+08   	255
+2c1922b7	819200         	0.000000e+00   	9.967334e+01   	2.197557e+01   	2.372225e+04   	2.479413e+06   	238
+d39bff17	3276800        	0.000000e+00   	7.019049e+02   	1.632697e+02   	1.109010e+05   	8.205375e+07   	158
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	0.000000e+00   	1.569547e+03   	2.419662e+02   	2.589752e+05   	4.161341e+08   	165
+2c1922b7	819200         	0.000000e+00   	2.858293e+02   	5.241353e+01   	7.460146e+04   	2.204030e+07   	261
+d39bff17	3276800        	0.000000e+00   	8.352707e+02   	1.515223e+02   	1.587014e+05   	1.369209e+08   	190
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+ff82dda0	7372800        	0.000000e+00   	1.591448e+03   	2.256700e+02   	2.387172e+05   	3.875451e+08   	150
+2c1922b7	819200         	0.000000e+00   	2.930233e+02   	5.590601e+01   	5.362326e+04   	1.628483e+07   	183
+d39bff17	3276800        	0.000000e+00   	8.453596e+02   	1.395049e+02   	1.420204e+05   	1.233279e+08   	168
+

+ 168 - 0
tools/perfmodels/sampling/codelets/44/starpu_slu_lu_model_22.sirocco

@@ -0,0 +1,168 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+5
+####################
+# COMB_4
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb4)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	0.000000e+00   	3.517390e+04   	7.045528e+03   	6.925741e+07   	2.533794e+12   	1969
+d46431bb	1228800        	0.000000e+00   	1.613402e+03   	3.115535e+02   	8.438094e+06   	1.412169e+10   	5230
+f0ac7beb	4915200        	0.000000e+00   	1.087142e+04   	2.109400e+03   	2.505863e+07   	2.826792e+11   	2305
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	0.000000e+00   	7.851775e+02   	4.684799e+01   	4.315336e+06   	3.400367e+09   	5496
+d46431bb	1228800        	0.000000e+00   	6.142508e+01   	1.012391e+01   	4.393736e+05   	2.772170e+07   	7153
+f0ac7beb	4915200        	0.000000e+00   	2.657700e+02   	2.996380e+01   	1.356225e+06   	3.650255e+08   	5103
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	0.000000e+00   	7.926860e+02   	4.760061e+01   	4.363736e+06   	3.471546e+09   	5505
+d46431bb	1228800        	0.000000e+00   	6.592485e+01   	1.426453e+01   	1.071279e+05   	7.393038e+06   	1625
+f0ac7beb	4915200        	0.000000e+00   	2.693001e+02   	2.710216e+01   	1.308798e+06   	3.560293e+08   	4860
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	0.000000e+00   	7.922324e+02   	5.091772e+01   	4.156844e+06   	3.306790e+09   	5247
+d46431bb	1228800        	0.000000e+00   	6.317490e+01   	1.087216e+01   	2.866877e+05   	1.864788e+07   	4538
+f0ac7beb	4915200        	0.000000e+00   	2.686331e+02   	2.912062e+01   	1.401996e+06   	3.810483e+08   	5219
+
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+3
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda3_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+24c84a50	11059200       	0.000000e+00   	7.867204e+02   	4.699968e+01   	4.148377e+06   	3.275261e+09   	5273
+d46431bb	1228800        	0.000000e+00   	5.975719e+01   	9.345113e+00   	4.033610e+05   	2.469321e+07   	6750
+f0ac7beb	4915200        	0.000000e+00   	2.642224e+02   	2.666799e+01   	1.450317e+06   	3.871098e+08   	5489
+