Pārlūkot izejas kodu

Add hannibal perfmodels from starpu-simgrid paper

Samuel Thibault 6 gadi atpakaļ
vecāks
revīzija
0e0896e46b
26 mainītis faili ar 1064 papildinājumiem un 0 dzēšanām
  1. 1 0
      tools/perfmodels/sampling/bus/hannibal-pitch.affinity
  2. 1 0
      tools/perfmodels/sampling/bus/hannibal-pitch.bandwidth
  3. 1 0
      tools/perfmodels/sampling/bus/hannibal-pitch.config
  4. 1 0
      tools/perfmodels/sampling/bus/hannibal-pitch.latency
  5. 70 0
      tools/perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml
  6. 70 0
      tools/perfmodels/sampling/bus/hannibal-pitch.platform.xml
  7. 7 0
      tools/perfmodels/sampling/bus/hannibal.affinity
  8. 17 0
      tools/perfmodels/sampling/bus/hannibal.bandwidth
  9. 4 0
      tools/perfmodels/sampling/bus/hannibal.config
  10. 17 0
      tools/perfmodels/sampling/bus/hannibal.latency
  11. 70 0
      tools/perfmodels/sampling/bus/hannibal.platform.v4.xml
  12. 70 0
      tools/perfmodels/sampling/bus/hannibal.platform.xml
  13. 104 0
      tools/perfmodels/sampling/codelets/45/chol_model_11.hannibal
  14. 1 0
      tools/perfmodels/sampling/codelets/45/chol_model_11.hannibal-pitch
  15. 104 0
      tools/perfmodels/sampling/codelets/45/chol_model_21.hannibal
  16. 1 0
      tools/perfmodels/sampling/codelets/45/chol_model_21.hannibal-pitch
  17. 104 0
      tools/perfmodels/sampling/codelets/45/chol_model_22.hannibal
  18. 1 0
      tools/perfmodels/sampling/codelets/45/chol_model_22.hannibal-pitch
  19. 104 0
      tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_11.hannibal
  20. 1 0
      tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_11.hannibal-pitch
  21. 104 0
      tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_12.hannibal
  22. 1 0
      tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_12.hannibal-pitch
  23. 104 0
      tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_21.hannibal
  24. 1 0
      tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_21.hannibal-pitch
  25. 104 0
      tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_22.hannibal
  26. 1 0
      tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_22.hannibal-pitch

+ 1 - 0
tools/perfmodels/sampling/bus/hannibal-pitch.affinity

@@ -0,0 +1 @@
+hannibal.affinity

+ 1 - 0
tools/perfmodels/sampling/bus/hannibal-pitch.bandwidth

@@ -0,0 +1 @@
+hannibal.bandwidth

+ 1 - 0
tools/perfmodels/sampling/bus/hannibal-pitch.config

@@ -0,0 +1 @@
+hannibal.config

+ 1 - 0
tools/perfmodels/sampling/bus/hannibal-pitch.latency

@@ -0,0 +1 @@
+hannibal.latency

+ 70 - 0
tools/perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml

@@ -0,0 +1,70 @@
+<?xml version="1.0"?>
+ <!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
+ <platform version="4">
+ <config id="General">
+   <prop id="network/TCP-gamma" value="-1"></prop>
+   <prop id="network/latency-factor" value="1"></prop>
+   <prop id="network/bandwidth-factor" value="1"></prop>
+ </config>
+ <AS  id="AS0"  routing="Full">
+   <host id="MAIN" speed="1f"/>
+   <host id="CPU0" speed="2000000000f"/>
+   <host id="CPU1" speed="2000000000f"/>
+   <host id="CPU2" speed="2000000000f"/>
+   <host id="CPU3" speed="2000000000f"/>
+   <host id="CPU4" speed="2000000000f"/>
+   <host id="CPU5" speed="2000000000f"/>
+   <host id="CPU6" speed="2000000000f"/>
+   <host id="CPU7" speed="2000000000f"/>
+   <host id="CUDA0" speed="2000000000f">
+     <prop id="memsize" value="3145728000"/>
+     <prop id="memcpy_peer" value="0"/>
+   </host>
+   <host id="CUDA1" speed="2000000000f">
+     <prop id="memsize" value="3145728000"/>
+     <prop id="memcpy_peer" value="0"/>
+   </host>
+   <host id="CUDA2" speed="2000000000f">
+     <prop id="memsize" value="3145728000"/>
+     <prop id="memcpy_peer" value="0"/>
+   </host>
+   <host id="OpenCL0" speed="2000000000f">
+     <prop id="memsize" value="3145728000"/>
+   </host>
+   <host id="OpenCL1" speed="2000000000f">
+     <prop id="memsize" value="3145728000"/>
+   </host>
+   <host id="OpenCL2" speed="2000000000f">
+     <prop id="memsize" value="3145728000"/>
+   </host>
+
+   <host id="RAM" speed="1f"/>
+
+   <link id="Share" bandwidth="5988779905.433726Bps" latency="0.000000s"/>
+
+   <link id="RAM-CUDA0" bandwidth="1653658596.433726Bps" latency="0.000012s"/>
+   <link id="CUDA0-RAM" bandwidth="993981963.299022Bps" latency="0.000012s"/>
+   <link id="RAM-CUDA1" bandwidth="869707794.319062Bps" latency="0.000013s"/>
+   <link id="CUDA1-RAM" bandwidth="925610046.160954Bps" latency="0.000013s"/>
+   <link id="RAM-CUDA2" bandwidth="1653711631.023217Bps" latency="0.000012s"/>
+   <link id="CUDA2-RAM" bandwidth="981498659.805904Bps" latency="0.000013s"/>
+   <link id="RAM-OpenCL0" bandwidth="3975378655.154796Bps" latency="0.000020s"/>
+   <link id="OpenCL0-RAM" bandwidth="2937163571.508681Bps" latency="0.000064s"/>
+   <link id="RAM-OpenCL1" bandwidth="2636838726.154693Bps" latency="0.000020s"/>
+   <link id="OpenCL1-RAM" bandwidth="2610203570.688437Bps" latency="0.000036s"/>
+   <link id="RAM-OpenCL2" bandwidth="3992447566.540525Bps" latency="0.000020s"/>
+   <link id="OpenCL2-RAM" bandwidth="2812550617.128727Bps" latency="0.000037s"/>
+   <route src="RAM" dst="CUDA0" symmetrical="NO"><link_ctn id="RAM-CUDA0"/><link_ctn id="Share"/></route>
+   <route src="CUDA0" dst="RAM" symmetrical="NO"><link_ctn id="CUDA0-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="CUDA1" symmetrical="NO"><link_ctn id="RAM-CUDA1"/><link_ctn id="Share"/></route>
+   <route src="CUDA1" dst="RAM" symmetrical="NO"><link_ctn id="CUDA1-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="CUDA2" symmetrical="NO"><link_ctn id="RAM-CUDA2"/><link_ctn id="Share"/></route>
+   <route src="CUDA2" dst="RAM" symmetrical="NO"><link_ctn id="CUDA2-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="OpenCL0" symmetrical="NO"><link_ctn id="RAM-OpenCL0"/><link_ctn id="Share"/></route>
+   <route src="OpenCL0" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL0-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="OpenCL1" symmetrical="NO"><link_ctn id="RAM-OpenCL1"/><link_ctn id="Share"/></route>
+   <route src="OpenCL1" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL1-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="OpenCL2" symmetrical="NO"><link_ctn id="RAM-OpenCL2"/><link_ctn id="Share"/></route>
+   <route src="OpenCL2" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL2-RAM"/><link_ctn id="Share"/></route>
+ </AS>
+ </platform>

+ 70 - 0
tools/perfmodels/sampling/bus/hannibal-pitch.platform.xml

@@ -0,0 +1,70 @@
+<?xml version="1.0"?>
+ <!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid.dtd">
+ <platform version="3">
+ <config id="General">
+   <prop id="network/TCP_gamma" value="-1"></prop>
+   <prop id="network/latency_factor" value="1"></prop>
+   <prop id="network/bandwidth_factor" value="1"></prop>
+ </config>
+ <AS  id="AS0"  routing="Full">
+   <host id="MAIN" power="1"/>
+   <host id="CPU0" power="2000000000"/>
+   <host id="CPU1" power="2000000000"/>
+   <host id="CPU2" power="2000000000"/>
+   <host id="CPU3" power="2000000000"/>
+   <host id="CPU4" power="2000000000"/>
+   <host id="CPU5" power="2000000000"/>
+   <host id="CPU6" power="2000000000"/>
+   <host id="CPU7" power="2000000000"/>
+   <host id="CUDA0" power="2000000000">
+     <prop id="memsize" value="3145728000"/>
+     <prop id="memcpy_peer" value="0"/>
+   </host>
+   <host id="CUDA1" power="2000000000">
+     <prop id="memsize" value="3145728000"/>
+     <prop id="memcpy_peer" value="0"/>
+   </host>
+   <host id="CUDA2" power="2000000000">
+     <prop id="memsize" value="3145728000"/>
+     <prop id="memcpy_peer" value="0"/>
+   </host>
+   <host id="OpenCL0" power="2000000000">
+     <prop id="memsize" value="3145728000"/>
+   </host>
+   <host id="OpenCL1" power="2000000000">
+     <prop id="memsize" value="3145728000"/>
+   </host>
+   <host id="OpenCL2" power="2000000000">
+     <prop id="memsize" value="3145728000"/>
+   </host>
+
+   <host id="RAM" power="1"/>
+
+   <link id="Share" bandwidth="5988779905.433726" latency="0.000000"/>
+   
+   <link id="RAM-CUDA0" bandwidth="1653658596.433726" latency="0.000012"/>
+   <link id="CUDA0-RAM" bandwidth="993981963.299022" latency="0.000012"/>
+   <link id="RAM-CUDA1" bandwidth="869707794.319062" latency="0.000013"/>
+   <link id="CUDA1-RAM" bandwidth="925610046.160954" latency="0.000013"/>
+   <link id="RAM-CUDA2" bandwidth="1653711631.023217" latency="0.000012"/>
+   <link id="CUDA2-RAM" bandwidth="981498659.805904" latency="0.000013"/>
+   <link id="RAM-OpenCL0" bandwidth="3975378655.154796" latency="0.000020"/>
+   <link id="OpenCL0-RAM" bandwidth="2937163571.508681" latency="0.000064"/>
+   <link id="RAM-OpenCL1" bandwidth="2636838726.154693" latency="0.000020"/>
+   <link id="OpenCL1-RAM" bandwidth="2610203570.688437" latency="0.000036"/>
+   <link id="RAM-OpenCL2" bandwidth="3992447566.540525" latency="0.000020"/>
+   <link id="OpenCL2-RAM" bandwidth="2812550617.128727" latency="0.000037"/>
+   <route src="RAM" dst="CUDA0" symmetrical="NO"><link_ctn id="RAM-CUDA0"/><link_ctn id="Share"/></route>
+   <route src="CUDA0" dst="RAM" symmetrical="NO"><link_ctn id="CUDA0-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="CUDA1" symmetrical="NO"><link_ctn id="RAM-CUDA1"/><link_ctn id="Share"/></route>
+   <route src="CUDA1" dst="RAM" symmetrical="NO"><link_ctn id="CUDA1-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="CUDA2" symmetrical="NO"><link_ctn id="RAM-CUDA2"/><link_ctn id="Share"/></route>
+   <route src="CUDA2" dst="RAM" symmetrical="NO"><link_ctn id="CUDA2-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="OpenCL0" symmetrical="NO"><link_ctn id="RAM-OpenCL0"/><link_ctn id="Share"/></route>
+   <route src="OpenCL0" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL0-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="OpenCL1" symmetrical="NO"><link_ctn id="RAM-OpenCL1"/><link_ctn id="Share"/></route>
+   <route src="OpenCL1" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL1-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="OpenCL2" symmetrical="NO"><link_ctn id="RAM-OpenCL2"/><link_ctn id="Share"/></route>
+   <route src="OpenCL2" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL2-RAM"/><link_ctn id="Share"/></route>
+ </AS>
+ </platform>

+ 7 - 0
tools/perfmodels/sampling/bus/hannibal.affinity

@@ -0,0 +1,7 @@
+# GPU	CPU0	CPU1	CPU2	CPU3	CPU4	CPU5	CPU6	CPU7	
+0	0	1	2	3	4	5	6	7	
+1	4	5	6	7	0	1	2	3	
+2	4	5	6	7	0	1	2	3	
+0	0	1	2	3	4	5	6	7	
+1	4	5	6	7	0	1	2	3	
+2	4	5	6	7	0	1	2	3	

+ 17 - 0
tools/perfmodels/sampling/bus/hannibal.bandwidth

@@ -0,0 +1,17 @@
+# to 0		to 1		to 2		to 3		to 4		to 5		to 6		to 7		to 8		to 9		to 10		to 11		to 12		to 13		to 14		to 15		
+0.000000	5988.779905	3149.675860	5988.971975	3975.378655	2636.838726	3992.447567	nan	nan	nan	nan	nan	nan	nan	nan	nan
+3599.738919	0.000000	1679.850942	2248.345554	1889.122528	1521.977521	1892.968372	nan	nan	nan	nan	nan	nan	nan	nan	nan
+3352.127736	2149.165370	0.000000	2149.190105	1818.623736	1475.884075	1822.187624	nan	nan	nan	nan	nan	nan	nan	nan	nan
+3554.530216	2230.599117	1669.939421	0.000000	1876.596887	1513.836926	1880.391850	nan	nan	nan	nan	nan	nan	nan	nan	nan
+2937.163572	1970.662958	1519.854976	1970.683755	0.000000	1389.455231	1692.226493	nan	nan	nan	nan	nan	nan	nan	nan	nan
+2610.203571	1817.881699	1427.338068	1817.899396	1575.646193	0.000000	1578.320689	nan	nan	nan	nan	nan	nan	nan	nan	nan
+2812.550617	1913.772761	1485.791058	1913.792375	1647.181820	1360.930908	0.000000	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan

+ 4 - 0
tools/perfmodels/sampling/bus/hannibal.config

@@ -0,0 +1,4 @@
+# Current configuration
+8 # Number of CPUs
+3 # Number of CUDA devices
+3 # Number of OpenCL devices

+ 17 - 0
tools/perfmodels/sampling/bus/hannibal.latency

@@ -0,0 +1,17 @@
+# to 0		to 1		to 2		to 3		to 4		to 5		to 6		to 7		to 8		to 9		to 10		to 11		to 12		to 13		to 14		to 15		
+0.000000	12.460938	12.570312	12.468750	20.000000	20.328125	19.593750	nan	nan	nan	nan	nan	nan	nan	nan	nan
+12.476562	0.000000	25.046875	24.945312	32.476562	32.804688	32.070312	nan	nan	nan	nan	nan	nan	nan	nan	nan
+12.593750	25.054688	0.000000	25.062500	32.593750	32.921875	32.187500	nan	nan	nan	nan	nan	nan	nan	nan	nan
+12.539062	25.000000	25.109375	0.000000	32.539062	32.867188	32.132812	nan	nan	nan	nan	nan	nan	nan	nan	nan
+63.601562	76.062500	76.171875	76.070312	0.000000	83.929688	83.195312	nan	nan	nan	nan	nan	nan	nan	nan	nan
+35.992188	48.453125	48.562500	48.460938	55.992188	0.000000	55.585938	nan	nan	nan	nan	nan	nan	nan	nan	nan
+36.765625	49.226562	49.335938	49.234375	56.765625	57.093750	0.000000	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan

+ 70 - 0
tools/perfmodels/sampling/bus/hannibal.platform.v4.xml

@@ -0,0 +1,70 @@
+<?xml version="1.0"?>
+ <!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
+ <platform version="4">
+ <config id="General">
+   <prop id="network/TCP-gamma" value="-1"></prop>
+   <prop id="network/latency-factor" value="1"></prop>
+   <prop id="network/bandwidth-factor" value="1"></prop>
+ </config>
+ <AS  id="AS0"  routing="Full">
+   <host id="MAIN" speed="1f"/>
+   <host id="CPU0" speed="2000000000f"/>
+   <host id="CPU1" speed="2000000000f"/>
+   <host id="CPU2" speed="2000000000f"/>
+   <host id="CPU3" speed="2000000000f"/>
+   <host id="CPU4" speed="2000000000f"/>
+   <host id="CPU5" speed="2000000000f"/>
+   <host id="CPU6" speed="2000000000f"/>
+   <host id="CPU7" speed="2000000000f"/>
+   <host id="CUDA0" speed="2000000000f">
+     <prop id="memsize" value="3145728000"/>
+     <prop id="memcpy_peer" value="0"/>
+   </host>
+   <host id="CUDA1" speed="2000000000f">
+     <prop id="memsize" value="3145728000"/>
+     <prop id="memcpy_peer" value="0"/>
+   </host>
+   <host id="CUDA2" speed="2000000000f">
+     <prop id="memsize" value="3145728000"/>
+     <prop id="memcpy_peer" value="0"/>
+   </host>
+   <host id="OpenCL0" speed="2000000000f">
+     <prop id="memsize" value="3145728000"/>
+   </host>
+   <host id="OpenCL1" speed="2000000000f">
+     <prop id="memsize" value="3145728000"/>
+   </host>
+   <host id="OpenCL2" speed="2000000000f">
+     <prop id="memsize" value="3145728000"/>
+   </host>
+
+   <host id="RAM" speed="1f"/>
+
+   <link id="Share" bandwidth="5988971975.023217Bps" latency="0.000000s"/>
+
+   <link id="RAM-CUDA0" bandwidth="5988779905.433726Bps" latency="0.000012s"/>
+   <link id="CUDA0-RAM" bandwidth="3599738919.299022Bps" latency="0.000012s"/>
+   <link id="RAM-CUDA1" bandwidth="3149675860.319062Bps" latency="0.000013s"/>
+   <link id="CUDA1-RAM" bandwidth="3352127736.160954Bps" latency="0.000013s"/>
+   <link id="RAM-CUDA2" bandwidth="5988971975.023217Bps" latency="0.000012s"/>
+   <link id="CUDA2-RAM" bandwidth="3554530215.805904Bps" latency="0.000013s"/>
+   <link id="RAM-OpenCL0" bandwidth="3975378655.154796Bps" latency="0.000020s"/>
+   <link id="OpenCL0-RAM" bandwidth="2937163571.508681Bps" latency="0.000064s"/>
+   <link id="RAM-OpenCL1" bandwidth="2636838726.154693Bps" latency="0.000020s"/>
+   <link id="OpenCL1-RAM" bandwidth="2610203570.688437Bps" latency="0.000036s"/>
+   <link id="RAM-OpenCL2" bandwidth="3992447566.540525Bps" latency="0.000020s"/>
+   <link id="OpenCL2-RAM" bandwidth="2812550617.128727Bps" latency="0.000037s"/>
+   <route src="RAM" dst="CUDA0" symmetrical="NO"><link_ctn id="RAM-CUDA0"/><link_ctn id="Share"/></route>
+   <route src="CUDA0" dst="RAM" symmetrical="NO"><link_ctn id="CUDA0-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="CUDA1" symmetrical="NO"><link_ctn id="RAM-CUDA1"/><link_ctn id="Share"/></route>
+   <route src="CUDA1" dst="RAM" symmetrical="NO"><link_ctn id="CUDA1-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="CUDA2" symmetrical="NO"><link_ctn id="RAM-CUDA2"/><link_ctn id="Share"/></route>
+   <route src="CUDA2" dst="RAM" symmetrical="NO"><link_ctn id="CUDA2-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="OpenCL0" symmetrical="NO"><link_ctn id="RAM-OpenCL0"/><link_ctn id="Share"/></route>
+   <route src="OpenCL0" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL0-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="OpenCL1" symmetrical="NO"><link_ctn id="RAM-OpenCL1"/><link_ctn id="Share"/></route>
+   <route src="OpenCL1" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL1-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="OpenCL2" symmetrical="NO"><link_ctn id="RAM-OpenCL2"/><link_ctn id="Share"/></route>
+   <route src="OpenCL2" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL2-RAM"/><link_ctn id="Share"/></route>
+ </AS>
+ </platform>

+ 70 - 0
tools/perfmodels/sampling/bus/hannibal.platform.xml

@@ -0,0 +1,70 @@
+<?xml version="1.0"?>
+ <!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid.dtd">
+ <platform version="3">
+ <config id="General">
+   <prop id="network/TCP_gamma" value="-1"></prop>
+   <prop id="network/latency_factor" value="1"></prop>
+   <prop id="network/bandwidth_factor" value="1"></prop>
+ </config>
+ <AS  id="AS0"  routing="Full">
+   <host id="MAIN" power="1"/>
+   <host id="CPU0" power="2000000000"/>
+   <host id="CPU1" power="2000000000"/>
+   <host id="CPU2" power="2000000000"/>
+   <host id="CPU3" power="2000000000"/>
+   <host id="CPU4" power="2000000000"/>
+   <host id="CPU5" power="2000000000"/>
+   <host id="CPU6" power="2000000000"/>
+   <host id="CPU7" power="2000000000"/>
+   <host id="CUDA0" power="2000000000">
+     <prop id="memsize" value="3145728000"/>
+     <prop id="memcpy_peer" value="0"/>
+   </host>
+   <host id="CUDA1" power="2000000000">
+     <prop id="memsize" value="3145728000"/>
+     <prop id="memcpy_peer" value="0"/>
+   </host>
+   <host id="CUDA2" power="2000000000">
+     <prop id="memsize" value="3145728000"/>
+     <prop id="memcpy_peer" value="0"/>
+   </host>
+   <host id="OpenCL0" power="2000000000">
+     <prop id="memsize" value="3145728000"/>
+   </host>
+   <host id="OpenCL1" power="2000000000">
+     <prop id="memsize" value="3145728000"/>
+   </host>
+   <host id="OpenCL2" power="2000000000">
+     <prop id="memsize" value="3145728000"/>
+   </host>
+
+   <host id="RAM" power="1"/>
+
+   <link id="Share" bandwidth="5988971975.023217" latency="0.000000"/>
+
+   <link id="RAM-CUDA0" bandwidth="5988779905.433726" latency="0.000012"/>
+   <link id="CUDA0-RAM" bandwidth="3599738919.299022" latency="0.000012"/>
+   <link id="RAM-CUDA1" bandwidth="3149675860.319062" latency="0.000013"/>
+   <link id="CUDA1-RAM" bandwidth="3352127736.160954" latency="0.000013"/>
+   <link id="RAM-CUDA2" bandwidth="5988971975.023217" latency="0.000012"/>
+   <link id="CUDA2-RAM" bandwidth="3554530215.805904" latency="0.000013"/>
+   <link id="RAM-OpenCL0" bandwidth="3975378655.154796" latency="0.000020"/>
+   <link id="OpenCL0-RAM" bandwidth="2937163571.508681" latency="0.000064"/>
+   <link id="RAM-OpenCL1" bandwidth="2636838726.154693" latency="0.000020"/>
+   <link id="OpenCL1-RAM" bandwidth="2610203570.688437" latency="0.000036"/>
+   <link id="RAM-OpenCL2" bandwidth="3992447566.540525" latency="0.000020"/>
+   <link id="OpenCL2-RAM" bandwidth="2812550617.128727" latency="0.000037"/>
+   <route src="RAM" dst="CUDA0" symmetrical="NO"><link_ctn id="RAM-CUDA0"/><link_ctn id="Share"/></route>
+   <route src="CUDA0" dst="RAM" symmetrical="NO"><link_ctn id="CUDA0-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="CUDA1" symmetrical="NO"><link_ctn id="RAM-CUDA1"/><link_ctn id="Share"/></route>
+   <route src="CUDA1" dst="RAM" symmetrical="NO"><link_ctn id="CUDA1-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="CUDA2" symmetrical="NO"><link_ctn id="RAM-CUDA2"/><link_ctn id="Share"/></route>
+   <route src="CUDA2" dst="RAM" symmetrical="NO"><link_ctn id="CUDA2-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="OpenCL0" symmetrical="NO"><link_ctn id="RAM-OpenCL0"/><link_ctn id="Share"/></route>
+   <route src="OpenCL0" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL0-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="OpenCL1" symmetrical="NO"><link_ctn id="RAM-OpenCL1"/><link_ctn id="Share"/></route>
+   <route src="OpenCL1" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL1-RAM"/><link_ctn id="Share"/></route>
+   <route src="RAM" dst="OpenCL2" symmetrical="NO"><link_ctn id="RAM-OpenCL2"/><link_ctn id="Share"/></route>
+   <route src="OpenCL2" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL2-RAM"/><link_ctn id="Share"/></route>
+ </AS>
+ </platform>

+ 104 - 0
tools/perfmodels/sampling/codelets/45/chol_model_11.hannibal

@@ -0,0 +1,104 @@
+##################
+#	Performance	Model	Version
+45
+
+####################
+# COMBs
+# number of combinations
+3
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+617e5fe6	3686400	0	1.701016e+05	7.229737e+03	4.082438e+06	6.956835e+11	24
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+617e5fe6	3686400	0	1.188776e+05	9.331204e+02	2.113643e+08	2.512803e+13	1778
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+617e5fe6	3686400	0	1.205438e+05	2.044578e+03	2.189075e+08	2.639552e+13	1816
+

+ 1 - 0
tools/perfmodels/sampling/codelets/45/chol_model_11.hannibal-pitch

@@ -0,0 +1 @@
+chol_model_11.hannibal

+ 104 - 0
tools/perfmodels/sampling/codelets/45/chol_model_21.hannibal

@@ -0,0 +1,104 @@
+##################
+#	Performance	Model	Version
+45
+
+####################
+# COMBs
+# number of combinations
+3
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+ff82dda0	7372800	8.856576e+08	1.551780e+04	9.258624e+03	5.415867e+08	1.139602e+13	34901
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+ff82dda0	7372800	8.856576e+08	1.787309e+04	1.121893e+04	5.782658e+08	1.440761e+13	32354
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+ff82dda0	7372800	8.856576e+08	1.675795e+04	1.012077e+04	5.931309e+08	1.356507e+13	35394
+

+ 1 - 0
tools/perfmodels/sampling/codelets/45/chol_model_21.hannibal-pitch

@@ -0,0 +1 @@
+chol_model_21.hannibal

+ 104 - 0
tools/perfmodels/sampling/codelets/45/chol_model_22.hannibal

@@ -0,0 +1,104 @@
+##################
+#	Performance	Model	Version
+45
+
+####################
+# COMBs
+# number of combinations
+3
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+24c84a50	11059200	1.769472e+09	5.763709e+03	3.768350e+03	4.501024e+09	3.703209e+13	780925
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+24c84a50	11059200	1.769472e+09	5.889910e+03	4.485232e+03	4.352661e+09	4.050353e+13	739003
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+24c84a50	11059200	1.769472e+09	5.782569e+03	3.939612e+03	4.412291e+09	3.735706e+13	763033
+

+ 1 - 0
tools/perfmodels/sampling/codelets/45/chol_model_22.hannibal-pitch

@@ -0,0 +1 @@
+chol_model_22.hannibal

+ 104 - 0
tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_11.hannibal

@@ -0,0 +1,104 @@
+##################
+#	Performance	Model	Version
+45
+
+####################
+# COMBs
+# number of combinations
+3
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+617e5fe6	3686400	0.000000e+00	1.250229e+05	4.416720e+03	1.500275e+06	1.878028e+11	12
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+617e5fe6	3686400	0.000000e+00	8.424585e+04	1.140908e+03	4.802014e+07	4.046239e+12	570
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+617e5fe6	3686400	0.000000e+00	8.331807e+04	6.460292e+02	5.782274e+07	4.817969e+12	694
+

+ 1 - 0
tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_11.hannibal-pitch

@@ -0,0 +1 @@
+starpu_slu_lu_model_11.hannibal

+ 104 - 0
tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_12.hannibal

@@ -0,0 +1,104 @@
+##################
+#	Performance	Model	Version
+45
+
+####################
+# COMBs
+# number of combinations
+3
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+ff82dda0	7372800	0.000000e+00	1.072902e+04	3.731292e+03	7.780684e+07	9.357572e+11	7252
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+ff82dda0	7372800	0.000000e+00	1.250147e+04	5.489974e+03	7.944684e+07	1.184741e+12	6355
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+ff82dda0	7372800	0.000000e+00	1.131230e+04	4.120480e+03	8.165221e+07	1.046224e+12	7218
+

+ 1 - 0
tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_12.hannibal-pitch

@@ -0,0 +1 @@
+starpu_slu_lu_model_12.hannibal

+ 104 - 0
tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_21.hannibal

@@ -0,0 +1,104 @@
+##################
+#	Performance	Model	Version
+45
+
+####################
+# COMBs
+# number of combinations
+3
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+ff82dda0	7372800	0.000000e+00	1.103789e+04	3.664518e+03	7.889881e+07	9.668643e+11	7148
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+ff82dda0	7372800	0.000000e+00	1.284524e+04	5.462619e+03	8.441889e+07	1.280490e+12	6572
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+ff82dda0	7372800	0.000000e+00	1.171798e+04	4.121992e+03	8.325626e+07	1.096315e+12	7105
+

+ 1 - 0
tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_21.hannibal-pitch

@@ -0,0 +1 @@
+starpu_slu_lu_model_21.hannibal

+ 104 - 0
tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_22.hannibal

@@ -0,0 +1,104 @@
+##################
+#	Performance	Model	Version
+45
+
+####################
+# COMBs
+# number of combinations
+3
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+24c84a50	11059200	0.000000e+00	5.116253e+03	1.361494e+03	9.170526e+08	5.024130e+12	179243
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+24c84a50	11059200	0.000000e+00	5.228920e+03	1.967478e+03	8.761527e+08	5.229949e+12	167559
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+#	number	of	entries
+1
+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
+#	a	b	c
+nan	nan	nan
+# not multiple-regression-base
+0
+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
+24c84a50	11059200	0.000000e+00	5.131691e+03	1.494139e+03	8.920059e+08	4.965550e+12	173823
+

+ 1 - 0
tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_22.hannibal-pitch

@@ -0,0 +1 @@
+starpu_slu_lu_model_22.hannibal