ソースを参照

Add stencil perfmodel

Samuel Thibault 10 年 前
コミット
d045a58e37

+ 136 - 0
tools/perfmodels/.starpu/sampling/codelets/44/cl_update.mirage

@@ -0,0 +1,136 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+6d78e48f	4461600        	0.000000e+00   	6.670318e+03   	3.279077e+02   	6.103341e+06   	4.080961e+10   	915
+8ec75d42	14753312       	0.000000e+00   	2.178007e+04   	1.559694e+03   	1.008417e+07   	2.207603e+11   	463
+49ec0825	34613280       	0.000000e+00   	5.101465e+04   	2.613713e+03   	2.443602e+07   	1.249867e+12   	479
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+6d78e48f	4461600        	0.000000e+00   	1.028619e+03   	1.201323e+02   	5.626547e+05   	5.866515e+08   	547
+8ec75d42	14753312       	0.000000e+00   	1.871093e+03   	3.437894e+02   	1.981488e+06   	3.832713e+09   	1059
+49ec0825	34613280       	0.000000e+00   	5.018828e+03   	7.664203e+02   	4.672528e+06   	2.399748e+10   	931
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+6d78e48f	4461600        	0.000000e+00   	1.024201e+03   	1.096599e+02   	6.452464e+05   	6.684377e+08   	630
+8ec75d42	14753312       	0.000000e+00   	1.877457e+03   	3.608958e+02   	1.907496e+06   	3.713572e+09   	1016
+49ec0825	34613280       	0.000000e+00   	5.018101e+03   	7.255196e+02   	5.314169e+06   	2.722447e+10   	1059
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+6d78e48f	4461600        	0.000000e+00   	1.010004e+03   	1.090743e+02   	5.383321e+05   	5.500588e+08   	533
+8ec75d42	14753312       	0.000000e+00   	1.986058e+03   	3.264552e+02   	1.288952e+06   	2.629100e+09   	649
+49ec0825	34613280       	0.000000e+00   	5.064765e+03   	7.492118e+02   	4.948276e+06   	2.561026e+10   	977
+

+ 136 - 0
tools/perfmodels/.starpu/sampling/codelets/44/save_cl_bottom.mirage

@@ -0,0 +1,136 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+fb4b8624	4427800        	0.000000e+00   	1.291871e+01   	2.707056e+00   	1.452063e+04   	1.958246e+05   	1124
+4af260f6	14678040       	0.000000e+00   	2.737328e+01   	5.740626e+00   	2.241872e+04   	6.406639e+05   	819
+f2ff9ae5	34480152       	0.000000e+00   	4.727593e+01   	1.006422e+01   	4.179193e+04   	2.065291e+06   	884
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+fb4b8624	4427800        	0.000000e+00   	4.529760e+01   	1.106616e+01   	3.940891e+03   	1.891669e+05   	87
+4af260f6	14678040       	0.000000e+00   	5.186998e+01   	9.163546e+00   	2.386019e+03   	1.276254e+05   	46
+f2ff9ae5	34480152       	0.000000e+00   	5.555996e+01   	1.123960e+01   	2.777998e+03   	1.606619e+05   	50
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+fb4b8624	4427800        	0.000000e+00   	4.174505e+01   	7.717897e+00   	1.711547e+03   	7.389082e+04   	41
+4af260f6	14678040       	0.000000e+00   	4.338701e+01   	9.950636e+00   	8.677401e+03   	3.962895e+05   	200
+f2ff9ae5	34480152       	0.000000e+00   	5.523574e+01   	1.243105e+01   	2.154194e+03   	1.250152e+05   	39
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+fb4b8624	4427800        	0.000000e+00   	5.145250e+01   	1.035300e+01   	1.955195e+03   	1.046727e+05   	38
+4af260f6	14678040       	0.000000e+00   	4.717138e+01   	8.365012e+00   	2.217055e+03   	1.078703e+05   	47
+f2ff9ae5	34480152       	0.000000e+00   	4.374296e+01   	8.852326e+00   	3.455694e+03   	1.573530e+05   	79
+

+ 136 - 0
tools/perfmodels/.starpu/sampling/codelets/44/save_cl_top.mirage

@@ -0,0 +1,136 @@
+##################
+# Performance Model Version
+44
+
+####################
+# COMBs
+# number of combinations
+4
+####################
+# COMB_0
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+0
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cpu0_impl0 (Comb0)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+fb4b8624	4427800        	0.000000e+00   	1.161593e+01   	2.312881e+00   	1.684310e+04   	2.034051e+05   	1450
+4af260f6	14678040       	0.000000e+00   	2.793439e+01   	6.208645e+00   	2.807406e+04   	8.229715e+05   	1005
+f2ff9ae5	34480152       	0.000000e+00   	5.388292e+01   	1.191766e+01   	4.930288e+04   	2.786541e+06   	915
+
+####################
+# COMB_1
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+0
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda0_impl0 (Comb1)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+fb4b8624	4427800        	0.000000e+00   	3.349897e+01   	6.495369e+00   	7.939257e+03   	2.759560e+05   	237
+4af260f6	14678040       	0.000000e+00   	3.814493e+01   	8.460348e+00   	1.609716e+04   	6.442306e+05   	422
+f2ff9ae5	34480152       	0.000000e+00   	3.894616e+01   	8.022125e+00   	1.339748e+04   	5.439182e+05   	344
+
+####################
+# COMB_2
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+1
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda1_impl0 (Comb2)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+fb4b8624	4427800        	0.000000e+00   	4.505725e+01   	1.044174e+01   	7.209160e+02   	3.422697e+04   	16
+4af260f6	14678040       	0.000000e+00   	3.820932e+01   	8.787776e+00   	1.138638e+04   	4.580788e+05   	298
+f2ff9ae5	34480152       	0.000000e+00   	4.714002e+01   	1.060923e+01   	1.343491e+04   	6.654002e+05   	285
+
+####################
+# COMB_3
+# number of types devices
+1
+####################
+# DEV_0
+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)
+1
+####################
+# DEV_0
+# device id 
+2
+####################
+# DEV_0
+# number of cores 
+1
+##########
+# number of implementations
+1
+#####
+# Model for cuda2_impl0 (Comb3)
+# number of entries
+3
+# sumlnx	sumlnx2		sumlny		sumlnxlny	alpha		beta		n	minx		maxx
+0.000000e+00   	0.000000e+00   	0.000000e+00   	0.000000e+00   	nan            	nan            	0	0              	0              
+# a		b		c
+nan            	nan            	nan            
+# hash		size		flops		mean (us)	dev (us)	sum		sum2		n
+fb4b8624	4427800        	0.000000e+00   	2.397521e+01   	2.109607e+00   	5.754050e+02   	1.390227e+04   	24
+4af260f6	14678040       	0.000000e+00   	3.827520e+01   	8.943097e+00   	9.453975e+03   	3.816076e+05   	247
+f2ff9ae5	34480152       	0.000000e+00   	5.567087e+01   	1.159966e+01   	8.127947e+03   	4.721345e+05   	146
+