|
@@ -0,0 +1,939 @@
|
|
|
+#include "apps.h"
|
|
|
+#include "scc_signals.h"
|
|
|
+#include "libfunctions.h"
|
|
|
+#include "my_rtrm.h"
|
|
|
+
|
|
|
+#define SWAP(a,b) {float tmp; tmp=a; a=b; b=tmp;}
|
|
|
+#define FFT_MAX 136192
|
|
|
+#define PAGE_SIZE 4096
|
|
|
+
|
|
|
+/*
|
|
|
+static float input_vector[25][10] = {
|
|
|
+ {-0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119},
|
|
|
+ {-0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467},
|
|
|
+ {-0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565},
|
|
|
+ {-0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533},
|
|
|
+ {-0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941},
|
|
|
+ {-1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784},
|
|
|
+ {0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575},
|
|
|
+ {0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371},
|
|
|
+ {1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241},
|
|
|
+ {1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752},
|
|
|
+ {-0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115},
|
|
|
+ {-0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876},
|
|
|
+ {-0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559},
|
|
|
+ {-0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523},
|
|
|
+ {1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364},
|
|
|
+ {-0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320},
|
|
|
+ {3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579},
|
|
|
+ {-0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210},
|
|
|
+ {1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734},
|
|
|
+ {-0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990},
|
|
|
+ {-0.632187, -0.629597, -0.560808, -0.439143, -0.550906, -3.344673, -1.389866, 0.268649, 0.664319, 1.150327},
|
|
|
+ {-0.205756, -0.210472, -0.151426, -0.121347, 0.196067, -3.136218, -2.621049, -0.026517, 0.358534, 0.714117},
|
|
|
+ {-0.418011, -0.424854, -0.461205, -0.428858, -0.801747, 1.933860, -0.129047, -0.674498, -0.880092, -0.752953},
|
|
|
+ {-0.625461, -0.633598, -0.651167, -0.621632, -0.312866, -3.908468, -2.380095, -0.118114, 0.233478, 0.722539},
|
|
|
+ {-0.525633, -0.521436, -0.552314, -0.527505, -0.233392, -3.763046, -2.487090, -0.133160, 0.156544, 0.642195},
|
|
|
+};
|
|
|
+*/
|
|
|
+/*
|
|
|
+static float input_vector[2][D_sv] = {
|
|
|
+ {-0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119,
|
|
|
+ -0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467,
|
|
|
+ -0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565,
|
|
|
+ -0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533,
|
|
|
+ -0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941,
|
|
|
+ -1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784,
|
|
|
+ 0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575,
|
|
|
+ 0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371,
|
|
|
+ 1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241,
|
|
|
+ 1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752},
|
|
|
+ {-0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115,
|
|
|
+ -0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876,
|
|
|
+ -0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559,
|
|
|
+ -0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523,
|
|
|
+ 1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364,
|
|
|
+ -0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320,
|
|
|
+ 3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579,
|
|
|
+ -0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210,
|
|
|
+ 1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734,
|
|
|
+ -0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990},
|
|
|
+};
|
|
|
+*/
|
|
|
+static float input_vector[1][D_sv] = {
|
|
|
+ {-0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119,
|
|
|
+ -0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467,
|
|
|
+ -0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565,
|
|
|
+ -0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533,
|
|
|
+ -0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941,
|
|
|
+ -1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784,
|
|
|
+ 0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575,
|
|
|
+ 0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371,
|
|
|
+ 1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241,
|
|
|
+ 1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752,
|
|
|
+ -0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115,
|
|
|
+ -0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876,
|
|
|
+ -0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559,
|
|
|
+ -0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523,
|
|
|
+ 1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364,
|
|
|
+ -0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320,
|
|
|
+ 3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579,
|
|
|
+ -0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210,
|
|
|
+ 1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734,
|
|
|
+ -0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990,
|
|
|
+ -0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119,
|
|
|
+ -0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467,
|
|
|
+ -0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565,
|
|
|
+ -0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533,
|
|
|
+ -0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941,
|
|
|
+ -1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784,
|
|
|
+ 0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575,
|
|
|
+ 0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371,
|
|
|
+ 1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241,
|
|
|
+ 1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752,
|
|
|
+ -0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115,
|
|
|
+ -0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876,
|
|
|
+ -0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559,
|
|
|
+ -0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523,
|
|
|
+ 1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364,
|
|
|
+ -0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320,
|
|
|
+ 3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579,
|
|
|
+ -0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210,
|
|
|
+ 1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734,
|
|
|
+ -0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990,
|
|
|
+ -0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119,
|
|
|
+ -0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467,
|
|
|
+ -0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565,
|
|
|
+ -0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533,
|
|
|
+ -0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941,
|
|
|
+ -1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784,
|
|
|
+ 0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575,
|
|
|
+ 0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371,
|
|
|
+ 1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241,
|
|
|
+ 1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752,
|
|
|
+ -0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115,
|
|
|
+ -0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876,
|
|
|
+ -0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559,
|
|
|
+ -0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523,
|
|
|
+ 1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364,
|
|
|
+ -0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320,
|
|
|
+ 3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579,
|
|
|
+ -0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210,
|
|
|
+ 1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734,
|
|
|
+ -0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990,
|
|
|
+ -0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119,
|
|
|
+ -0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467,
|
|
|
+ -0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565,
|
|
|
+ -0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533,
|
|
|
+ -0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941,
|
|
|
+ -1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784,
|
|
|
+ 0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575,
|
|
|
+ 0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371,
|
|
|
+ 1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241,
|
|
|
+ 1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752,
|
|
|
+ -0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115,
|
|
|
+ -0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876,
|
|
|
+ -0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559,
|
|
|
+ -0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523,
|
|
|
+ 1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364,
|
|
|
+ -0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320,
|
|
|
+ 3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579,
|
|
|
+ -0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210,
|
|
|
+ 1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734,
|
|
|
+ -0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990,
|
|
|
+ -0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119,
|
|
|
+ -0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467,
|
|
|
+ -0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565,
|
|
|
+ -0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533,
|
|
|
+ -0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941,
|
|
|
+ -1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784,
|
|
|
+ 0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575,
|
|
|
+ 0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371,
|
|
|
+ 1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241,
|
|
|
+ 1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752,
|
|
|
+ -0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115,
|
|
|
+ -0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876,
|
|
|
+ -0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559,
|
|
|
+ -0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523,
|
|
|
+ 1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364,
|
|
|
+ -0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320,
|
|
|
+ 3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579,
|
|
|
+ -0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210,
|
|
|
+ 1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734,
|
|
|
+ -0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990
|
|
|
+ },
|
|
|
+};
|
|
|
+
|
|
|
+
|
|
|
+static float **svm_vectors, *svm_coef;
|
|
|
+static int *vector, **matrix;
|
|
|
+//static float matr_speedup[NUM_OF_MATRICES][MAX_WORKERS_COUNT];
|
|
|
+//static int matr_times[NUM_OF_MATRICES][MAX_WORKERS_COUNT];
|
|
|
+
|
|
|
+static float Exec_Speedup[MAX_WORKERS_COUNT];
|
|
|
+static int Exec_Latencies[MAX_WORKERS_COUNT];
|
|
|
+//static float **vectors, *coef;
|
|
|
+
|
|
|
+//2*(N+rootN*pad_length)*sizeof(float)+PAGE_SIZE);
|
|
|
+static int P = 1; /* DEFAULT_P = 1 */
|
|
|
+static int M = 16; /* DEFAULT_M = 10 */
|
|
|
+static int N = 65536; /* N = 2^M */
|
|
|
+static int rootN = 256; /* rootN = sqrt(N) */
|
|
|
+static int num_cache_lines = 65536;
|
|
|
+#define PADLENGTH 2
|
|
|
+
|
|
|
+
|
|
|
+static float *x_local; /* x is the original time-domain data */
|
|
|
+static float *trans; /* trans is used as scratch space */
|
|
|
+static float *umain; /* umain is roots of unity for 1D FFTs */
|
|
|
+static float *umain2; /* umain2 is entire roots of unity matrix*/
|
|
|
+static float *upriv;
|
|
|
+
|
|
|
+void execute_workload_svm (int lower_bound, int upper_bound);
|
|
|
+void execute_workload_matrix (int lower_bound, int upper_bound);
|
|
|
+
|
|
|
+void matrix_transpose(int n1, float *src, float *dest, int node_id, int myFirst, int myLast, int pad_length);
|
|
|
+void FFT1D(int direction, int M, int N, float *x, float *scratch, float *upriv, float *umain2, int node_id, int myFirst, int myLast, int pad_length, int P);
|
|
|
+void copyColumn(int n1, float *src, float *dest);
|
|
|
+void single_FFT1D(int direction, int M, int N, float *u, float *x);
|
|
|
+void twiddle_Col(int direction, int n1, int N, int j, float *u, float *x, int pad_length);
|
|
|
+void reverse(int N, int M, float *x);
|
|
|
+int reverse_bit(int M, int k);
|
|
|
+
|
|
|
+void execute_workload_svm (int lower_bound, int upper_bound) {
|
|
|
+ int i = 0, j = 0;
|
|
|
+ float diff = 0, norma = 0, local_sum[N_sv];
|
|
|
+ int vector_id = 0;
|
|
|
+
|
|
|
+ if (base_offset == -1) {
|
|
|
+ base_offset = cur_agent.my_agent * N_sv;
|
|
|
+ //fprintf(log_file, "My agent is %d. Calculated base_offset is %d\n",cur_agent.my_agent,base_offset);
|
|
|
+ }
|
|
|
+
|
|
|
+ for (i = lower_bound; i <= upper_bound; i++) {
|
|
|
+ local_sum[i] = 0;
|
|
|
+ scc_signals_check();
|
|
|
+
|
|
|
+ for (j = 0; j < D_sv; j++){
|
|
|
+ diff = input_vector[vector_id][j] - svm_vectors[j][i];
|
|
|
+ norma += diff*diff;
|
|
|
+ }
|
|
|
+ local_sum[i] += (float) (exp((double) (-gamma*norma))*svm_coef[i]);
|
|
|
+ norma = 0;
|
|
|
+ }
|
|
|
+ /*
|
|
|
+ for (i=lower_bound; i<=upper_bound; i++)
|
|
|
+ manager_result_out[base_offset+i] = (int) local_sum[i];
|
|
|
+ */
|
|
|
+}
|
|
|
+
|
|
|
+void execute_workload_matrix (int lower_bound, int upper_bound) {
|
|
|
+ int i, j, local_sum[MAX_ARRAY_SIZE];
|
|
|
+
|
|
|
+ if (base_offset == -1) {
|
|
|
+ //matrix_out = (int*) shmat (cur_agent.segment_id, NULL, 0);
|
|
|
+ base_offset = cur_agent.my_agent * MAX_ARRAY_SIZE;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (i=lower_bound; i<=upper_bound; i++) {
|
|
|
+ local_sum[i] = 0;
|
|
|
+ scc_signals_check();
|
|
|
+ //signals_enable();
|
|
|
+ for (j=0; j<cur_agent.array_size; j++)
|
|
|
+ local_sum[i] += matrix[i][j] * vector[j];
|
|
|
+ //signals_disable();
|
|
|
+ }
|
|
|
+
|
|
|
+ for (i=lower_bound; i<=upper_bound; i++)
|
|
|
+ manager_result_out[base_offset+i] = local_sum[i];
|
|
|
+}
|
|
|
+
|
|
|
+void execute_workload_fft (int lower_bound, int upper_bound) {
|
|
|
+ int work_id = 0, pad_length = PADLENGTH;
|
|
|
+
|
|
|
+ if ((lower_bound == 0) && (upper_bound == FFT_MAX)) {
|
|
|
+ P = 1;
|
|
|
+ } else {
|
|
|
+ P = 2;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* FIXME works only because fft is restricted to two workers */
|
|
|
+ if (lower_bound > 0) {
|
|
|
+ work_id = 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ FFT1D(1, M, N, x_local, trans, upriv, umain2, work_id, lower_bound, upper_bound, pad_length, P); //HACK node_id - 1 important!!
|
|
|
+}
|
|
|
+
|
|
|
+void execute_workload (int lower_bound, int upper_bound) {
|
|
|
+
|
|
|
+ if (executed_app == MATRIX_MUL) {
|
|
|
+ execute_workload_matrix (lower_bound, upper_bound);
|
|
|
+ } else if (executed_app == SVM) {
|
|
|
+ execute_workload_svm (lower_bound, upper_bound);
|
|
|
+ } else if (executed_app == FFT) {
|
|
|
+ execute_workload_fft (lower_bound, upper_bound);
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+void init_speedup_structs (void) {
|
|
|
+
|
|
|
+ if (executed_app == MATRIX_MUL) {
|
|
|
+ if (MATRIX_ARRAY_SIZE == 1024) {
|
|
|
+#ifdef PLAT_SCC
|
|
|
+ Exec_Speedup[0] = 1.0;
|
|
|
+ Exec_Speedup[1] = 1.188;
|
|
|
+ Exec_Speedup[2] = 2.264;
|
|
|
+ Exec_Speedup[3] = 3.0;
|
|
|
+ Exec_Speedup[4] = 3.429;
|
|
|
+ Exec_Speedup[5] = 4.0;
|
|
|
+ Exec_Speedup[6] = 8.0;
|
|
|
+ Exec_Speedup[7] = 0.0;
|
|
|
+
|
|
|
+ Exec_Latencies[0] = 120;//29352;
|
|
|
+ Exec_Latencies[1] = 101;//15112;
|
|
|
+ Exec_Latencies[2] = 53;//11194;
|
|
|
+ Exec_Latencies[3] = 40;//10313;
|
|
|
+ Exec_Latencies[4] = 35;//8645;
|
|
|
+ Exec_Latencies[5] = 30;//7871;
|
|
|
+ Exec_Latencies[6] = 15;//6715;
|
|
|
+#else
|
|
|
+ Exec_Speedup[0] = 1.0;
|
|
|
+ Exec_Speedup[1] = 1.065;
|
|
|
+ Exec_Speedup[2] = 1.270;
|
|
|
+ Exec_Speedup[3] = 0.0;
|
|
|
+ Exec_Speedup[4] = 0.0;
|
|
|
+ Exec_Speedup[5] = 0.0;
|
|
|
+ Exec_Speedup[6] = 0.0;
|
|
|
+ Exec_Speedup[7] = 0.0;
|
|
|
+
|
|
|
+ Exec_Latencies[0] = 100000000;//29352;
|
|
|
+ Exec_Latencies[1] = 31;//15112;
|
|
|
+ Exec_Latencies[2] = 29;//11194;
|
|
|
+ Exec_Latencies[3] = 24;//10313;
|
|
|
+ Exec_Latencies[4] = 0;//8645;
|
|
|
+ Exec_Latencies[5] = 0;//7871;
|
|
|
+ Exec_Latencies[6] = 0;//6715;
|
|
|
+ Exec_Latencies[7] = 0;//7014;
|
|
|
+#endif
|
|
|
+ } else if (MATRIX_ARRAY_SIZE == 2048) {
|
|
|
+#ifdef PLAT_SCC
|
|
|
+ Exec_Speedup[0] = 1.0;
|
|
|
+ Exec_Speedup[1] = 1.091;
|
|
|
+ Exec_Speedup[2] = 1.2;
|
|
|
+ Exec_Speedup[3] = 1.491;
|
|
|
+ Exec_Speedup[4] = 1.791;
|
|
|
+ Exec_Speedup[5] = 2.824;
|
|
|
+ Exec_Speedup[6] = 3.0;
|
|
|
+
|
|
|
+ Exec_Latencies[0] = 240;//112276;
|
|
|
+ Exec_Latencies[1] = 220;//58880;
|
|
|
+ Exec_Latencies[2] = 200;//40305;
|
|
|
+ Exec_Latencies[3] = 161;//31705;
|
|
|
+ Exec_Latencies[4] = 134;//28309;
|
|
|
+ Exec_Latencies[5] = 85;//24512;
|
|
|
+ Exec_Latencies[6] = 80;//22239;
|
|
|
+ //matr_times[1][7] = 23;//20332;
|
|
|
+
|
|
|
+#else
|
|
|
+ Exec_Speedup[0] = 1.0;
|
|
|
+ Exec_Speedup[1] = 1.331;
|
|
|
+ Exec_Speedup[2] = 2.009;
|
|
|
+ Exec_Speedup[3] = 2.315;
|
|
|
+ Exec_Speedup[4] = 2.572;
|
|
|
+ Exec_Speedup[5] = 0.0;
|
|
|
+ Exec_Speedup[6] = 0.0;
|
|
|
+ Exec_Speedup[7] = 0.0;//5.522;
|
|
|
+
|
|
|
+ Exec_Latencies[0] = 100000000;//112276;
|
|
|
+ Exec_Latencies[1] = 116;//58880;
|
|
|
+ Exec_Latencies[2] = 87;//40305;
|
|
|
+ Exec_Latencies[3] = 58;//31705;
|
|
|
+ Exec_Latencies[4] = 50;//28309;
|
|
|
+ Exec_Latencies[5] = 45;//24512;
|
|
|
+ Exec_Latencies[6] = 0;//22239;
|
|
|
+ Exec_Latencies[7] = 0;//20332;
|
|
|
+#endif
|
|
|
+ } else if (MATRIX_ARRAY_SIZE == 4096) {
|
|
|
+#ifdef PLAT_SCC
|
|
|
+ Exec_Speedup[0] = 1.0;
|
|
|
+ Exec_Speedup[1] = 2.001;
|
|
|
+ Exec_Speedup[2] = 2.976;
|
|
|
+ Exec_Speedup[3] = 4.032;
|
|
|
+ Exec_Speedup[4] = 5.034;
|
|
|
+ Exec_Speedup[5] = 6.25;
|
|
|
+ Exec_Speedup[6] = 6.678;
|
|
|
+ Exec_Speedup[7] = 6.819;
|
|
|
+
|
|
|
+ Exec_Latencies[0] = 750;//384005;
|
|
|
+ Exec_Latencies[1] = 374;//231583;
|
|
|
+ Exec_Latencies[2] = 252;//157966;
|
|
|
+ Exec_Latencies[3] = 186;//121222;
|
|
|
+ Exec_Latencies[4] = 149;//101208;
|
|
|
+ Exec_Latencies[5] = 120;//87852;
|
|
|
+ Exec_Latencies[6] = 110;//78093;
|
|
|
+#else
|
|
|
+ Exec_Speedup[0] = 1.0;
|
|
|
+ Exec_Speedup[1] = 1.517;
|
|
|
+ Exec_Speedup[2] = 1.958;
|
|
|
+ Exec_Speedup[3] = 2.112;
|
|
|
+ Exec_Speedup[4] = 2.878;
|
|
|
+ Exec_Speedup[5] = 3.338;
|
|
|
+ Exec_Speedup[6] = 4.241;
|
|
|
+ Exec_Speedup[7] = 0.0;//5.073;
|
|
|
+
|
|
|
+ Exec_Latencies[0] = 100000000;//384005;
|
|
|
+ Exec_Latencies[1] = 431;//231583;
|
|
|
+ Exec_Latencies[2] = 284;//157966;
|
|
|
+ Exec_Latencies[3] = 220;//121222;
|
|
|
+ Exec_Latencies[4] = 204;//101208;
|
|
|
+ Exec_Latencies[5] = 150;//87852;
|
|
|
+ Exec_Latencies[6] = 129;//78093;
|
|
|
+ Exec_Latencies[7] = 102;//75690;
|
|
|
+#endif
|
|
|
+ } else {
|
|
|
+ printf("Unknown array size\n");
|
|
|
+ exit(0);
|
|
|
+ }
|
|
|
+ } else if (executed_app == SVM) {
|
|
|
+ Exec_Speedup[0] = 1.0; /* 1 worker */
|
|
|
+ Exec_Speedup[1] = 2.006;
|
|
|
+ Exec_Speedup[2] = 2.814;
|
|
|
+ Exec_Speedup[3] = 3.469;
|
|
|
+ Exec_Speedup[4] = 4.029;
|
|
|
+ Exec_Speedup[5] = 4.285;
|
|
|
+ Exec_Speedup[6] = 4.646;
|
|
|
+ Exec_Speedup[7] = 0.0;
|
|
|
+
|
|
|
+ Exec_Latencies[0] = 28;
|
|
|
+ Exec_Latencies[1] = 14;
|
|
|
+ Exec_Latencies[2] = 10;
|
|
|
+ Exec_Latencies[3] = 8;
|
|
|
+ Exec_Latencies[4] = 7;
|
|
|
+ Exec_Latencies[5] = 7;
|
|
|
+ Exec_Latencies[6] = 6;
|
|
|
+ Exec_Latencies[7] = 6;
|
|
|
+ } else if (executed_app == FFT) {
|
|
|
+ Exec_Speedup[0] = 1.0; /* 1 worker */
|
|
|
+ Exec_Speedup[1] = 1.55;
|
|
|
+ Exec_Speedup[2] = 0;
|
|
|
+ Exec_Speedup[3] = 0;
|
|
|
+ Exec_Speedup[4] = 0;
|
|
|
+ Exec_Speedup[5] = 0;
|
|
|
+ Exec_Speedup[6] = 0;
|
|
|
+ Exec_Speedup[7] = 0;
|
|
|
+
|
|
|
+ Exec_Latencies[0] = 772;
|
|
|
+ Exec_Latencies[1] = 498;
|
|
|
+ Exec_Latencies[2] = 0;
|
|
|
+ Exec_Latencies[3] = 0;
|
|
|
+ Exec_Latencies[4] = 0;
|
|
|
+ Exec_Latencies[5] = 0;
|
|
|
+ Exec_Latencies[6] = 0;
|
|
|
+ Exec_Latencies[7] = 0;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void app_init (char scen_directory[SCEN_DIR_SIZE], char scen_num[SCEN_NUM_SIZE]) {
|
|
|
+ int i, j, pad_length = PADLENGTH;
|
|
|
+ char buf[MAX_STR_NAME_SIZE], *buffer;
|
|
|
+ FILE *matrix_input, *support_vectors_file, *coef_file, *umain_file, *umain2_file, *x_local_file;
|
|
|
+ size_t bufsize = 32;
|
|
|
+
|
|
|
+ if (executed_app == MATRIX_MUL) {
|
|
|
+ cur_agent.array_size = MATRIX_ARRAY_SIZE;
|
|
|
+ matrix = (int **) malloc(cur_agent.array_size * sizeof(int *));
|
|
|
+ #ifdef PLAT_SCC
|
|
|
+ strcpy(buf, "/shared/herc/");
|
|
|
+ #else
|
|
|
+ strcpy(buf, "../");
|
|
|
+ #endif
|
|
|
+ strcat(buf, scen_directory);
|
|
|
+ strcat(buf, "/MATRIX-inputs/");
|
|
|
+ strcat(buf, itoa(cur_agent.array_size));
|
|
|
+ fprintf(log_file,"matrix file path = %s\n",buf);
|
|
|
+
|
|
|
+ if ((matrix_input = fopen(buf, "r")) == NULL){
|
|
|
+ printf("Cannot open input file with file path = %s ",buf);
|
|
|
+ perror("open matrix_input");
|
|
|
+ }
|
|
|
+
|
|
|
+ for (i=0; i<cur_agent.array_size; i++) {
|
|
|
+ matrix[i] = (int *) malloc(cur_agent.array_size * sizeof(int));
|
|
|
+ for (j=0; j<cur_agent.array_size; j++)
|
|
|
+ fscanf(matrix_input,"%d",&matrix[i][j]);
|
|
|
+ }
|
|
|
+
|
|
|
+ vector = (int *) malloc(cur_agent.array_size * sizeof(int));
|
|
|
+ for (j=0; j<cur_agent.array_size; j++)
|
|
|
+ fscanf(matrix_input,"%d",&vector[j]);
|
|
|
+
|
|
|
+ fclose(matrix_input);
|
|
|
+ } else if (executed_app == SVM) {
|
|
|
+ #ifdef PLAT_SCC
|
|
|
+ strcpy(buf, "/shared/herc/");
|
|
|
+ #else
|
|
|
+ strcpy(buf, "../");
|
|
|
+ #endif
|
|
|
+ strcat(buf,scen_directory);
|
|
|
+ //strcat(buf,"/");
|
|
|
+ //strcat(buf,scen_num);
|
|
|
+ strcat(buf,"/SVM-inputs/support_vectors_N_sv_");
|
|
|
+ strcat(buf,itoa(N_sv));
|
|
|
+ strcat(buf,"_D_sv_");
|
|
|
+ strcat(buf,itoa(D_sv));
|
|
|
+ strcat(buf,".dat");
|
|
|
+ fprintf(log_file,"svm file path = %s\n",buf);
|
|
|
+
|
|
|
+ if ((support_vectors_file = fopen(buf,"r")) == NULL){
|
|
|
+ printf("Cannot open input file with file path = %s ",buf);
|
|
|
+ perror("open svm_input");
|
|
|
+ }
|
|
|
+
|
|
|
+ #ifdef PLAT_SCC
|
|
|
+ strcpy(buf, "/shared/herc/");
|
|
|
+ #else
|
|
|
+ strcpy(buf, "../");
|
|
|
+ #endif
|
|
|
+ strcat(buf,scen_directory);
|
|
|
+ //strcat(buf,"/");
|
|
|
+ //strcat(buf,scen_num);
|
|
|
+ strcat(buf,"/SVM-inputs/sv_coef_N_sv_");
|
|
|
+ strcat(buf,itoa(N_sv));
|
|
|
+ strcat(buf,"_D_sv_");
|
|
|
+ strcat(buf,itoa(D_sv));
|
|
|
+ strcat(buf,".dat");
|
|
|
+ fprintf(log_file,"svm_coef file path = %s\n",buf);
|
|
|
+
|
|
|
+ if ((coef_file = fopen(buf,"r")) == NULL){
|
|
|
+ printf("Cannot open input file with file path = %s ",buf);
|
|
|
+ perror("open svm_input");
|
|
|
+ }
|
|
|
+
|
|
|
+ cur_agent.array_size = N_sv;
|
|
|
+ svm_vectors = (float **)malloc((D_sv)*sizeof(float *));
|
|
|
+ if (svm_vectors == NULL){
|
|
|
+ printf("--%d-- svm_vectors malloc fail!!\n", node_id);
|
|
|
+ perror("malloc error");
|
|
|
+ }
|
|
|
+ svm_coef = (float *)malloc((cur_agent.array_size)*sizeof(float));
|
|
|
+ if (svm_coef == NULL){
|
|
|
+ printf("--%d-- svm_coef malloc fail!!\n", node_id);
|
|
|
+ perror("malloc error");
|
|
|
+ }
|
|
|
+
|
|
|
+ buffer = (char *)malloc(bufsize * sizeof(char));
|
|
|
+ for (i = 0; i < D_sv; i++) {
|
|
|
+ svm_vectors[i] = (float *)malloc((cur_agent.array_size)*sizeof(float));
|
|
|
+ if (svm_vectors[i] == NULL) {
|
|
|
+ printf("--%d-- svm_vectors[%d] malloc fail!!\n", node_id, i);
|
|
|
+ perror("malloc error");
|
|
|
+ } else {
|
|
|
+ for (j = 0; j < N_sv; j++) {
|
|
|
+ /* Read support svm_vectors */
|
|
|
+ if (j < cur_agent.array_size){
|
|
|
+ fscanf(support_vectors_file,"%f",&svm_vectors[i][j]);
|
|
|
+ fgetc(support_vectors_file);
|
|
|
+ }else{
|
|
|
+ getline(&buffer,&bufsize,support_vectors_file);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for (j = 0; j < N_sv; j++) {
|
|
|
+ /* Read coefficients */
|
|
|
+ fscanf(coef_file,"%f",&svm_coef[j]);
|
|
|
+ fgetc(coef_file);
|
|
|
+ }
|
|
|
+
|
|
|
+ cur_agent.array_size = -1;
|
|
|
+ fclose(support_vectors_file);
|
|
|
+ fclose(coef_file);
|
|
|
+ free(buffer);
|
|
|
+ } else if (executed_app == FFT) {
|
|
|
+ fprintf(log_file,"Initializing FFT application\n");
|
|
|
+
|
|
|
+ x_local = (float *)malloc(2*(N+rootN*pad_length)*sizeof(float)+PAGE_SIZE);
|
|
|
+ if (x_local == NULL){
|
|
|
+ printf("Malloc error for x_local\n");
|
|
|
+ perror("malloc error");
|
|
|
+ exit(-1);
|
|
|
+ }
|
|
|
+
|
|
|
+ trans = (float *)malloc(2*(N+rootN*pad_length)*sizeof(float)+PAGE_SIZE);
|
|
|
+ if (trans == NULL){
|
|
|
+ printf("Malloc error for trans\n");
|
|
|
+ perror("malloc error");
|
|
|
+ exit(-1);
|
|
|
+ }
|
|
|
+
|
|
|
+ umain = (float *)malloc(2*rootN*sizeof(float));
|
|
|
+ if (umain == NULL){
|
|
|
+ printf("Malloc error for umain\n");
|
|
|
+ perror("malloc error");
|
|
|
+ exit(-1);
|
|
|
+ }
|
|
|
+
|
|
|
+ umain2 = (float *)malloc(2*(N+rootN*pad_length)*sizeof(float)+PAGE_SIZE);
|
|
|
+ if (umain2 == NULL){
|
|
|
+ printf("Malloc error for umain2\n");
|
|
|
+ perror("malloc error");
|
|
|
+ exit(-1);
|
|
|
+ }
|
|
|
+
|
|
|
+ upriv = (float *)malloc(2*(rootN-1)*sizeof(float));
|
|
|
+ if (upriv == NULL){
|
|
|
+ printf("--%d-- Malloc error for upriv\n", node_id);
|
|
|
+ perror("malloc error");
|
|
|
+ exit(-1);
|
|
|
+ }
|
|
|
+
|
|
|
+ #ifdef PLAT_SCC
|
|
|
+ strcpy(buf, "/shared/herc/");
|
|
|
+ #else
|
|
|
+ strcpy(buf, "../");
|
|
|
+ #endif
|
|
|
+ strcat(buf,scen_directory);
|
|
|
+ //strcat(buf,"/");
|
|
|
+ //strcat(buf,scen_num);
|
|
|
+ strcat(buf,"/FFT-inputs/umain_file");
|
|
|
+ fprintf(log_file,"umain_file file path = %s\n",buf);
|
|
|
+
|
|
|
+ if ((umain_file = fopen(buf,"r")) == NULL){
|
|
|
+ printf("Cannot open input file with file path = %s ",buf);
|
|
|
+ perror("open fft_input");
|
|
|
+ }
|
|
|
+
|
|
|
+ for (i=0; i<2*rootN; i++) {
|
|
|
+ fscanf(umain_file,"%f",&umain[i]);
|
|
|
+ }
|
|
|
+ fclose(umain_file);
|
|
|
+
|
|
|
+ #ifdef PLAT_SCC
|
|
|
+ strcpy(buf, "/shared/herc/");
|
|
|
+ #else
|
|
|
+ strcpy(buf, "../");
|
|
|
+ #endif
|
|
|
+ strcat(buf,scen_directory);
|
|
|
+ //strcat(buf,"/");
|
|
|
+ //strcat(buf,scen_num);
|
|
|
+ strcat(buf,"/FFT-inputs/umain2_file");
|
|
|
+ fprintf(log_file,"umain2_file file path = %s\n",buf);
|
|
|
+
|
|
|
+ if ((umain2_file = fopen(buf,"r")) == NULL){
|
|
|
+ printf("Cannot open input file with file path = %s ",buf);
|
|
|
+ perror("open umain_file");
|
|
|
+ }
|
|
|
+
|
|
|
+ //for (i=0; i<2*(N+rootN*pad_length)+PAGE_SIZE; i++) {
|
|
|
+ for (i=0; i<2*(N+rootN*pad_length); i++) {
|
|
|
+ fscanf(umain2_file,"%f",&umain2[i]);
|
|
|
+ }
|
|
|
+ fclose(umain2_file);
|
|
|
+
|
|
|
+ #ifdef PLAT_SCC
|
|
|
+ strcpy(buf, "/shared/herc/");
|
|
|
+ #else
|
|
|
+ strcpy(buf, "../");
|
|
|
+ #endif
|
|
|
+ strcat(buf,scen_directory);
|
|
|
+ //strcat(buf,"/");
|
|
|
+ //strcat(buf,scen_num);
|
|
|
+ strcat(buf,"/FFT-inputs/x_local_file");
|
|
|
+ fprintf(log_file,"x_local_file file path = %s\n",buf);
|
|
|
+
|
|
|
+ if ((x_local_file = fopen(buf,"r")) == NULL){
|
|
|
+ printf("Cannot open input file with file path = %s ",buf);
|
|
|
+ perror("open x_local_file");
|
|
|
+ }
|
|
|
+
|
|
|
+ //for (i=0;i<2*(N+rootN*pad_length)+PAGE_SIZE;i++) {
|
|
|
+ for (i=0; i<2*(N+rootN*pad_length); i++) {
|
|
|
+ fscanf(x_local_file,"%f",&x_local[i]);
|
|
|
+ }
|
|
|
+ fclose(x_local_file);
|
|
|
+
|
|
|
+ for (i = 0; i < 2*(rootN-1); i++){
|
|
|
+ upriv[i] = umain[i];
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+int get_max_cores_count(app cur_app){
|
|
|
+
|
|
|
+ /*if (cur_app.var < 1.0)
|
|
|
+ return (int) ceilf(2.0*cur_app.A - 1);
|
|
|
+ else
|
|
|
+ return (int) ceilf(cur_app.A + cur_app.A*cur_app.var - cur_app.var);*/
|
|
|
+#ifdef SINGLE_WORKER
|
|
|
+ return 2;
|
|
|
+#else
|
|
|
+ if (executed_app == FFT) {
|
|
|
+ return 3;
|
|
|
+ } else {
|
|
|
+ return MAX_WORKERS_COUNT;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
+float Speedup(app cur_app, int num_of_cores) {
|
|
|
+ /*
|
|
|
+ int type;
|
|
|
+
|
|
|
+ if (cur_app.array_size == 1024) type = 0;
|
|
|
+ else if (cur_app.array_size == 2048) type = 1;
|
|
|
+ else if (cur_app.array_size == 4096) type = 2;
|
|
|
+ else {
|
|
|
+ fprintf(log_file, "Unknown array size = %d\n",cur_app.array_size);
|
|
|
+ fflush(log_file);
|
|
|
+ return 0.0;
|
|
|
+ }
|
|
|
+
|
|
|
+ return matr_speedup[type][num_of_cores-1];
|
|
|
+ */
|
|
|
+ if (num_of_cores > get_max_cores_count(cur_app)) {
|
|
|
+ return 0;
|
|
|
+ } else {
|
|
|
+ return Exec_Speedup[num_of_cores-2];
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+int get_times(app cur_app, int num_of_cores) {
|
|
|
+ /*
|
|
|
+ int type;
|
|
|
+
|
|
|
+ if (cur_app.array_size == 1024) type = 0;
|
|
|
+ else if (cur_app.array_size == 2048) type = 1;
|
|
|
+ else if (cur_app.array_size == 4096) type = 2;
|
|
|
+ else {
|
|
|
+ fprintf(log_file, "Unknown array size = %d\n",cur_app.array_size);
|
|
|
+ fflush(log_file);
|
|
|
+ return 0.0;
|
|
|
+ }
|
|
|
+ return (cur_app.workld * matr_times[type][num_of_cores-2]);
|
|
|
+ */
|
|
|
+ return (cur_app.workld * Exec_Latencies[num_of_cores-2]);
|
|
|
+}
|
|
|
+
|
|
|
+void matrix_transpose(int n1, float *src, float *dest, int node_id, int myFirst, int myLast, int pad_length){
|
|
|
+ int i;
|
|
|
+ int j;
|
|
|
+ int k;
|
|
|
+ int l;
|
|
|
+ int m;
|
|
|
+ int blksize;
|
|
|
+ int numblks;
|
|
|
+ int firstfirst;
|
|
|
+ int h_off;
|
|
|
+ int v_off;
|
|
|
+ int v;
|
|
|
+ int h;
|
|
|
+ int n1p;
|
|
|
+ int row_count;
|
|
|
+
|
|
|
+ //fprintf(log_file,"I am inside matrix_transpose-0 node_id is %d n1 %d\n",node_id,n1);
|
|
|
+ blksize = myLast-myFirst;
|
|
|
+ numblks = (2*blksize)/num_cache_lines;
|
|
|
+ if (numblks * num_cache_lines != 2 * blksize) {
|
|
|
+ numblks ++;
|
|
|
+ }
|
|
|
+ blksize = blksize / numblks;
|
|
|
+ firstfirst = myFirst;
|
|
|
+ row_count = n1/P;
|
|
|
+ n1p = n1+pad_length;
|
|
|
+ for (l=node_id+1;l<P;l++) {
|
|
|
+ v_off = l*row_count;
|
|
|
+ for (k=0; k<numblks; k++) {
|
|
|
+ h_off = firstfirst;
|
|
|
+ for (m=0; m<numblks; m++) {
|
|
|
+ for (i=0; i<blksize; i++) {
|
|
|
+ v = v_off + i;
|
|
|
+ for (j=0; j<blksize; j++) {
|
|
|
+ h = h_off + j;
|
|
|
+ //fprintf(log_file,"Index dest is %d\n",2*(h*n1p+v));
|
|
|
+ //fprintf(log_file,"Index src is %d\n",2*(v*n1p+h));
|
|
|
+ //fprintf(log_file,"src = %f\n",src[2*(v*n1p+h)]);
|
|
|
+ //fprintf(log_file,"src + 1 = %f\n",src[2*(v*n1p+h)+1]);
|
|
|
+ //fprintf(log_file,"dest = %f\n",dest[2*(h*n1p+v)]);
|
|
|
+ //fprintf(log_file,"dest + 1 = %f\n",dest[2*(h*n1p+v)+1]);
|
|
|
+ //fflush(log_file);
|
|
|
+ dest[2*(h*n1p+v)] = src[2*(v*n1p+h)];
|
|
|
+ dest[2*(h*n1p+v)+1] = src[2*(v*n1p+h)+1];
|
|
|
+ //fprintf(log_file,"yolo\n");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ h_off += blksize;
|
|
|
+ }
|
|
|
+ v_off+=blksize;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //fprintf(log_file,"I am inside matrix_transpose-A\n");
|
|
|
+
|
|
|
+ for (l=0;l<node_id;l++) {
|
|
|
+ v_off = l*row_count;
|
|
|
+ for (k=0; k<numblks; k++) {
|
|
|
+ h_off = firstfirst;
|
|
|
+ for (m=0; m<numblks; m++) {
|
|
|
+ for (i=0; i<blksize; i++) {
|
|
|
+ v = v_off + i;
|
|
|
+ for (j=0; j<blksize; j++) {
|
|
|
+ h = h_off + j;
|
|
|
+ dest[2*(h*n1p+v)] = src[2*(v*n1p+h)];
|
|
|
+ dest[2*(h*n1p+v)+1] = src[2*(v*n1p+h)+1];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ h_off += blksize;
|
|
|
+ }
|
|
|
+ v_off+=blksize;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //fprintf(log_file,"I am inside matrix_transpose-B\n");
|
|
|
+
|
|
|
+ v_off = node_id*row_count;
|
|
|
+ for (k=0; k<numblks; k++) {
|
|
|
+ h_off = firstfirst;
|
|
|
+ for (m=0; m<numblks; m++) {
|
|
|
+ for (i=0; i<blksize; i++) {
|
|
|
+ v = v_off + i;
|
|
|
+ for (j=0; j<blksize; j++) {
|
|
|
+ h = h_off + j;
|
|
|
+ dest[2*(h*n1p+v)] = src[2*(v*n1p+h)];
|
|
|
+ dest[2*(h*n1p+v)+1] = src[2*(v*n1p+h)+1];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ h_off += blksize;
|
|
|
+ }
|
|
|
+ v_off+=blksize;
|
|
|
+ }
|
|
|
+ //fprintf(log_file,"I am inside matrix_transpose-C\n");
|
|
|
+}
|
|
|
+
|
|
|
+//FFT1D(1, M, N, x_local, trans, upriv, umain2, work_id, lower_bound, upper_bound, pad_length, P);
|
|
|
+void FFT1D(int direction, int M, int N, float *x, float *scratch, float *upriv, float *umain2, int node_id, int myFirst, int myLast, int pad_length, int P){
|
|
|
+ int j, m1, n1;
|
|
|
+
|
|
|
+ printf("I am %d and I am inside FFT1D\n",node_id);
|
|
|
+ //fprintf(log_file,"I am inside FFT1D-A myFirst=%d myLast=%d\n",myFirst,myLast);
|
|
|
+
|
|
|
+ m1 = M/2;
|
|
|
+ n1 = 1 << m1;
|
|
|
+
|
|
|
+ matrix_transpose(n1, x, scratch, node_id, myFirst, myLast, pad_length);
|
|
|
+ //fprintf(log_file,"I am inside FFT1D-B\n");
|
|
|
+
|
|
|
+ /* do n1 1D FFTs on columns */
|
|
|
+ for (j = myFirst; j < myLast; j++){
|
|
|
+ single_FFT1D(direction, m1, n1, upriv, &scratch[2*j*(n1+pad_length)]);
|
|
|
+ twiddle_Col(direction, n1, N, j, umain2, &scratch[2*j*(n1+pad_length)],pad_length);
|
|
|
+ }
|
|
|
+ //fprintf(log_file,"I am inside FFT1D-C\n");
|
|
|
+
|
|
|
+ matrix_transpose(n1, scratch, x, node_id, myFirst, myLast, pad_length);
|
|
|
+ //fprintf(log_file,"I am inside FFT1D-D\n");
|
|
|
+
|
|
|
+ /* do n1 1D FFTs on columns again */
|
|
|
+ for (j = myFirst; j < myLast; j++) {
|
|
|
+ single_FFT1D(direction, m1, n1, upriv, &x[2*j*(n1+pad_length)]);
|
|
|
+ }
|
|
|
+ //fprintf(log_file,"I am inside FFT1D-E\n");
|
|
|
+
|
|
|
+ matrix_transpose(n1, x, scratch, node_id, myFirst, myLast, pad_length);
|
|
|
+ //fprintf(log_file,"I am inside FFT1D-F\n");
|
|
|
+ /*for (j = myFirst; j < myLast; j++){
|
|
|
+ copyColumn(n1, &scratch[2*j*(n1+pad_length)], &x_shared[2*j*(n1+pad_length)]);
|
|
|
+ }*/
|
|
|
+
|
|
|
+ return;
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+void copyColumn(int n1, float *src, float *dest){
|
|
|
+
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < n1; i++) {
|
|
|
+ dest[2*i] = src[2*i];
|
|
|
+ dest[2*i+1] = src[2*i+1];
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void single_FFT1D(int direction, int M, int N, float *u, float *x){
|
|
|
+
|
|
|
+ int j, k, q, L, r, Lstar;
|
|
|
+ float *u1, *x1, *x2;
|
|
|
+ float omega_r, omega_c, tau_r, tau_c, x_r, x_c;
|
|
|
+
|
|
|
+ reverse(N, M, x);
|
|
|
+
|
|
|
+ for (q=1; q<=M; q++) {
|
|
|
+ L = 1<<q; r = N/L; Lstar = L/2;
|
|
|
+ u1 = &u[2*(Lstar-1)];
|
|
|
+ for (k=0; k<r; k++) {
|
|
|
+ x1 = &x[2*(k*L)];
|
|
|
+ x2 = &x[2*(k*L+Lstar)];
|
|
|
+ for (j=0; j<Lstar; j++) {
|
|
|
+ omega_r = u1[2*j];
|
|
|
+ omega_c = direction*u1[2*j+1];
|
|
|
+ x_r = x2[2*j];
|
|
|
+ x_c = x2[2*j+1];
|
|
|
+ tau_r = omega_r*x_r - omega_c*x_c;
|
|
|
+ tau_c = omega_r*x_c + omega_c*x_r;
|
|
|
+ x_r = x1[2*j];
|
|
|
+ x_c = x1[2*j+1];
|
|
|
+ x2[2*j] = x_r - tau_r;
|
|
|
+ x2[2*j+1] = x_c - tau_c;
|
|
|
+ x1[2*j] = x_r + tau_r;
|
|
|
+ x1[2*j+1] = x_c + tau_c;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return;
|
|
|
+}
|
|
|
+
|
|
|
+void twiddle_Col(int direction, int n1, int N, int j, float *u, float *x, int pad_length){
|
|
|
+
|
|
|
+ int i;
|
|
|
+ float omega_c, omega_r, x_r, x_c;
|
|
|
+
|
|
|
+ for (i = 0; i < n1; i++) {
|
|
|
+ omega_r = u[2*(j*(n1+pad_length)+i)];
|
|
|
+ omega_c = direction*u[2*(j*(n1+pad_length)+i)+1];
|
|
|
+ x_r = x[2*i];
|
|
|
+ x_c = x[2*i+1];
|
|
|
+ x[2*i] = omega_r*x_r - omega_c*x_c;
|
|
|
+ x[2*i+1] = omega_r*x_c + omega_c*x_r;
|
|
|
+ }
|
|
|
+
|
|
|
+ return;
|
|
|
+}
|
|
|
+
|
|
|
+void reverse(int N, int M, float *x){
|
|
|
+
|
|
|
+ int j, k;
|
|
|
+
|
|
|
+ for (k = 0; k < N; k++){
|
|
|
+ j = reverse_bit(M, k);
|
|
|
+ if (j > k){
|
|
|
+ SWAP(x[2*j], x[2*k]);
|
|
|
+ SWAP(x[2*j+1], x[2*k+1]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return;
|
|
|
+}
|
|
|
+
|
|
|
+int reverse_bit(int M, int k){
|
|
|
+
|
|
|
+ int i, j = 0, tmp = k;
|
|
|
+
|
|
|
+ for (i = 0; i < M; i++){
|
|
|
+ j = 2*j + (tmp&0x1);
|
|
|
+ tmp = tmp >> 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ return j;
|
|
|
+}
|