#include "apps.h" #include "scc_signals.h" #include "libfunctions.h" #include "my_rtrm.h" #define SWAP(a,b) {float tmp; tmp=a; a=b; b=tmp;} #define FFT_MAX 136192 #define PAGE_SIZE 4096 /* static float input_vector[25][10] = { {-0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119}, {-0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467}, {-0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565}, {-0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533}, {-0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941}, {-1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784}, {0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575}, {0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371}, {1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241}, {1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752}, {-0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115}, {-0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876}, {-0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559}, {-0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523}, {1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364}, {-0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320}, {3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579}, {-0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210}, {1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734}, {-0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990}, {-0.632187, -0.629597, -0.560808, -0.439143, -0.550906, -3.344673, -1.389866, 0.268649, 0.664319, 1.150327}, {-0.205756, -0.210472, -0.151426, -0.121347, 0.196067, -3.136218, -2.621049, -0.026517, 0.358534, 0.714117}, {-0.418011, -0.424854, -0.461205, -0.428858, -0.801747, 1.933860, -0.129047, -0.674498, -0.880092, -0.752953}, {-0.625461, -0.633598, -0.651167, -0.621632, -0.312866, -3.908468, -2.380095, -0.118114, 0.233478, 0.722539}, {-0.525633, -0.521436, -0.552314, -0.527505, -0.233392, -3.763046, -2.487090, -0.133160, 0.156544, 0.642195}, }; */ /* static float input_vector[2][D_sv] = { {-0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119, -0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467, -0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565, -0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533, -0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941, -1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784, 0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575, 0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371, 1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241, 1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752}, {-0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115, -0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876, -0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559, -0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523, 1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364, -0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320, 3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579, -0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210, 1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734, -0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990}, }; */ static float input_vector[1][D_sv] = { {-0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119, -0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467, -0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565, -0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533, -0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941, -1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784, 0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575, 0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371, 1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241, 1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752, -0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115, -0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876, -0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559, -0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523, 1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364, -0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320, 3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579, -0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210, 1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734, -0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990, -0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119, -0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467, -0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565, -0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533, -0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941, -1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784, 0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575, 0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371, 1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241, 1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752, -0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115, -0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876, -0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559, -0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523, 1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364, -0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320, 3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579, -0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210, 1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734, -0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990, -0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119, -0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467, -0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565, -0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533, -0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941, -1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784, 0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575, 0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371, 1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241, 1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752, -0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115, -0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876, -0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559, -0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523, 1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364, -0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320, 3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579, -0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210, 1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734, -0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990, -0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119, -0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467, -0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565, -0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533, -0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941, -1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784, 0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575, 0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371, 1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241, 1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752, -0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115, -0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876, -0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559, -0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523, 1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364, -0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320, 3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579, -0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210, 1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734, -0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990, -0.390695, -0.381094, -0.341754, -0.272043, -0.588159, 2.297114, -0.116822, -0.507031, -0.563574, -0.534119, -0.250457, -0.240673, -0.210423, -0.217804, -0.453731, 1.784484, -0.142005, -0.230148, -0.339593, -0.298467, -0.177671, -0.183887, -0.191644, -0.205313, -0.441613, 1.599526, 0.023297, -0.322458, -0.539978, -0.457565, -0.149024, -0.140364, -0.142723, -0.066651, -0.344126, 1.830863, 0.154307, -0.171898, -0.343950, -0.379533, -0.155498, -0.154145, -0.181875, -0.189322, -0.122244, -3.037579, -1.476675, -0.089878, 0.004249, 0.169941, -1.786535, -1.809749, -1.445913, -1.050310, -0.095173, 2.964720, 4.706277, -0.649227, -1.362017, -1.447784, 0.034026, 0.038159, -0.018497, -0.067303, 0.059710, -2.908602, -1.484229, 0.108804, 0.292757, 0.566575, 0.527511, 0.518252, 0.762342, 1.148066, 0.772846, -2.113671, -2.704303, -0.664257, 0.460104, 1.295371, 1.370790, 1.383803, 1.340450, 0.634441, -0.596868, -2.427651, -2.352223, 0.454767, 1.405430, 1.838241, 1.455314, 1.464866, 2.487861, 2.464788, 1.963237, 0.324097, -1.869693, -2.554004, -2.022594, 1.114752, -0.664593, -0.687087, -0.075215, 0.583554, 0.927597, 3.671647, 3.835722, 0.694133, -2.007198, -2.345115, -0.343777, -0.344074, -0.221304, -0.153775, -0.494637, 1.743759, -0.590775, -0.413656, -0.389204, -0.221876, -0.159571, -0.163080, -0.513428, -0.775665, -0.891170, -3.118664, -1.247074, 0.913688, 1.009656, 1.170559, -0.761538, -0.755972, -0.773967, -0.674253, -0.978258, 1.514730, -0.145977, -0.620700, -0.857082, -0.765523, 1.344641, 1.329151, 1.633256, 2.020196, 1.777348, -1.744907, -5.928727, -4.032057, -0.585966, 1.072364, -0.468882, -0.486856, 0.935241, 2.032263, 2.219547, -0.225632, -3.438104, -4.412505, -0.550391, 2.234320, 3.177341, 3.204259, 2.749250, 1.741244, 1.197538, -2.985864, -6.084715, -1.447878, 0.904210, 1.495579, -0.998953, -1.005918, -0.988911, -0.863153, -1.003750, 1.256336, -0.471785, -0.860056, -0.872804, -0.629210, 1.957319, 1.966453, 1.840960, 1.405216, 1.313205, -0.956540, -3.535391, -2.280320, -1.179478, -0.458734, -0.326054, -0.331524, -0.134208, -0.218622, -0.158037, 3.128528, 4.020623, -1.129257, -1.524952, -1.377990 }, }; static float **svm_vectors, *svm_coef; static int *vector, **matrix; //static float matr_speedup[NUM_OF_MATRICES][MAX_WORKERS_COUNT]; //static int matr_times[NUM_OF_MATRICES][MAX_WORKERS_COUNT]; static float Exec_Speedup[MAX_WORKERS_COUNT]; static int Exec_Latencies[MAX_WORKERS_COUNT]; //static float **vectors, *coef; //2*(N+rootN*pad_length)*sizeof(float)+PAGE_SIZE); static int P = 1; /* DEFAULT_P = 1 */ static int M = 16; /* DEFAULT_M = 10 */ static int N = 65536; /* N = 2^M */ static int rootN = 256; /* rootN = sqrt(N) */ static int num_cache_lines = 65536; #define PADLENGTH 2 static float *x_local; /* x is the original time-domain data */ static float *trans; /* trans is used as scratch space */ static float *umain; /* umain is roots of unity for 1D FFTs */ static float *umain2; /* umain2 is entire roots of unity matrix*/ static float *upriv; void execute_workload_svm (int lower_bound, int upper_bound); void execute_workload_matrix (int lower_bound, int upper_bound); void matrix_transpose(int n1, float *src, float *dest, int node_id, int myFirst, int myLast, int pad_length); void FFT1D(int direction, int M, int N, float *x, float *scratch, float *upriv, float *umain2, int node_id, int myFirst, int myLast, int pad_length, int P); void copyColumn(int n1, float *src, float *dest); void single_FFT1D(int direction, int M, int N, float *u, float *x); void twiddle_Col(int direction, int n1, int N, int j, float *u, float *x, int pad_length); void reverse(int N, int M, float *x); int reverse_bit(int M, int k); void execute_workload_svm (int lower_bound, int upper_bound) { int i = 0, j = 0; float diff = 0, norma = 0, local_sum[N_sv]; int vector_id = 0; if (base_offset == -1) { base_offset = cur_agent.my_agent * N_sv; //fprintf(log_file, "My agent is %d. Calculated base_offset is %d\n",cur_agent.my_agent,base_offset); } for (i = lower_bound; i <= upper_bound; i++) { local_sum[i] = 0; scc_signals_check(); for (j = 0; j < D_sv; j++){ diff = input_vector[vector_id][j] - svm_vectors[j][i]; norma += diff*diff; } local_sum[i] += (float) (exp((double) (-gamma*norma))*svm_coef[i]); norma = 0; } /* for (i=lower_bound; i<=upper_bound; i++) manager_result_out[base_offset+i] = (int) local_sum[i]; */ } void execute_workload_matrix (int lower_bound, int upper_bound) { int i, j, local_sum[MAX_ARRAY_SIZE]; if (base_offset == -1) { //matrix_out = (int*) shmat (cur_agent.segment_id, NULL, 0); base_offset = cur_agent.my_agent * MAX_ARRAY_SIZE; } for (i=lower_bound; i<=upper_bound; i++) { local_sum[i] = 0; scc_signals_check(); //signals_enable(); for (j=0; j 0) { work_id = 1; } FFT1D(1, M, N, x_local, trans, upriv, umain2, work_id, lower_bound, upper_bound, pad_length, P); //HACK node_id - 1 important!! } void execute_workload (int lower_bound, int upper_bound) { if (executed_app == MATRIX_MUL) { execute_workload_matrix (lower_bound, upper_bound); } else if (executed_app == SVM) { execute_workload_svm (lower_bound, upper_bound); } else if (executed_app == FFT) { execute_workload_fft (lower_bound, upper_bound); } } void init_speedup_structs (void) { if (executed_app == MATRIX_MUL) { if (MATRIX_ARRAY_SIZE == 1024) { #ifdef PLAT_SCC Exec_Speedup[0] = 1.0; Exec_Speedup[1] = 1.188; Exec_Speedup[2] = 2.264; Exec_Speedup[3] = 3.0; Exec_Speedup[4] = 3.429; Exec_Speedup[5] = 4.0; Exec_Speedup[6] = 8.0; Exec_Speedup[7] = 0.0; Exec_Latencies[0] = 120;//29352; Exec_Latencies[1] = 101;//15112; Exec_Latencies[2] = 53;//11194; Exec_Latencies[3] = 40;//10313; Exec_Latencies[4] = 35;//8645; Exec_Latencies[5] = 30;//7871; Exec_Latencies[6] = 15;//6715; #else Exec_Speedup[0] = 1.0; Exec_Speedup[1] = 1.065; Exec_Speedup[2] = 1.270; Exec_Speedup[3] = 0.0; Exec_Speedup[4] = 0.0; Exec_Speedup[5] = 0.0; Exec_Speedup[6] = 0.0; Exec_Speedup[7] = 0.0; Exec_Latencies[0] = 100000000;//29352; Exec_Latencies[1] = 31;//15112; Exec_Latencies[2] = 29;//11194; Exec_Latencies[3] = 24;//10313; Exec_Latencies[4] = 0;//8645; Exec_Latencies[5] = 0;//7871; Exec_Latencies[6] = 0;//6715; Exec_Latencies[7] = 0;//7014; #endif } else if (MATRIX_ARRAY_SIZE == 2048) { #ifdef PLAT_SCC Exec_Speedup[0] = 1.0; Exec_Speedup[1] = 1.091; Exec_Speedup[2] = 1.2; Exec_Speedup[3] = 1.491; Exec_Speedup[4] = 1.791; Exec_Speedup[5] = 2.824; Exec_Speedup[6] = 3.0; Exec_Latencies[0] = 240;//112276; Exec_Latencies[1] = 220;//58880; Exec_Latencies[2] = 200;//40305; Exec_Latencies[3] = 161;//31705; Exec_Latencies[4] = 134;//28309; Exec_Latencies[5] = 85;//24512; Exec_Latencies[6] = 80;//22239; //matr_times[1][7] = 23;//20332; #else Exec_Speedup[0] = 1.0; Exec_Speedup[1] = 1.331; Exec_Speedup[2] = 2.009; Exec_Speedup[3] = 2.315; Exec_Speedup[4] = 2.572; Exec_Speedup[5] = 0.0; Exec_Speedup[6] = 0.0; Exec_Speedup[7] = 0.0;//5.522; Exec_Latencies[0] = 100000000;//112276; Exec_Latencies[1] = 116;//58880; Exec_Latencies[2] = 87;//40305; Exec_Latencies[3] = 58;//31705; Exec_Latencies[4] = 50;//28309; Exec_Latencies[5] = 45;//24512; Exec_Latencies[6] = 0;//22239; Exec_Latencies[7] = 0;//20332; #endif } else if (MATRIX_ARRAY_SIZE == 4096) { #ifdef PLAT_SCC Exec_Speedup[0] = 1.0; Exec_Speedup[1] = 2.001; Exec_Speedup[2] = 2.976; Exec_Speedup[3] = 4.032; Exec_Speedup[4] = 5.034; Exec_Speedup[5] = 6.25; Exec_Speedup[6] = 6.678; Exec_Speedup[7] = 6.819; Exec_Latencies[0] = 750;//384005; Exec_Latencies[1] = 374;//231583; Exec_Latencies[2] = 252;//157966; Exec_Latencies[3] = 186;//121222; Exec_Latencies[4] = 149;//101208; Exec_Latencies[5] = 120;//87852; Exec_Latencies[6] = 110;//78093; #else Exec_Speedup[0] = 1.0; Exec_Speedup[1] = 1.517; Exec_Speedup[2] = 1.958; Exec_Speedup[3] = 2.112; Exec_Speedup[4] = 2.878; Exec_Speedup[5] = 3.338; Exec_Speedup[6] = 4.241; Exec_Speedup[7] = 0.0;//5.073; Exec_Latencies[0] = 100000000;//384005; Exec_Latencies[1] = 431;//231583; Exec_Latencies[2] = 284;//157966; Exec_Latencies[3] = 220;//121222; Exec_Latencies[4] = 204;//101208; Exec_Latencies[5] = 150;//87852; Exec_Latencies[6] = 129;//78093; Exec_Latencies[7] = 102;//75690; #endif } else { printf("Unknown array size\n"); exit(0); } } else if (executed_app == SVM) { Exec_Speedup[0] = 1.0; /* 1 worker */ Exec_Speedup[1] = 2.006; Exec_Speedup[2] = 2.814; Exec_Speedup[3] = 3.469; Exec_Speedup[4] = 4.029; Exec_Speedup[5] = 4.285; Exec_Speedup[6] = 4.646; Exec_Speedup[7] = 0.0; Exec_Latencies[0] = 28; Exec_Latencies[1] = 14; Exec_Latencies[2] = 10; Exec_Latencies[3] = 8; Exec_Latencies[4] = 7; Exec_Latencies[5] = 7; Exec_Latencies[6] = 6; Exec_Latencies[7] = 6; } else if (executed_app == FFT) { Exec_Speedup[0] = 1.0; /* 1 worker */ Exec_Speedup[1] = 1.55; Exec_Speedup[2] = 0; Exec_Speedup[3] = 0; Exec_Speedup[4] = 0; Exec_Speedup[5] = 0; Exec_Speedup[6] = 0; Exec_Speedup[7] = 0; Exec_Latencies[0] = 772; Exec_Latencies[1] = 498; Exec_Latencies[2] = 0; Exec_Latencies[3] = 0; Exec_Latencies[4] = 0; Exec_Latencies[5] = 0; Exec_Latencies[6] = 0; Exec_Latencies[7] = 0; } } void app_init (char scen_directory[SCEN_DIR_SIZE], char scen_num[SCEN_NUM_SIZE]) { int i, j, pad_length = PADLENGTH; char buf[MAX_STR_NAME_SIZE], *buffer; FILE *matrix_input, *support_vectors_file, *coef_file, *umain_file, *umain2_file, *x_local_file; size_t bufsize = 32; if (executed_app == MATRIX_MUL) { cur_agent.array_size = MATRIX_ARRAY_SIZE; matrix = (int **) malloc(cur_agent.array_size * sizeof(int *)); #ifdef PLAT_SCC strcpy(buf, "/shared/herc/"); #else strcpy(buf, "../"); #endif strcat(buf, scen_directory); strcat(buf, "/MATRIX-inputs/"); strcat(buf, itoa(cur_agent.array_size)); fprintf(log_file,"matrix file path = %s\n",buf); if ((matrix_input = fopen(buf, "r")) == NULL){ printf("Cannot open input file with file path = %s ",buf); perror("open matrix_input"); } for (i=0; i get_max_cores_count(cur_app)) { return 0; } else { return Exec_Speedup[num_of_cores-2]; } } int get_times(app cur_app, int num_of_cores) { /* int type; if (cur_app.array_size == 1024) type = 0; else if (cur_app.array_size == 2048) type = 1; else if (cur_app.array_size == 4096) type = 2; else { fprintf(log_file, "Unknown array size = %d\n",cur_app.array_size); fflush(log_file); return 0.0; } return (cur_app.workld * matr_times[type][num_of_cores-2]); */ return (cur_app.workld * Exec_Latencies[num_of_cores-2]); } void matrix_transpose(int n1, float *src, float *dest, int node_id, int myFirst, int myLast, int pad_length){ int i; int j; int k; int l; int m; int blksize; int numblks; int firstfirst; int h_off; int v_off; int v; int h; int n1p; int row_count; //fprintf(log_file,"I am inside matrix_transpose-0 node_id is %d n1 %d\n",node_id,n1); blksize = myLast-myFirst; numblks = (2*blksize)/num_cache_lines; if (numblks * num_cache_lines != 2 * blksize) { numblks ++; } blksize = blksize / numblks; firstfirst = myFirst; row_count = n1/P; n1p = n1+pad_length; for (l=node_id+1;l k){ SWAP(x[2*j], x[2*k]); SWAP(x[2*j+1], x[2*k+1]); } } return; } int reverse_bit(int M, int k){ int i, j = 0, tmp = k; for (i = 0; i < M; i++){ j = 2*j + (tmp&0x1); tmp = tmp >> 1; } return j; }