123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853 |
- #include "apps.h"
- #include "scc_signals.h"
- #include "libfunctions.h"
- #include "my_rtrm.h"
- #include <time.h>
- #define SWAP(a,b) {float tmp; tmp=a; a=b; b=tmp;}
- #define FFT_MAX 136192
- #define PAGE_SIZE 4096
- #define ARTIFICIAL_ROUND_DURATION_SEC 1
- #define ARTIFICIAL_ROUND_DURATION_NSEC 500000000 /* 1 ms */
-
- static float **svm_vectors, *svm_coef;
- static int *vector, **matrix;
- static float input_vector[D_sv];
- //static float matr_speedup[NUM_OF_MATRICES][MAX_WORKERS_COUNT];
- //static int matr_times[NUM_OF_MATRICES][MAX_WORKERS_COUNT];
- static float Exec_Speedup[MAX_WORKERS_COUNT];
- static int Exec_Latencies[MAX_WORKERS_COUNT];
- //static float **vectors, *coef;
- //2*(N+rootN*pad_length)*sizeof(float)+PAGE_SIZE);
- static int P = 1; /* DEFAULT_P = 1 */
- static int M = 16; /* DEFAULT_M = 10 */
- static int N = 65536; /* N = 2^M */
- static int rootN = 256; /* rootN = sqrt(N) */
- static int num_cache_lines = 65536;
- #define PADLENGTH 2
- static float *x_local; /* x is the original time-domain data */
- static float *trans; /* trans is used as scratch space */
- static float *umain; /* umain is roots of unity for 1D FFTs */
- static float *umain2; /* umain2 is entire roots of unity matrix*/
- static float *upriv;
- void execute_workload_svm (int lower_bound, int upper_bound);
- void execute_workload_matrix (int lower_bound, int upper_bound);
- void matrix_transpose(int n1, float *src, float *dest, int node_id, int myFirst, int myLast, int pad_length);
- void FFT1D(int direction, int M, int N, float *x, float *scratch, float *upriv, float *umain2, int node_id, int myFirst, int myLast, int pad_length, int P);
- void copyColumn(int n1, float *src, float *dest);
- void single_FFT1D(int direction, int M, int N, float *u, float *x);
- void twiddle_Col(int direction, int n1, int N, int j, float *u, float *x, int pad_length);
- void reverse(int N, int M, float *x);
- int reverse_bit(int M, int k);
- void execute_workload_svm (int lower_bound, int upper_bound) {
- int i = 0, j = 0;
- float diff = 0, norma = 0, local_sum[N_sv];
- /* int vector_id = 0; Removed 16.02. Only one test vector */
- if (base_offset == -1) {
- base_offset = cur_agent.my_agent * N_sv;
- //fprintf(log_file, "My agent is %d. Calculated base_offset is %d\n",cur_agent.my_agent,base_offset);
- }
-
- for (i = lower_bound; i <= upper_bound; i++) {
- local_sum[i] = 0;
- scc_signals_check();
-
- for (j = 0; j < D_sv; j++){
- diff = svm_vectors[i][j] - input_vector[j];
- norma += diff*diff;
- }
- local_sum[i] += (float) (exp((double) (-gamma*norma))*svm_coef[i]);
- norma = 0;
- }
-
- for (i=lower_bound; i<=upper_bound; i++)
- manager_result_out[base_offset+i] = (int) local_sum[i];
- }
- void execute_workload_matrix (int lower_bound, int upper_bound) {
- int i, j, local_sum[MAX_ARRAY_SIZE];
-
- if (base_offset == -1) {
- //matrix_out = (int*) shmat (cur_agent.segment_id, NULL, 0);
- base_offset = cur_agent.my_agent * MAX_ARRAY_SIZE;
- }
-
- for (i=lower_bound; i<=upper_bound; i++) {
- local_sum[i] = 0;
- scc_signals_check();
- //signals_enable();
- for (j=0; j<cur_agent.array_size; j++)
- local_sum[i] += matrix[i][j] * vector[j];
- //signals_disable();
- }
- for (i=lower_bound; i<=upper_bound; i++)
- manager_result_out[base_offset+i] = local_sum[i];
- }
- void execute_workload_fft (int lower_bound, int upper_bound) {
- int work_id = 0, pad_length = PADLENGTH;
-
- if ((lower_bound == 0) && (upper_bound == FFT_MAX)) {
- P = 1;
- } else {
- P = 2;
- }
-
- /* FIXME works only because fft is restricted to two workers */
- if (lower_bound > 0) {
- work_id = 1;
- }
-
- FFT1D(1, M, N, x_local, trans, upriv, umain2, work_id, lower_bound, upper_bound, pad_length, P); //HACK node_id - 1 important!!
- }
- void execute_workload_artificial (int lower_bound, int upper_bound) {
- int AppSpeedup = upper_bound - lower_bound;
- struct timespec ts;
- //if (base_offset == -1) {
- // base_offset = cur_agent.my_agent * MAX_ARRAY_SIZE; /* FIXME Why is it always MAX_ARRAY_SIZE */
- //}
-
- ts.tv_sec = 0;
- ts.tv_nsec = ARTIFICIAL_ROUND_DURATION_NSEC / AppSpeedup;
- nanosleep(&ts, NULL);
- /*
- for (i=lower_bound; i<=upper_bound; i++) {
- sleep(ARTIFICIAL_ROUND_DURATION_SEC);
- }
- */
- /*
- for (i=lower_bound; i<=upper_bound; i++)
- manager_result_out[base_offset+i] = -1;
- */
- }
- void execute_workload (int lower_bound, int upper_bound) {
-
- if (executed_app == MATRIX_MUL) {
- execute_workload_matrix (lower_bound, upper_bound);
- } else if (executed_app == SVM) {
- execute_workload_svm (lower_bound, upper_bound);
- } else if (executed_app == FFT) {
- execute_workload_fft (lower_bound, upper_bound);
- } else if (executed_app == ARTIFICIAL) {
- execute_workload_artificial (lower_bound, upper_bound);
- }
- }
- void init_speedup_structs (void) {
-
- if (executed_app == MATRIX_MUL) {
- if (MATRIX_ARRAY_SIZE == 1024) {
- #ifdef PLAT_SCC
- Exec_Speedup[0] = 1.0;
- Exec_Speedup[1] = 1.188;
- Exec_Speedup[2] = 2.264;
- Exec_Speedup[3] = 3.0;
- Exec_Speedup[4] = 3.429;
- Exec_Speedup[5] = 4.0;
- Exec_Speedup[6] = 8.0;
- Exec_Speedup[7] = 0.0;
-
- Exec_Latencies[0] = 120;//29352;
- Exec_Latencies[1] = 101;//15112;
- Exec_Latencies[2] = 53;//11194;
- Exec_Latencies[3] = 40;//10313;
- Exec_Latencies[4] = 35;//8645;
- Exec_Latencies[5] = 30;//7871;
- Exec_Latencies[6] = 15;//6715;
- #else
- Exec_Speedup[0] = 1.0;
- Exec_Speedup[1] = 1.065;
- Exec_Speedup[2] = 1.270;
- Exec_Speedup[3] = 0.0;
- Exec_Speedup[4] = 0.0;
- Exec_Speedup[5] = 0.0;
- Exec_Speedup[6] = 0.0;
- Exec_Speedup[7] = 0.0;
-
- Exec_Latencies[0] = 100000000;//29352;
- Exec_Latencies[1] = 31;//15112;
- Exec_Latencies[2] = 29;//11194;
- Exec_Latencies[3] = 24;//10313;
- Exec_Latencies[4] = 0;//8645;
- Exec_Latencies[5] = 0;//7871;
- Exec_Latencies[6] = 0;//6715;
- Exec_Latencies[7] = 0;//7014;
- #endif
- } else if (MATRIX_ARRAY_SIZE == 2048) {
- #ifdef PLAT_SCC
- Exec_Speedup[0] = 1.0;
- Exec_Speedup[1] = 1.091;
- Exec_Speedup[2] = 1.2;
- Exec_Speedup[3] = 1.491;
- Exec_Speedup[4] = 1.791;
- Exec_Speedup[5] = 2.824;
- Exec_Speedup[6] = 3.0;
- Exec_Latencies[0] = 240;//112276;
- Exec_Latencies[1] = 220;//58880;
- Exec_Latencies[2] = 200;//40305;
- Exec_Latencies[3] = 161;//31705;
- Exec_Latencies[4] = 134;//28309;
- Exec_Latencies[5] = 85;//24512;
- Exec_Latencies[6] = 80;//22239;
- //matr_times[1][7] = 23;//20332;
- #else
- Exec_Speedup[0] = 1.0;
- Exec_Speedup[1] = 1.331;
- Exec_Speedup[2] = 2.009;
- Exec_Speedup[3] = 2.315;
- Exec_Speedup[4] = 2.572;
- Exec_Speedup[5] = 0.0;
- Exec_Speedup[6] = 0.0;
- Exec_Speedup[7] = 0.0;//5.522;
- Exec_Latencies[0] = 100000000;//112276;
- Exec_Latencies[1] = 116;//58880;
- Exec_Latencies[2] = 87;//40305;
- Exec_Latencies[3] = 58;//31705;
- Exec_Latencies[4] = 50;//28309;
- Exec_Latencies[5] = 45;//24512;
- Exec_Latencies[6] = 0;//22239;
- Exec_Latencies[7] = 0;//20332;
- #endif
- } else if (MATRIX_ARRAY_SIZE == 4096) {
- #ifdef PLAT_SCC
- Exec_Speedup[0] = 1.0;
- Exec_Speedup[1] = 2.001;
- Exec_Speedup[2] = 2.976;
- Exec_Speedup[3] = 4.032;
- Exec_Speedup[4] = 5.034;
- Exec_Speedup[5] = 6.25;
- Exec_Speedup[6] = 6.678;
- Exec_Speedup[7] = 6.819;
- Exec_Latencies[0] = 750;//384005;
- Exec_Latencies[1] = 374;//231583;
- Exec_Latencies[2] = 252;//157966;
- Exec_Latencies[3] = 186;//121222;
- Exec_Latencies[4] = 149;//101208;
- Exec_Latencies[5] = 120;//87852;
- Exec_Latencies[6] = 110;//78093;
- #else
- Exec_Speedup[0] = 1.0;
- Exec_Speedup[1] = 1.517;
- Exec_Speedup[2] = 1.958;
- Exec_Speedup[3] = 2.112;
- Exec_Speedup[4] = 2.878;
- Exec_Speedup[5] = 3.338;
- Exec_Speedup[6] = 4.241;
- Exec_Speedup[7] = 0.0;//5.073;
-
- Exec_Latencies[0] = 100000000;//384005;
- Exec_Latencies[1] = 431;//231583;
- Exec_Latencies[2] = 284;//157966;
- Exec_Latencies[3] = 220;//121222;
- Exec_Latencies[4] = 204;//101208;
- Exec_Latencies[5] = 150;//87852;
- Exec_Latencies[6] = 129;//78093;
- Exec_Latencies[7] = 102;//75690;
- #endif
- } else {
- printf("Unknown array size\n");
- exit(0);
- }
- } else if (executed_app == SVM) {
- /* N_sv 4096 D_sv 4096 */
- Exec_Speedup[0] = 1.0; /* 1 worker */
- Exec_Speedup[1] = 1.959;
- Exec_Speedup[2] = 2.919;
- Exec_Speedup[3] = 3.853;
- Exec_Speedup[4] = 4.777;
- Exec_Speedup[5] = 5.723;
- Exec_Speedup[6] = 6.644;
- Exec_Speedup[7] = 0.0;
-
- Exec_Latencies[0] = 578;
- Exec_Latencies[1] = 295;
- Exec_Latencies[2] = 198;
- Exec_Latencies[3] = 150;
- Exec_Latencies[4] = 121;
- Exec_Latencies[5] = 101;
- Exec_Latencies[6] = 87;
- Exec_Latencies[7] = 6; /* Irrelevant */
- } else if (executed_app == FFT) {
- Exec_Speedup[0] = 1.0; /* 1 worker */
- Exec_Speedup[1] = 1.55;
- Exec_Speedup[2] = 0;
- Exec_Speedup[3] = 0;
- Exec_Speedup[4] = 0;
- Exec_Speedup[5] = 0;
- Exec_Speedup[6] = 0;
- Exec_Speedup[7] = 0;
-
- Exec_Latencies[0] = 772;
- Exec_Latencies[1] = 498;
- Exec_Latencies[2] = 0;
- Exec_Latencies[3] = 0;
- Exec_Latencies[4] = 0;
- Exec_Latencies[5] = 0;
- Exec_Latencies[6] = 0;
- Exec_Latencies[7] = 0;
- } if (executed_app == ARTIFICIAL) {
- }
- }
- void app_init (char scen_directory[SCEN_DIR_SIZE], char scen_num[SCEN_NUM_SIZE]) {
- int i, j, pad_length = PADLENGTH;
- char buf[MAX_STR_NAME_SIZE], *buffer;
- FILE *matrix_input, *support_vectors_file, *coef_file, *test_vector_file, *umain_file, *umain2_file, *x_local_file;
- size_t bufsize = 32;
-
- if (executed_app == MATRIX_MUL) {
- cur_agent.array_size = MATRIX_ARRAY_SIZE;
- matrix = (int **) malloc(cur_agent.array_size * sizeof(int *));
- #ifdef PLAT_SCC
- strcpy(buf, "/shared/herc/");
- #else
- strcpy(buf, "../");
- #endif
- strcat(buf, scen_directory);
- strcat(buf, "/MATRIX-inputs/");
- strcat(buf, itoa(cur_agent.array_size));
- fprintf(log_file,"matrix file path = %s\n",buf);
- if ((matrix_input = fopen(buf, "r")) == NULL){
- printf("Cannot open input file with file path = %s ",buf);
- perror("open matrix_input");
- }
- for (i=0; i<cur_agent.array_size; i++) {
- matrix[i] = (int *) malloc(cur_agent.array_size * sizeof(int));
- for (j=0; j<cur_agent.array_size; j++)
- fscanf(matrix_input,"%d",&matrix[i][j]);
- }
- vector = (int *) malloc(cur_agent.array_size * sizeof(int));
- for (j=0; j<cur_agent.array_size; j++)
- fscanf(matrix_input,"%d",&vector[j]);
-
- fclose(matrix_input);
- } else if (executed_app == SVM) {
- #ifdef PLAT_SCC
- strcpy(buf, "/shared/herc/");
- #else
- strcpy(buf, "../");
- #endif
- strcat(buf,scen_directory);
- //strcat(buf,"/");
- //strcat(buf,scen_num);
- strcat(buf,"/SVM-inputs/support_vectors_N_sv_");
- strcat(buf,itoa(N_sv));
- strcat(buf,"_D_sv_");
- strcat(buf,itoa(D_sv));
- strcat(buf,".dat");
- fprintf(log_file,"svm file path = %s\n",buf);
-
- if ((support_vectors_file = fopen(buf,"r")) == NULL){
- printf("Cannot open input file with file path = %s ",buf);
- perror("open svm_input");
- }
-
- #ifdef PLAT_SCC
- strcpy(buf, "/shared/herc/");
- #else
- strcpy(buf, "../");
- #endif
- strcat(buf,scen_directory);
- //strcat(buf,"/");
- //strcat(buf,scen_num);
- strcat(buf,"/SVM-inputs/sv_coef_N_sv_");
- strcat(buf,itoa(N_sv));
- strcat(buf,"_D_sv_");
- strcat(buf,itoa(D_sv));
- strcat(buf,".dat");
- fprintf(log_file,"svm_coef file path = %s\n",buf);
-
- if ((coef_file = fopen(buf,"r")) == NULL){
- printf("Cannot open input file with file path = %s ",buf);
- perror("open svm_input");
- }
-
- #ifdef PLAT_SCC
- strcpy(buf, "/shared/herc/");
- #else
- strcpy(buf, "../");
- #endif
- strcat(buf,scen_directory);
- //strcat(buf,"/");
- //strcat(buf,scen_num);
- strcat(buf,"/SVM-inputs/test_vector_D_sv_");
- strcat(buf,itoa(D_sv));
- strcat(buf,".dat");
- fprintf(log_file,"test_vector file path = %s\n",buf);
- if ((test_vector_file = fopen(buf,"r")) == NULL){
- printf("Cannot open input file with file path = %s ",buf);
- perror("open svm_input");
- }
- svm_vectors = (float **)malloc(N_sv*sizeof(float *));
- if (svm_vectors == NULL){
- printf("--%d-- svm_vectors malloc fail!!\n", node_id);
- perror("malloc error");
- }
- svm_coef = (float *)malloc(N_sv*sizeof(float));
- if (svm_coef == NULL){
- printf("--%d-- svm_coef malloc fail!!\n", node_id);
- perror("malloc error");
- }
-
- buffer = (char *)malloc(bufsize * sizeof(char));
- for (i = 0; i < N_sv; i++) {
- svm_vectors[i] = (float *)malloc(D_sv*sizeof(float));
- if (svm_vectors[i] == NULL) {
- printf("--%d-- svm_vectors[%d] malloc fail!!\n", node_id, i);
- perror("malloc error");
- } else {
- for (j = 0; j < D_sv; j++) {
- /* Read support svm_vectors */
- if (j < D_sv){
- fscanf(support_vectors_file,"%f",&svm_vectors[i][j]);
- fgetc(support_vectors_file);
- }else{
- getline(&buffer,&bufsize,support_vectors_file);
- }
- }
- }
- }
- for (j = 0; j < N_sv; j++) {
- /* Read coefficients */
- fscanf(coef_file,"%f",&svm_coef[j]);
- fgetc(coef_file);
- }
- for (j = 0; j < D_sv; j++) {
- /* Read coefficients */
- fscanf(test_vector_file,"%f",&input_vector[j]);
- }
- cur_agent.array_size = -1;
- fclose(support_vectors_file);
- fclose(coef_file);
- fclose(test_vector_file);
- free(buffer);
- } else if (executed_app == FFT) {
- fprintf(log_file,"Initializing FFT application\n");
-
- x_local = (float *)malloc(2*(N+rootN*pad_length)*sizeof(float)+PAGE_SIZE);
- if (x_local == NULL){
- printf("Malloc error for x_local\n");
- perror("malloc error");
- exit(-1);
- }
-
- trans = (float *)malloc(2*(N+rootN*pad_length)*sizeof(float)+PAGE_SIZE);
- if (trans == NULL){
- printf("Malloc error for trans\n");
- perror("malloc error");
- exit(-1);
- }
-
- umain = (float *)malloc(2*rootN*sizeof(float));
- if (umain == NULL){
- printf("Malloc error for umain\n");
- perror("malloc error");
- exit(-1);
- }
-
- umain2 = (float *)malloc(2*(N+rootN*pad_length)*sizeof(float)+PAGE_SIZE);
- if (umain2 == NULL){
- printf("Malloc error for umain2\n");
- perror("malloc error");
- exit(-1);
- }
-
- upriv = (float *)malloc(2*(rootN-1)*sizeof(float));
- if (upriv == NULL){
- printf("--%d-- Malloc error for upriv\n", node_id);
- perror("malloc error");
- exit(-1);
- }
-
- #ifdef PLAT_SCC
- strcpy(buf, "/shared/herc/");
- #else
- strcpy(buf, "../");
- #endif
- strcat(buf,scen_directory);
- //strcat(buf,"/");
- //strcat(buf,scen_num);
- strcat(buf,"/FFT-inputs/umain_file");
- fprintf(log_file,"umain_file file path = %s\n",buf);
-
- if ((umain_file = fopen(buf,"r")) == NULL){
- printf("Cannot open input file with file path = %s ",buf);
- perror("open fft_input");
- }
-
- for (i=0; i<2*rootN; i++) {
- fscanf(umain_file,"%f",&umain[i]);
- }
- fclose(umain_file);
-
- #ifdef PLAT_SCC
- strcpy(buf, "/shared/herc/");
- #else
- strcpy(buf, "../");
- #endif
- strcat(buf,scen_directory);
- //strcat(buf,"/");
- //strcat(buf,scen_num);
- strcat(buf,"/FFT-inputs/umain2_file");
- fprintf(log_file,"umain2_file file path = %s\n",buf);
-
- if ((umain2_file = fopen(buf,"r")) == NULL){
- printf("Cannot open input file with file path = %s ",buf);
- perror("open umain_file");
- }
-
- //for (i=0; i<2*(N+rootN*pad_length)+PAGE_SIZE; i++) {
- for (i=0; i<2*(N+rootN*pad_length); i++) {
- fscanf(umain2_file,"%f",&umain2[i]);
- }
- fclose(umain2_file);
-
- #ifdef PLAT_SCC
- strcpy(buf, "/shared/herc/");
- #else
- strcpy(buf, "../");
- #endif
- strcat(buf,scen_directory);
- //strcat(buf,"/");
- //strcat(buf,scen_num);
- strcat(buf,"/FFT-inputs/x_local_file");
- fprintf(log_file,"x_local_file file path = %s\n",buf);
-
- if ((x_local_file = fopen(buf,"r")) == NULL){
- printf("Cannot open input file with file path = %s ",buf);
- perror("open x_local_file");
- }
-
- //for (i=0;i<2*(N+rootN*pad_length)+PAGE_SIZE;i++) {
- for (i=0; i<2*(N+rootN*pad_length); i++) {
- fscanf(x_local_file,"%f",&x_local[i]);
- }
- fclose(x_local_file);
-
- for (i = 0; i < 2*(rootN-1); i++){
- upriv[i] = umain[i];
- }
-
- } else if (executed_app == MATRIX_MUL) {
- }
- }
- int get_max_cores_count(app cur_app){
-
- /*if (cur_app.var < 1.0)
- return (int) ceilf(2.0*cur_app.A - 1);
- else
- return (int) ceilf(cur_app.A + cur_app.A*cur_app.var - cur_app.var);*/
- #ifdef SINGLE_WORKER
- return 2;
- #else
- if (executed_app == FFT) {
- return 3;
- } else {
- return MAX_WORKERS_COUNT;
- }
- #endif
- }
- float Speedup_Artificial_App(app cur_app, int num_of_cores) {
- float res=0;
-
- if (num_of_cores > 0) {
- if (cur_app.var < 1.0) {
- if (num_of_cores == 1) {
- res = 1;
- } else if ((num_of_cores > 1) && (num_of_cores < cur_app.A)) {
- res = (num_of_cores*cur_app.A) / (cur_app.A + (cur_app.var / 2.0*(num_of_cores-1)));
- } else if ((num_of_cores >= cur_app.A) && (num_of_cores < 2.0*cur_app.A - 1)) {
- res = (num_of_cores*cur_app.A) / (cur_app.var*(cur_app.A -0.5) + num_of_cores*(1.0 - 0.5*cur_app.var));
- } else {
- res = cur_app.A;
- }
- } else {
- if ((num_of_cores >= 1) && (num_of_cores <= (cur_app.A + cur_app.A*cur_app.var - cur_app.var))) {
- res = (num_of_cores*cur_app.A*(cur_app.var + 1)) / (cur_app.A + cur_app.var*(num_of_cores-1 + cur_app.A));
- } else {
- res = cur_app.A;
- }
- }
- }
- return res;
- }
- float Speedup(app cur_app, int num_of_cores) {
- if ((num_of_cores < 2) || (num_of_cores > get_max_cores_count(cur_app))) {
- return 0;
- } else {
- #ifndef ARTIFICIAL_APPS_SIM
- return Exec_Speedup[num_of_cores-2];
- #else
- return Speedup_Artificial_App(cur_app, num_of_cores-1);
- #endif
- }
- }
- int get_times(app cur_app, int num_of_cores) {
- /*
- int type;
- if (cur_app.array_size == 1024) type = 0;
- else if (cur_app.array_size == 2048) type = 1;
- else if (cur_app.array_size == 4096) type = 2;
- else {
- fprintf(log_file, "Unknown array size = %d\n",cur_app.array_size);
- fflush(log_file);
- return 0.0;
- }
- return (cur_app.workld * matr_times[type][num_of_cores-2]);
- */
- return (cur_app.workld * Exec_Latencies[num_of_cores-2]);
- }
- void matrix_transpose(int n1, float *src, float *dest, int node_id, int myFirst, int myLast, int pad_length){
- int i;
- int j;
- int k;
- int l;
- int m;
- int blksize;
- int numblks;
- int firstfirst;
- int h_off;
- int v_off;
- int v;
- int h;
- int n1p;
- int row_count;
- //fprintf(log_file,"I am inside matrix_transpose-0 node_id is %d n1 %d\n",node_id,n1);
- blksize = myLast-myFirst;
- numblks = (2*blksize)/num_cache_lines;
- if (numblks * num_cache_lines != 2 * blksize) {
- numblks ++;
- }
- blksize = blksize / numblks;
- firstfirst = myFirst;
- row_count = n1/P;
- n1p = n1+pad_length;
- for (l=node_id+1;l<P;l++) {
- v_off = l*row_count;
- for (k=0; k<numblks; k++) {
- h_off = firstfirst;
- for (m=0; m<numblks; m++) {
- for (i=0; i<blksize; i++) {
- v = v_off + i;
- for (j=0; j<blksize; j++) {
- h = h_off + j;
- //fprintf(log_file,"Index dest is %d\n",2*(h*n1p+v));
- //fprintf(log_file,"Index src is %d\n",2*(v*n1p+h));
- //fprintf(log_file,"src = %f\n",src[2*(v*n1p+h)]);
- //fprintf(log_file,"src + 1 = %f\n",src[2*(v*n1p+h)+1]);
- //fprintf(log_file,"dest = %f\n",dest[2*(h*n1p+v)]);
- //fprintf(log_file,"dest + 1 = %f\n",dest[2*(h*n1p+v)+1]);
- //fflush(log_file);
- dest[2*(h*n1p+v)] = src[2*(v*n1p+h)];
- dest[2*(h*n1p+v)+1] = src[2*(v*n1p+h)+1];
- //fprintf(log_file,"yolo\n");
- }
- }
- h_off += blksize;
- }
- v_off+=blksize;
- }
- }
- //fprintf(log_file,"I am inside matrix_transpose-A\n");
-
- for (l=0;l<node_id;l++) {
- v_off = l*row_count;
- for (k=0; k<numblks; k++) {
- h_off = firstfirst;
- for (m=0; m<numblks; m++) {
- for (i=0; i<blksize; i++) {
- v = v_off + i;
- for (j=0; j<blksize; j++) {
- h = h_off + j;
- dest[2*(h*n1p+v)] = src[2*(v*n1p+h)];
- dest[2*(h*n1p+v)+1] = src[2*(v*n1p+h)+1];
- }
- }
- h_off += blksize;
- }
- v_off+=blksize;
- }
- }
- //fprintf(log_file,"I am inside matrix_transpose-B\n");
-
- v_off = node_id*row_count;
- for (k=0; k<numblks; k++) {
- h_off = firstfirst;
- for (m=0; m<numblks; m++) {
- for (i=0; i<blksize; i++) {
- v = v_off + i;
- for (j=0; j<blksize; j++) {
- h = h_off + j;
- dest[2*(h*n1p+v)] = src[2*(v*n1p+h)];
- dest[2*(h*n1p+v)+1] = src[2*(v*n1p+h)+1];
- }
- }
- h_off += blksize;
- }
- v_off+=blksize;
- }
- //fprintf(log_file,"I am inside matrix_transpose-C\n");
- }
- //FFT1D(1, M, N, x_local, trans, upriv, umain2, work_id, lower_bound, upper_bound, pad_length, P);
- void FFT1D(int direction, int M, int N, float *x, float *scratch, float *upriv, float *umain2, int node_id, int myFirst, int myLast, int pad_length, int P){
- int j, m1, n1;
-
- //printf("I am %d and I am inside FFT1D\n",node_id);
- //fprintf(log_file,"I am inside FFT1D-A myFirst=%d myLast=%d\n",myFirst,myLast);
-
- m1 = M/2;
- n1 = 1 << m1;
-
- matrix_transpose(n1, x, scratch, node_id, myFirst, myLast, pad_length);
- //fprintf(log_file,"I am inside FFT1D-B\n");
-
- /* do n1 1D FFTs on columns */
- for (j = myFirst; j < myLast; j++){
- single_FFT1D(direction, m1, n1, upriv, &scratch[2*j*(n1+pad_length)]);
- twiddle_Col(direction, n1, N, j, umain2, &scratch[2*j*(n1+pad_length)],pad_length);
- }
- //fprintf(log_file,"I am inside FFT1D-C\n");
-
- matrix_transpose(n1, scratch, x, node_id, myFirst, myLast, pad_length);
- //fprintf(log_file,"I am inside FFT1D-D\n");
-
- /* do n1 1D FFTs on columns again */
- for (j = myFirst; j < myLast; j++) {
- single_FFT1D(direction, m1, n1, upriv, &x[2*j*(n1+pad_length)]);
- }
- //fprintf(log_file,"I am inside FFT1D-E\n");
-
- matrix_transpose(n1, x, scratch, node_id, myFirst, myLast, pad_length);
- //fprintf(log_file,"I am inside FFT1D-F\n");
- /*for (j = myFirst; j < myLast; j++){
- copyColumn(n1, &scratch[2*j*(n1+pad_length)], &x_shared[2*j*(n1+pad_length)]);
- }*/
-
- return;
- }
- void copyColumn(int n1, float *src, float *dest){
-
- int i;
- for (i = 0; i < n1; i++) {
- dest[2*i] = src[2*i];
- dest[2*i+1] = src[2*i+1];
- }
- }
- void single_FFT1D(int direction, int M, int N, float *u, float *x){
-
- int j, k, q, L, r, Lstar;
- float *u1, *x1, *x2;
- float omega_r, omega_c, tau_r, tau_c, x_r, x_c;
-
- reverse(N, M, x);
-
- for (q=1; q<=M; q++) {
- L = 1<<q; r = N/L; Lstar = L/2;
- u1 = &u[2*(Lstar-1)];
- for (k=0; k<r; k++) {
- x1 = &x[2*(k*L)];
- x2 = &x[2*(k*L+Lstar)];
- for (j=0; j<Lstar; j++) {
- omega_r = u1[2*j];
- omega_c = direction*u1[2*j+1];
- x_r = x2[2*j];
- x_c = x2[2*j+1];
- tau_r = omega_r*x_r - omega_c*x_c;
- tau_c = omega_r*x_c + omega_c*x_r;
- x_r = x1[2*j];
- x_c = x1[2*j+1];
- x2[2*j] = x_r - tau_r;
- x2[2*j+1] = x_c - tau_c;
- x1[2*j] = x_r + tau_r;
- x1[2*j+1] = x_c + tau_c;
- }
- }
- }
-
- return;
- }
- void twiddle_Col(int direction, int n1, int N, int j, float *u, float *x, int pad_length){
-
- int i;
- float omega_c, omega_r, x_r, x_c;
-
- for (i = 0; i < n1; i++) {
- omega_r = u[2*(j*(n1+pad_length)+i)];
- omega_c = direction*u[2*(j*(n1+pad_length)+i)+1];
- x_r = x[2*i];
- x_c = x[2*i+1];
- x[2*i] = omega_r*x_r - omega_c*x_c;
- x[2*i+1] = omega_r*x_c + omega_c*x_r;
- }
-
- return;
- }
- void reverse(int N, int M, float *x){
-
- int j, k;
-
- for (k = 0; k < N; k++){
- j = reverse_bit(M, k);
- if (j > k){
- SWAP(x[2*j], x[2*k]);
- SWAP(x[2*j+1], x[2*k+1]);
- }
- }
-
- return;
- }
- int reverse_bit(int M, int k){
-
- int i, j = 0, tmp = k;
-
- for (i = 0; i < M; i++){
- j = 2*j + (tmp&0x1);
- tmp = tmp >> 1;
- }
-
- return j;
- }
|