my_rtrm.c 51 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503
  1. #include "my_rtrm.h"
  2. #include "idag_defs.h"
  3. #include "libfunctions.h"
  4. #include "noc_functions.h"
  5. #include "sig_aux.h"
  6. #include "controller_core.h"
  7. #include "common_core.h"
  8. #include "signal_handlers.h"
  9. #include "scc_signals.h"
  10. #include "idag_defs.h"
  11. #include "paxos_signal_handlers.h"
  12. #include "apps.h"
  13. #ifdef PLAT_SCC
  14. int idag_mask[X_max*Y_max];
  15. int low_voltage_core[X_max*Y_max];
  16. int timer_schedule[X_max*Y_max];
  17. volatile int *manager_result_out;
  18. volatile int *index_bottom;
  19. RCCE_FLAG flag_data_written;
  20. RCCE_FLAG proposal_number_lock;
  21. int num_idags_x = 1; //FIXME
  22. #else
  23. int X_max, Y_max;
  24. int *pid_num = NULL;
  25. int *idag_mask = NULL;
  26. int *low_voltage_core = NULL;
  27. int *timer_schedule = NULL;
  28. int *manager_result_out = NULL;
  29. int *index_bottom = NULL;
  30. int num_idags_x;
  31. sem_t *flag_data_written;
  32. sem_t *scc_lock;
  33. sem_t *proposal_number_lock;
  34. #endif
  35. int node_id = -1;
  36. int my_idag = -1;
  37. int num_idags = 0;
  38. int DDS_count = 0;
  39. int my_cores_count = 0;
  40. int nodes_ended_cnt = 0;
  41. int nodes_initialised = 0;
  42. int stats_replied = 0;
  43. int paxos_stats_replied = 0;
  44. int num_apps_terminated = 0;
  45. int num_apps = 0;
  46. int idags_replied = 0;
  47. int index_top = 0;
  48. int last_index_bottom = 0;
  49. int NUES;
  50. int manager_to_fail = -1;
  51. int *sig_array = NULL;
  52. int *data_array = NULL;
  53. int *idag_id_arr = NULL;
  54. int *proposal_number_global;
  55. char paxos_scen[MAX_STR_NAME_SIZE];
  56. char app_input_file[MAX_STR_NAME_SIZE];
  57. FILE *log_file = NULL;
  58. FILE *init_ack_file = NULL;
  59. core_states state;
  60. inter_list **core_inter_head;
  61. inter_list **core_inter_tail;
  62. inter_list *init_pending_head;
  63. inter_list *init_pending_tail;
  64. app my_app;
  65. metrics my_stats = {0,0,0,0,0,0,0,0};
  66. metrics total_stats = {0,0,0,0,0,0,0,0};
  67. metrics paxos_node_stats = {0,0,0,0,0,0,0,0};
  68. metrics paxos_total_stats = {0,0,0,0,0,0,0,0};
  69. DDS_list *DDS = NULL;
  70. DDS_list *DDS_tail = NULL;
  71. core_list *my_cores = NULL;
  72. core_list *my_cores_tail = NULL;
  73. time_t cur_time;
  74. timer_t timerid;
  75. timer_t inter_timer;
  76. timer_t controller_timer;
  77. timer_t epfd_timer;
  78. timer_t pfd_timer;
  79. struct tm *cur_t;
  80. struct sigevent sev;
  81. struct itimerspec its;
  82. struct itimerspec chk_timer;
  83. struct timeval time_val;
  84. /* Dimos variables */
  85. int PREPARE_ACCEPT_SENT = 0;
  86. int controllers_replied = 0;
  87. int delay = 500;
  88. char *tabs;
  89. #ifdef PLAT_SCC
  90. int RCCE_APP(int argc, char *argv[]) {
  91. char error_str[64];
  92. int error, str_len, sig_array_local[LINE_SIZE];
  93. RCCE_init(&argc, &argv);
  94. node_id = RCCE_ue();
  95. #else
  96. int main(int argc, char *argv[]) {
  97. int segment_id;
  98. #endif
  99. int num_of_bytes;
  100. int i;
  101. int j;
  102. int k;
  103. int c;
  104. int one_idag;
  105. int one_core;
  106. int app_cnt = 0;
  107. int time_passed = -1;
  108. int time_next;
  109. int init_core;
  110. int timer_init_null = 0;
  111. int executed_app_type_number = 0; /* Used for the initialization of init_app type */
  112. int executed_app_array_size = 0; /* Used for the initialization of init_app type */
  113. float avg_cluster_util;
  114. char scen_directory[MAX_STR_NAME_SIZE];
  115. char scen_num[MAX_STR_NAME_SIZE];
  116. char idag_defs_file_name[MAX_STR_NAME_SIZE];
  117. char app_input_file_name[MAX_STR_NAME_SIZE];
  118. char time_log_file_name[MAX_STR_NAME_SIZE];
  119. char init_ack_file_name[MAX_STR_NAME_SIZE];
  120. inter_list *tmp_inter_list;
  121. inter_list *tmp_inter_prev;
  122. inter_list *tmp_pending_head;
  123. FILE *app_input;
  124. FILE *time_log;
  125. struct tm start_time_of_day, *end_time_of_day;
  126. pid_t p;
  127. DDS_list *tmp_DDS;
  128. core_list *tmp_cores_list;
  129. struct timeval time_val;
  130. setvbuf(stdout, NULL, _IONBF, 0);
  131. if (argc < 17) {
  132. printf("USAGE: ./my_rtrm <options>\n\n");
  133. printf("options:\n");
  134. printf("\t-d <scen_dir> : <scen_dir> = Directory of all scenarios.\n");
  135. printf("\t-n <scen_num> : <scen_num> = Scenario number to be executed.\n");
  136. printf("\t-i <idag_defs> : <idag_defs> = File with the definitions of grid and idags.\n");
  137. printf("\t-a <app_input> : <app_input> = File with application characteristics.\n");
  138. printf("\t-p <paxos_scen> : <paxos_scen> = File with failure characteristics.\n");
  139. printf("\t-x X : X = X * 6 will be the horizontal dimension of the grid.\n");
  140. printf("\t-y Y : Y = Y * 8 will be the vertical dimension of the grid.\n");
  141. printf("\t-t <app_type> : <app_type> M for MATRIX_MUL, S for SVM, F for FFT \n");
  142. exit(0);
  143. }
  144. while ((c = getopt(argc, argv, "d:n:i:x:y:a:p:t:h")) != -1){
  145. switch(c){
  146. case 'd':
  147. strcpy(scen_directory, optarg);
  148. if (node_id == 0)
  149. printf("Scenario directory : %s...\n",scen_directory);
  150. break;
  151. case 'n':
  152. strcpy(scen_num, optarg);
  153. if (node_id == 0)
  154. printf("Scenario number : %s...\n",scen_num);
  155. break;
  156. case 'i':
  157. strcpy(idag_defs_file_name,optarg);
  158. if (node_id == 0)
  159. printf("Idag definitions file name : %s...\n",idag_defs_file_name);
  160. break;
  161. case 'x':
  162. #ifdef PLAT_LINUX
  163. X_max = atoi(optarg) * 6;
  164. #endif
  165. if (node_id == 0)
  166. printf("X = %d...\n",X_max);
  167. break;
  168. case 'y':
  169. #ifdef PLAT_LINUX
  170. Y_max = atoi(optarg) * 8;
  171. #endif
  172. if (node_id == 0)
  173. printf("Y = %d...\n",Y_max);
  174. break;
  175. case 'a':
  176. strcpy(app_input_file, optarg);
  177. if (node_id == 0)
  178. printf("Application input file %s...\n",app_input_file);
  179. break;
  180. case 'p':
  181. strcpy(paxos_scen, optarg);
  182. if (node_id == 0)
  183. printf("Paxos scenario file name %s...\n", paxos_scen);
  184. break;
  185. case 't':
  186. if (!strcmp(optarg,"M")) {
  187. executed_app = MATRIX_MUL;
  188. executed_app_type_number = 0;
  189. executed_app_array_size = MATRIX_ARRAY_SIZE;
  190. //printf("Input application type is Matrix mul\n");
  191. } else if (!strcmp(optarg,"S")) {
  192. executed_app = SVM;
  193. executed_app_type_number = 1;
  194. executed_app_array_size = SVM_ARRAY_SIZE;
  195. //printf("Input application type is SVM\n");
  196. } else if (!strcmp(optarg,"F")) {
  197. executed_app = FFT;
  198. executed_app_type_number = 2;
  199. executed_app_array_size = FFT_ARRAY_SIZE;
  200. //printf("Input application type is FFT\n");
  201. } else {
  202. printf("Input application type is wrong. Options are M,S,F\n");
  203. }
  204. break;
  205. /*
  206. if (cur_agent.app_type == 0)
  207. executed_app = MATRIX_MUL;
  208. else if (cur_agent.app_type == 1)
  209. executed_app = SVM;
  210. else if (cur_agent.app_type == 2)
  211. executed_app = FFT;
  212. */
  213. case 'h':
  214. printf("USAGE: ./my_rtrm <options>\n\n");
  215. printf("options:\n");
  216. printf("\t-d <scen_dir> : <scen_dir> = Directory of all scenarios.\n");
  217. printf("\t-n <scen_num> : <scen_num> = Scenario number to be executed.\n");
  218. printf("\t-i <idag_defs> : <idag_defs> = File with the definitions of grid and idags.\n");
  219. printf("\t-a <app_input> : <app_input> = File with application characteristics.\n");
  220. printf("\t-p <paxos_scen> : <paxos_scen> = File with failure characteristics.\n");
  221. printf("\t-x X : X = X * 6 will be the horizontal dimension of the grid.\n");
  222. printf("\t-y Y : Y = Y * 8 will be the vertical dimension of the grid.\n");
  223. printf("\t-t <app_type> : <app_type> M for MATRIX_MUL, S for SVM, F for FFT \n");
  224. exit(0);
  225. break;
  226. }
  227. }
  228. #ifdef PLAT_SCC
  229. NUES = RCCE_num_ues();
  230. RCCE_flag_alloc(&flag_data_written);
  231. RCCE_flag_write(&flag_data_written, RCCE_FLAG_UNSET, node_id);
  232. RCCE_flag_alloc(&proposal_number_lock);
  233. RCCE_flag_write(&proposal_number_lock, RCCE_FLAG_UNSET, node_id);
  234. sig_array = (int *) RCCE_malloc(MAX_SIGNAL_LIST_LEN * LINE_SIZE * sizeof(int));//NUES * NUES
  235. data_array = (int *) RCCE_malloc(MAX_DATA_LIST_LEN * LINE_SIZE * sizeof(int));
  236. if (executed_app == MATRIX_MUL) {
  237. num_of_bytes = NUES * MAX_ARRAY_SIZE * sizeof(int);
  238. } else if (executed_app == SVM) {
  239. num_of_bytes = NUES * SVM_ARRAY_SIZE * sizeof(float);
  240. } else if (executed_app == FFT) {
  241. num_of_bytes = NUES * FFT_ARRAY_SIZE * sizeof(float);
  242. }
  243. manager_result_out = (volatile int*) RCCE_shmalloc(num_of_bytes);
  244. num_of_bytes = NUES * sizeof(int);
  245. index_bottom = (volatile int*) RCCE_shmalloc(num_of_bytes);
  246. proposal_number_global = (int *)RCCE_shmalloc(sizeof(int *));
  247. *proposal_number_global = 0;
  248. for (i=0; i<LINE_SIZE; i++)
  249. sig_array_local[i] = NO_SIG;
  250. for (i=0; i<MAX_SIGNAL_LIST_LEN; i++) {
  251. error = RCCE_put((t_vcharp)(&sig_array[i*LINE_SIZE]), (t_vcharp)(&sig_array_local[0]), LINE_SIZE * sizeof(int), node_id);
  252. if (error != RCCE_SUCCESS) {
  253. RCCE_error_string(error, error_str, &str_len);
  254. fprintf(log_file,"I got an error in put 2 with descr %s\n",error_str);
  255. fflush(log_file);
  256. }
  257. }
  258. #else
  259. idag_mask = (int *) malloc(X_max*Y_max*sizeof(int));
  260. low_voltage_core = (int *) malloc(X_max*Y_max*sizeof(int));
  261. timer_schedule = (int *) malloc(X_max*Y_max*sizeof(int));
  262. NUES = X_max * Y_max;
  263. node_id = 0;
  264. num_of_bytes = NUES * sizeof(pid_t);
  265. segment_id = shmget (IPC_PRIVATE, num_of_bytes, IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR);
  266. pid_num = (int *) shmat (segment_id, NULL, 0);
  267. num_of_bytes = NUES * MAX_SIGNAL_LIST_LEN * LINE_SIZE * sizeof(int);
  268. segment_id = shmget (IPC_PRIVATE, num_of_bytes, IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR);
  269. sig_array = (int *) shmat (segment_id, NULL, 0); //(int *) RCCE_malloc(MAX_SIGNAL_LIST_LEN * LINE_SIZE * sizeof(int));//NUES * NUES
  270. for (i = 0; i < MAX_SIGNAL_LIST_LEN*LINE_SIZE; i++) {
  271. sig_array[i] = NO_SIG;
  272. }
  273. num_of_bytes = NUES * MAX_DATA_LIST_LEN * LINE_SIZE * sizeof(int);
  274. segment_id = shmget (IPC_PRIVATE, num_of_bytes, IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR);
  275. data_array = (int *) shmat (segment_id, NULL, 0); //(int *) RCCE_malloc(3 * LINE_SIZE * sizeof(int));
  276. if (executed_app == MATRIX_MUL) {
  277. num_of_bytes = NUES * MAX_ARRAY_SIZE * sizeof(int);
  278. } else if (executed_app == SVM) {
  279. num_of_bytes = NUES * SVM_ARRAY_SIZE * sizeof(int);
  280. } else if (executed_app == FFT) {
  281. num_of_bytes = NUES * FFT_ARRAY_SIZE * sizeof(float);
  282. }
  283. segment_id = shmget (IPC_PRIVATE, num_of_bytes, IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR);
  284. manager_result_out = (int *) shmat (segment_id, NULL, 0); //(volatile int*) RCCE_shmalloc(num_of_bytes);
  285. num_of_bytes = NUES * sizeof(int);
  286. segment_id = shmget (IPC_PRIVATE, num_of_bytes, IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR);
  287. index_bottom = (int *) shmat (segment_id, NULL, 0); //(volatile int*) RCCE_shmalloc(num_of_bytes);
  288. //for (i=0; i<LINE_SIZE; i++)
  289. // sig_array_local[i] = NO_SIG;
  290. //#if defined(BASIC_PAXOS)
  291. num_of_bytes = sizeof(sem_t *);
  292. if ((segment_id = shmget(IPC_PRIVATE, num_of_bytes, IPC_CREAT | S_IRUSR | S_IWUSR)) < 0){
  293. printf("Unable to allocate shared memory!\n");
  294. exit(1);
  295. }
  296. proposal_number_lock = (sem_t *) shmat (segment_id, NULL, 0);
  297. if (sem_init(proposal_number_lock, 1, 1) == -1){
  298. printf("I am %d error\n",node_id);
  299. perror("sem_init");
  300. }
  301. num_of_bytes = sizeof(sem_t *);
  302. if ((segment_id = shmget(IPC_PRIVATE, num_of_bytes, IPC_CREAT | S_IRUSR | S_IWUSR)) < 0){
  303. printf("Unable to allocate shared memory!\n");
  304. exit(1);
  305. }
  306. num_of_bytes = sizeof(int *);
  307. if ((segment_id = shmget(IPC_PRIVATE, num_of_bytes, IPC_CREAT | S_IRUSR | S_IWUSR)) < 0){
  308. printf("Unable to allocate shared memory!\n");
  309. exit(1);
  310. }
  311. proposal_number_global = (int *) shmat (segment_id, NULL, 0);
  312. *proposal_number_global = 0;
  313. //#endif
  314. num_of_bytes = NUES * sizeof(sem_t);
  315. segment_id = shmget (IPC_PRIVATE, num_of_bytes, IPC_CREAT | S_IRUSR | S_IWUSR);
  316. scc_lock = (sem_t *) shmat (segment_id, NULL, 0);
  317. if (sem_init(&scc_lock[node_id], 1, 1) == -1){
  318. printf("I am %d error\n",node_id);
  319. perror("sem_init");
  320. }
  321. segment_id = shmget (IPC_PRIVATE, num_of_bytes,
  322. IPC_CREAT | S_IRUSR | S_IWUSR);
  323. flag_data_written = (sem_t *) shmat (segment_id, NULL, 0);
  324. if (sem_init(&flag_data_written[node_id], 1, 0) == -1){
  325. printf("I am %d error\n",node_id);
  326. perror("sem_init");
  327. }
  328. #endif
  329. init_speedup_structs();
  330. core_inter_head = (inter_list **) malloc(X_max*Y_max*sizeof(inter_list *));
  331. core_inter_tail = (inter_list **) malloc(X_max*Y_max*sizeof(inter_list *));
  332. for (i=0; i<X_max*Y_max; i++){
  333. core_inter_head[i] = NULL;
  334. core_inter_tail[i] = NULL;
  335. timer_schedule[i] = 0;
  336. }
  337. read_idag_defs(scen_directory, scen_num, idag_defs_file_name, paxos_scen);
  338. global_idag_defs();
  339. /* Initialise structs for mapping of scc coordinates to normal ones*/
  340. create_scc2grid_mapping(scen_directory, scen_num);
  341. create_grid2scc_mapping(scen_directory, scen_num);
  342. initialize_PAXOS_data(scen_directory,scen_num);
  343. #ifdef MANAGER
  344. if (manager_to_fail == -1){
  345. time_t t;
  346. srand((unsigned) time(&t));
  347. manager_to_fail = rand() % 8;
  348. //printf("MANAGER TO FAIL IS OF APP %d\n",manager_to_fail);
  349. }
  350. #endif
  351. /* Create rest of idags and assign only their node id */
  352. #ifdef PLAT_LINUX
  353. printf("I an idag with node_id = %d, pid = %d\n",0,getpid());
  354. for (i=1; i < num_idags; i++) {
  355. p = fork();
  356. if (p == 0) {
  357. node_id = idag_id_arr[i];
  358. idle_agent_actions(scen_directory,scen_num);
  359. printf("I am newly created idag with i = %d and node_id = %d\n",i,node_id);
  360. fflush(stdout);
  361. break;
  362. }
  363. }
  364. #endif
  365. if ((node_id != idag_id_arr[0]) && (is_core_idag(node_id) == 1)) {
  366. idle_agent_actions(scen_directory,scen_num); //dimos comment out
  367. } else if (node_id != idag_id_arr[0]) {
  368. common_node_actions(scen_directory,scen_num);
  369. } else {
  370. index_bottom[node_id] = 0;
  371. install_signal_handlers();
  372. #ifdef PLAT_LINUX
  373. sig_SEGV_enable();
  374. #endif
  375. sev.sigev_notify = SIGEV_SIGNAL;
  376. sev.sigev_signo = SIG_TIMER;
  377. sev.sigev_value.sival_ptr = &timerid;
  378. if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) printf("timer_create error\n");
  379. if (my_cores == NULL) {
  380. my_cores = (core_list *) malloc(sizeof(core_list));
  381. my_cores_tail = my_cores;
  382. }
  383. my_cores_count++;
  384. my_cores_tail->core_id = node_id;
  385. my_cores_tail->offered_to = -1;
  386. my_cores_tail->next = NULL;
  387. DDS = (DDS_list *) malloc(sizeof(DDS_list));
  388. DDS->agent_id = node_id;
  389. DDS->next = NULL;
  390. DDS_tail = DDS;
  391. DDS_count++;
  392. for (i = 0; i < NUES; i++)
  393. if (node_id != i && idag_mask[i] == node_id){
  394. if (my_cores == NULL) {
  395. my_cores = (core_list *) malloc(sizeof(core_list));
  396. my_cores_tail = my_cores;
  397. } else {
  398. my_cores_tail->next = (core_list *) malloc(sizeof(core_list));
  399. my_cores_tail = my_cores_tail->next;
  400. }
  401. my_cores_count++;
  402. my_cores_tail->core_id = i;
  403. my_cores_tail->offered_to = -1;
  404. my_cores_tail->next = NULL;
  405. #ifdef PLAT_LINUX
  406. p = fork();
  407. if (p==0){
  408. node_id = i;
  409. common_node_actions(scen_directory, scen_num);
  410. }
  411. #endif
  412. }
  413. DDS->num_of_cores = my_cores_count;
  414. log_file = create_log_file(node_id, 0, scen_directory, scen_num);
  415. setbuf(log_file, NULL);
  416. alive = (int *)malloc(X_max*Y_max*sizeof(int));
  417. suspected = (int *)malloc(X_max*Y_max*sizeof(int));
  418. for (i = 0; i < X_max*Y_max; i++){
  419. alive[i] = 0;
  420. suspected[i] = 0;
  421. }
  422. printf("DELAY = %d\n",delay);
  423. #if defined(EPFD) || defined(tEPFD)
  424. sev.sigev_notify = SIGEV_SIGNAL;
  425. sev.sigev_signo = SIG_EPFD_TIMER;
  426. sev.sigev_value.sival_ptr = &epfd_timer;
  427. if (timer_create(CLOCK_REALTIME, &sev, &epfd_timer) == -1)
  428. printf("timer_create error\n");
  429. else
  430. fprintf(log_file,"I succesfully created epfd_timer\n");
  431. #endif
  432. #if defined(PFD) || defined(tPFD)
  433. sev.sigev_notify = SIGEV_SIGNAL;
  434. sev.sigev_signo = SIG_PFD_TIMER;
  435. sev.sigev_value.sival_ptr = &pfd_timer;
  436. if (timer_create(CLOCK_REALTIME, &sev, &pfd_timer) == -1)
  437. printf("timer_create error\n");
  438. else
  439. fprintf(log_file,"I succesfully created pfd_timer\n");
  440. #endif
  441. cur_time = time(NULL);
  442. cur_t = localtime(&cur_time);
  443. fprintf(log_file, "[%d:%d:%d]: Initialized node_id=%d\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,node_id);
  444. #ifdef PLAT_SCC
  445. RCCE_barrier(&RCCE_COMM_WORLD);
  446. #else
  447. sleep(1);
  448. #endif
  449. for (tmp_cores_list = my_cores->next; tmp_cores_list != NULL; tmp_cores_list = tmp_cores_list->next) {
  450. one_core = tmp_cores_list->core_id;
  451. if (core_inter_head[one_core] == NULL){
  452. core_inter_head[one_core] = (inter_list *) malloc(sizeof(inter_list));
  453. core_inter_tail[one_core] = core_inter_head[one_core];
  454. } else {
  455. core_inter_tail[one_core]->next = (inter_list *) malloc(sizeof(inter_list));
  456. core_inter_tail[one_core] = core_inter_tail[one_core]->next;
  457. }
  458. core_inter_tail[one_core]->type = INIT_CORE;
  459. core_inter_tail[one_core]->next = NULL;
  460. scc_kill(one_core, SIG_INIT, core_inter_head[one_core]);
  461. }
  462. while (nodes_initialised != my_cores_count-1) {
  463. scc_pause();
  464. scc_signals_check();
  465. }
  466. #ifdef PLAT_SCC
  467. RCCE_barrier(&RCCE_COMM_WORLD);
  468. #else
  469. sleep(1);
  470. #endif
  471. printf("End of initialisation\n");
  472. //print_grid();
  473. /* Open the Time Log File*/
  474. #ifdef PLAT_SCC
  475. strcpy(time_log_file_name, "/shared/herc/");
  476. #else
  477. strcpy(time_log_file_name, "../");
  478. #endif
  479. strcat(time_log_file_name,scen_directory);
  480. strcat(time_log_file_name ,"/");
  481. strcat(time_log_file_name ,scen_num);
  482. strcat(time_log_file_name, "/times_log.txt");
  483. printf("Time Log File Path = %s ... ",time_log_file_name);
  484. fflush(stdout);
  485. fprintf(log_file,"time log file path = %s\n",time_log_file_name);
  486. fflush(log_file);
  487. if ((time_log = fopen(time_log_file_name, "w")) == NULL){
  488. printf("Error!\n");
  489. fflush(stdout);
  490. perror("open time_log");
  491. }else{
  492. printf("Success!\n");
  493. fflush(stdout);
  494. }
  495. /* done */
  496. /* Init app file is kept in linux for compatibility of result gathering scripts */
  497. #ifdef PLAT_SCC
  498. strcpy(init_ack_file_name, "/shared/herc/");
  499. #else
  500. strcpy(init_ack_file_name, "../");
  501. #endif
  502. strcat(init_ack_file_name, scen_directory);
  503. strcat(init_ack_file_name, "/");
  504. strcat(init_ack_file_name, scen_num);
  505. strcat(init_ack_file_name, "/init_ack.txt");
  506. //printf("file path = %s\n",app_input_file_name);
  507. if ((init_ack_file = fopen(init_ack_file_name, "w")) == NULL){
  508. printf("Cannot open input file with file path = %s ",init_ack_file_name);
  509. perror("open app_input");
  510. }
  511. /* Open the Application Input File*/
  512. #ifdef PLAT_SCC
  513. strcpy(app_input_file_name, "/shared/herc/");
  514. #else
  515. strcpy(app_input_file_name, "../");
  516. #endif
  517. strcat(app_input_file_name, scen_directory);
  518. strcat(app_input_file_name, "/");
  519. strcat(app_input_file_name, scen_num);
  520. strcat(app_input_file_name, "/inputs/");
  521. strcat(app_input_file_name, app_input_file);
  522. printf("App Input File Path = %s ...",app_input_file_name);
  523. fprintf(log_file,"file path = %s\n",app_input_file_name);
  524. if ((app_input = fopen(app_input_file_name, "r")) == NULL){
  525. printf("Error!\n");
  526. perror("open app_input");
  527. }else{
  528. printf("Success!\n");
  529. fflush(stdout);
  530. }
  531. /* done */
  532. fscanf(app_input,"%d",&time_next);
  533. state = IDLE_CHK_APP_FILE;
  534. its.it_interval.tv_sec = 0;
  535. its.it_interval.tv_nsec = 0;
  536. its.it_value.tv_sec = 0;
  537. its.it_value.tv_nsec = 10 * MS;
  538. if (timer_settime(timerid, 0, &its, NULL) == -1) perror("timer_settime error9");
  539. gettimeofday(&time_val, NULL);
  540. cur_t = localtime(&time_val.tv_sec);
  541. start_time_of_day = *cur_t;
  542. while (state != IDAG_ENDING)
  543. if (state == IDLE_IDAG) {
  544. scc_pause();
  545. scc_signals_check();
  546. if (num_apps_terminated == num_apps)
  547. state = USER_INPUT;
  548. else if (!timer_init_null && init_pending_head != NULL) {
  549. its.it_value.tv_sec = 10;
  550. its.it_value.tv_nsec = 0;
  551. if (timer_settime(timerid, 0, &its, NULL) == -1) perror("timer_settime error9");
  552. timer_init_null = 1;
  553. }
  554. } else if (state == IDLE_IDAG_INIT_SEND) {
  555. signals_enable();
  556. tmp_inter_prev = NULL;
  557. tmp_pending_head = init_pending_head;
  558. while (tmp_pending_head != NULL) {
  559. init_core = tmp_pending_head->data.new_app.num_of_cores;
  560. for (tmp_inter_list = core_inter_head[init_core]; tmp_inter_list != NULL; tmp_inter_list = tmp_inter_list->next)
  561. if (tmp_inter_list->type == INIT_APP) break;
  562. if (tmp_inter_list == NULL) {
  563. fprintf(log_file,"I am sending an aborted init_app to %d with id %d\n",init_core,tmp_pending_head->data.new_app.id);
  564. fflush(log_file);
  565. if (core_inter_head[init_core] == NULL){
  566. core_inter_head[init_core] = (inter_list *) malloc(sizeof(inter_list));
  567. core_inter_tail[init_core] = core_inter_head[init_core];
  568. } else {
  569. core_inter_tail[init_core]->next = (inter_list *) malloc(sizeof(inter_list));
  570. core_inter_tail[init_core] = core_inter_tail[init_core]->next;
  571. }
  572. core_inter_tail[init_core]->type = INIT_APP;
  573. core_inter_tail[init_core]->data.new_app = tmp_pending_head->data.new_app;
  574. core_inter_tail[init_core]->data.new_app.num_of_cores = 0;
  575. core_inter_tail[init_core]->next = NULL;
  576. if (core_inter_head[init_core]->next == NULL) {
  577. //kill(pid_num[sender_id],SIG_INIT_APP);
  578. scc_kill(init_core, SIG_INIT_APP, core_inter_head[init_core]);
  579. my_stats.msg_count++;
  580. my_stats.distance += distance(node_id,init_core);
  581. }
  582. if (tmp_inter_prev == NULL) {
  583. tmp_inter_list = init_pending_head;
  584. init_pending_head = init_pending_head->next;
  585. tmp_pending_head = init_pending_head;
  586. //free(tmp_inter_list);
  587. } else {
  588. tmp_inter_list = tmp_inter_prev->next;
  589. tmp_inter_prev->next = tmp_inter_list->next;
  590. tmp_pending_head = tmp_inter_prev->next;
  591. if (tmp_inter_prev->next == NULL) init_pending_tail = tmp_inter_prev;
  592. }
  593. free(tmp_inter_list);
  594. } else {
  595. tmp_inter_prev = tmp_pending_head;
  596. tmp_pending_head = tmp_pending_head->next;
  597. }
  598. }
  599. timer_init_null = 0;
  600. state = IDLE_IDAG;
  601. fprintf(log_file,"I exit this fuck\n");
  602. fflush(log_file);
  603. } else if (state == IDLE_CHK_APP_FILE) {
  604. scc_pause();
  605. scc_signals_check();
  606. } else if (state == CHK_APP_FILE) {
  607. signals_disable();
  608. time_passed++;
  609. //if (time_for_farman > 0) time_for_farman -= 10;a
  610. //printf("time passed=%d | time next=%d\n",time_passed,time_next);
  611. if (time_next == time_passed) {
  612. fscanf(app_input,"%d",&init_core);
  613. num_apps++;
  614. fprintf(log_file,"idag_mask[%d] = %d\n",init_core,idag_mask[init_core]);
  615. if (idag_mask[init_core] == init_core){
  616. fprintf(log_file,"init_core %d is a controller. New init_core = %d\n",init_core,init_core+1);
  617. printf("init_core %d is a controller. New init_core = %d\n",init_core,init_core+1);
  618. init_core++;
  619. }
  620. printf("time = %d, id = %d\n",time_passed,app_cnt);
  621. gettimeofday(&time_val, NULL);
  622. cur_t = localtime(&time_val.tv_sec);
  623. fprintf(log_file, "[%d:%d:%d]: Initialising app_id=%d\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,app_cnt);
  624. fflush(log_file);
  625. fprintf(time_log, "[%d:%d:%d:%ld] app_id=%d\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,time_val.tv_usec,app_cnt);
  626. fflush(time_log);
  627. for (tmp_inter_list = core_inter_head[init_core]; tmp_inter_list != NULL; tmp_inter_list = tmp_inter_list->next)
  628. if (tmp_inter_list->type == INIT_APP) break;
  629. signals_enable();
  630. if (tmp_inter_list == NULL) {
  631. if (core_inter_head[init_core] == NULL){
  632. core_inter_head[init_core] = (inter_list *) malloc(sizeof(inter_list));
  633. core_inter_tail[init_core] = core_inter_head[init_core];
  634. } else {
  635. core_inter_tail[init_core]->next = (inter_list *) malloc(sizeof(inter_list));
  636. core_inter_tail[init_core] = core_inter_tail[init_core]->next;
  637. }
  638. core_inter_tail[init_core]->type = INIT_APP;
  639. /* FIXME scanned for old app inputs compatibility reasons but it is discarded */
  640. fscanf(app_input,"%d",&core_inter_tail[init_core]->data.new_app.array_size);
  641. core_inter_tail[init_core]->data.new_app.array_size = executed_app_array_size;
  642. fscanf(app_input,"%d",&core_inter_tail[init_core]->data.new_app.workld);
  643. core_inter_tail[init_core]->data.new_app.app_type = executed_app_type_number;
  644. /* FIXME for the time being, do not scan for app type. All apps are of the same type, read from argv */
  645. /*
  646. fscanf(app_input,"%d",&temp_scan);
  647. core_inter_tail[init_core]->data.new_app.app_type = temp_scan;
  648. if (temp_scan == 0){
  649. printf("MATRIX_MUL\n");
  650. }
  651. else if (temp_scan == 1){
  652. printf("SVM\n");
  653. }
  654. else if (temp_scan == 2){
  655. printf("FFT\n");
  656. }
  657. */
  658. core_inter_tail[init_core]->data.new_app.id = app_cnt++;
  659. core_inter_tail[init_core]->data.new_app.num_of_cores = 0;
  660. core_inter_tail[init_core]->next = NULL;
  661. /*printf("time = %d, id = %d, workld = %d\n",time_passed,core_inter_tail[init_core]->data.new_app.id,core_inter_tail[init_core]->data.new_app.workld);
  662. gettimeofday(&time_val, NULL);
  663. cur_t = localtime(&time_val.tv_sec);
  664. fprintf(log_file, "[%d:%d:%d]: Initialising app_id=%d\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,core_inter_tail[init_core]->data.new_app.id);
  665. fflush(log_file);
  666. fprintf(time_log, "[%d:%d:%d:%ld] app_id=%d\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,time_val.tv_usec,core_inter_tail[init_core]->data.new_app.id);
  667. fflush(time_log);*/
  668. if (core_inter_head[init_core]->next == NULL) {
  669. //kill(pid_num[init_core],SIG_INIT_APP);
  670. scc_kill(init_core, SIG_INIT_APP, core_inter_head[init_core]);
  671. my_stats.msg_count++;
  672. my_stats.distance += distance(node_id,init_core);
  673. }
  674. } else {
  675. if (init_pending_head == NULL){
  676. init_pending_head = (inter_list *) malloc(sizeof(inter_list));
  677. init_pending_tail = init_pending_head;
  678. } else {
  679. init_pending_tail->next = (inter_list *) malloc(sizeof(inter_list));
  680. init_pending_tail = init_pending_tail->next;
  681. }
  682. init_pending_tail->type = INIT_APP;
  683. /* FIXME scanned for old app inputs compatibility reasons but it is discarded */
  684. fscanf(app_input,"%d",&init_pending_tail->data.new_app.array_size);
  685. init_pending_tail->data.new_app.array_size = executed_app_array_size;
  686. fscanf(app_input,"%d",&init_pending_tail->data.new_app.workld);
  687. core_inter_tail[init_core]->data.new_app.app_type = executed_app_type_number;
  688. /* FIXME for the time being, do not scan for app type. All apps are of the same type, read from argv */
  689. init_pending_tail->data.new_app.id = app_cnt++;
  690. init_pending_tail->data.new_app.num_of_cores = init_core;
  691. init_pending_tail->next = NULL;
  692. }
  693. if (fscanf(app_input,"%d",&time_next) == EOF) {
  694. //state = USER_INPUT;
  695. state = IDLE_IDAG;
  696. time_passed = -1;
  697. } else {
  698. its.it_value.tv_sec = 0;
  699. its.it_value.tv_nsec = 10 * MS;
  700. if (timer_settime(timerid, 0, &its, NULL) == -1) printf("timer_settime error11\n");
  701. state = IDLE_CHK_APP_FILE;
  702. }
  703. } else {
  704. state = IDLE_CHK_APP_FILE;
  705. its.it_value.tv_sec = 0;
  706. its.it_value.tv_nsec = 10 * MS;
  707. if (timer_settime(timerid, 0, &its, NULL) == -1) printf("timer_settime error92\n");
  708. }
  709. signals_enable();
  710. } else if (state == USER_INPUT) {
  711. //while (num_apps_terminated != num_apps) {//pause(); my_cores_count
  712. // scc_pause();
  713. // scc_signals_check();
  714. //}
  715. for (k=0; k<15000; k++) {//pause(); my_cores_count
  716. scc_pause();
  717. scc_signals_check();
  718. }
  719. for (j=0; j<num_idags; j++) {
  720. one_idag = idag_id_arr[j];
  721. fprintf(log_file, "Sending to controller core %d\n",one_idag);
  722. if (one_idag != idag_id_arr[0]) {
  723. if (core_inter_head[one_idag] == NULL){
  724. core_inter_head[one_idag] = (inter_list *) malloc(sizeof(inter_list));
  725. core_inter_tail[one_idag] = core_inter_head[one_idag];
  726. } else {
  727. core_inter_tail[one_idag]->next = (inter_list *) malloc(sizeof(inter_list));
  728. core_inter_tail[one_idag] = core_inter_tail[one_idag]->next;
  729. }
  730. core_inter_tail[one_idag]->type = DEBUG_IDAG_REQ_DDS;
  731. core_inter_tail[one_idag]->data.reg.C = -1;
  732. core_inter_tail[one_idag]->data.reg.r = 0;
  733. core_inter_tail[one_idag]->next = NULL;
  734. if (core_inter_head[one_idag]->next == NULL)
  735. scc_kill(one_idag, SIG_REQ_DDS, core_inter_head[one_idag]);
  736. else {
  737. fprintf(log_file,"interaction in debug req dds is %d\n",core_inter_head[one_idag]->type);
  738. fflush(log_file);
  739. }
  740. } else {
  741. printf("Number of agents in region = %d\n",DDS_count);
  742. tmp_DDS = DDS;
  743. i=0;
  744. while (tmp_DDS != NULL) {
  745. printf("Agent no %d is %d with %d cores\n",i,tmp_DDS->agent_id,tmp_DDS->num_of_cores);
  746. tmp_DDS = tmp_DDS->next;
  747. i++;
  748. }
  749. }
  750. }
  751. while (idags_replied < num_idags - 1) {
  752. scc_pause();
  753. scc_signals_check();
  754. }
  755. fprintf(log_file,"killing\n");
  756. fflush(log_file);
  757. for (i=1; i<num_idags; i++){
  758. printf("i am killing %d\n",idag_id_arr[i]);
  759. one_core = idag_id_arr[i];
  760. if (core_inter_head[one_core] == NULL){
  761. core_inter_head[one_core] = (inter_list *) malloc(sizeof(inter_list));
  762. core_inter_tail[one_core] = core_inter_head[one_core];
  763. } else {
  764. core_inter_tail[one_core]->next = (inter_list *) malloc(sizeof(inter_list));
  765. core_inter_tail[one_core] = core_inter_tail[one_core]->next;
  766. fprintf(log_file,"I am still doing smth with idag %d interaction = %d\n",one_core,core_inter_head[one_core]->type);
  767. fflush(log_file);
  768. }
  769. core_inter_tail[one_core]->type = TERMINATION_STATS;
  770. core_inter_tail[one_core]->next = NULL;
  771. signals_disable();
  772. scc_kill(one_core, SIG_TERMINATE, core_inter_head[one_core]);
  773. signals_enable();
  774. }
  775. tmp_cores_list = my_cores;
  776. my_cores = my_cores->next;
  777. free(tmp_cores_list);
  778. inter_list tmp_inter_list;
  779. for (; my_cores != NULL; my_cores = my_cores->next) {
  780. tmp_cores_list = my_cores;
  781. one_core = my_cores->core_id;
  782. if (core_inter_head[one_core] == NULL){
  783. core_inter_head[one_core] = (inter_list *) malloc(sizeof(inter_list));
  784. core_inter_tail[one_core] = core_inter_head[one_core];
  785. } else {
  786. core_inter_tail[one_core]->next = (inter_list *) malloc(sizeof(inter_list));
  787. core_inter_tail[one_core] = core_inter_tail[one_core]->next;
  788. fprintf(log_file,"I am still doing smth with my node %d interaction = %d\n",one_core,core_inter_head[one_core]->type);
  789. fflush(log_file);
  790. }
  791. core_inter_tail[one_core]->type = TERMINATION_STATS;
  792. core_inter_tail[one_core]->next = NULL;
  793. //kill(pid_num[one_core], SIG_TERMINATE);
  794. signals_disable();
  795. scc_kill(one_core, SIG_TERMINATE, core_inter_head[one_core]);
  796. signals_enable();
  797. /* 8.7.2016 Paxos Stats */
  798. tmp_inter_list.next = NULL;
  799. tmp_inter_list.type = PAXOS_STATS_REQ;
  800. scc_kill(one_core, SIG_PAXOS_STATS_REQ, &tmp_inter_list);
  801. paxos_node_stats.msg_count++;
  802. paxos_node_stats.distance += distance(node_id,one_core);
  803. my_stats.msg_count++;
  804. my_stats.distance += distance(node_id,one_core);
  805. free(tmp_cores_list);
  806. }
  807. state = IDAG_ENDING;
  808. } else {
  809. printf("my_rtrm.c : Unknown state node_id = %d state = %d\n",node_id,state);
  810. state = IDLE_IDAG;
  811. }
  812. while (state == IDAG_ENDING) {
  813. scc_pause();
  814. scc_signals_check();
  815. if (stats_replied == my_cores_count+num_idags-2 && paxos_stats_replied == my_cores_count+num_idags-2) state = TERMINATED;
  816. }
  817. #ifdef PLAT_LINUX
  818. for (i=0; i<my_cores_count-1; i++)
  819. wait(NULL); //wait for children
  820. for (i=0; i<num_idags-1; i++)
  821. wait(NULL);//wait for the other idags
  822. #endif
  823. total_stats.msg_count += my_stats.msg_count;
  824. total_stats.message_size += my_stats.message_size;
  825. total_stats.distance += my_stats.distance;
  826. total_stats.app_turnaround += my_stats.app_turnaround;
  827. total_stats.comp_effort += my_stats.comp_effort;
  828. total_stats.cores_utilized += my_stats.cores_utilized;
  829. total_stats.times_accessed += my_stats.times_accessed;
  830. paxos_total_stats.msg_count += paxos_node_stats.msg_count;
  831. paxos_total_stats.fd_msg_count += paxos_node_stats.fd_msg_count;
  832. avg_cluster_util = (float) my_stats.cores_utilized / (my_stats.times_accessed * (my_cores_count-1));
  833. printf("I am %d with cores_utilized = %d times_accessed = %d my_cores_count = %d and avg_cluster_util = %0.2f\n",
  834. node_id,my_stats.cores_utilized,my_stats.times_accessed,my_cores_count,avg_cluster_util);
  835. fprintf(log_file,"cores_utilized = %d times_accessed = %d my_cores_count = %d and avg_cluster_util = %0.2f\n",
  836. my_stats.cores_utilized,my_stats.times_accessed,my_cores_count,avg_cluster_util);
  837. gettimeofday(&time_val, NULL);
  838. end_time_of_day = localtime(&time_val.tv_sec);
  839. fprintf(log_file,"================= DRTRM STATS ==================\n");
  840. fprintf(log_file,"Total message count = %lld\n",total_stats.msg_count);
  841. fprintf(log_file,"Total message size = %d\n",total_stats.message_size);
  842. fprintf(log_file,"Total distance = %d\n",total_stats.distance);
  843. fprintf(log_file,"Total app turnaround time = %d\n",total_stats.app_turnaround);
  844. fprintf(log_file,"Total computational effort = %d\n",total_stats.comp_effort);
  845. fprintf(log_file,"Total cores_utilized = %d\n",total_stats.cores_utilized);
  846. fprintf(log_file,"Total times_accessed = %d\n",total_stats.times_accessed);
  847. fprintf(log_file,"================= PAXOS STATS ==================\n");
  848. fprintf(log_file,"Total message count = %lld\n",paxos_total_stats.msg_count);
  849. printf("================= DRTRM STATS ==================\n");
  850. printf("Total message count = %lld\n",total_stats.msg_count);
  851. printf("Total message size = %d\n",total_stats.message_size);
  852. printf("Total distance = %d\n",total_stats.distance);
  853. printf("Total app turnaround time = %d\n",total_stats.app_turnaround);
  854. printf("Total computational effort = %d\n",total_stats.comp_effort);
  855. printf("Total cores_utilized = %d\n",total_stats.cores_utilized);
  856. printf("Total times_accessed = %d\n",total_stats.times_accessed);
  857. printf("================= PAXOS STATS ==================\n");
  858. printf("Start Time: [%d:%d:%d]\n",start_time_of_day.tm_hour,start_time_of_day.tm_min,start_time_of_day.tm_sec);
  859. printf("End Time: [%d:%d:%d]\n",end_time_of_day->tm_hour,end_time_of_day->tm_min,end_time_of_day->tm_sec);
  860. #ifdef PFD
  861. printf("Perfect Failure Detector\n");
  862. #elif tPFD
  863. printf("tweaked Perfect Failure Detector\n");
  864. #endif
  865. printf("PAXOS Total message count = %lld\n",paxos_total_stats.msg_count);\
  866. printf("Failure Detection Total message count = %d\n",paxos_total_stats.fd_msg_count);
  867. for (i=0; i<NUES; i++){
  868. free(core_inter_head[i]);
  869. free(core_inter_tail[i]);
  870. }
  871. free(core_inter_head);
  872. free(core_inter_tail);
  873. #ifdef PLAT_SCC
  874. RCCE_flag_free(&flag_data_written);
  875. RCCE_free((t_vcharp) sig_array);
  876. RCCE_free((t_vcharp) data_array);
  877. RCCE_free((t_vcharp) proposal_number_global);
  878. fclose(init_ack_file);
  879. #else
  880. shmdt(pid_num);
  881. for (i=0; i<NUES; i++) {
  882. sem_destroy(&flag_data_written[i]);
  883. sem_destroy(&scc_lock[i]);
  884. }
  885. shmdt(flag_data_written);
  886. shmdt(scc_lock);
  887. shmdt(manager_result_out);
  888. shmdt(index_bottom);
  889. #endif
  890. cur_time = time(NULL);
  891. cur_t = localtime(&cur_time);
  892. fprintf(log_file, "[%d:%d:%d]: I ended well\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec);
  893. fclose(log_file);
  894. printf("[%d:%d:%d]: I ended well\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec);
  895. //}
  896. }
  897. return 0;
  898. }
  899. /* Speedup is calculated correclty inside Speedup function */
  900. int offer_cores(core_list *cores, app req_app, region req_reg, int *Offered_cores, int req_id) {
  901. int Of_cores_num=0, min_dist=0, cur_dist=0;
  902. float gain_total=0.1, base_receiver=0.0, base_giver=0.0, gain_receiver=0.0, loss_giver=0.0, share_giver=0.0, new_gain=0.0;
  903. int Cores_receiver = req_app.num_of_cores, Cores_giver = my_app.num_of_cores;
  904. core_list *tmp, *GreedyChoice;
  905. int offered_cnt=0, counted_cores=0;
  906. for (tmp=cores; tmp!=NULL; tmp=tmp->next) {
  907. if (distance(req_reg.C, tmp->core_id) <= req_reg.r) share_giver++;
  908. counted_cores++;
  909. if (tmp->offered_to != -1) offered_cnt++;
  910. fprintf(log_file,"Core %d is offered to %d\n",tmp->core_id,tmp->offered_to);
  911. }
  912. fprintf(log_file,"Proceeding\n");
  913. fflush(log_file);
  914. if (offered_cnt == (counted_cores-2) && my_idag != -1) {
  915. fprintf(log_file,"I did not give up my only not offered core\n");
  916. fflush(log_file);
  917. return 0;
  918. }
  919. share_giver = share_giver / (float) region_count(req_reg);
  920. if (my_idag == -1) {
  921. while (gain_total > 0.0) {
  922. gain_total = 0.0;
  923. GreedyChoice = NULL;//-1;
  924. min_dist = -1;
  925. base_giver = 0;
  926. tmp = cores->next;//very important!!! that way i avoid giving up my agent core
  927. while (tmp != NULL) {
  928. cur_dist = distance(req_reg.C, tmp->core_id);
  929. if (tmp->offered_to == -1 && cur_dist <= req_reg.r) {
  930. //Of_cores_num == 0 to be the first offered core
  931. //Cores_receiver == 0 to avoid providing the core to an non-initial core search
  932. if (low_voltage_core[tmp->core_id] && Of_cores_num == 0 && Cores_receiver == 0) {
  933. if ((Cores_receiver + Of_cores_num) == 0) {
  934. gain_receiver = 1000; //0 sto init_app
  935. } else if ((Cores_receiver + Of_cores_num) == 1) {
  936. gain_receiver = 100; //no worker cores
  937. } else { /* (Cores_receiver + Of_cores_num) > 1 */
  938. base_receiver = Speedup(req_app, Cores_receiver + Of_cores_num);
  939. //gain_receiver = share_giver * (Speedup(req_app, Cores_receiver + Of_cores_num + 1) - base_receiver);
  940. gain_receiver = share_giver * (Speedup(req_app, Cores_receiver + Of_cores_num + 1) - base_receiver);
  941. }
  942. loss_giver = 0;
  943. new_gain = gain_receiver - loss_giver;
  944. gain_total = new_gain;
  945. GreedyChoice = tmp;//->core_id;
  946. break;
  947. #ifdef LOW_VOLTAGE_ISLANDS_4
  948. } else if (low_voltage_core[tmp->core_id] && Of_cores_num == 0 && Cores_receiver == 1) {
  949. if (Cores_receiver == 0 && Of_cores_num == 0) gain_receiver = 1000; //0 sto init_app
  950. else gain_receiver = share_giver * (Speedup(req_app, Cores_receiver + Of_cores_num + 1) - base_receiver);
  951. loss_giver = 0;
  952. new_gain = gain_receiver - loss_giver;
  953. gain_total = new_gain;
  954. GreedyChoice = tmp;//->core_id;
  955. break;
  956. #endif
  957. } else if (low_voltage_core[tmp->core_id] == 0) {
  958. if ((Cores_receiver + Of_cores_num) == 0) {
  959. gain_receiver = 1000; //0 sto init_app
  960. } else if ((Cores_receiver + Of_cores_num) == 1) {
  961. gain_receiver = 100; //no worker cores
  962. } else { /* (Cores_receiver + Of_cores_num) > 1 */
  963. base_receiver = Speedup(req_app, Cores_receiver + Of_cores_num);
  964. gain_receiver = share_giver * (Speedup(req_app, Cores_receiver + Of_cores_num + 1) - base_receiver);
  965. }
  966. loss_giver = 0;
  967. new_gain = gain_receiver - loss_giver;
  968. if (new_gain > gain_total){
  969. gain_total = new_gain;
  970. min_dist = cur_dist;
  971. GreedyChoice = tmp;//->core_id;
  972. } else if (new_gain == gain_total && cur_dist < min_dist) {
  973. //printf("I am %d and i change offer to %d with cores %d->%d with distances %d->%d\n",
  974. // node_id,req_id,GreedyChoice->core_id,tmp->core_id,min_dist,cur_dist);
  975. min_dist = cur_dist;
  976. GreedyChoice = tmp;
  977. }
  978. }
  979. }
  980. tmp = tmp->next;
  981. }
  982. if (gain_total > 0.0) {
  983. Offered_cores[Of_cores_num++] = GreedyChoice->core_id;
  984. GreedyChoice->offered_to = req_id;
  985. }
  986. }
  987. }
  988. #ifndef GREEDY_MANAGER
  989. else {
  990. while (gain_total > 0.0) {
  991. gain_total = 0.0;
  992. GreedyChoice = NULL;//-1;
  993. min_dist = -1;
  994. base_giver = Speedup(my_app, Cores_giver - Of_cores_num);
  995. tmp = cores->next->next;//very important!!! that way i avoid giving up my only working core
  996. while (tmp != NULL) {
  997. if (core_inter_head[tmp->core_id] != NULL &&
  998. (core_inter_head[tmp->core_id]->type == INIT_WORK_NODE_PENDING || core_inter_head[tmp->core_id]->type == INIT_WORK_NODE)) {
  999. fprintf(log_file,"Core %d is about to start work type = %d\n",tmp->core_id,core_inter_head[tmp->core_id]->type);
  1000. fflush(log_file);
  1001. tmp = tmp->next;
  1002. } else {
  1003. cur_dist = distance(req_reg.C, tmp->core_id);
  1004. if (tmp->offered_to == -1 && cur_dist <= req_reg.r) {
  1005. if ((Cores_receiver + Of_cores_num) == 0) {
  1006. gain_receiver = 1000; //0 sto init_app
  1007. } else if ((Cores_receiver + Of_cores_num) == 1) {
  1008. gain_receiver = 100; //no worker cores -- in case I have only one worker core then I should not be here
  1009. } else { /* (Cores_receiver + Of_cores_num) > 1 */
  1010. base_receiver = Speedup(req_app, Cores_receiver + Of_cores_num);
  1011. gain_receiver = share_giver * (Speedup(req_app, Cores_receiver + Of_cores_num + 1) - base_receiver); /* + 1 is ommited due to workload convention */
  1012. }
  1013. loss_giver = base_giver - Speedup(my_app, Cores_giver - (Of_cores_num + 1));
  1014. new_gain = gain_receiver - loss_giver;
  1015. if (new_gain > gain_total){
  1016. gain_total = new_gain;
  1017. min_dist = cur_dist;
  1018. GreedyChoice = tmp;//->core_id;
  1019. } else if (new_gain == gain_total && cur_dist < min_dist) {
  1020. //printf("I am %d and i change offer to %d with cores %d->%d with distances %d->%d\n",
  1021. // node_id,req_id,GreedyChoice->core_id,tmp->core_id,min_dist,cur_dist);
  1022. min_dist = cur_dist;
  1023. GreedyChoice = tmp;
  1024. }
  1025. }
  1026. tmp = tmp->next;
  1027. }
  1028. }
  1029. if (gain_total > 0.0) {
  1030. Offered_cores[Of_cores_num++] = GreedyChoice->core_id;
  1031. GreedyChoice->offered_to = req_id;
  1032. }
  1033. }
  1034. }
  1035. #endif
  1036. fprintf(log_file,"I will offer %d cores\n",Of_cores_num);
  1037. fflush(log_file);
  1038. return Of_cores_num;
  1039. }
  1040. // int offer_cores_fft(core_list *cores, app req_app, region req_reg, int *Offered_cores, int req_id) {
  1041. // int Of_cores_num=0, min_dist=0, cur_dist=0;
  1042. // float gain_total=0.1,base_receiver=0.0,base_giver=0.0,gain_receiver=0.0,loss_giver=0.0,share_giver=0.0,new_gain=0.0;
  1043. // int Cores_receiver = req_app.num_of_cores, Workers_giver = my_app.num_of_cores-1;
  1044. // core_list *tmp, *GreedyChoice;
  1045. // int offered_cnt=0, counted_cores=0;
  1046. //
  1047. // for (tmp=cores; tmp!=NULL; tmp=tmp->next) {
  1048. // if (distance(req_reg.C, tmp->core_id) <= req_reg.r) share_giver++;
  1049. // counted_cores++;
  1050. // if (tmp->offered_to != -1) offered_cnt++;
  1051. // fprintf(log_file,"Core %d is offered to %d\n",tmp->core_id,tmp->offered_to);
  1052. // }
  1053. // fflush(log_file);
  1054. //
  1055. // if (offered_cnt == (counted_cores-2) && my_idag != -1) {
  1056. // fprintf(log_file,"I did not give up my only not offered core\n");
  1057. // fflush(log_file);
  1058. // return 0;
  1059. // }
  1060. // share_giver = share_giver / (float) region_count(req_reg);
  1061. //
  1062. // if (my_idag == -1) {
  1063. // while (gain_total > 0.0) {
  1064. // gain_total = 0.0;
  1065. // GreedyChoice = NULL;//-1;
  1066. // min_dist = -1;
  1067. // base_giver = 0;
  1068. // tmp = cores->next;//very important!!! that way i avoid giving up my agent core
  1069. //
  1070. // while (tmp != NULL) {
  1071. // cur_dist = distance(req_reg.C, tmp->core_id);
  1072. // if (tmp->offered_to == -1 && cur_dist <= req_reg.r) {
  1073. // //Of_cores_num == 0 to be the first offered core
  1074. // //Cores_receiver == 0 to avoid providing the core to an non-initial core search
  1075. // if (low_voltage_core[tmp->core_id] && Of_cores_num == 0 && Cores_receiver == 0) {
  1076. // if ((Cores_receiver + Of_cores_num) == 0) {
  1077. // gain_receiver = 1000; //0 sto init_app
  1078. // } else if ((Cores_receiver + Of_cores_num) == 1) {
  1079. // gain_receiver = 100; //no worker cores -- in case I have only one worker core then I should not be here
  1080. // } else { /* (Cores_receiver + Of_cores_num) > 1 */
  1081. // base_receiver = Speedup(req_app, Cores_receiver + Of_cores_num - 1);
  1082. // gain_receiver = share_giver * (Speedup(req_app, Cores_receiver + Of_cores_num) - base_receiver); /* + 1 is ommited due to workload convention */
  1083. // }
  1084. //
  1085. // loss_giver = 0;
  1086. // new_gain = gain_receiver - loss_giver;
  1087. // gain_total = new_gain;
  1088. // GreedyChoice = tmp;//->core_id;
  1089. // break;
  1090. // #ifdef LOW_VOLTAGE_ISLANDS_4
  1091. // } else if (low_voltage_core[tmp->core_id] && Of_cores_num == 0 && Cores_receiver == 1) {
  1092. // if (Cores_receiver == 0 && Of_cores_num == 0) gain_receiver = 1000; //0 sto init_app
  1093. // else gain_receiver = share_giver * (Speedup(req_app, Cores_receiver + Of_cores_num + 1) - base_receiver); /* +1 stands for the possibly offered core */
  1094. //
  1095. // loss_giver = 0;
  1096. // new_gain = gain_receiver - loss_giver;
  1097. // gain_total = new_gain;
  1098. // GreedyChoice = tmp;//->core_id;
  1099. // break;
  1100. // #endif
  1101. // } else if (low_voltage_core[tmp->core_id] == 0) {
  1102. // if ((Cores_receiver + Of_cores_num) == 0) {
  1103. // gain_receiver = 1000; //0 sto init_app
  1104. // } else if ((Cores_receiver + Of_cores_num) == 1) {
  1105. // gain_receiver = 100; //no worker cores -- in case I have only one worker core then I should not be here
  1106. // } else { /* (Cores_receiver + Of_cores_num) > 1 */
  1107. // base_receiver = Speedup(req_app, Cores_receiver + Of_cores_num - 1);
  1108. // gain_receiver = share_giver * (Speedup(req_app, Cores_receiver + Of_cores_num) - base_receiver); /* + 1 is ommited due to workload convention */
  1109. // }
  1110. //
  1111. // loss_giver = 0;
  1112. // new_gain = gain_receiver - loss_giver;
  1113. // if (new_gain > gain_total){
  1114. // gain_total = new_gain;
  1115. // min_dist = cur_dist;
  1116. // GreedyChoice = tmp;//->core_id;
  1117. // } else if (new_gain == gain_total && cur_dist < min_dist) {
  1118. // //printf("I am %d and i change offer to %d with cores %d->%d with distances %d->%d\n",
  1119. // // node_id,req_id,GreedyChoice->core_id,tmp->core_id,min_dist,cur_dist);
  1120. // min_dist = cur_dist;
  1121. // GreedyChoice = tmp;
  1122. // }
  1123. // }
  1124. // }
  1125. //
  1126. // tmp = tmp->next;
  1127. // }
  1128. //
  1129. // if (gain_total > 0.0) {
  1130. // Offered_cores[Of_cores_num++] = GreedyChoice->core_id;
  1131. // GreedyChoice->offered_to = req_id;
  1132. // }
  1133. // }
  1134. //
  1135. // /* FFT app requires only power of 2 exec cores plus its manager
  1136. // * I do not include higher than 5 because it will create no speedup
  1137. // */
  1138. // if ((Cores_receiver + Of_cores_num) == 4) {
  1139. // for (tmp = my_cores->next; tmp!=NULL; tmp=tmp->next) {
  1140. // if (tmp->core_id == Offered_cores[Of_cores_num-1]) {
  1141. // fprintf(log_file,"Abandoning offered core %d because FFT needs 2 cores\n",tmp->core_id);
  1142. // tmp->offered_to = -1;
  1143. // Of_cores_num--;
  1144. // break;
  1145. // }
  1146. // }
  1147. // }
  1148. // /*
  1149. // else if (Of_cores_num > 4) {
  1150. //
  1151. // }
  1152. // */
  1153. // }
  1154. // #ifndef GREEDY_MANAGER
  1155. // else {
  1156. //
  1157. // if (((Workers_giver == 4) && (Cores_receiver < 3)) || ((Workers_giver == 2) && (Cores_receiver < 2))) {
  1158. //
  1159. // while (gain_total > 0.0) {
  1160. // gain_total = 0.0;
  1161. // GreedyChoice = NULL;//-1;
  1162. // min_dist = -1;
  1163. // base_giver = Speedup(my_app, Workers_giver - Of_cores_num);
  1164. //
  1165. // tmp = cores->next->next;//very important!!! that way i avoid giving up my only working core
  1166. //
  1167. // while (tmp != NULL) {
  1168. // if (core_inter_head[tmp->core_id] != NULL &&
  1169. // (core_inter_head[tmp->core_id]->type == INIT_WORK_NODE_PENDING || core_inter_head[tmp->core_id]->type == INIT_WORK_NODE)) {
  1170. // fprintf(log_file,"Core %d is about to start work type = %d\n",tmp->core_id,core_inter_head[tmp->core_id]->type);
  1171. // fflush(log_file);
  1172. // tmp = tmp->next;
  1173. // } else {
  1174. // cur_dist = distance(req_reg.C, tmp->core_id);
  1175. // if (tmp->offered_to == -1 && cur_dist <= req_reg.r) {
  1176. // if ((Cores_receiver + Of_cores_num) == 0) {
  1177. // gain_receiver = 1000; //0 sto init_app
  1178. // } else if ((Cores_receiver + Of_cores_num) == 1) {
  1179. // gain_receiver = 100; //no worker cores -- in case I have only one worker core then I should not be here
  1180. // } else { /* (Cores_receiver + Of_cores_num) > 1 */
  1181. // base_receiver = Speedup(req_app, Cores_receiver + Of_cores_num - 1);
  1182. // gain_receiver = share_giver * (Speedup(req_app, Cores_receiver + Of_cores_num) - base_receiver); /* + 1 is ommited due to workload convention */
  1183. // }
  1184. //
  1185. // loss_giver = base_giver - Speedup(my_app, Workers_giver - Of_cores_num - 1);
  1186. //
  1187. // new_gain = gain_receiver - loss_giver;
  1188. // if (new_gain > gain_total){
  1189. // gain_total = new_gain;
  1190. // min_dist = cur_dist;
  1191. // GreedyChoice = tmp;//->core_id;
  1192. // } else if (new_gain == gain_total && cur_dist < min_dist) {
  1193. // //printf("I am %d and i change offer to %d with cores %d->%d with distances %d->%d\n",
  1194. // // node_id,req_id,GreedyChoice->core_id,tmp->core_id,min_dist,cur_dist);
  1195. // min_dist = cur_dist;
  1196. // GreedyChoice = tmp;
  1197. // }
  1198. // }
  1199. //
  1200. // tmp = tmp->next;
  1201. // }
  1202. // }
  1203. //
  1204. // if (gain_total > 0.0) {
  1205. // Offered_cores[Of_cores_num++] = GreedyChoice->core_id;
  1206. // GreedyChoice->offered_to = req_id;
  1207. // }
  1208. // }
  1209. //
  1210. // if ((Cores_receiver + Of_cores_num) == 4) {
  1211. // for (tmp = my_cores->next; tmp!=NULL; tmp=tmp->next) {
  1212. // if (tmp->core_id == Offered_cores[Of_cores_num-1]) {
  1213. // fprintf(log_file,"Abandoning offered core %d because FFT needs 2 cores\n",tmp->core_id);
  1214. // tmp->offered_to = -1;
  1215. // Of_cores_num--;
  1216. // break;
  1217. // }
  1218. // }
  1219. // }
  1220. //
  1221. // }
  1222. // }
  1223. // #endif
  1224. //
  1225. // /* FFT app requires only power of 2 exec cores plus its manager */
  1226. // /*
  1227. // if (my_idag == -1) {
  1228. //
  1229. // if (executed_app == FFT) {
  1230. // if (Of_cores_num == 3) {
  1231. // for (tmp = my_cores->next; tmp!=NULL; tmp=tmp->next) {
  1232. // if (tmp->core_id == Offered_cores[Of_cores_num-1]) {
  1233. // fprintf(log_file,"Abandoning offered core %d because FFT needs 2 cores\n",tmp->core_id);
  1234. // tmp->offered_to = -1;
  1235. // Of_cores_num--;
  1236. // break;
  1237. // }
  1238. // }
  1239. // } else if (Of_cores_num > 4) {
  1240. //
  1241. // }
  1242. // }
  1243. // */
  1244. // return Of_cores_num;
  1245. // }
  1246. void send_next_signal(inter_list *head, int node_num){
  1247. inter_list *tmp_inter_list = NULL;
  1248. signals_disable();
  1249. if (head->type == IDAG_FIND_IDAGS ||
  1250. head->type == SELFOPT_IDAG_FIND_IDAGS ||
  1251. head->type == REP_IDAG_FIND_IDAGS ||
  1252. head->type == SELFOPT_IDAG_FIND_IDAGS_PENDING ||
  1253. head->type == IDAG_FIND_IDAGS_PENDING){
  1254. scc_kill(node_num, SIG_IDAG_FIND_IDAGS, head);
  1255. }else if (head->type == IDAG_REQ_DDS ||
  1256. head->type == SELFOPT_IDAG_REQ_DDS ||
  1257. head->type == DEBUG_IDAG_REQ_DDS ||
  1258. head->type == SELFOPT_IDAG_REQ_DDS_PENDING ||
  1259. head->type == IDAG_REQ_DDS_PENDING){
  1260. scc_kill(node_num, SIG_REQ_DDS, head);
  1261. }else if (head->type == AGENT_REQ_CORES ||
  1262. head->type == SELFOPT_REQ_CORES ||
  1263. head->type == AGENT_REQ_CORES_PENDING ||
  1264. head->type == SELFOPT_REQ_CORES_PENDING){
  1265. scc_kill(node_num, SIG_REQ_CORES, head);
  1266. }else if (head->type == IDAG_ADD_CORES_DDS){
  1267. scc_kill(node_num, SIG_ADD_CORES_DDS, head);
  1268. }else if (head->type == IDAG_REM_CORES_DDS){
  1269. scc_kill(node_num, SIG_REM_CORES_DDS, head);
  1270. }else if (head->type == INIT_WORK_NODE ||
  1271. head->type == APPOINT_WORK_NODE){
  1272. scc_kill(node_num, SIG_APPOINT_WORK, head);
  1273. }else if (head->type == REMOVE_APP){
  1274. scc_kill(node_num, SIG_FINISH, head);
  1275. }else if (head->type == INIT_APP){
  1276. scc_kill(node_num, SIG_INIT_APP, head);
  1277. }else if (head->type == REP_AGENT_REQ_CORES){
  1278. scc_kill(node_num, SIG_REQ_CORES, head);
  1279. }else if (head->type == INIT_AGENT){
  1280. scc_kill(node_num, SIG_INIT_AGENT, head);
  1281. }else if (head->type == APPOINT_WORK_NODE_PENDING){
  1282. fprintf(log_file,"\nI have unpredictable interaction with node %d with interaction = %d\n",node_num,head->type);
  1283. tmp_inter_list = core_inter_head[node_num];
  1284. core_inter_head[node_num] = core_inter_head[node_num]->next;
  1285. if (core_inter_head[node_num] == NULL){
  1286. core_inter_tail[node_num] = NULL;
  1287. }else{
  1288. send_next_signal(core_inter_head[node_num], node_num);
  1289. }
  1290. free(tmp_inter_list);
  1291. }else if (head->type == DECLARE_INIT_AVAILABILITY) {
  1292. scc_kill(node_num, SIG_INIT_APP,head);
  1293. tmp_inter_list = core_inter_head[node_num];
  1294. core_inter_head[node_num] = core_inter_head[node_num]->next;
  1295. if (core_inter_head[node_num] == NULL) core_inter_tail[node_num] = NULL;
  1296. else send_next_signal(core_inter_head[node_num], node_num);
  1297. free(tmp_inter_list);
  1298. }
  1299. my_stats.msg_count++;
  1300. my_stats.distance += distance(node_id,node_num);
  1301. signals_enable();
  1302. }