paxos_signal_handlers.c 75 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107
  1. #include "paxos_signal_handlers.h"
  2. #include "my_rtrm.h"
  3. #include "libfunctions.h"
  4. //#include "noc_functions.h"
  5. #include "sig_aux.h"
  6. #include "scc_signals.h"
  7. #include "controller_core.h"
  8. #include "common_core.h"
  9. #include "idag_defs.h"
  10. #include "signal_handlers.h"
  11. #include "variables.h"
  12. #include "macros.h"
  13. #include "structs.h"
  14. int faulty_core = -1;
  15. int first_time = 0;
  16. int pending_workload[2] = {-1,-1};
  17. int proposal_number_personal;
  18. core_states paxos_state;
  19. acceptor_var acceptor_vars = {-1,-1,-1};
  20. proposer_var proposer_vars = {-1,-1,0,0,NULL};
  21. int fail_flag = 0;
  22. struct timeval fail_time_val;
  23. static char local_scen_directory[SCEN_DIR_SIZE], local_scen_num[SCEN_NUM_SIZE];
  24. #ifdef PLAT_SCC
  25. char error_str[64];
  26. int error, str_len, sig_array_local[LINE_SIZE];
  27. #endif
  28. int leader_preference(){
  29. coworkers_list *tmp_cowork_list;
  30. int num_of_coworkers = 0;
  31. if (state == IDLE_CORE){
  32. return 60;
  33. }
  34. else if (state == INIT_MANAGER ||
  35. state == INIT_MANAGER_SEND_OFFERS ||
  36. state == IDLE_INIT_MAN ||
  37. state == INIT_MAN_CHK_OFFERS){
  38. return 50;
  39. }
  40. else if (state == WORKING_NODE && pending_state == NO_PENDING_STATE){
  41. if (coworkers != NULL)
  42. FOR_MY_COWORKERS_LIST num_of_coworkers++;
  43. if (num_of_coworkers == 1)
  44. return 1;
  45. else
  46. return (40 + num_of_coworkers);
  47. }
  48. else if (state == WORKING_NODE && (
  49. pending_state == INIT_MANAGER ||
  50. pending_state == INIT_MANAGER_SEND_OFFERS ||
  51. pending_state == IDLE_INIT_MAN ||
  52. pending_state == INIT_MAN_CHK_OFFERS)){
  53. if (coworkers != NULL)
  54. FOR_MY_COWORKERS_LIST num_of_coworkers++;
  55. if (num_of_coworkers == 1)
  56. return 1;
  57. else{
  58. return (30+num_of_coworkers);
  59. }
  60. }
  61. else if (state == WORKING_NODE && (
  62. pending_state == IDLE_AGENT ||
  63. pending_state == IDLE_AGENT_WAITING_OFF ||
  64. pending_state == AGENT_INIT_STATE ||
  65. pending_state == AGENT_SELF_OPT ||
  66. pending_state == AGENT_SELF_CHK_OFFERS ||
  67. pending_state == AGENT_ENDING ||
  68. pending_state == IDAG_ENDING ||
  69. pending_state == AGENT_ZOMBIE ||
  70. pending_state == AGENT_INIT_APP_INIT ||
  71. pending_state == AGENT_INIT_CHK_OFFERS ||
  72. pending_state == AGENT_INIT_IDLE_INIT ||
  73. pending_state == IDLE_INIT_IDLE_AGENT ||
  74. pending_state == IDLE_INIT_AGENT_SELFOPT ||
  75. pending_state == INIT_CHK_OFFERS_IDLE_AGENT ||
  76. pending_state == INIT_CHK_OFFERS_SELFOPT)){
  77. if (coworkers != NULL)
  78. FOR_MY_COWORKERS_LIST num_of_coworkers++;
  79. if (num_of_coworkers == 1)
  80. return 1;
  81. else{
  82. return (20+num_of_coworkers);
  83. }
  84. }
  85. else if (state == IDLE_AGENT ||
  86. state == IDLE_AGENT_WAITING_OFF ||
  87. state == AGENT_INIT_STATE ||
  88. state == AGENT_SELF_OPT ||
  89. state == AGENT_SELF_CHK_OFFERS ||
  90. state == AGENT_ENDING ||
  91. state == IDAG_ENDING ||
  92. state == AGENT_ZOMBIE){
  93. return 10;
  94. }
  95. return 0;
  96. }
  97. void initialize_PAXOS_data (char scen_directory[SCEN_DIR_SIZE], char scen_num[SCEN_NUM_SIZE]) {
  98. //printf("Initializing Paxos data...\n");
  99. strcpy(local_scen_directory, scen_directory);
  100. strcpy(local_scen_num, scen_num);
  101. }
  102. void sig_PAXOS_INIT_handler(){
  103. int i;
  104. int num_of_coworkers = 0;
  105. inter_list tmp_inter_list;
  106. coworkers_list *tmp_cowork_list;
  107. handler_Enter(-1, "sig_PAXOS_INIT_handler");
  108. paxos_state = PAXOS_ACTIVE;
  109. #ifdef PLAT_SCC
  110. RCCE_wait_until(proposal_number_lock,RCCE_FLAG_UNSET);
  111. RCCE_flag_write(&proposal_number_lock,RCCE_FLAG_SET,node_id);
  112. proposal_number_personal = *proposal_number_global;
  113. *proposal_number_global += 1;
  114. RCCE_flag_write(&proposal_number_lock,RCCE_FLAG_UNSET,node_id);
  115. #else
  116. /* Pick up my personal proposal number */
  117. sem_wait(proposal_number_lock);
  118. proposal_number_personal = *proposal_number_global;
  119. *proposal_number_global += 1;
  120. sem_post(proposal_number_lock);
  121. #endif
  122. fprintf(log_file, "\t\tI am %d, and my proposal number is %d\n",node_id,proposal_number_personal);
  123. tmp_inter_list.next = NULL;
  124. proposer_vars.core_states = (int *)malloc((X_max*Y_max)*sizeof(int));
  125. for (i = 0; i < X_max*Y_max; i++){
  126. proposer_vars.core_states[i] = -1;
  127. }
  128. /* Case where controller fails */
  129. if (idag_mask[faulty_core] == faulty_core){
  130. for (i = 0; i < X_max*Y_max; i++){
  131. if ((i != my_idag) && (idag_mask[i] == my_idag)){
  132. tmp_inter_list.data.proposal_number = proposal_number_personal;
  133. tmp_inter_list.type = PREPARE_REQUEST;
  134. scc_kill(i,SIG_PREPARE_REQUEST,&tmp_inter_list);
  135. }
  136. }
  137. }
  138. /* Case where manager fails */
  139. else{
  140. fprintf(log_file,"\t\tcoworkers List: ");
  141. FOR_MY_COWORKERS_LIST{
  142. fprintf(log_file,"%d --> ", tmp_cowork_list->core_id);
  143. }
  144. printf("\n");
  145. FOR_MY_COWORKERS_LIST{
  146. num_of_coworkers++;
  147. tmp_inter_list.data.proposal_number = proposal_number_personal;
  148. tmp_inter_list.type = PREPARE_REQUEST;
  149. scc_kill(tmp_cowork_list->core_id, SIG_PREPARE_REQUEST, &tmp_inter_list);
  150. }
  151. fprintf(log_file,"num_of_coworkers=%d\n",num_of_coworkers);
  152. printf("num_of_coworkers=%d\n",num_of_coworkers);
  153. }
  154. handler_Exit(-1, "sig_PAXOS_INIT_handler");
  155. return;
  156. }
  157. /************* If a core doesn't receive a signal for a period of time it assumes the
  158. ************** controller has died and sends PREPARE_REQUEST signal
  159. *************/
  160. void sig_PREPARE_REQUEST_handler(int sender_id){
  161. int received_proposal_number = sig_read_ar[2];
  162. inter_list tmp_inter_list;
  163. int score;
  164. handler_Enter(sender_id,"sig_PREPARE_REQUEST_handler");
  165. fprintf(log_file, "\t\tReceived SIG_PREPARE_REQUEST from %d with Proposal Number %d\n", sender_id, received_proposal_number);
  166. /* The acceptor has seen a higher proposal number */
  167. /* Prepare Request Rejected */
  168. if (acceptor_vars.highest_proposed_n > received_proposal_number){
  169. fprintf(log_file, "\t\tI have seen a higher proposal number request --> REJECTED!\n");
  170. return;
  171. }
  172. /* The acceptor has not seen another prepare request before so it promises never to accept a prepare request with proposal number
  173. lower than this one. Replies with PREPARE_ACCEPT_NO_PREVIOUS */
  174. /* UPDATE 13.10.2016 -- Also send my state in order to elect core with minimum utilization */
  175. else if (acceptor_vars.highest_proposed_n == -1){
  176. score = leader_preference();
  177. printf("My score is %d and my state %s\n",score, id2string(state));
  178. fprintf(log_file, "\t\tI haven't accepted any values yet!\n");
  179. acceptor_vars.highest_proposed_n = received_proposal_number;
  180. fprintf(log_file, "\t\tUpdated: acceptor_vars.highest_proposed_n = %d\n", acceptor_vars.highest_proposed_n);
  181. tmp_inter_list.next = NULL;
  182. tmp_inter_list.type = PREPARE_ACCEPT_NO_PREVIOUS;
  183. tmp_inter_list.data.accepted_values[0] = -1;
  184. tmp_inter_list.data.accepted_values[1] = -1;
  185. tmp_inter_list.data.accepted_values[2] = score;
  186. scc_kill(sender_id,SIG_PREPARE_ACCEPT_NO_PREVIOUS,&tmp_inter_list);
  187. }
  188. /* Proposal number received > Highest proposal number seen */
  189. /* Acceptor replies with highest proposal number seen and its value if any and updates the values*/
  190. else if (acceptor_vars.highest_proposed_n < received_proposal_number){
  191. fprintf(log_file, "\t\treceived_proposal_number higher than highest_proposed_n\n");
  192. acceptor_vars.highest_proposed_n = received_proposal_number;
  193. fprintf(log_file, "\t\tUpdated: acceptor_vars.highest_proposed_n = %d\n", acceptor_vars.highest_proposed_n);
  194. /*If the core has accepted any value it sends that back along with the proposal number of this value*/
  195. if (acceptor_vars.highest_acc_value == -1){
  196. fprintf(log_file, "\t\tI haven't accepted any values yet!\n");
  197. tmp_inter_list.next = NULL;
  198. tmp_inter_list.type = PREPARE_ACCEPT_NO_PREVIOUS;
  199. tmp_inter_list.data.accepted_values[0] = -1;
  200. tmp_inter_list.data.accepted_values[1] = -1;
  201. tmp_inter_list.data.accepted_values[2] = score;
  202. scc_kill(sender_id,SIG_PREPARE_ACCEPT_NO_PREVIOUS,&tmp_inter_list);
  203. }else{
  204. fprintf(log_file, "\t\tI have already accepted the value %d!\n", acceptor_vars.highest_acc_value);
  205. tmp_inter_list.next = NULL;
  206. tmp_inter_list.type = PREPARE_ACCEPT;
  207. tmp_inter_list.data.accepted_values[0] = acceptor_vars.highest_acc_n;
  208. tmp_inter_list.data.accepted_values[1] = acceptor_vars.highest_acc_value;
  209. tmp_inter_list.data.accepted_values[2] = score;
  210. scc_kill(sender_id, SIG_PREPARE_ACCEPT, &tmp_inter_list);
  211. }
  212. }
  213. handler_Exit(sender_id,"sig_PREPARE_REQUEST_handler");
  214. return;
  215. }
  216. void sig_PREPARE_ACCEPT_NO_PREVIOUS_handler(int sender_id){
  217. int k,i;
  218. int num_of_coworkers;
  219. int replied_score = sig_read_ar[4];
  220. int max_score,index;
  221. inter_list tmp_inter_list;
  222. coworkers_list *tmp_cowork_list;
  223. handler_Enter(sender_id,"sig_PREPARE_ACCEPT_NO_PREVIOUS_handler");
  224. proposer_vars.core_states[sender_id] = replied_score;
  225. proposer_vars.cores_promised++;
  226. fprintf(log_file, "\t\t Updated state of %d to %s\n", sender_id, id2string(proposer_vars.core_states[sender_id]));
  227. tmp_inter_list.next = NULL;
  228. /* Case where controller fails */
  229. if (idag_mask[faulty_core] == faulty_core){
  230. /*Received reply from majority */
  231. if (proposer_vars.cores_promised >= majority(CLUSTER_SIZE)){
  232. if (PREPARE_ACCEPT_SENT == 0){
  233. for (i=0; i < X_max*Y_max;i++){
  234. if (proposer_vars.core_states[i] > max_score){
  235. max_score = proposer_vars.core_states[i];
  236. index = i;
  237. }
  238. }
  239. PREPARE_ACCEPT_SENT = 1;
  240. fprintf(log_file,"\t\t-------- LAST KNOWN CORE STATES --------\n");
  241. FOR_NUES{
  242. fprintf(log_file,"\t\t%d --> %s\n",k,id2string(proposer_vars.core_states[k]));
  243. }
  244. fprintf(log_file,"\t\t----------------------------------------\n");
  245. fprintf(log_file,"\n");
  246. //TODO Decide proposing core based on core states
  247. fprintf(log_file, "\t\tCONTROLLER CASE ; RECEIVED ACCEPT FROM MAJORITY!\n");
  248. for (k = 0; k < X_max*Y_max; k++){
  249. /* I send SIG_ACCEPT_REQUEST to cores inside my cluster */
  250. if ((k != my_idag) && (idag_mask[k] == my_idag)){
  251. tmp_inter_list.type = ACCEPT_REQUEST;
  252. tmp_inter_list.data.accepted_values[PROP_NW] = proposal_number_personal;
  253. /*The acceptor hasn't received any reply with accepted value so it will propose itself for leader*/
  254. if (proposer_vars.highest_replied_value == -1){
  255. /*if (im_manager() != 1){
  256. fprintf(log_file,"\t\tI am not a manager. I propose myself as the new controller\n");
  257. proposer_vars.highest_replied_value = node_id;
  258. tmp_inter_list.data.accepted_values[VALUE_W] = proposer_vars.highest_replied_value;
  259. }else{
  260. fprintf(log_file,"\t\tI am a manager. I propose a worker of mine as the new controller.That is %d\n",my_cores->next->core_id);
  261. tmp_inter_list.data.accepted_values[VALUE_W] = my_cores->next->core_id;
  262. }*/
  263. /* RANDOM PAXOS */
  264. //proposer_vars.highest_replied_value = node_id;
  265. //tmp_inter_list.data.accepted_values[VALUE_W] = node_id;
  266. /****************/
  267. /* MODIFIED PAXOS */
  268. proposer_vars.highest_replied_value = index;
  269. tmp_inter_list.data.accepted_values[VALUE_W] = index;
  270. /****************/
  271. }else{
  272. tmp_inter_list.data.accepted_values[VALUE_W] = proposer_vars.highest_replied_value;
  273. }
  274. scc_kill(k,SIG_ACCEPT_REQUEST,&tmp_inter_list);
  275. }
  276. }
  277. }
  278. }
  279. /* Case where manager fails */
  280. }else{
  281. num_of_coworkers = 0;
  282. FOR_MY_COWORKERS_LIST num_of_coworkers++;
  283. fprintf(log_file,"num_of_coworkers=%d\n",num_of_coworkers);
  284. if (proposer_vars.cores_promised > majority(num_of_coworkers)){
  285. if (PREPARE_ACCEPT_SENT == 0){
  286. PREPARE_ACCEPT_SENT = 1;
  287. fprintf(log_file, "\t\tMANAGER CASE ; RECEIVED ACCEPT FROM MAJORITY!\n");
  288. FOR_MY_COWORKERS_LIST{
  289. tmp_inter_list.type = ACCEPT_REQUEST;
  290. tmp_inter_list.data.accepted_values[PROP_NW] = proposal_number_personal;
  291. if (proposer_vars.highest_replied_value == -1)
  292. proposer_vars.highest_replied_value = node_id;
  293. tmp_inter_list.data.accepted_values[VALUE_W] = proposer_vars.highest_replied_value;
  294. scc_kill(tmp_cowork_list->core_id,SIG_ACCEPT_REQUEST,&tmp_inter_list);
  295. }
  296. }
  297. }
  298. }
  299. handler_Exit(sender_id,"sig_PREPARE_ACCEPT_NO_PREVIOUS_handler");
  300. return;
  301. }
  302. /************* The other cores would either accept its proposal if they haven't seen
  303. ************** a higher proposal number
  304. *************/
  305. void sig_PREPARE_ACCEPT_handler(int sender_id){
  306. int replied_proposal_number = sig_read_ar[PROP_NR];
  307. int replied_value = sig_read_ar[VALUE_R];
  308. int k;
  309. int num_of_coworkers;
  310. inter_list tmp_inter_list;
  311. coworkers_list *tmp_cowork_list;
  312. handler_Enter(sender_id, "sig_PREPARE_ACCEPT_handler");
  313. proposer_vars.core_states[sender_id] = sig_read_ar[4];
  314. proposer_vars.cores_promised++;
  315. fprintf(log_file, "\t\t Updated state of %d to %s\n", sender_id, id2string(proposer_vars.core_states[sender_id]));
  316. tmp_inter_list.next = NULL;
  317. /*Save the values if proposal number is higher than the highest replied so far*/
  318. if (replied_proposal_number > proposer_vars.highest_replied_n){
  319. proposer_vars.highest_replied_n = replied_proposal_number;
  320. proposer_vars.highest_replied_value = replied_value;
  321. fprintf(log_file, "\t\t Updated proposer_vars.highest_replied_n = %d\n", proposer_vars.highest_replied_n);
  322. fprintf(log_file, "\t\t Updated proposer_vars.highest_replied_value = %d\n", proposer_vars.highest_replied_value);
  323. }
  324. /* Case where controller fails */
  325. if (idag_mask[faulty_core] == faulty_core){
  326. if (proposer_vars.cores_promised >= majority(CLUSTER_SIZE)){
  327. if (PREPARE_ACCEPT_SENT == 0){
  328. PREPARE_ACCEPT_SENT = 1;
  329. fprintf(log_file,"\t\t-------- LAST KNOWN CORE STATES --------\n");
  330. FOR_NUES{
  331. fprintf(log_file,"\t\t%d --> %s\n",k,id2string(proposer_vars.core_states[k]));
  332. }
  333. fprintf(log_file,"\n");
  334. fprintf(log_file,"\t\t----------------------------------------\n");
  335. fprintf(log_file, "\t\tRECEIVED ACCEPT FROM MAJORITY!\n");
  336. for (k = 0; k < X_max*Y_max; k++){
  337. if ((k != my_idag) && (idag_mask[k] == my_idag)){
  338. tmp_inter_list.type = ACCEPT_REQUEST;
  339. tmp_inter_list.data.accepted_values[PROP_NW] = proposal_number_personal;
  340. tmp_inter_list.data.accepted_values[VALUE_W] = proposer_vars.highest_replied_value;
  341. scc_kill(k,SIG_ACCEPT_REQUEST,&tmp_inter_list);
  342. }
  343. }
  344. }
  345. }
  346. /* Case where manager fails */
  347. }else{
  348. num_of_coworkers = 0;
  349. FOR_MY_COWORKERS_LIST num_of_coworkers++;
  350. fprintf(log_file,"num_of_coworkers=%d\n",num_of_coworkers);
  351. if (proposer_vars.cores_promised >= majority(num_of_coworkers)){
  352. if (PREPARE_ACCEPT_SENT == 0){
  353. PREPARE_ACCEPT_SENT = 1;
  354. fprintf(log_file, "\t\tRECEIVED ACCEPT FROM MAJORITY!\n");
  355. FOR_MY_COWORKERS_LIST{
  356. tmp_inter_list.type = ACCEPT_REQUEST;
  357. tmp_inter_list.data.accepted_values[PROP_NW] = proposal_number_personal;
  358. tmp_inter_list.data.accepted_values[VALUE_W] = proposer_vars.highest_replied_value;
  359. scc_kill(tmp_cowork_list->core_id,SIG_ACCEPT_REQUEST,&tmp_inter_list);
  360. }
  361. }
  362. }
  363. }
  364. handler_Exit(sender_id, "sig_PREPARE_ACCEPT_handler");
  365. return;
  366. }
  367. void sig_ACCEPT_REQUEST_handler(int sender_id){
  368. int proposer_proposal_number = sig_read_ar[PROP_NR];
  369. inter_list tmp_inter_list;
  370. handler_Enter(sender_id,"sig_ACCEPT_REQUEST_handler");
  371. tmp_inter_list.next = NULL;
  372. if (proposer_proposal_number < acceptor_vars.highest_proposed_n){
  373. fprintf(log_file, "\t\t ACCEPT_REQUEST proposal number lower than highest_proposed_n %d -> REJECTED\n", acceptor_vars.highest_proposed_n);
  374. }else{
  375. acceptor_vars.highest_acc_n = proposer_proposal_number;
  376. fprintf(log_file, "\t\t Updated acceptor_vars.highest_acc_n = %d\n", acceptor_vars.highest_acc_n);
  377. acceptor_vars.highest_acc_value = sig_read_ar[VALUE_R];
  378. fprintf(log_file, "\t\t Updated acceptor_vars.highest_acc_value = %d\n", acceptor_vars.highest_acc_value);
  379. acceptor_vars.highest_proposed_n = proposer_proposal_number;
  380. fprintf(log_file, "\t\t Updated acceptor_vars.highest_proposed_n = %d\n", acceptor_vars.highest_proposed_n);
  381. tmp_inter_list.type = ACCEPTED;
  382. tmp_inter_list.data.accepted_values[VALUE_W] = acceptor_vars.highest_acc_value;
  383. scc_kill(sender_id, SIG_ACCEPTED, &tmp_inter_list);
  384. }
  385. handler_Exit(sender_id,"sig_ACCEPT_REQUEST_handler");
  386. return;
  387. }
  388. void sig_ACCEPTED_handler(int sender_id){
  389. int k;
  390. int received_value = sig_read_ar[VALUE_R];
  391. int num_of_coworkers;
  392. inter_list tmp_inter_list;
  393. coworkers_list *tmp_cowork_list;
  394. handler_Enter(sender_id,"sig_ACCEPTED_handler");
  395. proposer_vars.cores_accepted++;
  396. tmp_inter_list.next = NULL;
  397. /****************************************************************/
  398. /***************** Case where controller failed *****************/
  399. /****************************************************************/
  400. if (idag_mask[faulty_core] == faulty_core && idag_mask[faulty_core] != -1){
  401. if ((proposer_vars.cores_accepted >= majority(CLUSTER_SIZE)) && (SIG_LEARN_SENT == 0)){
  402. fprintf(log_file, "\t\tRECEIVED ACCEPTED FROM MAJORITY!\n");
  403. SIG_LEARN_SENT = 1;
  404. tmp_inter_list.type = LEARN;
  405. tmp_inter_list.data.learn_ack_info[VALUE_W] = received_value;
  406. tmp_inter_list.data.learn_ack_info[PREV_CW] = faulty_core;
  407. scc_kill(node_id,SIG_LEARN,&tmp_inter_list);
  408. for (k = 0; k < X_max*Y_max; k++){
  409. if ((k != my_idag) && (k != node_id)){
  410. tmp_inter_list.type = LEARN;
  411. tmp_inter_list.data.learn_ack_info[VALUE_W] = received_value;
  412. tmp_inter_list.data.learn_ack_info[PREV_CW] = faulty_core;
  413. scc_kill(k,SIG_LEARN,&tmp_inter_list);
  414. }
  415. }
  416. }
  417. /****************************************************************/
  418. /******************* Case where manager failed ******************/
  419. /****************************************************************/
  420. }else{
  421. num_of_coworkers=0;
  422. FOR_MY_COWORKERS_LIST num_of_coworkers++;
  423. printf("num_of_coworkers=%d\n",num_of_coworkers);
  424. if ((proposer_vars.cores_accepted >= majority(num_of_coworkers)) && (SIG_LEARN_SENT == 0)){
  425. fprintf(log_file, "\t\t MANAGER CASE ; RECEIVED ACCEPTED FROM MAJORITY!\n");
  426. SIG_LEARN_SENT = 1;
  427. tmp_inter_list.type = LEARN;
  428. tmp_inter_list.data.learn_ack_info[VALUE_W] = received_value;
  429. tmp_inter_list.data.learn_ack_info[PREV_CW] = faulty_core;
  430. scc_kill(node_id,SIG_LEARN,&tmp_inter_list);
  431. for (k = 0; k < X_max*Y_max; k++){
  432. if (k != node_id && k != faulty_core){
  433. tmp_inter_list.type = LEARN;
  434. tmp_inter_list.data.learn_ack_info[VALUE_W] = received_value;
  435. tmp_inter_list.data.learn_ack_info[PREV_CW] = faulty_core;
  436. scc_kill(k,SIG_LEARN,&tmp_inter_list);
  437. }
  438. }
  439. }
  440. }
  441. handler_Exit(sender_id,"sig_ACCEPTED_handler");
  442. return;
  443. }
  444. void sig_LEARN_handler(int sender_id){
  445. int received_value = sig_read_ar[VALUE_R];
  446. int failed_core = sig_read_ar[PREV_CR];
  447. int k;
  448. int i;
  449. int cluster_idag;
  450. int counter;
  451. int selfopt_r;
  452. int failed_interaction = 0; /* 0 nothing, 1 init_search, 2 manager_search */
  453. int one_core;
  454. core_list *tmp_core_list;
  455. core_list *tmp_core_list_prev;
  456. DDS_list *tmp_dds;
  457. DDS_list *tmp_prev_dds;
  458. inter_list tmp_inter_list;
  459. coworkers_list *tmp_cowork_list;
  460. inter_list *tmp_inter;
  461. core_states new_state = NO_PENDING_STATE;
  462. handler_Enter(sender_id,"sig_LEARN_handler");
  463. fprintf(log_file,"\t\t Received_value = %d and failed_core=%d\n",received_value,failed_core);
  464. suspected[received_value] = -1;
  465. //faulty_core = failed_core;
  466. /****************************************************************/
  467. /***************** Case where controller failed *****************/
  468. /****************************************************************/
  469. i = 0;
  470. /* FIXED IDs */
  471. for (i=0; i < X_max*Y_max;i++)
  472. if (idag_mask[i] == failed_core && i != failed_core){
  473. if (i == node_id)
  474. printf("I am the new controller and my current state is: %s\n\n",id2string(state));
  475. break;
  476. }
  477. if (idag_mask[node_id] == idag_mask[failed_core])
  478. printf("%d : %s\n",node_id,id2string(state));
  479. exit(0);
  480. /* I am checking the interactions i had with the new controller */
  481. if (failed_core != node_id && idag_mask[failed_core] != -1){
  482. if (core_inter_head[failed_core] == NULL){
  483. fprintf(log_file,"\t\tI had no interactions with failed core %d\n", failed_core);
  484. }else{
  485. fprintf(log_file, "\t\tMy interactions with failed core %d were:\n",failed_core);
  486. for (tmp_inter = core_inter_head[failed_core]; tmp_inter != NULL; tmp_inter = tmp_inter->next){
  487. fprintf(log_file,"\t\t\t%d. %s\n",i,inter2string(tmp_inter->type));
  488. if (tmp_inter->type == IDAG_FIND_IDAGS ||
  489. tmp_inter->type == IDAG_FIND_IDAGS_PENDING){
  490. failed_interaction = 1;
  491. }
  492. if (tmp_inter->type == SELFOPT_IDAG_FIND_IDAGS ||
  493. tmp_inter->type == SELFOPT_IDAG_FIND_IDAGS_PENDING){
  494. failed_interaction = 2;
  495. }
  496. i++;
  497. }
  498. }
  499. }
  500. if (core_inter_head[failed_core] != NULL &&
  501. (core_inter_head[failed_core]->type == IDAG_REQ_DDS ||
  502. core_inter_head[failed_core]->type == IDAG_REQ_DDS_PENDING ||
  503. core_inter_head[failed_core]->type == SELFOPT_IDAG_REQ_DDS ||
  504. core_inter_head[failed_core]->type == SELFOPT_IDAG_REQ_DDS_PENDING)){
  505. trigger_shit(failed_core);
  506. }
  507. /* Controller Failure and First Time i receive SIG_LEARN */
  508. if (idag_mask[failed_core] == failed_core && idag_mask[failed_core] != -1){
  509. /* Update idag_mask and idag_id_arr in any subcase */
  510. fprintf(log_file,"\t\tUpdating idag_mask and idag_id_arr with new controller %d... ",received_value);
  511. for (k = 0; k < X_max*Y_max; k++){
  512. if (idag_mask[k] == failed_core)
  513. idag_mask[k] = received_value;
  514. }
  515. for (k = 0; k < num_idags; k++){
  516. if (idag_id_arr[k] == failed_core)
  517. idag_id_arr[k] = received_value;
  518. }
  519. idag_mask[failed_core] = -1;
  520. fprintf(log_file,"DONE\n\n");
  521. fprintf(log_file,"\t\tMy interactions with the new controller are:\n");
  522. i = 0;
  523. tmp_inter = core_inter_head[received_value];
  524. while (tmp_inter != NULL && node_id != received_value)
  525. {
  526. fprintf(log_file, "\t\t\t%d. %s...\n",i,inter2string(tmp_inter->type));
  527. tmp_inter = tmp_inter->next;
  528. i++;
  529. }
  530. /***** I am a newly elected controller *****/
  531. coworkers_list *tmp_cowork_list;
  532. int num_of_coworkers;
  533. if (node_id == received_value){
  534. if (tmp_cowork_list != NULL)
  535. FOR_MY_COWORKERS_LIST num_of_coworkers++;
  536. printf("I am the new controller : %d -- Current state : %s - %d!\n", received_value,id2string(state),num_of_coworkers);
  537. fprintf(log_file, "\t\tI am the new controller : %d -- Current state : %s!\n", received_value,id2string(state));
  538. my_idag = -1;
  539. /***** Create my cores list *****/
  540. if (my_cores != NULL){
  541. printf("my_cores list is not NULL...\n");
  542. fprintf(log_file,"\t\tmy_cores list is not NULL...\n");
  543. for (tmp_core_list = my_cores->next; tmp_core_list != NULL; tmp_core_list=tmp_core_list->next){
  544. printf("\t\tCore_id : %d | Offered_to : %d ... %sREMOVED%s\n",my_cores->core_id,my_cores->offered_to,KRED,KNRM);
  545. fprintf(log_file,"\t\t\tCore_id : %d | Offered_to : %d ... %sREMOVED%s\n",my_cores->core_id,my_cores->offered_to,KRED,KNRM);
  546. free(my_cores);
  547. my_cores = tmp_core_list;
  548. }
  549. printf("\t\tCore_id : %d | Offered_to : %d ... %sREMOVED%s\n",my_cores->core_id,my_cores->offered_to,KRED,KNRM);
  550. fprintf(log_file,"\t\t\tCore_id : %d | Offered_to : %d ... %sREMOVED%s\n",my_cores->core_id,my_cores->offered_to,KRED,KNRM);
  551. free(my_cores);
  552. my_cores = NULL;
  553. }
  554. if (my_cores == NULL){
  555. printf("\t\tCreating my_cores list... ");
  556. fprintf(log_file,"\t\tCreating my_cores list... ");
  557. my_cores = (core_list *) malloc(sizeof(core_list));
  558. my_cores_count = 0;
  559. if (my_cores != NULL){
  560. printf("%sSuccess!%s\n",KGRN,KNRM);
  561. my_cores_tail = my_cores;
  562. my_cores_count++;
  563. my_cores_tail->core_id = node_id;
  564. my_cores_tail->offered_to = -1;
  565. my_cores_tail->next = NULL;
  566. my_cores_tail->workload[0] = -1;
  567. my_cores_tail->workload[1] = -1;
  568. printf("\t\t\tAdded Core_id : %d | Offered_to : %d\n",my_cores_tail->core_id,my_cores_tail->offered_to);
  569. fprintf(log_file,"\t\t\tAdded Core_id : %d | Offered_to : %d\n",my_cores_tail->core_id,my_cores_tail->offered_to);
  570. for (i = 0; i < X_max*Y_max; i++){
  571. if (idag_mask[i] == node_id && i != node_id){
  572. my_cores_tail->next = (core_list *) malloc(sizeof(core_list));
  573. if (my_cores_tail->next != NULL){
  574. my_cores_tail = my_cores_tail->next;
  575. my_cores_tail->next = NULL;
  576. my_cores_tail->core_id = i;
  577. my_cores_tail->offered_to = -1;
  578. printf("\t\t\tAdded Core_id : %d | Offered_to : %d\n",my_cores_tail->core_id,my_cores_tail->offered_to);
  579. fprintf(log_file,"\t\t\tAdded Core_id : %d | Offered_to : %d\n",my_cores_tail->core_id,my_cores_tail->offered_to);
  580. my_cores_count++;
  581. }else printf("--%d-- error allocating memory for my_cores\n",node_id);
  582. }
  583. }
  584. }else{
  585. printf("--%d-- error allocating memory for my_cores\n",node_id);
  586. fprintf(log_file, "--%d-- error allocating memory for my_cores\n",node_id);
  587. }
  588. }
  589. /***** Create my DDS List *****/
  590. if (DDS == DDS_tail && DDS != NULL){
  591. printf("\t\tDDS = DDS_tail with value: \n\t\t\tAgent_id : %d | Cores in cluster : %d\n",DDS->agent_id,DDS->num_of_cores);
  592. printf("\t\tReconfiguring DDS...\n");
  593. fprintf(log_file,"\t\tDDS = DDS_tail with value: \n\t\t\tAgent_id : %d | Cores in cluster : %d\n",DDS->agent_id,DDS->num_of_cores);
  594. fprintf(log_file,"\t\tReconfiguring DDS...\n");
  595. free(DDS);
  596. DDS = NULL;
  597. DDS_count = 0;
  598. }
  599. if (DDS == NULL){
  600. printf("\t\tCreating DDS list... ");
  601. fflush(stdout);
  602. DDS = (DDS_list *) malloc(sizeof(DDS_list));
  603. DDS_count = 0;
  604. if (DDS != NULL){
  605. printf("%sSuccess!%s\n",KGRN,KNRM);
  606. fflush(stdout);
  607. DDS->agent_id = node_id;
  608. DDS->next = NULL;
  609. DDS_tail = DDS;
  610. DDS_count++;
  611. DDS->num_of_cores = my_cores_count;
  612. printf("\t\t\tAdded Agent_id : %d | Cores in cluster : %d\n\n",DDS->agent_id, DDS->num_of_cores);
  613. fprintf(log_file,"\t\t\tAdded Agent_id : %d | Cores in cluster : %d\n\n",DDS->agent_id, DDS->num_of_cores);
  614. }else{
  615. printf("--%d-- error allocating memory for my_cores\n",node_id);
  616. fprintf(log_file,"--%d-- error allocating memory for my_cores\n",node_id);
  617. }
  618. }else{
  619. DDS_list *tmp_dds;
  620. printf("\t\tDDS list of %d:\n",node_id);
  621. fprintf(log_file,"\t\tDDS list of %d:\n",node_id);
  622. tmp_dds = DDS;
  623. while (tmp_dds != NULL){
  624. printf("\t\t\tAgent_id : %d | Cores in cluster : %d\n",tmp_dds->agent_id, tmp_dds->num_of_cores);
  625. fprintf(log_file,"\t\t\tAgent_id : %d | Cores in cluster : %d\n",tmp_dds->agent_id, tmp_dds->num_of_cores);
  626. tmp_dds = tmp_dds->next;
  627. }
  628. printf("\n\n");
  629. }
  630. paxos_state = NEW_IDAG;
  631. fprintf(log_file,"Changed Paxos State to %s\n", id2string(paxos_state));
  632. }
  633. /***** I am in the same cluster as the new controller *****/
  634. else if ((my_idag != -1) && (my_idag == failed_core)){
  635. fprintf(log_file, "\t\t I am in the same cluster as %d. My new controller is %d\n", sender_id, received_value);
  636. my_idag = received_value;
  637. if (failed_interaction == 1) {
  638. fprintf(log_file,"I have to resend signal SIG_IDAG_FIND_IDAGS to new controller\n");
  639. if (core_inter_head[my_idag] == NULL){
  640. core_inter_head[my_idag] = (inter_list *) malloc(sizeof(inter_list));
  641. core_inter_tail[my_idag] = core_inter_head[my_idag];
  642. } else {
  643. core_inter_tail[my_idag]->next = (inter_list *) malloc(sizeof(inter_list));
  644. core_inter_tail[my_idag] = core_inter_tail[my_idag]->next;
  645. }
  646. selfopt_r = (int) (1.5 * (X_max / num_idags_x));
  647. core_inter_tail[my_idag]->type = IDAG_FIND_IDAGS_PENDING;
  648. core_inter_tail[my_idag]->data.reg.C = node_id-1;
  649. core_inter_tail[my_idag]->data.reg.r = selfopt_r;
  650. core_inter_tail[my_idag]->next = NULL;
  651. if (core_inter_head[my_idag]->next == NULL) {
  652. paxos_node_stats.msg_count++;
  653. scc_kill(my_idag, SIG_IDAG_FIND_IDAGS, core_inter_head[my_idag]);
  654. } else {
  655. fprintf(log_file, "paxos_signal_handlers.c: Did not send idag_find_idags with interaction %s no2 %s\n",inter2string(core_inter_head[my_idag]->type),inter2string(core_inter_head[my_idag]->next->type));
  656. }
  657. if (selfopt_time_rem != -1) {
  658. selfopt_time_rem = my_gettimer();
  659. if (selfopt_time_rem > 0)
  660. my_settimer(0);
  661. }
  662. if (pending_state == WORKING_NODE) {
  663. fprintf(log_file, "I change to working idle init\n");
  664. state = WORKING_NODE;
  665. pending_state = IDLE_INIT_MAN;
  666. }else{
  667. state = IDLE_INIT_MAN;
  668. }
  669. }
  670. if (failed_interaction == 2){
  671. if (core_inter_head[my_idag] == NULL){
  672. core_inter_head[my_idag] = (inter_list *) malloc(sizeof(inter_list));
  673. core_inter_tail[my_idag] = core_inter_head[my_idag];
  674. } else {
  675. core_inter_tail[my_idag]->next = (inter_list *) malloc(sizeof(inter_list));
  676. core_inter_tail[my_idag] = core_inter_tail[my_idag]->next;
  677. }
  678. selfopt_r = (int) (1.5 * (X_max / num_idags_x));
  679. core_inter_tail[my_idag]->type = SELFOPT_IDAG_FIND_IDAGS_PENDING;
  680. core_inter_tail[my_idag]->data.reg.C = node_id;
  681. core_inter_tail[my_idag]->data.reg.r = selfopt_r;
  682. core_inter_tail[my_idag]->next = NULL;
  683. if (core_inter_head[my_idag]->next == NULL){
  684. paxos_node_stats.msg_count++;
  685. scc_kill(my_idag, SIG_IDAG_FIND_IDAGS, core_inter_head[my_idag]);
  686. } else {
  687. fprintf(log_file,"paxos_signal_handlers.c: Did not send sig_find_idags with inter1 = %s, inter2 = %s\n",inter2string(core_inter_head[my_idag]->type),inter2string(core_inter_head[my_idag]->next->type));
  688. }
  689. if (selfopt_interval != MAX_SELF_OPT_INTERVAL_MS){
  690. selfopt_interval = 2 * selfopt_interval;
  691. }else{
  692. selfopt_interval = -1;
  693. }
  694. new_state = IDLE_AGENT_WAITING_OFF;
  695. }
  696. }
  697. /***** I am a controller so i have to update idag_id_arr and reply with LEARN_ACK_CONTR *****/
  698. /*else if (im_controller() == 1){
  699. for (k = 0; k < num_idags; k++)
  700. if (idag_id_arr[k] == failed_core)
  701. idag_id_arr[k] = received_value;
  702. else if (idag_id_arr[k] == node_id)
  703. tmp_inter_list.data.controller_index = k;
  704. fprintf(log_file, "\t\tI am a Controller. Changed controller %d with %d\n",failed_core,sender_id);
  705. tmp_inter_list.next = NULL;
  706. tmp_inter_list.type = LEARN_ACK_CONTR;
  707. scc_kill(received_value,SIG_LEARN_ACK_CONTR,&tmp_inter_list);
  708. fprintf(log_file, "\t\tUpdated idag_id_arr\n");
  709. }*/
  710. /* I am a manager so i have to reply with ADD_AGENT_TO_DDS. In addition if the new controller was a worker of mine i remove him from my_cores list */
  711. if (im_manager() == 1){
  712. if (new_state == IDLE_AGENT_WAITING_OFF) state = IDLE_AGENT_WAITING_OFF;
  713. counter = 0;
  714. FOR_MY_CORES_LIST{
  715. cluster_idag = idag_mask[tmp_core_list->core_id];
  716. if (cluster_idag == received_value){
  717. fprintf(log_file,"\t\tI am manager %d and my core %d utilizes in cluster with idag %d\n", node_id, tmp_core_list->core_id,cluster_idag);
  718. /* if the new controller was a worker of mine i do not send him */
  719. if (tmp_core_list->core_id != received_value)
  720. tmp_inter_list.data.workers_info[++counter] = tmp_core_list->core_id;
  721. }
  722. }
  723. if (counter > 0){
  724. tmp_inter_list.next = NULL;
  725. tmp_inter_list.type = ADD_TO_DDS;
  726. tmp_inter_list.data.workers_info[0] = counter;
  727. fprintf(log_file,"\t\tNUMBER OF WORKERS: %d\n", counter);
  728. scc_kill(received_value,SIG_ADD_TO_DDS,&tmp_inter_list);
  729. }
  730. /***** If the new controller was a worker of mine i remove him from my_cores list and reappoing workload *****/
  731. tmp_core_list = my_cores->next;
  732. tmp_core_list_prev = my_cores;
  733. while (tmp_core_list != NULL){
  734. if (tmp_core_list->core_id == received_value){
  735. fprintf(log_file,"\t\tNew controller was a worker of mine! I remove him from my cores_list!\n");
  736. my_cores_count--;
  737. tmp_core_list_prev->next = tmp_core_list->next;
  738. pending_workload[0] = tmp_core_list->workload[0];
  739. pending_workload[1] = tmp_core_list->workload[1];
  740. fprintf(log_file,"Pending workload of new controller was: %d %d\n", pending_workload[0], pending_workload[1]);
  741. free(tmp_core_list);
  742. if (pending_workload[0] != -1 || pending_workload[1] != -1){
  743. //active_working_cores--;
  744. tmp_core_list = my_cores->next;
  745. fprintf(log_file,"\t\tI am reassigning the workload!\n");
  746. fprintf(log_file,"\t\t-------- CURRENT WORKLOADS --------\n");
  747. while (tmp_core_list != NULL){
  748. fprintf(log_file,"\t\t%d\t|\t%d\t|\t%d\n", tmp_core_list->core_id, tmp_core_list->workload[0], tmp_core_list->workload[1]);
  749. printf("\t\t%d\t|\t%d\t|\t%d\n", tmp_core_list->core_id, tmp_core_list->workload[0], tmp_core_list->workload[1]);
  750. tmp_core_list = tmp_core_list->next;
  751. }
  752. fprintf(log_file,"\t\t-----------------------------------\n");
  753. tmp_core_list = my_cores->next;
  754. while (tmp_core_list != NULL){
  755. if ((tmp_core_list->workload[0] == -1) && (tmp_core_list->workload[1] == -1)){
  756. one_core = tmp_core_list->core_id;
  757. fprintf(log_file,"\t\tpaxos_signal_handlers.c : I have pending workload %d | %d\n",pending_workload[0],pending_workload[1]);
  758. fprintf(log_file,"\t\tpaxos_signal_handlers.c : I am assigning workload to %d\n",one_core);
  759. if (core_inter_head[one_core] == NULL){
  760. core_inter_head[one_core] = (inter_list *) malloc(sizeof(inter_list));
  761. core_inter_tail[one_core] = core_inter_head[one_core];
  762. } else {
  763. core_inter_tail[one_core]->next = (inter_list *) malloc(sizeof(inter_list));
  764. core_inter_tail[one_core] = core_inter_tail[one_core]->next;
  765. }
  766. core_inter_tail[one_core]->type = APPOINT_WORK_NODE;
  767. core_inter_tail[one_core]->data.work_bounds[0] = pending_workload[0];
  768. core_inter_tail[one_core]->data.work_bounds[1] = pending_workload[1];
  769. fprintf(app_log_file,"%d (%d, %d), ",one_core,core_inter_tail[one_core]->data.work_bounds[0],core_inter_tail[one_core]->data.work_bounds[1]);
  770. core_inter_tail[one_core]->next = NULL;
  771. if (core_inter_head[one_core]->next == NULL) {
  772. paxos_node_stats.msg_count++;
  773. scc_kill(one_core, SIG_APPOINT_WORK, core_inter_head[one_core]);
  774. } else {
  775. fprintf(log_file,"I am doing smth else with my working node %d in init inter1 = %d inter2 = %d\n",one_core,core_inter_head[one_core]->type,core_inter_head[one_core]->next->type);
  776. printf("ASDASDASDASDASDAS\n");
  777. }
  778. pending_workload[0] = -1;
  779. pending_workload[1] = -1;
  780. break;
  781. }
  782. tmp_core_list = tmp_core_list->next;
  783. }
  784. break;
  785. }
  786. }else{
  787. tmp_core_list = tmp_core_list->next;
  788. tmp_core_list_prev = tmp_core_list_prev->next;
  789. }
  790. }
  791. }
  792. /****************************************************************/
  793. /******************* Case where manager failed ******************/
  794. /****************************************************************/
  795. }
  796. else{
  797. /***** I am the newly elected manager *****/
  798. if (node_id == received_value && idag_mask[failed_core] != -1){
  799. idag_mask[failed_core] = -1;
  800. printf("I am the new manager : %d -- Current state : %s!\n", received_value,id2string(state));
  801. if (my_cores != NULL){
  802. printf("my_cores list is not NULL...\n");
  803. for (tmp_core_list = my_cores->next; tmp_core_list != NULL; tmp_core_list=tmp_core_list->next){
  804. printf("\t\tCore_id : %d | Offered_to : %d ... %sREMOVED%s\n",my_cores->core_id,my_cores->offered_to,KRED,KNRM);
  805. free(my_cores);
  806. my_cores = tmp_core_list;
  807. }
  808. printf("\t\tCore_id : %d | Offered_to : %d ... %sREMOVED%s\n",my_cores->core_id,my_cores->offered_to,KRED,KNRM);
  809. free(my_cores);
  810. my_cores = NULL;
  811. }
  812. if (my_cores == NULL){
  813. printf("\t\tCreating my_cores list... ");
  814. fflush(stdout);
  815. my_cores = (core_list *) malloc(sizeof(core_list));
  816. my_cores_count = 0;
  817. if (my_cores != NULL){
  818. printf("%sSuccess!%s\n",KGRN,KNRM);
  819. my_cores_tail = my_cores;
  820. my_cores_tail->core_id = node_id;
  821. my_cores_tail->offered_to = -1;
  822. my_cores_tail->workload[0] = -1;
  823. my_cores_tail->workload[1] = -1;
  824. my_cores_tail->next = NULL;
  825. my_cores_count++;
  826. printf("\t\t\tAdded Core_id : %d | Offered_to : %d\n",my_cores_tail->core_id,my_cores_tail->offered_to);
  827. FOR_MY_COWORKERS_LIST{
  828. my_cores_tail->next = (core_list *) malloc(sizeof(core_list));
  829. if (my_cores_tail->next != NULL){
  830. my_cores_tail = my_cores_tail->next;
  831. my_cores_tail->next = NULL;
  832. my_cores_tail->core_id = tmp_cowork_list->core_id;
  833. my_cores_tail->offered_to = -1;
  834. my_cores_tail->workload[0] = -1;
  835. my_cores_tail->workload[1] = -1;
  836. printf("\t\t\tAdded Core_id : %d | Offered_to : %d\n",my_cores_tail->core_id,my_cores_tail->offered_to);
  837. my_cores_count++;
  838. }else printf("--%d-- error allocating memory for my_cores\n",node_id);
  839. }
  840. }else printf("--%d-- error allocating memory for my_cores\n",node_id);
  841. }
  842. /**** I have to send SIG_ADD_TO_DDS to the controllers of my workers ****/
  843. for (i = 0; i < X_max*Y_max; i++){
  844. if (idag_mask[i] == i){
  845. counter = 0;
  846. cluster_idag = idag_mask[i];
  847. FOR_MY_CORES_LIST{
  848. if (cluster_idag == idag_mask[tmp_core_list->core_id]){
  849. fprintf(log_file,"\t\tI am manager %d and my core %d utilizes in cluster with idag %d\n", node_id, tmp_core_list->core_id,cluster_idag);
  850. tmp_inter_list.data.workers_info[++counter] = tmp_core_list->core_id;
  851. }
  852. }
  853. if (counter > 0){
  854. tmp_inter_list.next = NULL;
  855. tmp_inter_list.type = ADD_TO_DDS;
  856. tmp_inter_list.data.workers_info[0] = counter;
  857. fprintf(log_file,"\t\tNUMBER OF WORKERS: %d\n", counter);
  858. scc_kill(i,SIG_ADD_TO_DDS,&tmp_inter_list);
  859. }
  860. }
  861. }
  862. state = AGENT_INIT_STATE;
  863. paxos_state = NEW_AGENT;
  864. printf("I was working for app: %d\n", worker_app_id);
  865. my_app.id = worker_app_id;
  866. my_app.num_of_cores = my_cores_count-1;
  867. find_app_info();
  868. printf("Found array size = %d\n", my_app.array_size);
  869. fprintf(log_file,"Found array size = %d\n", my_app.array_size);
  870. printf("Found remaining workload = %d\n", my_app.workld);
  871. fprintf(log_file,"Found remaining workload = %d\n", my_app.workld);
  872. printf("App number of cores = %d\n", my_app.num_of_cores);
  873. fprintf(log_file,"App number of cores = %d\n", my_app.num_of_cores);
  874. FOR_MY_CORES_LIST{
  875. fprintf(log_file,"\t\t\tWorker_id : %d | Workload : %d %d\n", tmp_core_list->core_id, tmp_core_list->workload[0], tmp_core_list->workload[1]);
  876. printf("\t\t\tWorker_id : %d | Workload : %d %d\n", tmp_core_list->core_id, tmp_core_list->workload[0], tmp_core_list->workload[1]);
  877. }
  878. /***** I am controller i have to remove the failed_core from my DDS and cores list *****/
  879. }else if (my_idag == -1){
  880. if (idag_mask[failed_core] == node_id){
  881. printf("--%d-- I received SIG_LEARN from %d\n",node_id,sender_id);
  882. tmp_core_list = my_cores->next;
  883. tmp_core_list_prev = my_cores;
  884. while (tmp_core_list != NULL){
  885. if (tmp_core_list->core_id == failed_core){
  886. my_cores_count--;
  887. tmp_core_list_prev->next = tmp_core_list->next;
  888. free(tmp_core_list);
  889. break;
  890. }
  891. tmp_core_list = tmp_core_list->next;
  892. tmp_core_list_prev = tmp_core_list_prev->next;
  893. }
  894. printf("\t\tUpdated my_cores list:\n");
  895. fprintf(log_file,"\t\tUpdated my_cores list:\n");
  896. counter = 0;
  897. FOR_MY_CORES_LIST{
  898. if (tmp_core_list->offered_to == failed_core){
  899. tmp_core_list->offered_to = -1;
  900. counter++;
  901. }
  902. printf("\t\t\tCore_id : %d | Offered_to : %d\n",tmp_core_list->core_id,tmp_core_list->offered_to);
  903. fprintf(log_file,"\t\t\tCore_id : %d | Offered_to : %d\n",tmp_core_list->core_id,tmp_core_list->offered_to);
  904. }
  905. printf("\t\t\tmy_cores_count = %d\n",my_cores_count);
  906. tmp_dds = DDS->next;
  907. tmp_prev_dds = DDS;
  908. while (tmp_dds != NULL){
  909. if (tmp_dds->agent_id == failed_core){
  910. fprintf(log_file,"\t\t Removed failed core %d from DDS\n",tmp_dds->agent_id);
  911. tmp_prev_dds->next = tmp_dds->next;
  912. if (tmp_dds->next == NULL){
  913. DDS_tail = tmp_prev_dds;
  914. }
  915. DDS_count--;
  916. free(tmp_dds);
  917. DDS->num_of_cores = DDS->num_of_cores + counter;
  918. break;
  919. }else{
  920. tmp_prev_dds = tmp_dds;
  921. tmp_dds = tmp_dds->next;
  922. }
  923. }
  924. printf("\t\tUpdated DDS list:\n");
  925. fprintf(log_file,"\t\tUpdated DDS list:\n");
  926. FOR_MY_DDS_LIST{
  927. printf("\t\t\tAgent_id : %d | Cores in cluster : %d\n",tmp_dds->agent_id,tmp_dds->num_of_cores);
  928. fprintf(log_file,"\t\t\tAgent_id : %d | Cores in cluster : %d\n",tmp_dds->agent_id,tmp_dds->num_of_cores);
  929. }
  930. printf("\t\t\tDDS_count = %d\n",DDS_count);
  931. }
  932. /***** I was working for the failed manager *****/
  933. }else if (cur_agent.my_agent == failed_core){
  934. cur_agent.my_agent = -1;
  935. }
  936. }
  937. cur_time = time(NULL);
  938. cur_t = localtime(&cur_time);
  939. fprintf(log_file, "[%d:%d:%d]: I ended sig_LEARN_handler with sender = %d state = %s\n\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  940. //exit(0);
  941. return;
  942. }
  943. void sig_REINIT_APP_handler(int sender_id){
  944. inter_list *tmp_inter_list;
  945. cur_time = time(NULL);
  946. cur_t = localtime(&cur_time);
  947. fprintf(log_file, "\n\n[%d:%d:%d]: I entered sig_REINIT_APP_handler with sender = %d state = %s\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  948. printf("Received SIG_REINIT_APP from %d\n", sender_id);
  949. //int i, data_array_local[LINE_SIZE];
  950. for (tmp_inter_list = core_inter_head[0]; tmp_inter_list != NULL; tmp_inter_list = tmp_inter_list->next)
  951. if (tmp_inter_list->type == INIT_APP) {
  952. printf("i not null\n");
  953. break;
  954. }
  955. if (core_inter_head[12] == NULL){
  956. core_inter_head[12] = (inter_list *)malloc(sizeof(inter_list));
  957. core_inter_tail[12] = core_inter_head[12];
  958. }else{
  959. core_inter_tail[12]->next = (inter_list *)malloc(sizeof(inter_list));
  960. core_inter_tail[12] = core_inter_tail[12]->next;
  961. }
  962. core_inter_tail[12]->next = NULL;
  963. core_inter_tail[12]->type = INIT_APP;
  964. core_inter_tail[12]->data.new_app.id = sig_read_ar[2];
  965. core_inter_tail[12]->data.new_app.array_size = sig_read_ar[3];
  966. core_inter_tail[12]->data.new_app.workld = sig_read_ar[4];
  967. core_inter_tail[12]->data.new_app.num_of_cores = sig_read_ar[5];
  968. if (core_inter_head[12]->next == NULL){
  969. scc_kill(12, SIG_INIT_APP, core_inter_head[12]);
  970. }
  971. cur_time = time(NULL);
  972. cur_t = localtime(&cur_time);
  973. fprintf(log_file, "[%d:%d:%d]: I ended sig_REINIT_APP_handler with sender = %d state=%s\n\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  974. return;
  975. }
  976. void find_app_info(){
  977. //int sz;
  978. char app_log_file_name[64];
  979. char buffer[64];
  980. int temp;
  981. strcpy(app_log_file_name, "../");
  982. strcat(app_log_file_name,local_scen_directory);
  983. strcat(app_log_file_name, "/");
  984. strcat(app_log_file_name,local_scen_num);
  985. strcat(app_log_file_name,"/app_logs/");
  986. strcat(app_log_file_name, itoa(worker_app_id));
  987. strcat(app_log_file_name, ".txt");
  988. printf("Trying to open file %s... ", app_log_file_name);
  989. if ((app_log_file = fopen(app_log_file_name, "r")) == NULL){
  990. printf("%sError%s\n",KRED,KNRM);
  991. printf("paxos_signal_handlers.c : Cannot open input file with file path = %s ",app_log_file_name);
  992. perror("open app_log_file");
  993. }else{
  994. printf("%sSuccess%s\n",KGRN,KNRM);
  995. }
  996. while (fscanf(app_log_file,"%s", buffer) != EOF){
  997. if (strcmp(buffer,"array_size") == 0){
  998. fscanf(app_log_file,"%s",buffer);
  999. fscanf(app_log_file,"%d", &temp);
  1000. my_app.array_size = temp;
  1001. }
  1002. if (strcmp(buffer,"workload") == 0){
  1003. fscanf(app_log_file,"%s",buffer);
  1004. fscanf(app_log_file,"%d",&temp);
  1005. my_app.workld = temp;
  1006. }
  1007. }
  1008. fclose(app_log_file);
  1009. }
  1010. void rollback(){
  1011. offer_list *tmp_offer_list;
  1012. inter_list tmp_inter_list;
  1013. core_list *tmp_core_list;
  1014. int core_idag;
  1015. cur_time = time(NULL);
  1016. cur_t = localtime(&cur_time);
  1017. tmp_inter_list.next = NULL;
  1018. fprintf(log_file, "Rolling back... %s\n", id2string(state));
  1019. //If i am the new controller and i have an app to initialize i send SIG_REINIT_APP to 0.
  1020. if ((paxos_state == NEW_IDAG) && (state == INIT_MANAGER || state == INIT_MANAGER_SEND_OFFERS || state == IDLE_INIT_MAN || state == INIT_MAN_CHK_OFFERS || pending_state == INIT_MANAGER
  1021. || pending_state == INIT_MANAGER_SEND_OFFERS || pending_state == INIT_MAN_CHK_OFFERS || pending_state == AGENT_INIT_CHK_OFFERS || pending_state == IDLE_INIT_MAN
  1022. || pending_state == IDLE_INIT_IDLE_AGENT || pending_state == IDLE_INIT_AGENT_SELFOPT || pending_state == INIT_CHK_OFFERS_IDLE_AGENT || pending_state == INIT_CHK_OFFERS_SELFOPT)){
  1023. printf("i am the new controller and i have an app to initialize i send SIG_REINIT_APP to 0\n");
  1024. if (init_man_offers != NULL){
  1025. tmp_offer_list = init_man_offers;
  1026. while (tmp_offer_list != NULL){
  1027. *tmp_offer_list->answer = 0;
  1028. tmp_offer_list = tmp_offer_list->next;
  1029. }
  1030. }
  1031. while (init_man_offers != NULL){
  1032. if (core_inter_head[init_man_offers->sender] != NULL) {
  1033. if (core_inter_head[init_man_offers->sender]->type == REP_AGENT_OFFER_PENDING){
  1034. core_inter_head[init_man_offers->sender]->type = REP_AGENT_OFFER_SENT;
  1035. //kill(pid_num[init_man_offers->sender], SIG_REP_OFFERS);
  1036. paxos_node_stats.msg_count++;
  1037. scc_kill(init_man_offers->sender, SIG_REP_OFFERS, core_inter_head[init_man_offers->sender]);
  1038. //my_stats.msg_count++;
  1039. //my_stats.distance += distance(node_id,init_man_offers->sender);
  1040. } else {
  1041. printf("gamietai b = %d",init_man_offers->sender);
  1042. fprintf(log_file,"gamietai b = %d",init_man_offers->sender);
  1043. }
  1044. tmp_offer_list = init_man_offers;
  1045. init_man_offers = init_man_offers->next;
  1046. free(tmp_offer_list);
  1047. }
  1048. }
  1049. fprintf(log_file, "Replied to all my offers negatively\n");
  1050. tmp_inter_list.type = REINIT_APP;
  1051. tmp_inter_list.data.reappointed_app = init_app;
  1052. scc_kill(0,SIG_REINIT_APP,&tmp_inter_list);
  1053. }
  1054. /* If i were a manager i have to inform that i am no longer manager and also add other managers to my DDS */
  1055. else if (im_manager() == 1){
  1056. printf("New controller was a manager before paxos! Remove him from dds lists and create his dds list...\n");
  1057. //TODO remove from dds and add managers to dds
  1058. tmp_core_list = my_cores;
  1059. while (tmp_core_list != NULL){
  1060. //idag_mask[tmp_core_list->core_id] -> idag id
  1061. core_idag = idag_mask[tmp_core_list->core_id];
  1062. tmp_inter_list.next = NULL;
  1063. tmp_inter_list.type = REMOVE_FROM_DDS;
  1064. scc_kill(core_idag, SIG_REMOVE_FROM_DDS, &tmp_inter_list);
  1065. tmp_core_list = tmp_core_list->next;
  1066. }
  1067. }else
  1068. //printf("New controller was an idle core before paxos! Just create his dds list...\n");
  1069. //TODO add managers to dds
  1070. //my_cores = NULL;
  1071. return;
  1072. }
  1073. void sig_ADD_TO_DDS_handler(int sender_id, int *inc_cnt, int cur_index_top){
  1074. DDS_list *tmp_dds = NULL;
  1075. core_list *tmp_core_list;
  1076. int num_of_workers = 0, flag = 0, current = 0;
  1077. handler_Enter(sender_id,"sig_ADD_TO_DDS_handler");
  1078. num_of_workers = sig_read_ar[2];
  1079. fprintf(log_file,"--%d-- [%d:%d:%d]:I received SIG_ADD_TO_DDS from %d with num_of_workers = %d\n",node_id,cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,num_of_workers);
  1080. if (num_of_workers > 5){
  1081. *inc_cnt = *inc_cnt + 1;
  1082. #ifdef PLAT_SCC
  1083. error = RCCE_get((t_vcharp)(&sig_read_ar[LINE_SIZE]), (t_vcharp)(&sig_array[(cur_index_top+1)*LINE_SIZE]), LINE_SIZE * sizeof(int), node_id);
  1084. if (error != RCCE_SUCCESS) {
  1085. RCCE_error_string(error, error_str, &str_len);
  1086. fprintf(log_file,"I got an error in get data in sig_ADD_CORES_DDS_handler from %d with descr %s\n",sender_id,error_str);
  1087. fflush(log_file);
  1088. }
  1089. #else
  1090. new_RCCE_get(sig_read_ar, sig_array, cur_index_top, LINE_SIZE, node_id);
  1091. #endif
  1092. }
  1093. if (DDS == NULL){
  1094. DDS_count=0;
  1095. DDS = (DDS_list *) malloc(sizeof(DDS_list));
  1096. DDS->agent_id = node_id;
  1097. DDS->next = NULL;
  1098. DDS_tail = DDS;
  1099. DDS_count++;
  1100. flag = 0;
  1101. }else{
  1102. FOR_MY_DDS_LIST{
  1103. if (tmp_dds->agent_id == sender_id){
  1104. fprintf(log_file,"\t\t%d is already in my DDS. %d->num_of_cores++ && DDS->num_of_cores--\n",sender_id,sender_id);
  1105. DDS->num_of_cores--;
  1106. tmp_dds->num_of_cores++;
  1107. break;
  1108. flag = 1;
  1109. }
  1110. }
  1111. }
  1112. if (flag == 0){ /* Sender was not in my DDS */
  1113. fprintf(log_file,"\t\t%d is not in my DDS. DDS_count++\n",sender_id);
  1114. DDS_tail->next = (DDS_list *)malloc(sizeof(DDS_list));
  1115. DDS_tail = DDS_tail->next;
  1116. DDS_tail->next = NULL;
  1117. DDS_tail->agent_id = sender_id;
  1118. DDS_tail->num_of_cores = num_of_workers;
  1119. DDS->num_of_cores = DDS->num_of_cores - num_of_workers;
  1120. DDS_count++;
  1121. }
  1122. fprintf(log_file, "\t\t%d utilizes %d cores in my cluster\n",sender_id, num_of_workers);
  1123. while (num_of_workers > 0){
  1124. current = sig_read_ar[2+num_of_workers];
  1125. FOR_MY_CORES_LIST{
  1126. if (tmp_core_list->core_id == current){
  1127. fprintf(log_file,"\t\tChanged %d->offered_to = %d\n",tmp_core_list->core_id,sender_id);
  1128. tmp_core_list->offered_to = sender_id;
  1129. }
  1130. }
  1131. num_of_workers--;
  1132. }
  1133. printf("\t\tUpdated my_cores list:\n");
  1134. fprintf(log_file,"\t\tUpdated my_cores list:\n");
  1135. FOR_MY_CORES_LIST{
  1136. printf("\t\t\tCore_id : %d | Offered_to : %d\n",tmp_core_list->core_id,tmp_core_list->offered_to);
  1137. fprintf(log_file,"\t\t\tCore_id : %d | Offered_to : %d\n",tmp_core_list->core_id,tmp_core_list->offered_to);
  1138. }
  1139. printf("\t\tUpdated DDS list:\n");
  1140. fprintf(log_file,"\t\tUpdated DDS list:\n");
  1141. FOR_MY_DDS_LIST{
  1142. printf("\t\t\tAgent_id : %d | Cores in cluster : %d\n",tmp_dds->agent_id,tmp_dds->num_of_cores);
  1143. fprintf(log_file,"\t\t\tAgent_id : %d | Cores in cluster : %d\n",tmp_dds->agent_id,tmp_dds->num_of_cores);
  1144. }
  1145. cur_time = time(NULL);
  1146. cur_t = localtime(&cur_time);
  1147. fprintf(log_file, "\n[%d:%d:%d]: I ended sig_ADD_TO_DDS_handler with sender = %d state = %s\n\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  1148. return;
  1149. }
  1150. void sig_REMOVE_FROM_DDS_handler(int sender_id){
  1151. core_list *tmp_core_list;
  1152. DDS_list *tmp_dds, *tmp_prev_dds;
  1153. cur_time = time(NULL);
  1154. cur_t = localtime(&cur_time);
  1155. fprintf(log_file, "\n\n[%d:%d:%d]: I entered sig_REMOVE_FROM_DDS_handler with sender = %d state = %s\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  1156. tmp_core_list = my_cores;
  1157. while (tmp_core_list != NULL){
  1158. if (tmp_core_list->offered_to == sender_id){
  1159. fprintf(log_file,"\t\tChanged %d->offered_to to -1\n",tmp_core_list->core_id);
  1160. tmp_core_list->offered_to = -1;
  1161. }
  1162. tmp_core_list = tmp_core_list->next;
  1163. }
  1164. tmp_dds = DDS->next;
  1165. tmp_prev_dds = DDS;
  1166. while (tmp_dds != NULL){
  1167. if (tmp_dds->agent_id == sender_id){
  1168. fprintf(log_file,"\t\t Removed %d from DDS\n",tmp_dds->agent_id);
  1169. tmp_prev_dds->next = tmp_dds->next;
  1170. if (tmp_dds->next == NULL){
  1171. DDS_tail = tmp_prev_dds;
  1172. }
  1173. DDS_count--;
  1174. free(tmp_dds);
  1175. break;
  1176. }else{
  1177. tmp_prev_dds = tmp_dds;
  1178. tmp_dds = tmp_dds->next;
  1179. }
  1180. }
  1181. cur_time = time(NULL);
  1182. cur_t = localtime(&cur_time);
  1183. fprintf(log_file, "[%d:%d:%d]: I ended sig_REMOVE_FROM_DDS_handler with sender = %d state = %s\n\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  1184. return;
  1185. }
  1186. void sig_CONTR_TO_handler(int sender_id){
  1187. cur_time = time(NULL);
  1188. cur_t = localtime(&cur_time);
  1189. fprintf(log_file, "\n\n[%d:%d:%d]: I entered sig_CONTR_TO_handler with sender=%d state = %s\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  1190. faulty_core = my_idag;
  1191. if (paxos_state != PAXOS_ACTIVE && paxos_state != NEW_AGENT && paxos_state != NEW_IDAG)
  1192. sig_PAXOS_INIT_handler();
  1193. cur_time = time(NULL);
  1194. cur_t = localtime(&cur_time);
  1195. fprintf(log_file, "[%d:%d:%d]: I ended sig_CONTR_TO_handler with sender = %d state = %s\n\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  1196. return;
  1197. }
  1198. void sig_HEARTBEAT_REQ_handler(int sender_id){
  1199. inter_list tmp_inter_list;
  1200. cur_time = time(NULL);
  1201. cur_t = localtime(&cur_time);
  1202. //fprintf(log_file, "[%d:%d:%d]: I entered sig_HEARTBEAT_REQ_handler with sender=%d state = %s\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  1203. tmp_inter_list.next = NULL;
  1204. tmp_inter_list.type = HEARTBEAT_REP;
  1205. scc_kill(sender_id,SIG_HEARTBEAT_REP,&tmp_inter_list);
  1206. cur_time = time(NULL);
  1207. cur_t = localtime(&cur_time);
  1208. return;
  1209. }
  1210. void sig_HEARTBEAT_REP_handler(int sender_id){
  1211. cur_time = time(NULL);
  1212. cur_t = localtime(&cur_time);
  1213. //fprintf(log_file, "[%d:%d:%d]: I entered sig_HEARTBEAT_REP_handler with sender = %d state = %s\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  1214. alive[sender_id] = 1;
  1215. suspected[sender_id] = 0;
  1216. cur_time = time(NULL);
  1217. cur_t = localtime(&cur_time);
  1218. //fprintf(log_file, "[%d:%d:%d]: I ended sig_TERMINATE_handler with sender = %d state = %s\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  1219. return;
  1220. }
  1221. void sig_PFD_TIMER_handler(int signo, siginfo_t *info, void *context){
  1222. int i, j, failed_core, pending_workload[2], one_core;
  1223. DDS_list *tmp_dds, *tmp_dds_prev;
  1224. inter_list tmp_inter_list;
  1225. core_list *tmp_core_list, *tmp_core_list_prev;
  1226. if (first_time == 0){
  1227. printf("%d oh yeah\n",node_id);
  1228. first_time = 1;
  1229. its.it_interval.tv_sec = 0;
  1230. its.it_interval.tv_nsec = 0;
  1231. its.it_value.tv_sec = 2;
  1232. its.it_value.tv_nsec = 0;//100000000;
  1233. if (timer_settime(pfd_timer, 0, &its, NULL) == -1){
  1234. printf("-- %d --", node_id);
  1235. fflush(stdout);
  1236. perror("paxos_signal_handlers.c : timer_settime error9");
  1237. }else {
  1238. fprintf(log_file,"Updated timer!\n");
  1239. }
  1240. return;
  1241. }
  1242. signals_disable();
  1243. cur_time = time(NULL);
  1244. cur_t = localtime(&cur_time);
  1245. fprintf(log_file, "\n\n[%d:%d:%d]: I entered sig_PFD_TIMER_handler state = %s\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,id2string(state));
  1246. //printf("[%d:%d:%d]: -%d- I entered sig_PFD_TIMER_handler state = %s\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,node_id, id2string(state));
  1247. for (i = 0; i < X_max*Y_max; i++){
  1248. if (alive[i] == 0){
  1249. suspected[i]++;
  1250. }
  1251. #if defined(PFD) && defined(BASIC_PAXOS)
  1252. /* The Perfect Failure Detectors sends a SIG_HEARTBEAT_REQ each time the timer explodes and waits for a SIG_HEARTBEAT_REP*/
  1253. /* If he doesn't receive a reply until the timer reexplodes then the node is detected as faulty */
  1254. if (alive[i] != 1 && alive[i] != -1 && faulty_core == -1 && i != node_id && idag_mask[i] == idag_mask[node_id])
  1255. #elif defined(tPFD) && defined(BASIC_PAXOS) //tPFD
  1256. /* The tweaked Perfect Failure Detectors only suspects a core if he sends him a signal and doesn't receive a reply in some period of time*/
  1257. if (alive[i] == 0 && suspected[i] == 2 && i != node_id && idag_mask[i] == idag_mask[node_id])
  1258. #else
  1259. if (alive[i] == -5)
  1260. #endif
  1261. {
  1262. cur_time = time(NULL);
  1263. cur_t = localtime(&cur_time);
  1264. printf("-- %d -- I detected %d as faulty at [%d:%d:%d]!!\n", node_id, i,cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec);
  1265. fprintf(log_file, "-- %d -- I detected %d as faulty at [%d:%d:%d]!!\n", node_id, i,cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec);
  1266. fprintf(log_file, "-- %d -- I detected %d as faulty!!\n", node_id, i);
  1267. failed_core = i;
  1268. faulty_core = i;
  1269. suspected[failed_core] = -1;
  1270. alive[failed_core] = -1;
  1271. #if defined(CONTROLLER) && defined(PLAT_LINUX)
  1272. int semvalue = 0;
  1273. sem_getvalue(&flag_data_written[failed_core],&semvalue);
  1274. if (semvalue == 0){
  1275. /*I am locked*/
  1276. printf("--%d-- I unlocked semaphore for node %d\n",node_id,failed_core);
  1277. sem_post(&flag_data_written[failed_core]);
  1278. sem_getvalue(&flag_data_written[failed_core],&semvalue);
  1279. }
  1280. #endif
  1281. /*My controller timed out */
  1282. if (failed_core == my_idag){
  1283. sig_PAXOS_INIT_handler();
  1284. /* Controller in an other cluster timed out */
  1285. }else if (idag_mask[failed_core] == failed_core){
  1286. printf("--%d-- Other controller TIMED OUT\n", node_id);
  1287. for (j = 0; j < X_max*Y_max; j++){
  1288. if (idag_mask[j] == failed_core && j != failed_core){
  1289. if (state == IDLE_AGENT_WAITING_OFF) state = IDLE_AGENT;
  1290. tmp_inter_list.next = NULL;
  1291. tmp_inter_list.type = CONTR_TO;
  1292. scc_kill(j,SIG_CONTR_TO,&tmp_inter_list);
  1293. }
  1294. }
  1295. /* Worker or manager timed out */
  1296. }else{
  1297. /*I am controller.
  1298. *If failed node is inside my cluster
  1299. *i have to remove the failed node from my cores and DDS list */
  1300. if ((idag_mask[node_id] == node_id) && (idag_mask[failed_core] == node_id)){
  1301. tmp_core_list = my_cores->next;
  1302. tmp_core_list_prev = my_cores;
  1303. while (tmp_core_list != NULL){
  1304. if (tmp_core_list->core_id == failed_core){
  1305. my_cores_count--;
  1306. tmp_core_list_prev->next = tmp_core_list->next;
  1307. free(tmp_core_list);
  1308. break;
  1309. }else{
  1310. tmp_core_list_prev = tmp_core_list;
  1311. tmp_core_list = tmp_core_list->next;
  1312. }
  1313. }
  1314. tmp_dds = DDS->next;
  1315. tmp_dds_prev = DDS;
  1316. while (tmp_dds != NULL){
  1317. if (tmp_dds->agent_id == failed_core){
  1318. tmp_dds_prev->next = tmp_dds->next;
  1319. free(tmp_dds);
  1320. break;
  1321. }else{
  1322. tmp_dds_prev = tmp_dds;
  1323. tmp_dds = tmp_dds->next;
  1324. }
  1325. }
  1326. }
  1327. /*I am manager.
  1328. *I have to check if the failed node is my worker
  1329. *If yes i have to appoint work to a new node.*/
  1330. if (im_manager()){
  1331. printf("-- %d -- I am manager of an application.\n",node_id);
  1332. tmp_core_list = my_cores->next;
  1333. tmp_core_list_prev = my_cores;
  1334. while (tmp_core_list != NULL){
  1335. /* I am the manager of the failed worker. I reappoint the work to another core. */
  1336. if (tmp_core_list->core_id == failed_core){
  1337. my_cores_count--;
  1338. /* I am the manager of the failed worker so i remove him from my core list */
  1339. tmp_core_list_prev->next = tmp_core_list->next;
  1340. fprintf(log_file,"I am the manager of the failed worker %d! I removed him from my cores list\n",tmp_core_list->core_id);
  1341. printf("-- %d --I am the manager of the failed worker %d! I removed him from my cores list\n",node_id, tmp_core_list->core_id);
  1342. //one_core = tmp_core_list->core_id;
  1343. pending_workload[0] = tmp_core_list->workload[0];
  1344. pending_workload[1] = tmp_core_list->workload[1];
  1345. printf("Pending workload of faulty core: %d %d\n", pending_workload[0], pending_workload[1]);
  1346. /*else{
  1347. reappoint = FALSE;
  1348. }*/
  1349. free(tmp_core_list);
  1350. tmp_core_list = my_cores->next;
  1351. fprintf(log_file,"I am reassigning the workload!\n");
  1352. fprintf(log_file,"-------- CURRENT WORKLOADS --------\n");
  1353. printf("-------- CURRENT WORKLOADS --------\n");
  1354. while (tmp_core_list != NULL){
  1355. fprintf(log_file,"%d\t|\t%d\t|\t%d\n", tmp_core_list->core_id, tmp_core_list->workload[0], tmp_core_list->workload[1]);
  1356. printf("%d\t|\t%d\t|\t%d\n", tmp_core_list->core_id, tmp_core_list->workload[0], tmp_core_list->workload[1]);
  1357. tmp_core_list = tmp_core_list->next;
  1358. }
  1359. printf("-----------------------------------\n");
  1360. fprintf(log_file,"-----------------------------------\n");
  1361. tmp_core_list = my_cores->next;
  1362. while (tmp_core_list != NULL){
  1363. if ((tmp_core_list->workload[0] == -1) && (tmp_core_list->workload[1] == -1)){
  1364. one_core = tmp_core_list->core_id;
  1365. if (core_inter_head[one_core] == NULL){
  1366. core_inter_head[one_core] = (inter_list *) malloc(sizeof(inter_list));
  1367. core_inter_tail[one_core] = core_inter_head[one_core];
  1368. } else {
  1369. core_inter_tail[one_core]->next = (inter_list *) malloc(sizeof(inter_list));
  1370. core_inter_tail[one_core] = core_inter_tail[one_core]->next;
  1371. }
  1372. core_inter_tail[one_core]->type = APPOINT_WORK_NODE;
  1373. core_inter_tail[one_core]->data.work_bounds[0] = pending_workload[0];
  1374. core_inter_tail[one_core]->data.work_bounds[1] = pending_workload[1];
  1375. fprintf(app_log_file,"%d (%d, %d), ",one_core,core_inter_tail[one_core]->data.work_bounds[0],core_inter_tail[one_core]->data.work_bounds[1]);
  1376. core_inter_tail[one_core]->next = NULL;
  1377. if (core_inter_head[one_core]->next == NULL) {
  1378. paxos_node_stats.msg_count++;
  1379. scc_kill(one_core, SIG_APPOINT_WORK, core_inter_head[one_core]);
  1380. } else {
  1381. fprintf(log_file,"I am doing smth else with my working node %d in init inter1 = %d inter2 = %d\n",one_core,core_inter_head[one_core]->type,core_inter_head[one_core]->next->type);
  1382. }
  1383. break;
  1384. }
  1385. tmp_core_list = tmp_core_list->next;
  1386. }
  1387. break;
  1388. }else {
  1389. tmp_core_list_prev = tmp_core_list;
  1390. tmp_core_list = tmp_core_list->next;
  1391. }
  1392. }
  1393. }else{
  1394. /* My manager timed out */
  1395. if (cur_agent.my_agent == failed_core){
  1396. printf("--%d-- %d is my manager! I initiate a Paxos instance\n",node_id, failed_core);
  1397. paxos_state = PAXOS_ACTIVE;
  1398. sig_PAXOS_INIT_handler();
  1399. }
  1400. }
  1401. }
  1402. }
  1403. #ifdef PFD
  1404. else{
  1405. if (alive[i] != -1 && i != node_id && idag_mask[i] == idag_mask[node_id]){
  1406. alive[i] = 0;
  1407. tmp_inter_list.type = HEARTBEAT_REQ;
  1408. tmp_inter_list.next = NULL;
  1409. scc_kill(i,SIG_HEARTBEAT_REQ,&tmp_inter_list);
  1410. }
  1411. }
  1412. #else
  1413. else{
  1414. if (alive[i] == 0 && i != node_id && idag_mask[i] == idag_mask[node_id]){
  1415. tmp_inter_list.type = HEARTBEAT_REQ;
  1416. tmp_inter_list.next = NULL;
  1417. scc_kill(i,SIG_HEARTBEAT_REQ,&tmp_inter_list);
  1418. }
  1419. }
  1420. #endif
  1421. }
  1422. fprintf(log_file,"\t\tNodes in my cluster: ");
  1423. for (i = 0; i < X_max*Y_max; i++){
  1424. if (idag_mask[i] == idag_mask[node_id] && i != node_id) fprintf(log_file,"%d, ",i);
  1425. }
  1426. fprintf(log_file,"\n");
  1427. fprintf(log_file,"\t\tI have received a signal from: ");
  1428. for (i = 0; i < X_max*Y_max; i++){
  1429. if (alive[i] == 1) fprintf(log_file,"%d, ",i);
  1430. }
  1431. fprintf(log_file,"\n");
  1432. fprintf(log_file,"\t\tSent HEARTBEAT_REQ to:");
  1433. for (i = 0; i < X_max*Y_max; i++){
  1434. if (alive[i] == 0 && i != node_id && idag_mask[i] == idag_mask[node_id]){
  1435. fprintf(log_file,"%d, ",i);
  1436. }
  1437. alive[i] = 0;
  1438. }
  1439. its.it_interval.tv_sec = 0;
  1440. its.it_interval.tv_nsec = 0;
  1441. its.it_value.tv_sec = 2;
  1442. its.it_value.tv_nsec = 0;//100000000;
  1443. if (timer_settime(pfd_timer, 0, &its, NULL) == -1){
  1444. printf("-- %d --", node_id);
  1445. fflush(stdout);
  1446. perror("paxos_signal_handlers.c : timer_settime error9");
  1447. }else {
  1448. fprintf(log_file,"Updated timer!\n");
  1449. }
  1450. cur_time = time(NULL);
  1451. cur_t = localtime(&cur_time);
  1452. fprintf(log_file, "[%d:%d:%d]: I ended sig_PFD_TIMER_handler state = %s\n\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,id2string(state));
  1453. signals_enable();
  1454. }
  1455. void sig_EPFD_TIMER_handler(int signo, siginfo_t *info, void *context)
  1456. {
  1457. core_list *tmp_core_list, *tmp_core_list_prev;
  1458. DDS_list *tmp_dds, *tmp_dds_prev;
  1459. int one_core,failed_core, pending_workload[2]/*,reappoint = TRUE*/;
  1460. signals_disable();
  1461. cur_time = time(NULL);
  1462. cur_t = localtime(&cur_time);
  1463. fprintf(log_file, "\n\n[%d:%d:%d]: I entered sig_EPFD_TIMER_handler state = %s\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,id2string(state));
  1464. int i, j;//, disjoint = 1;
  1465. inter_list tmp_inter_list;
  1466. for (i = 0; i < X_max*Y_max; i++){
  1467. if (alive[i] == suspected[i]){
  1468. fprintf(log_file,"\t\tNew Delay: %d\n",delay);
  1469. delay *= 2;
  1470. break;
  1471. //disjoint = 0;
  1472. }
  1473. }
  1474. fprintf(log_file,"\t\t------ FAILURE DETECTION ------\n");
  1475. for (i = 0; i < X_max*Y_max; i++){
  1476. if (alive[i] == 0 && suspected[i] != -1){
  1477. suspected[i]++;
  1478. fprintf(log_file,"\t\t%d -> SUSPECTED with suspected[%d] = %d\n",i,i,suspected[i]);
  1479. if ((suspected[i] > 2) && (faulty_core != i)){
  1480. cur_time = time(NULL);
  1481. cur_t = localtime(&cur_time);
  1482. failed_core = i;
  1483. faulty_core = i;
  1484. suspected[failed_core] = -1;
  1485. alive[failed_core] = -1;
  1486. #if defined(CONTROLLER) && defined(PLAT_LINUX)
  1487. int semvalue = 0;
  1488. sem_getvalue(&flag_data_written[failed_core],&semvalue);
  1489. if (semvalue == 0){
  1490. /*I am locked*/
  1491. printf("--%d-- I unlocked semaphore for node %d\n",node_id,failed_core);
  1492. sem_post(&flag_data_written[failed_core]);
  1493. sem_getvalue(&flag_data_written[failed_core],&semvalue);
  1494. }
  1495. #endif
  1496. /*My controller timed out */
  1497. if (failed_core == my_idag){
  1498. sig_PAXOS_INIT_handler();
  1499. /* Controller in an other cluster timed out */
  1500. }else if (idag_mask[failed_core] == failed_core){
  1501. printf("--%d-- Other controller TIMED OUT\n", node_id);
  1502. for (j = 0; j < X_max*Y_max; j++){
  1503. if (idag_mask[j] == failed_core && j != failed_core){
  1504. if (state == IDLE_AGENT_WAITING_OFF) state = IDLE_AGENT;
  1505. tmp_inter_list.next = NULL;
  1506. tmp_inter_list.type = CONTR_TO;
  1507. scc_kill(j,SIG_CONTR_TO,&tmp_inter_list);
  1508. }
  1509. }
  1510. /* Worker or manager timed out */
  1511. }else{
  1512. /*I am controller.
  1513. *If failed node is inside my cluster
  1514. *i have to remove the failed node from my cores and DDS list */
  1515. if ((idag_mask[node_id] == node_id) && (idag_mask[failed_core] == node_id)){
  1516. tmp_core_list = my_cores->next;
  1517. tmp_core_list_prev = my_cores;
  1518. while (tmp_core_list != NULL){
  1519. if (tmp_core_list->core_id == failed_core){
  1520. my_cores_count--;
  1521. tmp_core_list_prev->next = tmp_core_list->next;
  1522. free(tmp_core_list);
  1523. break;
  1524. }else{
  1525. tmp_core_list_prev = tmp_core_list;
  1526. tmp_core_list = tmp_core_list->next;
  1527. }
  1528. }
  1529. tmp_dds = DDS->next;
  1530. tmp_dds_prev = DDS;
  1531. while (tmp_dds != NULL){
  1532. if (tmp_dds->agent_id == failed_core){
  1533. tmp_dds_prev->next = tmp_dds->next;
  1534. free(tmp_dds);
  1535. break;
  1536. }else{
  1537. tmp_dds_prev = tmp_dds;
  1538. tmp_dds = tmp_dds->next;
  1539. }
  1540. }
  1541. }
  1542. /*I am manager.
  1543. *I have to check if the failed node is my worker
  1544. *If yes i have to appoint work to a new node.*/
  1545. if (im_manager()){
  1546. printf("-- %d -- I am manager of an application.\n",node_id);
  1547. tmp_core_list = my_cores->next;
  1548. tmp_core_list_prev = my_cores;
  1549. while (tmp_core_list != NULL){
  1550. /* I am the manager of the failed worker. I reappoint the work to another core. */
  1551. if (tmp_core_list->core_id == failed_core){
  1552. my_cores_count--;
  1553. /* I am the manager of the failed worker so i remove him from my core list */
  1554. tmp_core_list_prev->next = tmp_core_list->next;
  1555. fprintf(log_file,"I am the manager of the failed worker %d! I removed him from my cores list\n",tmp_core_list->core_id);
  1556. printf("-- %d --I am the manager of the failed worker %d! I removed him from my cores list\n",node_id, tmp_core_list->core_id);
  1557. //one_core = tmp_core_list->core_id;
  1558. pending_workload[0] = tmp_core_list->workload[0];
  1559. pending_workload[1] = tmp_core_list->workload[1];
  1560. printf("Pending workload of faulty core: %d %d\n", pending_workload[0], pending_workload[1]);
  1561. /*else{
  1562. reappoint = FALSE;
  1563. }*/
  1564. free(tmp_core_list);
  1565. tmp_core_list = my_cores->next;
  1566. fprintf(log_file,"I am reassigning the workload!\n");
  1567. fprintf(log_file,"-------- CURRENT WORKLOADS --------\n");
  1568. printf("-------- CURRENT WORKLOADS --------\n");
  1569. while (tmp_core_list != NULL){
  1570. fprintf(log_file,"%d\t|\t%d\t|\t%d\n", tmp_core_list->core_id, tmp_core_list->workload[0], tmp_core_list->workload[1]);
  1571. printf("%d\t|\t%d\t|\t%d\n", tmp_core_list->core_id, tmp_core_list->workload[0], tmp_core_list->workload[1]);
  1572. tmp_core_list = tmp_core_list->next;
  1573. }
  1574. printf("-----------------------------------\n");
  1575. fprintf(log_file,"-----------------------------------\n");
  1576. tmp_core_list = my_cores->next;
  1577. while (tmp_core_list != NULL){
  1578. if ((tmp_core_list->workload[0] == -1) && (tmp_core_list->workload[1] == -1)){
  1579. one_core = tmp_core_list->core_id;
  1580. if (core_inter_head[one_core] == NULL){
  1581. core_inter_head[one_core] = (inter_list *) malloc(sizeof(inter_list));
  1582. core_inter_tail[one_core] = core_inter_head[one_core];
  1583. } else {
  1584. core_inter_tail[one_core]->next = (inter_list *) malloc(sizeof(inter_list));
  1585. core_inter_tail[one_core] = core_inter_tail[one_core]->next;
  1586. }
  1587. core_inter_tail[one_core]->type = APPOINT_WORK_NODE;
  1588. core_inter_tail[one_core]->data.work_bounds[0] = pending_workload[0];
  1589. core_inter_tail[one_core]->data.work_bounds[1] = pending_workload[1];
  1590. fprintf(app_log_file,"%d (%d, %d), ",one_core,core_inter_tail[one_core]->data.work_bounds[0],core_inter_tail[one_core]->data.work_bounds[1]);
  1591. core_inter_tail[one_core]->next = NULL;
  1592. if (core_inter_head[one_core]->next == NULL) {
  1593. paxos_node_stats.msg_count++;
  1594. scc_kill(one_core, SIG_APPOINT_WORK, core_inter_head[one_core]);
  1595. } else {
  1596. fprintf(log_file,"I am doing smth else with my working node %d in init inter1 = %d inter2 = %d\n",one_core,core_inter_head[one_core]->type,core_inter_head[one_core]->next->type);
  1597. }
  1598. break;
  1599. }
  1600. tmp_core_list = tmp_core_list->next;
  1601. }
  1602. break;
  1603. }else {
  1604. tmp_core_list_prev = tmp_core_list;
  1605. tmp_core_list = tmp_core_list->next;
  1606. }
  1607. }
  1608. }else{
  1609. /* My manager timed out */
  1610. if (cur_agent.my_agent == failed_core){
  1611. printf("--%d-- %d is my manager! I initiate a Paxos instance\n",node_id, failed_core);
  1612. paxos_state = PAXOS_ACTIVE;
  1613. sig_PAXOS_INIT_handler();
  1614. }
  1615. }
  1616. }
  1617. }
  1618. #ifdef tEPFD
  1619. else if (suspected[i] == 2){
  1620. tmp_inter_list.next = NULL;
  1621. tmp_inter_list.type = HEARTBEAT_REQ;
  1622. scc_kill(i,SIG_HEARTBEAT_REQ,&tmp_inter_list);
  1623. }
  1624. #else
  1625. else {
  1626. if (i == 10)
  1627. printf("suspected[%d]=%d and alive[%d]=%d\n",i,suspected[i],i,alive[i]);
  1628. tmp_inter_list.next = NULL;
  1629. tmp_inter_list.type = HEARTBEAT_REQ;
  1630. scc_kill(i,SIG_HEARTBEAT_REQ,&tmp_inter_list);
  1631. }
  1632. #endif
  1633. }else if (alive[i] == 1){
  1634. suspected[i] = 0;
  1635. //fprintf(log_file,"\t\t%d -> ALIVE\n", i);
  1636. }
  1637. alive[i] = 0;
  1638. }
  1639. its.it_interval.tv_sec = 0;
  1640. its.it_interval.tv_nsec = 0;
  1641. its.it_value.tv_sec = delay;
  1642. its.it_value.tv_nsec = 0;
  1643. if (timer_settime(epfd_timer, 0, &its, NULL) == -1){
  1644. printf("-- %d --", node_id);
  1645. fflush(stdout);
  1646. perror("paxos_signal_handlers.c : timer_settime error9");
  1647. }else {
  1648. fprintf(log_file,"Updated timer!\n");
  1649. }
  1650. cur_time = time(NULL);
  1651. cur_t = localtime(&cur_time);
  1652. fprintf(log_file, "[%d:%d:%d]: I ended sig_EPFD_TIMER_handler state = %s\n\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,id2string(state));
  1653. signals_enable();
  1654. return;
  1655. }
  1656. void sig_CTIMER_handler(int signo, siginfo_t *info, void *context)
  1657. {
  1658. struct tm cur_t_1;
  1659. signals_disable();
  1660. struct timeval recover_time_val;
  1661. #ifdef CONTROLLER
  1662. DDS_list *tmp_dds;
  1663. printf("--%d-- CTIMER_handler : Controller %sTimed out!%s\n",node_id,KRED,KNRM);
  1664. printf("DDS list before time out:\n");
  1665. for (tmp_dds = DDS; tmp_dds != NULL; tmp_dds = tmp_dds->next)
  1666. printf("\t\t\tAgent_id : %d | Cores in cluster : %d\n",tmp_dds->agent_id, tmp_dds->num_of_cores);
  1667. #elif WORKER
  1668. printf("--%d-- CTIMER_handler : Worker %sTimed out!%s\n",node_id,KRED,KNRM);
  1669. printf("Worker state before timeout: %s\n",id2string(state));
  1670. #elif MANAGER
  1671. core_list *tmp_core_list;
  1672. printf("--%d-- CTIMER_handler : Manager %sTimed out!%s\n",node_id,KRED,KNRM);
  1673. printf("Manager state before timeout: %s\n",id2string(state));
  1674. for (tmp_core_list = my_cores; tmp_core_list != NULL; tmp_core_list = tmp_core_list->next)
  1675. printf("\t\t\tWorker_id : %d | Workload : %d %d\n", tmp_core_list->core_id, tmp_core_list->workload[0], tmp_core_list->workload[1]);
  1676. #endif
  1677. cur_time = time(NULL);
  1678. cur_t = localtime(&cur_time);
  1679. printf("\n\nI timed out at [%d:%d:%d]\n\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec);
  1680. /* FIXED IDs */
  1681. int k;
  1682. inter_list tmp_inter_list;
  1683. for (k = 0; k < X_max*Y_max; k++){
  1684. if (k != node_id && k != faulty_core){
  1685. tmp_inter_list.type = LEARN;
  1686. tmp_inter_list.data.learn_ack_info[VALUE_W] = node_id+1;
  1687. tmp_inter_list.data.learn_ack_info[PREV_CW] = node_id;
  1688. scc_kill(k,SIG_LEARN,&tmp_inter_list);
  1689. }
  1690. }
  1691. exit(0);
  1692. paxos_state = FAILED_CORE;
  1693. gettimeofday(&time_val,NULL);
  1694. cur_t = localtime(&time_val.tv_sec);
  1695. fail_time_val = time_val;
  1696. cur_t_1 = *cur_t;
  1697. while (fail_flag == 0){
  1698. scc_pause();
  1699. scc_signals_check();
  1700. }
  1701. gettimeofday(&time_val, NULL);
  1702. cur_t = localtime(&time_val.tv_sec);
  1703. printf("\n\n\n\n\n\n\n[%d:%d:%d:%ld]: gettimeofday_1\n",cur_t_1.tm_hour,cur_t_1.tm_min,cur_t_1.tm_sec,fail_time_val.tv_usec);
  1704. printf("[%d:%d:%d:%ld]: gettimeofday_2\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,time_val.tv_usec);
  1705. printf("qcbacd = %d Difference in us is: %ld\n",sizeof(suseconds_t),time_val.tv_usec - fail_time_val.tv_usec);
  1706. long int dif = ((cur_t->tm_sec * 1000000) + time_val.tv_usec) - ((cur_t_1.tm_sec * 1000000) + fail_time_val.tv_usec);
  1707. printf("awabacd = %d Difference in us is: %ld\n\n\n\n\n\n\n",sizeof(suseconds_t),dif);
  1708. fflush(stdout);
  1709. exit(0);
  1710. signals_enable();
  1711. }
  1712. void sig_ITIMER_handler(int signo, siginfo_t *info, void *context)
  1713. {
  1714. int i;
  1715. inter_list tmp_inter_list;
  1716. signals_disable();
  1717. printf("--%d-- i have to decide what to do here!!\n", node_id);
  1718. fflush(stdout);
  1719. for (i = 0; i < X_max*Y_max; i++){
  1720. if (idag_mask[i] == 10 && i != 10){
  1721. if (state == IDLE_AGENT_WAITING_OFF) state = IDLE_AGENT;
  1722. tmp_inter_list.next = NULL;
  1723. tmp_inter_list.type = CONTR_TO;
  1724. scc_kill(i,SIG_CONTR_TO,&tmp_inter_list);
  1725. }
  1726. }
  1727. signals_enable();
  1728. }
  1729. /* END */
  1730. void sig_FAIL_handler(){
  1731. #ifdef WORKER
  1732. sev.sigev_notify = SIGEV_SIGNAL;
  1733. sev.sigev_signo = SIG_CTIMER;
  1734. sev.sigev_value.sival_ptr = &controller_timer;
  1735. if (timer_create(CLOCK_REALTIME, &sev, &controller_timer) == -1)
  1736. printf("timer_create error\n");
  1737. else
  1738. printf("Worker Timer created succesfully!\n");
  1739. its.it_interval.tv_sec = 0;
  1740. its.it_interval.tv_nsec = 0;
  1741. its.it_value.tv_sec = 2;
  1742. its.it_value.tv_nsec = 0;
  1743. if (timer_settime(controller_timer, 0, &its, NULL) == -1)
  1744. perror("controller_core.c : timer_settime error9");
  1745. else
  1746. printf("%d : My timer will explode in %d seconds.\n", node_id, 10);
  1747. return;
  1748. #endif
  1749. #ifdef MANAGER
  1750. sev.sigev_notify = SIGEV_SIGNAL;
  1751. sev.sigev_signo = SIG_CTIMER;
  1752. sev.sigev_value.sival_ptr = &controller_timer;
  1753. if (timer_create(CLOCK_REALTIME, &sev, &controller_timer) == -1)
  1754. printf("timer_create error\n");
  1755. else
  1756. printf("Manager Timer created succesfully!\n");
  1757. its.it_interval.tv_sec = 0;
  1758. its.it_interval.tv_nsec = 0;
  1759. its.it_value.tv_sec = 10;
  1760. its.it_value.tv_nsec = 0;
  1761. if (timer_settime(controller_timer, 0, &its, NULL) == -1)
  1762. perror("controller_core.c : timer_settime error9");
  1763. else
  1764. printf("%d : My timer will explode in %d seconds.\n", node_id, 10);
  1765. return;
  1766. #endif
  1767. }
  1768. void sig_PAXOS_STATS_REQ_handler(int sender_id){
  1769. inter_list tmp_inter_list;
  1770. cur_time = time(NULL);
  1771. cur_t = localtime(&cur_time);
  1772. fprintf(log_file, "\n\n[%d:%d:%d]: I entered sig_PAXOS_STATS_REQ_handler with sender = %d state = %s\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  1773. tmp_inter_list.next = NULL;
  1774. tmp_inter_list.type = PAXOS_STATS_REP;
  1775. tmp_inter_list.data.paxos_stats[0] = paxos_node_stats.msg_count;
  1776. tmp_inter_list.data.paxos_stats[1] = paxos_node_stats.fd_msg_count;
  1777. fprintf(log_file,"\t\tI send %d my paxos stats %lld , %d\n",sender_id,paxos_node_stats.msg_count,paxos_node_stats.fd_msg_count);
  1778. scc_kill(sender_id,SIG_PAXOS_STATS_REP,&tmp_inter_list);
  1779. cur_time = time(NULL);
  1780. cur_t = localtime(&cur_time);
  1781. fprintf(log_file, "\n\n[%d:%d:%d]: I ended sig_PAXOS_STATS_REQ_handler with sender = %d state = %s\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  1782. }
  1783. void sig_PAXOS_STATS_REP_handler(int sender_id){
  1784. long long int paxos_replied_stats = sig_read_ar[2];
  1785. long long int fd_replied_stats = sig_read_ar[3];
  1786. cur_time = time(NULL);
  1787. cur_t = localtime(&cur_time);
  1788. fprintf(log_file, "\n\n[%d:%d:%d]: I entered sig_PAXOS_STATS_REP_handler with sender = %d state = %s\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  1789. fprintf(log_file,"\t\t%d has replied with msg_count = %lld and fd_msg_count = %lld\n",sender_id,paxos_replied_stats,fd_replied_stats);
  1790. paxos_total_stats.msg_count += paxos_replied_stats;
  1791. paxos_total_stats.fd_msg_count += fd_replied_stats;
  1792. paxos_stats_replied++;
  1793. fprintf(log_file,"\t\tI have updated my stats. New message count = %lld\n",paxos_total_stats.msg_count);
  1794. fprintf(log_file,"\t\tCores replied: %d | My cores count: %d\n",paxos_stats_replied,my_cores_count);
  1795. cur_time = time(NULL);
  1796. cur_t = localtime(&cur_time);
  1797. fprintf(log_file, "\n\n[%d:%d:%d]: I ended sig_PAXOS_STATS_REP_handler with sender = %d state = %s\n",cur_t->tm_hour,cur_t->tm_min,cur_t->tm_sec,sender_id,id2string(state));
  1798. }