fxt_tool.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087
  1. /*
  2. * StarPU
  3. * Copyright (C) INRIA 2008-2010 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include "fxt_tool.h"
  17. #include <inttypes.h>
  18. /*
  19. * Default user options
  20. */
  21. static unsigned per_task_colour = 0;
  22. static unsigned generate_distrib = 0;
  23. static unsigned no_counter = 0;
  24. static unsigned no_bus = 0;
  25. /* TODO don't make that global ? */
  26. struct fxt_ev_64 ev;
  27. /* In case we are going to gather multiple traces (eg in the case of MPI
  28. * processes), we may need to prefix the name of the containers. */
  29. char *prefix = "";
  30. uint64_t offset = 0;
  31. int rank = -1;
  32. static uint64_t start_time = 0;
  33. static uint64_t end_time = 0;
  34. static int nworkers = 0;
  35. //static char *filename = NULL;
  36. /* XXX remove the 64 ... */
  37. unsigned ninputfiles = 0;
  38. static char *filenames[64];
  39. static uint64_t last_codelet_hash[MAXWORKERS];
  40. static double last_codelet_start[MAXWORKERS];
  41. static char last_codelet_symbol[128][MAXWORKERS];
  42. /* If more than a period of time has elapsed, we flush the profiling info,
  43. * otherwise they are accumulated everytime there is a new relevant event. */
  44. #define ACTIVITY_PERIOD 125.0
  45. static double last_activity_flush_timestamp[MAXWORKERS];
  46. static double accumulated_sleep_time[MAXWORKERS];
  47. static double accumulated_exec_time[MAXWORKERS];
  48. LIST_TYPE(symbol_name,
  49. char *name;
  50. );
  51. static symbol_name_list_t symbol_list;
  52. LIST_TYPE(communication,
  53. unsigned comid;
  54. float comm_start;
  55. float bandwidth;
  56. unsigned node;
  57. );
  58. static communication_list_t communication_list;
  59. /*
  60. * Paje trace file tools
  61. */
  62. static char *out_paje_path = "paje.trace";
  63. static FILE *out_paje_file;
  64. static char *distrib_time_path = "distrib.data";
  65. static FILE *distrib_time;
  66. static char *activity_path = "activity.data";
  67. static FILE *activity_file;
  68. static void paje_output_file_init(void)
  69. {
  70. /* create a new file */
  71. out_paje_file = fopen(out_paje_path, "w+");
  72. if (!out_paje_file)
  73. {
  74. perror("fopen");
  75. STARPU_ABORT();
  76. }
  77. write_paje_header(out_paje_file);
  78. fprintf(out_paje_file, " \n \
  79. 1 MPIP 0 \"MPI Program\" \n \
  80. 1 P MPIP \"Program\" \n \
  81. 1 Mn P \"Memory Node\" \n \
  82. 1 T Mn \"Worker\" \n \
  83. 1 Sc P \"Scheduler State\" \n \
  84. 2 event T \"event type\" \n \
  85. 3 S T \"Thread State\" \n \
  86. 3 MS Mn \"Memory Node State\" \n \
  87. 4 ntask Sc \"Number of tasks\" \n \
  88. 4 bw Mn \"Bandwidth\" \n \
  89. 6 I S Initializing \"0.0 .7 1.0\" \n \
  90. 6 D S Deinitializing \"0.0 .1 .7\" \n \
  91. 6 Fi S FetchingInput \"1.0 .1 1.0\" \n \
  92. 6 Po S PushingOutput \"0.1 1.0 1.0\" \n \
  93. 6 E S Executing \".0 .6 .4\" \n \
  94. 6 C S Callback \".0 .3 .8\" \n \
  95. 6 B S Blocked \".9 .1 .0\" \n \
  96. 6 Sl S Sleeping \".9 .1 .0\" \n \
  97. 6 P S Progressing \".4 .1 .6\" \n \
  98. 6 A MS Allocating \".4 .1 .0\" \n \
  99. 6 Ar MS AllocatingReuse \".1 .1 .8\" \n \
  100. 6 R MS Reclaiming \".0 .1 .4\" \n \
  101. 6 Co MS DriverCopy \".3 .5 .1\" \n \
  102. 6 No MS Nothing \".0 .0 .0\" \n \
  103. 5 MPIL MPIP P P MPIL\n \
  104. 5 L P Mn Mn L\n");
  105. fprintf(out_paje_file, "7 0.0 MPIroot MPIP 0 root\n");
  106. }
  107. /*
  108. * Generic tools
  109. */
  110. static float get_event_time_stamp(void)
  111. {
  112. return (float)((ev.time-offset)/1000000.0);
  113. }
  114. static int register_worker_id(unsigned long tid)
  115. {
  116. int workerid = nworkers++;
  117. /* create a new key in the htable */
  118. char *tidstr = malloc(16*sizeof(char));
  119. sprintf(tidstr, "%ld", tid);
  120. ENTRY item;
  121. item.key = tidstr;
  122. item.data = (void *)(uintptr_t)workerid;
  123. ENTRY *res;
  124. res = hsearch(item, FIND);
  125. /* only register a thread once */
  126. STARPU_ASSERT(res == NULL);
  127. res = hsearch(item, ENTER);
  128. STARPU_ASSERT(res);
  129. return workerid;
  130. }
  131. static int find_worker_id(unsigned long tid)
  132. {
  133. char tidstr[16];
  134. sprintf(tidstr, "%ld", tid);
  135. ENTRY item;
  136. item.key = tidstr;
  137. item.data = NULL;
  138. ENTRY *res;
  139. res = hsearch(item, FIND);
  140. if (!res)
  141. return -1;
  142. int id = (uintptr_t)(res->data);
  143. return id;
  144. }
  145. static void update_accumulated_time(int worker, double sleep_time, double exec_time, double current_timestamp, int forceflush)
  146. {
  147. accumulated_sleep_time[worker] += sleep_time;
  148. accumulated_exec_time[worker] += exec_time;
  149. /* If sufficient time has elapsed since the last flush, we have a new
  150. * point in our graph */
  151. double elapsed = current_timestamp - last_activity_flush_timestamp[worker];
  152. if (forceflush || (elapsed > ACTIVITY_PERIOD))
  153. {
  154. fprintf(activity_file, "%d\t%lf\t%lf\t%lf\t%lf\n", worker, current_timestamp, elapsed, accumulated_exec_time[worker], accumulated_sleep_time[worker]);
  155. /* reset the accumulated times */
  156. last_activity_flush_timestamp[worker] = current_timestamp;
  157. accumulated_sleep_time[worker] = 0.0;
  158. accumulated_exec_time[worker] = 0.0;
  159. }
  160. }
  161. /*
  162. * Initialization
  163. */
  164. static void handle_new_mem_node(void)
  165. {
  166. fprintf(out_paje_file, "7 %f %"PRIu64" Mn %sp %sMEMNODE%"PRIu64"\n", get_event_time_stamp(), ev.param[0], prefix, prefix, ev.param[0]);
  167. if (!no_bus)
  168. fprintf(out_paje_file, "13 %f bw %sMEMNODE%"PRIu64" 0.0\n", 0.0f, prefix, ev.param[0]);
  169. }
  170. static void handle_worker_init_start(void)
  171. {
  172. /*
  173. arg0 : type of worker (cuda, cpu ..)
  174. arg1 : memory node
  175. arg2 : thread id
  176. */
  177. fprintf(out_paje_file, "7 %f %s%"PRIu64" T %sMEMNODE%"PRIu64" %s%"PRIu64"\n",
  178. get_event_time_stamp(), prefix, ev.param[2], prefix, ev.param[1], prefix, ev.param[2]);
  179. int workerid = register_worker_id(ev.param[2]);
  180. switch (ev.param[0]) {
  181. case STARPU_FUT_APPS_KEY:
  182. set_next_other_worker_color(workerid);
  183. break;
  184. case STARPU_FUT_CPU_KEY:
  185. set_next_cpu_worker_color(workerid);
  186. break;
  187. case STARPU_FUT_CUDA_KEY:
  188. set_next_cuda_worker_color(workerid);
  189. break;
  190. case STARPU_FUT_OPENCL_KEY:
  191. set_next_opencl_worker_color(workerid);
  192. break;
  193. default:
  194. STARPU_ABORT();
  195. }
  196. /* start initialization */
  197. fprintf(out_paje_file, "10 %f S %s%"PRIu64" I\n",
  198. get_event_time_stamp(), prefix, ev.param[2]);
  199. }
  200. static void handle_worker_init_end(void)
  201. {
  202. fprintf(out_paje_file, "10 %f S %s%"PRIu64" B\n",
  203. get_event_time_stamp(), prefix, ev.param[0]);
  204. /* Initilize the accumulated time counters */
  205. int worker = find_worker_id(ev.param[0]);
  206. last_activity_flush_timestamp[worker] = get_event_time_stamp();
  207. accumulated_sleep_time[worker] = 0.0;
  208. accumulated_exec_time[worker] = 0.0;
  209. }
  210. static void handle_worker_deinit_start(void)
  211. {
  212. fprintf(out_paje_file, "10 %f S %s%"PRIu64" D\n",
  213. get_event_time_stamp(), prefix, ev.param[0]);
  214. }
  215. static void handle_worker_deinit_end(void)
  216. {
  217. fprintf(out_paje_file, "8 %f %s%"PRIu64" T\n",
  218. get_event_time_stamp(), prefix, ev.param[1]);
  219. }
  220. static void create_paje_state_if_not_found(char *name)
  221. {
  222. symbol_name_itor_t itor;
  223. for (itor = symbol_name_list_begin(symbol_list);
  224. itor != symbol_name_list_end(symbol_list);
  225. itor = symbol_name_list_next(itor))
  226. {
  227. if (!strcmp(name, itor->name))
  228. {
  229. /* we found an entry */
  230. return;
  231. }
  232. }
  233. /* it's the first time ... */
  234. symbol_name_t entry = symbol_name_new();
  235. entry->name = malloc(strlen(name));
  236. strcpy(entry->name, name);
  237. symbol_name_list_push_front(symbol_list, entry);
  238. /* choose some colour ... that's disguting yes */
  239. unsigned hash_symbol_red = get_colour_symbol_red(name);
  240. unsigned hash_symbol_green = get_colour_symbol_green(name);
  241. unsigned hash_symbol_blue = get_colour_symbol_blue(name);
  242. fprintf(stderr, "name %s hash red %d green %d blue %d \n", name, hash_symbol_red, hash_symbol_green, hash_symbol_blue);
  243. uint32_t hash_sum = hash_symbol_red + hash_symbol_green + hash_symbol_blue;
  244. float red = (1.0f * hash_symbol_red) / hash_sum;
  245. float green = (1.0f * hash_symbol_green) / hash_sum;
  246. float blue = (1.0f * hash_symbol_blue) / hash_sum;
  247. /* create the Paje state */
  248. fprintf(out_paje_file, "6 %s S %s \"%f %f %f\" \n", name, name, red, green, blue);
  249. }
  250. static void handle_start_codelet_body(void)
  251. {
  252. int worker;
  253. worker = find_worker_id(ev.param[1]);
  254. if (worker < 0) return;
  255. unsigned long has_name = ev.param[2];
  256. char *name = has_name?(char *)&ev.param[3]:"unknown";
  257. snprintf(last_codelet_symbol[worker], 128, "%s", name);
  258. /* TODO */
  259. last_codelet_hash[worker] = 0;
  260. float start_codelet_time = get_event_time_stamp();
  261. last_codelet_start[worker] = start_codelet_time;
  262. if (per_task_colour)
  263. {
  264. create_paje_state_if_not_found(name);
  265. fprintf(out_paje_file, "101 %f S %s%"PRIu64" E %s\n", start_codelet_time, prefix, ev.param[1], name);
  266. }
  267. else {
  268. fprintf(out_paje_file, "10 %f S %s%"PRIu64" E\n", start_codelet_time, prefix, ev.param[1]);
  269. }
  270. end_time = STARPU_MAX(end_time, ev.time);
  271. }
  272. static void handle_end_codelet_body(void)
  273. {
  274. int worker;
  275. worker = find_worker_id(ev.param[1]);
  276. if (worker < 0) return;
  277. float end_codelet_time = get_event_time_stamp();
  278. fprintf(out_paje_file, "10 %f S %s%"PRIu64" B\n", end_codelet_time, prefix, ev.param[1]);
  279. float codelet_length = (end_codelet_time - last_codelet_start[worker]);
  280. update_accumulated_time(worker, 0.0, codelet_length, end_codelet_time, 0);
  281. if (generate_distrib)
  282. fprintf(distrib_time, "%s\t%s%d\t%"PRIx64"\t%f\n", last_codelet_symbol[worker],
  283. prefix, worker, last_codelet_hash[worker], codelet_length);
  284. end_time = STARPU_MAX(end_time, ev.time);
  285. }
  286. static void handle_user_event(void)
  287. {
  288. int worker;
  289. unsigned code;
  290. code = ev.param[2];
  291. worker = find_worker_id(ev.param[1]);
  292. if (worker < 0)
  293. {
  294. fprintf(out_paje_file, "9 %f event %sp %d\n", get_event_time_stamp(), prefix, rank);
  295. }
  296. else {
  297. fprintf(out_paje_file, "9 %f event %s%"PRIu64" %d\n", get_event_time_stamp(), prefix, ev.param[1], code);
  298. }
  299. }
  300. static void handle_start_callback(void)
  301. {
  302. int worker;
  303. worker = find_worker_id(ev.param[1]);
  304. if (worker < 0) return;
  305. fprintf(out_paje_file, "10 %f S %s%"PRIu64" C\n", get_event_time_stamp(), prefix, ev.param[1] );
  306. }
  307. static void handle_end_callback(void)
  308. {
  309. int worker;
  310. worker = find_worker_id(ev.param[1]);
  311. if (worker < 0) return;
  312. fprintf(out_paje_file, "10 %f S %s%"PRIu64" B\n", get_event_time_stamp(), prefix, ev.param[1] );
  313. }
  314. static void handle_worker_status(const char *newstatus)
  315. {
  316. int worker;
  317. worker = find_worker_id(ev.param[1]);
  318. if (worker < 0) return;
  319. fprintf(out_paje_file, "10 %f S %s%"PRIu64" %s\n",
  320. get_event_time_stamp(), prefix, ev.param[1], newstatus);
  321. end_time = STARPU_MAX(end_time, ev.time);
  322. }
  323. static double last_sleep_start[MAXWORKERS];
  324. static void handle_start_sleep(void)
  325. {
  326. int worker;
  327. worker = find_worker_id(ev.param[0]);
  328. if (worker < 0) return;
  329. float start_sleep_time = get_event_time_stamp();
  330. last_sleep_start[worker] = start_sleep_time;
  331. fprintf(out_paje_file, "10 %f S %s%"PRIu64" Sl\n",
  332. get_event_time_stamp(), prefix, ev.param[0]);
  333. end_time = STARPU_MAX(end_time, ev.time);
  334. }
  335. static void handle_end_sleep(void)
  336. {
  337. int worker;
  338. worker = find_worker_id(ev.param[0]);
  339. if (worker < 0) return;
  340. float end_sleep_timestamp = get_event_time_stamp();
  341. fprintf(out_paje_file, "10 %f S %s%"PRIu64" B\n",
  342. end_sleep_timestamp, prefix, ev.param[0]);
  343. double sleep_length = end_sleep_timestamp - last_sleep_start[worker];
  344. update_accumulated_time(worker, sleep_length, 0.0, end_sleep_timestamp, 0);
  345. end_time = STARPU_MAX(end_time, ev.time);
  346. }
  347. static void handle_data_copy(void)
  348. {
  349. }
  350. static void handle_start_driver_copy(void)
  351. {
  352. unsigned src = ev.param[0];
  353. unsigned dst = ev.param[1];
  354. unsigned size = ev.param[2];
  355. unsigned comid = ev.param[3];
  356. if (!no_bus)
  357. {
  358. fprintf(out_paje_file, "10 %f MS %sMEMNODE%d Co\n", get_event_time_stamp(), prefix, dst);
  359. fprintf(out_paje_file, "18 %f L %sp %d %sMEMNODE%d com_%d\n", get_event_time_stamp(), prefix, size, prefix, src, comid);
  360. /* create a structure to store the start of the communication, this will be matched later */
  361. communication_t com = communication_new();
  362. com->comid = comid;
  363. com->comm_start = get_event_time_stamp();
  364. /* that's a hack: either src or dst is non null */
  365. com->node = (src + dst);
  366. communication_list_push_back(communication_list, com);
  367. }
  368. }
  369. static void handle_end_driver_copy(void)
  370. {
  371. unsigned dst = ev.param[1];
  372. unsigned size = ev.param[2];
  373. unsigned comid = ev.param[3];
  374. if (!no_bus)
  375. {
  376. fprintf(out_paje_file, "10 %f MS %sMEMNODE%d No\n", get_event_time_stamp(), prefix, dst);
  377. fprintf(out_paje_file, "19 %f L %sp %d %sMEMNODE%d com_%d\n", get_event_time_stamp(), prefix, size, prefix, dst, comid);
  378. /* look for a data transfer to match */
  379. communication_itor_t itor;
  380. for (itor = communication_list_begin(communication_list);
  381. itor != communication_list_end(communication_list);
  382. itor = communication_list_next(itor))
  383. {
  384. if (itor->comid == comid)
  385. {
  386. float comm_end = get_event_time_stamp();
  387. float bandwidth = (float)((0.001*size)/(comm_end - itor->comm_start));
  388. itor->bandwidth = bandwidth;
  389. communication_t com = communication_new();
  390. com->comid = comid;
  391. com->comm_start = get_event_time_stamp();
  392. com->bandwidth = -bandwidth;
  393. com->node = itor->node;
  394. communication_list_push_back(communication_list, com);
  395. break;
  396. }
  397. }
  398. }
  399. }
  400. static void display_bandwidth_evolution(void)
  401. {
  402. float current_bandwidth = 0.0;
  403. float current_bandwidth_per_node[32] = {0.0};
  404. communication_itor_t itor;
  405. for (itor = communication_list_begin(communication_list);
  406. itor != communication_list_end(communication_list);
  407. itor = communication_list_next(itor))
  408. {
  409. current_bandwidth += itor->bandwidth;
  410. fprintf(out_paje_file, "13 %f bw %sMEMNODE0 %f\n",
  411. itor->comm_start, prefix, current_bandwidth);
  412. current_bandwidth_per_node[itor->node] += itor->bandwidth;
  413. fprintf(out_paje_file, "13 %f bw %sMEMNODE%d %f\n",
  414. itor->comm_start, prefix, itor->node, current_bandwidth_per_node[itor->node]);
  415. }
  416. }
  417. static void handle_memnode_event(const char *eventstr)
  418. {
  419. unsigned memnode = ev.param[0];
  420. fprintf(out_paje_file, "10 %f MS %sMEMNODE%d %s\n",
  421. get_event_time_stamp(), prefix, memnode, eventstr);
  422. }
  423. /*
  424. * Number of task submitted to the scheduler
  425. */
  426. static int curq_size = 0;
  427. static void handle_job_push(void)
  428. {
  429. curq_size++;
  430. fprintf(out_paje_file, "13 %f ntask %ssched %f\n", get_event_time_stamp(), prefix, (float)curq_size);
  431. }
  432. static void handle_job_pop(void)
  433. {
  434. curq_size--;
  435. fprintf(out_paje_file, "13 %f ntask %ssched %f\n", get_event_time_stamp(), prefix, (float)curq_size);
  436. }
  437. static void handle_codelet_tag_deps(void)
  438. {
  439. uint64_t child;
  440. uint64_t father;
  441. child = ev.param[0];
  442. father = ev.param[1];
  443. add_deps(child, father);
  444. }
  445. static void handle_task_deps(void)
  446. {
  447. unsigned long dep_prev = ev.param[0];
  448. unsigned long dep_succ = ev.param[1];
  449. /* There is a dependency between both job id : dep_prev -> dep_succ */
  450. add_task_deps(dep_prev, dep_succ);
  451. }
  452. static void handle_task_done(void)
  453. {
  454. unsigned long job_id;
  455. job_id = ev.param[0];
  456. unsigned long has_name = ev.param[3];
  457. char *name = has_name?(char *)&ev.param[4]:"unknown";
  458. int worker;
  459. worker = find_worker_id(ev.param[1]);
  460. const char *colour;
  461. char buffer[32];
  462. if (per_task_colour) {
  463. snprintf(buffer, 32, "#%x%x%x",
  464. get_colour_symbol_red(name)/4,
  465. get_colour_symbol_green(name)/4,
  466. get_colour_symbol_blue(name)/4);
  467. colour = &buffer[0];
  468. }
  469. else {
  470. colour= (worker < 0)?"#aaaaaa":get_worker_color(worker);
  471. }
  472. unsigned exclude_from_dag = ev.param[2];
  473. if (!exclude_from_dag)
  474. dot_set_task_done(job_id, name, colour);
  475. }
  476. static void handle_tag_done(void)
  477. {
  478. uint64_t tag_id;
  479. tag_id = ev.param[0];
  480. unsigned long has_name = ev.param[2];
  481. char *name = has_name?(char *)&ev.param[3]:"unknown";
  482. int worker;
  483. worker = find_worker_id(ev.param[1]);
  484. const char *colour;
  485. char buffer[32];
  486. if (per_task_colour) {
  487. snprintf(buffer, 32, "%.4f,%.4f,%.4f",
  488. get_colour_symbol_red(name)/1024.0,
  489. get_colour_symbol_green(name)/1024.0,
  490. get_colour_symbol_blue(name)/1024.0);
  491. colour = &buffer[0];
  492. }
  493. else {
  494. colour= (worker < 0)?"0.0,0.0,0.0":get_worker_color(worker);
  495. }
  496. dot_set_tag_done(tag_id, colour);
  497. }
  498. static void handle_mpi_barrier(void)
  499. {
  500. rank = ev.param[0];
  501. /* Add an event in the trace */
  502. fprintf(out_paje_file, "9 %f event %sp %d\n", get_event_time_stamp(), prefix, rank);
  503. }
  504. static void handle_mpi_isend(void)
  505. {
  506. int dest = ev.param[0];
  507. int mpi_tag = ev.param[1];
  508. size_t size = ev.param[2];
  509. float date = get_event_time_stamp();
  510. add_mpi_send_transfer(rank, dest, mpi_tag, size, date);
  511. }
  512. static void handle_mpi_irecv_end(void)
  513. {
  514. int src = ev.param[0];
  515. int mpi_tag = ev.param[1];
  516. float date = get_event_time_stamp();
  517. add_mpi_recv_transfer(src, rank, mpi_tag, date);
  518. }
  519. static void parse_args(int argc, char **argv)
  520. {
  521. /* We want to support arguments such as "fxt_tool -i trace_*" */
  522. unsigned reading_input_filenames = 0;
  523. int i;
  524. for (i = 1; i < argc; i++) {
  525. if (strcmp(argv[i], "-c") == 0) {
  526. per_task_colour = 1;
  527. reading_input_filenames = 0;
  528. continue;
  529. }
  530. if (strcmp(argv[i], "-o") == 0) {
  531. out_paje_path = argv[++i];
  532. reading_input_filenames = 0;
  533. continue;
  534. }
  535. if (strcmp(argv[i], "-i") == 0) {
  536. filenames[ninputfiles++] = argv[++i];
  537. reading_input_filenames = 1;
  538. continue;
  539. }
  540. if (strcmp(argv[i], "-no-counter") == 0) {
  541. no_counter = 1;
  542. reading_input_filenames = 0;
  543. continue;
  544. }
  545. if (strcmp(argv[i], "-no-bus") == 0) {
  546. no_bus = 1;
  547. reading_input_filenames = 0;
  548. continue;
  549. }
  550. if (strcmp(argv[i], "-d") == 0) {
  551. generate_distrib = 1;
  552. reading_input_filenames = 0;
  553. continue;
  554. }
  555. if (strcmp(argv[i], "-h") == 0) {
  556. fprintf(stderr, "Usage : %s [-c] [-no-counter] [-no-bus] [-i input_filename] [-o output_filename]\n", argv[0]);
  557. fprintf(stderr, "\t-c: use a different colour for every type of task.\n");
  558. exit(-1);
  559. }
  560. /* That's pretty dirty: if the reading_input_filenames flag is
  561. * set, and that the argument does not match an option, we
  562. * assume this may be another filename */
  563. if (reading_input_filenames)
  564. {
  565. filenames[ninputfiles++] = argv[i];
  566. continue;
  567. }
  568. }
  569. }
  570. void parse_new_file(char *filename_in, char *file_prefix, uint64_t file_offset)
  571. {
  572. prefix = file_prefix;
  573. offset = file_offset;
  574. /* Open the trace file */
  575. int fd_in;
  576. fd_in = open(filename_in, O_RDONLY);
  577. if (fd_in < 0) {
  578. perror("open failed :");
  579. exit(-1);
  580. }
  581. static fxt_t fut;
  582. fut = fxt_fdopen(fd_in);
  583. if (!fut) {
  584. perror("fxt_fdopen :");
  585. exit(-1);
  586. }
  587. fxt_blockev_t block;
  588. block = fxt_blockev_enter(fut);
  589. /* create a htable to identify each worker(tid) */
  590. hcreate(MAXWORKERS);
  591. symbol_list = symbol_name_list_new();
  592. communication_list = communication_list_new();
  593. /* TODO starttime ...*/
  594. /* create the "program" container */
  595. fprintf(out_paje_file, "7 0.0 %sp P MPIroot program%s \n", prefix, prefix);
  596. /* create a variable with the number of tasks */
  597. if (!no_counter)
  598. {
  599. fprintf(out_paje_file, "7 %f %ssched Sc %sp scheduler \n", 0.0, prefix, prefix);
  600. fprintf(out_paje_file, "13 0.0 ntask %ssched 0.0\n", prefix);
  601. }
  602. unsigned first_event = 1;
  603. while(1) {
  604. int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev);
  605. if (ret != FXT_EV_OK) {
  606. fprintf(stderr, "no more block ...\n");
  607. break;
  608. }
  609. __attribute__ ((unused)) int nbparam = ev.nb_params;
  610. if (first_event)
  611. {
  612. first_event = 0;
  613. start_time = ev.time;
  614. }
  615. switch (ev.code) {
  616. case STARPU_FUT_WORKER_INIT_START:
  617. handle_worker_init_start();
  618. break;
  619. case STARPU_FUT_WORKER_INIT_END:
  620. handle_worker_init_end();
  621. break;
  622. case STARPU_FUT_NEW_MEM_NODE:
  623. handle_new_mem_node();
  624. break;
  625. /* detect when the workers were idling or not */
  626. case STARPU_FUT_START_CODELET_BODY:
  627. handle_start_codelet_body();
  628. break;
  629. case STARPU_FUT_END_CODELET_BODY:
  630. handle_end_codelet_body();
  631. break;
  632. case STARPU_FUT_START_CALLBACK:
  633. handle_start_callback();
  634. break;
  635. case STARPU_FUT_END_CALLBACK:
  636. handle_end_callback();
  637. break;
  638. /* monitor stack size */
  639. case STARPU_FUT_JOB_PUSH:
  640. if (!no_counter)
  641. handle_job_push();
  642. break;
  643. case STARPU_FUT_JOB_POP:
  644. if (!no_counter)
  645. handle_job_pop();
  646. break;
  647. /* check the memory transfer overhead */
  648. case STARPU_FUT_START_FETCH_INPUT:
  649. handle_worker_status("Fi");
  650. break;
  651. case STARPU_FUT_START_PUSH_OUTPUT:
  652. handle_worker_status("Po");
  653. break;
  654. case STARPU_FUT_START_PROGRESS:
  655. handle_worker_status("P");
  656. break;
  657. case STARPU_FUT_END_FETCH_INPUT:
  658. case STARPU_FUT_END_PROGRESS:
  659. case STARPU_FUT_END_PUSH_OUTPUT:
  660. handle_worker_status("B");
  661. break;
  662. case STARPU_FUT_WORKER_SLEEP_START:
  663. handle_start_sleep();
  664. break;
  665. case STARPU_FUT_WORKER_SLEEP_END:
  666. handle_end_sleep();
  667. break;
  668. case STARPU_FUT_CODELET_TAG:
  669. /* XXX */
  670. break;
  671. case STARPU_FUT_CODELET_TAG_DEPS:
  672. handle_codelet_tag_deps();
  673. break;
  674. case STARPU_FUT_TASK_DEPS:
  675. handle_task_deps();
  676. break;
  677. case STARPU_FUT_TASK_DONE:
  678. handle_task_done();
  679. break;
  680. case STARPU_FUT_TAG_DONE:
  681. handle_tag_done();
  682. break;
  683. case STARPU_FUT_DATA_COPY:
  684. if (!no_bus)
  685. handle_data_copy();
  686. break;
  687. case STARPU_FUT_START_DRIVER_COPY:
  688. if (!no_bus)
  689. handle_start_driver_copy();
  690. break;
  691. case STARPU_FUT_END_DRIVER_COPY:
  692. if (!no_bus)
  693. handle_end_driver_copy();
  694. break;
  695. case STARPU_FUT_WORK_STEALING:
  696. /* XXX */
  697. break;
  698. case STARPU_FUT_WORKER_DEINIT_START:
  699. handle_worker_deinit_start();
  700. break;
  701. case STARPU_FUT_WORKER_DEINIT_END:
  702. handle_worker_deinit_end();
  703. break;
  704. case STARPU_FUT_START_ALLOC:
  705. if (!no_bus)
  706. handle_memnode_event("A");
  707. break;
  708. case STARPU_FUT_START_ALLOC_REUSE:
  709. if (!no_bus)
  710. handle_memnode_event("Ar");
  711. break;
  712. case STARPU_FUT_START_MEMRECLAIM:
  713. handle_memnode_event("R");
  714. break;
  715. case STARPU_FUT_END_ALLOC:
  716. case STARPU_FUT_END_ALLOC_REUSE:
  717. case STARPU_FUT_END_MEMRECLAIM:
  718. if (!no_bus)
  719. handle_memnode_event("No");
  720. break;
  721. case STARPU_FUT_USER_EVENT:
  722. handle_user_event();
  723. break;
  724. case FUT_MPI_BARRIER:
  725. handle_mpi_barrier();
  726. break;
  727. case FUT_MPI_ISEND:
  728. handle_mpi_isend();
  729. break;
  730. case FUT_MPI_IRECV_END:
  731. handle_mpi_irecv_end();
  732. break;
  733. default:
  734. fprintf(stderr, "unknown event.. %x at time %llx WITH OFFSET %llx\n",
  735. (unsigned)ev.code, (long long unsigned)ev.time, (long long unsigned)(ev.time-offset));
  736. break;
  737. }
  738. }
  739. hdestroy();
  740. /* Close the trace file */
  741. if (close(fd_in))
  742. {
  743. perror("close failed :");
  744. exit(-1);
  745. }
  746. }
  747. /*
  748. * This program should be used to parse the log generated by FxT
  749. */
  750. int main(int argc, char **argv)
  751. {
  752. int fd_out;
  753. parse_args(argc, argv);
  754. init_dag_dot();
  755. if (generate_distrib)
  756. distrib_time = fopen(distrib_time_path, "w+");
  757. activity_file = fopen(activity_path, "w+");
  758. paje_output_file_init();
  759. if (ninputfiles == 1)
  760. {
  761. /* we usually only have a single trace */
  762. uint64_t file_start_time = find_start_time(filenames[0]);
  763. parse_new_file(filenames[0], "", file_start_time);
  764. }
  765. else {
  766. unsigned inputfile;
  767. uint64_t offsets[64];
  768. uint64_t found_offsets[64];
  769. uint64_t start_times[64];
  770. uint64_t max = 0;
  771. /*
  772. * Find the trace offsets:
  773. * - If there is no sync point
  774. * psi_k(x) = x - start_k
  775. * - If there is a sync point sync_k
  776. * psi_k(x) = x - sync_k + M
  777. * where M = max { sync_i - start_i | there exists sync_i}
  778. * More generally:
  779. * - psi_k(x) = x - offset_k
  780. */
  781. int unique_keys[64];
  782. int rank_k[64];
  783. uint64_t start_k[64];
  784. uint64_t sync_k[64];
  785. unsigned sync_k_exists[64];
  786. uint64_t M = 0;
  787. unsigned found_one_sync_point = 0;
  788. int key;
  789. unsigned display_mpi = 0;
  790. /* Compute all start_k */
  791. for (inputfile = 0; inputfile < ninputfiles; inputfile++)
  792. {
  793. uint64_t file_start = find_start_time(filenames[inputfile]);
  794. start_k[inputfile] = file_start;
  795. }
  796. /* Compute all sync_k if they exist */
  797. for (inputfile = 0; inputfile < ninputfiles; inputfile++)
  798. {
  799. int ret = find_sync_point(filenames[inputfile],
  800. &sync_k[inputfile],
  801. &unique_keys[inputfile],
  802. &rank_k[inputfile]);
  803. if (ret == -1)
  804. {
  805. /* There was no sync point, we assume there is no offset */
  806. sync_k_exists[inputfile] = 0;
  807. }
  808. else {
  809. if (!found_one_sync_point)
  810. {
  811. key = unique_keys[inputfile];
  812. display_mpi = 1;
  813. found_one_sync_point = 1;
  814. }
  815. else {
  816. if (key != unique_keys[inputfile])
  817. {
  818. fprintf(stderr, "Warning: traces are coming from different run so we will not try to display MPI communications.\n");
  819. display_mpi = 0;
  820. }
  821. }
  822. STARPU_ASSERT(sync_k[inputfile] >= start_k[inputfile]);
  823. sync_k_exists[inputfile] = 1;
  824. uint64_t diff = sync_k[inputfile] - start_k[inputfile];
  825. if (diff > M)
  826. M = diff;
  827. }
  828. }
  829. /* Compute the offset */
  830. for (inputfile = 0; inputfile < ninputfiles; inputfile++)
  831. {
  832. offsets[inputfile] = sync_k_exists[inputfile]?
  833. (sync_k[inputfile]-M):start_k[inputfile];
  834. }
  835. /* generate the Paje trace for the different files */
  836. for (inputfile = 0; inputfile < ninputfiles; inputfile++)
  837. {
  838. int filerank = rank_k[inputfile];
  839. fprintf(stderr, "Handle file %s (rank %d)\n", filenames[inputfile], filerank);
  840. char file_prefix[32];
  841. snprintf(file_prefix, 32, "mpi_%d_", filerank);
  842. parse_new_file(filenames[inputfile], file_prefix, offsets[inputfile]);
  843. }
  844. /* display the MPI transfers if possible */
  845. if (display_mpi)
  846. for (inputfile = 0; inputfile < ninputfiles; inputfile++)
  847. {
  848. int filerank = rank_k[inputfile];
  849. display_all_transfers_from_trace(out_paje_file, filerank);
  850. }
  851. }
  852. display_bandwidth_evolution();
  853. /* close the different files */
  854. fclose(out_paje_file);
  855. fclose(activity_file);
  856. if (generate_distrib)
  857. fclose(distrib_time);
  858. terminate_dat_dot();
  859. return 0;
  860. }