starpu_trace_state_stats.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. #!/usr/bin/env python3
  2. # coding=utf-8
  3. #
  4. # StarPU --- Runtime system for heterogeneous multicore architectures.
  5. #
  6. # Copyright (C) 2016-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  7. #
  8. # StarPU is free software; you can redistribute it and/or modify
  9. # it under the terms of the GNU Lesser General Public License as published by
  10. # the Free Software Foundation; either version 2.1 of the License, or (at
  11. # your option) any later version.
  12. #
  13. # StarPU is distributed in the hope that it will be useful, but
  14. # WITHOUT ANY WARRANTY; without even the implied warranty of
  15. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. #
  17. # See the GNU Lesser General Public License in COPYING.LGPL for more details.
  18. #
  19. ##
  20. # This script parses the generated trace.rec file and reports statistics about
  21. # the number of different events/tasks and their durations. The report is
  22. # similar to the starpu_paje_state_stats.in script, except that this one
  23. # doesn't need R and pj_dump (from the pajeng repository), and it is also much
  24. # faster.
  25. ##
  26. import getopt
  27. import os
  28. import sys
  29. class Event():
  30. def __init__(self, type, name, category, start_time):
  31. self._type = type
  32. self._name = name
  33. self._category = category
  34. self._start_time = start_time
  35. class EventStats():
  36. def __init__(self, name, duration_time, category, count = 1):
  37. self._name = name
  38. self._duration_time = duration_time
  39. self._category = category
  40. self._count = count
  41. def aggregate(self, duration_time):
  42. self._duration_time += duration_time
  43. self._count += 1
  44. def show(self):
  45. if not self._name == None and not self._category == None:
  46. print("\"" + self._name + "\"," + str(self._count) + ",\"" + self._category + "\"," + str(round(self._duration_time, 6)))
  47. class Worker():
  48. def __init__(self, id):
  49. self._id = id
  50. self._events = []
  51. self._stats = []
  52. self._stack = []
  53. self._current_state = None
  54. def get_event_stats(self, name):
  55. for stat in self._stats:
  56. if stat._name == name:
  57. return stat
  58. return None
  59. def add_event(self, type, name, category, start_time):
  60. self._events.append(Event(type, name, category, start_time))
  61. def add_event_to_stats(self, curr_event):
  62. if curr_event._type == "PushState":
  63. self._stack.append(curr_event)
  64. return # Will look later to find a PopState event.
  65. elif curr_event._type == "PopState":
  66. if len(self._stack) == 0:
  67. print("warning: PopState without a PushState, probably a trace with start/stop profiling")
  68. self._current_state = None
  69. return
  70. next_event = curr_event
  71. curr_event = self._stack.pop()
  72. elif curr_event._type == "SetState":
  73. if self._current_state == None:
  74. # First SetState event found
  75. self._current_state = curr_event
  76. return
  77. saved_state = curr_event
  78. next_event = curr_event
  79. curr_event = self._current_state
  80. self._current_state = saved_state
  81. else:
  82. sys.exit("ERROR: Invalid event type!")
  83. # Compute duration with the next event.
  84. a = curr_event._start_time
  85. b = next_event._start_time
  86. # Add the event to the list of stats.
  87. for i in range(len(self._stats)):
  88. if self._stats[i]._name == curr_event._name:
  89. self._stats[i].aggregate(b - a)
  90. return
  91. self._stats.append(EventStats(curr_event._name, b - a,
  92. curr_event._category))
  93. def calc_stats(self, start_profiling_times, stop_profiling_times):
  94. num_events = len(self._events)
  95. use_start_stop = len(start_profiling_times) != 0
  96. for i in range(0, num_events):
  97. event = self._events[i]
  98. if i > 0 and self._events[i-1]._name == "Deinitializing":
  99. # Drop all events after the Deinitializing event is found
  100. # because they do not make sense.
  101. break
  102. if not use_start_stop:
  103. self.add_event_to_stats(event)
  104. continue
  105. # Check if the event is inbetween start/stop profiling events
  106. for t in range(len(start_profiling_times)):
  107. if (event._start_time > start_profiling_times[t] and
  108. event._start_time < stop_profiling_times[t]):
  109. self.add_event_to_stats(event)
  110. break
  111. if not use_start_stop:
  112. return
  113. # Special case for SetState events which need a next one for computing
  114. # the duration.
  115. curr_event = self._events[-1]
  116. if curr_event._type == "SetState":
  117. for i in range(len(start_profiling_times)):
  118. if (curr_event._start_time > start_profiling_times[i] and
  119. curr_event._start_time < stop_profiling_times[i]):
  120. curr_event = Event(curr_event._type, curr_event._name,
  121. curr_event._category,
  122. stop_profiling_times[i])
  123. self.add_event_to_stats(curr_event)
  124. def read_blocks(input_file):
  125. empty_lines = 0
  126. first_line = 1
  127. blocks = []
  128. for line in open(input_file):
  129. if first_line:
  130. blocks.append([])
  131. blocks[-1].append(line)
  132. first_line = 0
  133. # Check for empty lines
  134. if not line or line[0] == '\n':
  135. # If 1st one: new block
  136. if empty_lines == 0:
  137. blocks.append([])
  138. empty_lines += 1
  139. else:
  140. # Non empty line: add line in current(last) block
  141. empty_lines = 0
  142. blocks[-1].append(line)
  143. return blocks
  144. def read_field(field, index):
  145. return field[index+1:-1]
  146. def insert_worker_event(workers, prog_events, block):
  147. worker_id = -1
  148. name = None
  149. start_time = 0.0
  150. category = None
  151. for line in block:
  152. key = line[:2]
  153. value = read_field(line, 2)
  154. if key == "E:": # EventType
  155. event_type = value
  156. elif key == "C:": # Category
  157. category = value
  158. elif key == "W:": # WorkerId
  159. worker_id = int(value)
  160. elif key == "N:": # Name
  161. name = value
  162. elif key == "S:": # StartTime
  163. start_time = float(value)
  164. # Program events don't belong to workers, they are globals.
  165. if category == "Program":
  166. prog_events.append(Event(event_type, name, category, start_time))
  167. return
  168. for worker in workers:
  169. if worker._id == worker_id:
  170. worker.add_event(event_type, name, category, start_time)
  171. return
  172. worker = Worker(worker_id)
  173. worker.add_event(event_type, name, category, start_time)
  174. workers.append(worker)
  175. def calc_times(stats):
  176. tr = 0.0 # Runtime
  177. tt = 0.0 # Task
  178. ti = 0.0 # Idle
  179. ts = 0.0 # Scheduling
  180. for stat in stats:
  181. if stat._category == None:
  182. continue
  183. if stat._category == "Runtime":
  184. if stat._name == "Scheduling":
  185. # Scheduling time is part of runtime but we want to have
  186. # it separately.
  187. ts += stat._duration_time
  188. else:
  189. tr += stat._duration_time
  190. elif stat._category == "Task":
  191. tt += stat._duration_time
  192. elif stat._category == "Other":
  193. ti += stat._duration_time
  194. else:
  195. print("WARNING: Unknown category '" + stat._category + "'!")
  196. return ti, tr, tt, ts
  197. def save_times(ti, tr, tt, ts):
  198. f = open("times.csv", "w+")
  199. f.write("\"Time\",\"Duration\"\n")
  200. f.write("\"Runtime\"," + str(tr) + "\n")
  201. f.write("\"Task\"," + str(tt) + "\n")
  202. f.write("\"Idle\"," + str(ti) + "\n")
  203. f.write("\"Scheduling\"," + str(ts) + "\n")
  204. f.close()
  205. def calc_et(tt_1, tt_p):
  206. """ Compute the task efficiency (et). This measures the exploitation of
  207. data locality. """
  208. return tt_1 / tt_p
  209. def calc_es(tt_p, ts_p):
  210. """ Compute the scheduling efficiency (es). This measures time spent in
  211. the runtime scheduler. """
  212. return tt_p / (tt_p + ts_p)
  213. def calc_er(tt_p, tr_p, ts_p):
  214. """ Compute the runtime efficiency (er). This measures how the runtime
  215. overhead affects performance."""
  216. return (tt_p + ts_p) / (tt_p + tr_p + ts_p)
  217. def calc_ep(tt_p, tr_p, ti_p, ts_p):
  218. """ Compute the pipeline efficiency (et). This measures how much
  219. concurrency is available and how well it's exploited. """
  220. return (tt_p + tr_p + ts_p) / (tt_p + tr_p + ti_p + ts_p)
  221. def calc_e(et, er, ep, es):
  222. """ Compute the parallel efficiency. """
  223. return et * er * ep * es
  224. def save_efficiencies(e, ep, er, et, es):
  225. f = open("efficiencies.csv", "w+")
  226. f.write("\"Efficiency\",\"Value\"\n")
  227. f.write("\"Parallel\"," + str(e) + "\n")
  228. f.write("\"Task\"," + str(et) + "\n")
  229. f.write("\"Runtime\"," + str(er) + "\n")
  230. f.write("\"Scheduling\"," + str(es) + "\n")
  231. f.write("\"Pipeline\"," + str(ep) + "\n")
  232. f.close()
  233. def usage():
  234. print("USAGE:")
  235. print("starpu_trace_state_stats.py [ -te -s=<time> ] <trace.rec>")
  236. print("")
  237. print("OPTIONS:")
  238. print(" -t or --time Compute and dump times to times.csv")
  239. print("")
  240. print(" -e or --efficiency Compute and dump efficiencies to efficiencies.csv")
  241. print("")
  242. print(" -s or --seq_task_time Used to compute task efficiency between sequential and parallel times")
  243. print(" (if not set, task efficiency will be 1.0)")
  244. print("")
  245. print("EXAMPLES:")
  246. print("# Compute event statistics and report them to stdout:")
  247. print("starpu_trace_state_stats.py trace.rec")
  248. print("")
  249. print("# Compute event stats, times and efficiencies:")
  250. print("starpu_trace_state_stats.py -te trace.rec")
  251. print("")
  252. print("# Compute correct task efficiency with the sequential task time:")
  253. print("starpu_trace_state_stats.py -s=60093.950614 trace.rec")
  254. def main():
  255. try:
  256. opts, args = getopt.getopt(sys.argv[1:], "hets:",
  257. ["help", "time", "efficiency", "seq_task_time="])
  258. except getopt.GetoptError as err:
  259. usage()
  260. sys.exit(1)
  261. dump_time = False
  262. dump_efficiency = False
  263. tt_1 = 0.0
  264. for o, a in opts:
  265. if o in ("-h", "--help"):
  266. usage()
  267. sys.exit()
  268. elif o in ("-t", "--time"):
  269. dump_time = True
  270. elif o in ("-e", "--efficiency"):
  271. dump_efficiency = True
  272. elif o in ("-s", "--seq_task_time"):
  273. tt_1 = float(a)
  274. if len(args) < 1:
  275. usage()
  276. sys.exit()
  277. recfile = args[0]
  278. if not os.path.isfile(recfile):
  279. sys.exit("File does not exist!")
  280. # Declare a list for all workers.
  281. workers = []
  282. # Declare a list for program events
  283. prog_events = []
  284. # Read the recutils file format per blocks.
  285. blocks = read_blocks(recfile)
  286. for block in blocks:
  287. if not len(block) == 0:
  288. first_line = block[0]
  289. if first_line[:2] == "E:":
  290. insert_worker_event(workers, prog_events, block)
  291. # Find allowed range times between start/stop profiling events.
  292. start_profiling_times = []
  293. stop_profiling_times = []
  294. for prog_event in prog_events:
  295. if prog_event._name == "start_profiling":
  296. start_profiling_times.append(prog_event._start_time)
  297. if prog_event._name == "stop_profiling":
  298. stop_profiling_times.append(prog_event._start_time)
  299. if len(start_profiling_times) != len(stop_profiling_times):
  300. sys.exit("Mismatch number of start/stop profiling events!")
  301. # Compute worker statistics.
  302. stats = []
  303. for worker in workers:
  304. worker.calc_stats(start_profiling_times, stop_profiling_times)
  305. for stat in worker._stats:
  306. found = False
  307. for s in stats:
  308. if stat._name == s._name:
  309. found = True
  310. break
  311. if not found == True:
  312. stats.append(EventStats(stat._name, 0.0, stat._category, 0))
  313. # Compute global statistics for all workers.
  314. for i in range(0, len(workers)):
  315. for stat in stats:
  316. s = workers[i].get_event_stats(stat._name)
  317. if not s == None:
  318. # A task might not be executed on all workers.
  319. stat._duration_time += s._duration_time
  320. stat._count += s._count
  321. # Output statistics.
  322. print("\"Name\",\"Count\",\"Type\",\"Duration\"")
  323. for stat in stats:
  324. stat.show()
  325. # Compute runtime, task, idle, scheduling times and dump them to times.csv
  326. ti_p = tr_p = tt_p = ts_p = 0.0
  327. if dump_time == True:
  328. ti_p, tr_p, tt_p, ts_p = calc_times(stats)
  329. save_times(ti_p, tr_p, tt_p, ts_p)
  330. # Compute runtime, task, idle efficiencies and dump them to
  331. # efficiencies.csv.
  332. if dump_efficiency == True or not tt_1 == 0.0:
  333. if dump_time == False:
  334. ti_p, tr_p, tt_p, ts_p = calc_times(stats)
  335. if tt_1 == 0.0:
  336. sys.stderr.write("WARNING: Task efficiency will be 1.0 because -s is not set!\n")
  337. tt_1 = tt_p
  338. # Compute efficiencies.
  339. et = round(calc_et(tt_1, tt_p), 6)
  340. es = round(calc_es(tt_p, ts_p), 6)
  341. er = round(calc_er(tt_p, tr_p, ts_p), 6)
  342. ep = round(calc_ep(tt_p, tr_p, ti_p, ts_p), 6)
  343. e = round(calc_e(et, er, ep, es), 6)
  344. save_efficiencies(e, ep, er, et, es)
  345. if __name__ == "__main__":
  346. main()