Просмотр исходного кода

tools: update starpu_trace_state_stats to compute efficiencies

This script can now be used to compute the different times (runtime,
task and other) and the efficiencies (parallel, runtime, task and
pipeline).
Samuel Pitoiset лет назад: 9
Родитель
Сommit
9c70704c28
1 измененных файлов с 137 добавлено и 14 удалено
  1. 137 14
      tools/starpu_trace_state_stats.py

+ 137 - 14
tools/starpu_trace_state_stats.py

@@ -30,15 +30,17 @@ import os
 import sys
 
 class Event():
-    def __init__(self, type, name, start_time):
+    def __init__(self, type, name, category, start_time):
         self._type = type
         self._name = name
+        self._category = category
         self._start_time = start_time
 
 class EventStats():
-    def __init__(self, name, duration_time, count = 1):
+    def __init__(self, name, duration_time, category, count = 1):
         self._name = name
         self._duration_time = duration_time
+        self._category = category
         self._count = count
 
     def aggregate(self, duration_time):
@@ -47,7 +49,7 @@ class EventStats():
 
     def show(self):
         if not self._name == None:
-            print "\"" + self._name + "\"," + str(self._count) + "," + str(round(self._duration_time, 6))
+            print "\"" + self._name + "\"," + str(self._count) + ",\"" + self._category + "\"," + str(round(self._duration_time, 6))
 
 class Worker():
     def __init__(self, id):
@@ -62,8 +64,8 @@ class Worker():
                 return stat
         return None
 
-    def add_event(self, type, name, start_time):
-        self._events.append(Event(type, name, start_time))
+    def add_event(self, type, name, category, start_time):
+        self._events.append(Event(type, name, category, start_time))
 
     def calc_stats(self):
         num_events = len(self._events) - 1
@@ -91,7 +93,7 @@ class Worker():
                     found = True
                     break
             if not found == True:
-                self._stats.append(EventStats(curr_event._name, b - a))
+                self._stats.append(EventStats(curr_event._name, b - a, curr_event._category))
 
 def read_blocks(input_file):
     empty_lines = 0
@@ -122,10 +124,13 @@ def insert_worker_event(workers, block):
     worker_id = -1
     name = None
     start_time = 0.0
+    category = None
 
     for line in block:
         if line[:2] == "E:": # EventType
             event_type = read_field(line, 2)
+        elif line[:2] == "C:": # Category
+            category = read_field(line, 2)
         elif line[:2] == "W:": # WorkerId
             worker_id = int(read_field(line, 2))
         elif line[:2] == "N:": # Name
@@ -135,17 +140,114 @@ def insert_worker_event(workers, block):
 
     for worker in workers:
         if worker._id == worker_id:
-            worker.add_event(event_type, name, start_time)
+            worker.add_event(event_type, name, category, start_time)
             return
     worker = Worker(worker_id)
-    worker.add_event(event_type, name, start_time)
+    worker.add_event(event_type, name, category, start_time)
     workers.append(worker)
 
+def calc_times(stats):
+    tr = 0.0 # Runtime
+    tt = 0.0 # Task
+    ti = 0.0 # Idle (likely Other)
+    for stat in stats:
+        if stat._category == None:
+            continue
+        if stat._category == "Runtime":
+            tr += stat._duration_time
+        elif stat._category == "Task":
+            tt += stat._duration_time
+        elif stat._category == "Other":
+            ti += stat._duration_time
+        else:
+            sys.exit("Unknown category '" + stat._category + "'!")
+    return (ti, tr, tt)
+
+def save_times(ti, tr, tt):
+    f = open("times.csv", "w+")
+    f.write("\"Time\",\"Duration\"\n")
+    f.write("\"Runtime\"," + str(tr) + "\n")
+    f.write("\"Task\"," + str(tt) + "\n")
+    f.write("\"Other\"," + str(ti) + "\n")
+    f.close()
+
+def calc_et(tt_1, tt_p):
+    """ Compute the task efficiency (et). This measures the exploitation of
+    data locality. """
+    return tt_1 / tt_p
+
+def calc_er(tt_p, tr_p):
+    """ Compute the runtime efficiency (er). This measures how the runtime
+    overhead affects performance."""
+    return tt_p / (tt_p + tr_p)
+
+def calc_ep(tt_p, tr_p, ti_p):
+    """ Compute the pipeline efficiency (et). This measures how much
+    concurrency is available and how well it's exploited. """
+    return (tt_p + tr_p) / (tt_p + tr_p + ti_p)
+
+def calc_e(et, er, ep):
+    """ Compute the parallel efficiency. """
+    return et * er * ep
+
+def save_efficiencies(e, ep, er, et):
+    f = open("efficiencies.csv", "w+")
+    f.write("\"Efficiency\",\"Value\"\n")
+    f.write("\"Parallel\"," + str(e) + "\n")
+    f.write("\"Task\"," + str(et) + "\n")
+    f.write("\"Runtime\"," + str(er) + "\n")
+    f.write("\"Pipeline\"," + str(ep) + "\n")
+    f.close()
+
+def usage():
+    print "USAGE:"
+    print "starpu_trace_state_stats.py [ -te -s=<time> ] <trace.rec>"
+    print
+    print "OPTIONS:"
+    print " -t or --time            Compute and dump times to times.csv"
+    print
+    print " -e or --efficiency      Compute and dump efficiencies to efficiencies.csv"
+    print
+    print " -s or --seq_task_time   Used to compute task efficiency between sequential and parallel times"
+    print "                         (if not set, task efficiency will be 1.0)"
+    print
+    print "EXAMPLES:"
+    print "# Compute event statistics and report them to stdout:"
+    print "python starpu_trace_state_stats.py trace.rec"
+    print
+    print "# Compute event stats, times and efficiencies:"
+    print "python starpu_trace_state_stats.py -te trace.rec"
+    print
+    print "# Compute correct task efficiency with the sequential task time:"
+    print "python starpu_trace_state_stats.py -s=60093.950614 trace.rec"
+
 def main():
-    if len(sys.argv) != 2:
-        print sys.argv[0] + " <trace.rec>"
-	sys.exit(1)
-    recfile = sys.argv[1]
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hets:",
+                                   ["help", "time", "efficiency", "seq_task_time="])
+    except getopt.GetoptError as err:
+        usage()
+        sys.exit(1)
+
+    dump_time = False
+    dump_efficiency = False
+    tt_1 = 0.0
+
+    for o, a in opts:
+        if o in ("-h", "--help"):
+            usage()
+            sys.exit()
+        elif o in ("-t", "--time"):
+            dump_time = True
+        elif o in ("-e", "--efficiency"):
+            dump_efficiency = True
+        elif o in ("-s", "--seq_task_time"):
+            tt_1 = float(a)
+
+    if len(args) < 1:
+        usage()
+        sys.exit()
+    recfile = args[0]
 
     if not os.path.isfile(recfile):
         sys.exit("File does not exist!")
@@ -172,7 +274,7 @@ def main():
                     found = True
                     break
             if not found == True:
-                stats.append(EventStats(stat._name, 0.0, 0))
+                stats.append(EventStats(stat._name, 0.0, stat._category, 0))
 
     # Compute global statistics for all workers.
     for i in xrange(0, len(workers)):
@@ -184,9 +286,30 @@ def main():
                 stat._count += s._count
 
     # Output statistics.
-    print "\"Value\",\"Events\",\"Duration\""
+    print "\"Name\",\"Count\",\"Type\",\"Duration\""
     for stat in stats:
         stat.show()
 
+    # Compute runtime, task, idle times and dump them to times.csv
+    ti_p = tr_p = tt_p = 0.0
+    if dump_time == True:
+        ti_p, tr_p, tt_p = calc_times(stats)
+        save_times(ti_p, tr_p, tt_p)
+
+    # Compute runtime, task, idle efficiencies and dump them to
+    # efficiencies.csv.
+    if dump_efficiency == True or not tt_1 == 0.0:
+        if dump_time == False:
+            ti_p, tr_p, tt_p = calc_times(stats)
+        if tt_1 == 0.0:
+            sys.stderr.write("WARNING: Task efficiency will be 1.0 because -s is not set!\n")
+            tt_1 = tt_p
+
+        # Compute efficiencies.
+        ep = round(calc_ep(tt_p, tr_p, ti_p), 6)
+        er = round(calc_er(tt_p, tr_p), 6)
+        et = round(calc_et(tt_1, tt_p), 6)
+        e  = round(calc_e(et, er, ep), 6)
+        save_efficiencies(e, ep, er, et)
 if __name__ == "__main__":
     main()