7 yıl önce · 6fee0d788e
--- a/ChangeLog
+++ b/ChangeLog
@@ -123,7 +123,8 @@ Small features:
 
																   * Add starpu_task_declare_deps()
															
 
																   * New function starpu_data_unpartition_submit_sequential_consistency_cb()
															
 
																     to specify a callback for the task submitting the unpartitioning
															
 
																-
															
 
																+  * New tool starpu_mpi_comm_trace.py to draw heatmap of MPI
															
 
																+    communications
															
 
																 Changes:
															
 
																   * Vastly improve simgrid simulation time.
															
--- a/configure.ac
+++ b/configure.ac
@@ -3594,6 +3594,7 @@ AC_CONFIG_COMMANDS([executable-scripts], [
 
																   chmod +x gcc-plugin/tests/run-test
															
 
																   chmod +x tools/starpu_codelet_profile
															
 
																   chmod +x tools/starpu_codelet_histo_profile
															
 
																+  chmod +x tools/starpu_mpi_comm_matrix.py
															
 
																   chmod +x tools/starpu_workers_activity
															
 
																   chmod +x tools/starpu_paje_draw_histogram
															
 
																   chmod +x tools/starpu_paje_state_stats
															
@@ -3659,6 +3660,7 @@ AC_OUTPUT([
 
																 	tools/Makefile
															
 
																 	tools/starpu_codelet_profile
															
 
																 	tools/starpu_codelet_histo_profile
															
 
																+	tools/starpu_mpi_comm_matrix.py
															
 
																 	tools/starpu_workers_activity
															
 
																 	tools/starpu_paje_draw_histogram
															
 
																 	tools/starpu_paje_state_stats
															
--- a/doc/doxygen/chapters/370_online_performance_tools.doxy
+++ b/doc/doxygen/chapters/370_online_performance_tools.doxy
@@ -176,12 +176,15 @@ obtained by setting the environment variable \ref STARPU_COMM_STATS to <c>1</c>;
 
																 a summary will then be displayed at program termination:
															
 
																 \verbatim
															
 
																-[starpu_comm_stats][0] TOTAL:	4.000000 GB	4.000000 GB
															
 
																-[starpu_comm_stats][0->1]	4.000000 GB	4.000000 GB
															
 
																-[starpu_comm_stats][1] TOTAL:	8.000000 GB	8.000000 GB
															
 
																-[starpu_comm_stats][1->0]	8.000000 GB	8.000000 GB
															
 
																+starpu_comm_stats][1] TOTAL:	456.000000 B	0.000435 MB	 0.000188 B/s	 0.000000 MB/s
															
 
																+[starpu_comm_stats][1:0]	456.000000 B	0.000435 MB	 0.000188 B/s	 0.000000 MB/s
															
 
																+
															
 
																+[starpu_comm_stats][0] TOTAL:	456.000000 B	0.000435 MB	 0.000188 B/s	 0.000000 MB/s
															
 
																+[starpu_comm_stats][0:1]	456.000000 B	0.000435 MB	 0.000188 B/s	 0.000000 MB/s
															
 
																 \endverbatim
															
 
																+These statistics can be plotted as heatmaps using StarPU tool <c>starpu_mpi_comm_matrix.py</c>
															
 
																+
															
 
																 \subsection StarPU-TopInterface StarPU-Top Interface
															
 
																 StarPU-Top is an interface which remotely displays the on-line state of a StarPU
															
--- a/doc/doxygen/chapters/410_mpi_support.doxy
+++ b/doc/doxygen/chapters/410_mpi_support.doxy
@@ -897,7 +897,7 @@ from the received communication cache.
 
																 When the environment variable \ref STARPU_COMM_STATS is set to \c 1,
															
 
																 StarPU will display at the end of the execution for each node the
															
 
																-volume and the bandwidth of data sent to each other nodes.
															
 
																+volume and the bandwidth of data sent to all the other nodes.
															
 
																 \section MPIExamples More MPI examples
															
--- a/mpi/src/starpu_mpi_stats.c
+++ b/mpi/src/starpu_mpi_stats.c
@@ -95,9 +95,10 @@ void _starpu_mpi_comm_amounts_display(FILE *stream, int node)
 
																 	for (dst = 0; dst < world_size; dst++)
															
 
																 	{
															
 
																-		fprintf(stream, "[starpu_comm_stats][%d->%d]\t%f B\t%f MB\t %f B/s\t %f MB/s\n",
															
 
																-			node, dst, (float)comm_amount[dst], ((float)comm_amount[dst])/(1024*1024),
															
 
																-			(float)comm_amount[dst]/(float)time, ((float)comm_amount[dst])/(1024*1024)/(float)time);
															
 
																+		if (comm_amount[dst])
															
 
																+			fprintf(stream, "[starpu_comm_stats][%d:%d]\t%f B\t%f MB\t %f B/s\t %f MB/s\n",
															
 
																+				node, dst, (float)comm_amount[dst], ((float)comm_amount[dst])/(1024*1024),
															
 
																+				(float)comm_amount[dst]/(float)time, ((float)comm_amount[dst])/(1024*1024)/(float)time);
															
 
																 	}
															
 
																 }
															
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -281,6 +281,7 @@ dist_bin_SCRIPTS +=			\
 
																 	starpu_workers_activity		\
															
 
																 	starpu_codelet_histo_profile	\
															
 
																 	starpu_codelet_profile		\
															
 
																+	starpu_mpi_comm_matrix.py	\
															
 
																 	starpu_paje_draw_histogram	\
															
 
																 	starpu_paje_draw_histogram.R	\
															
 
																 	starpu_paje_summary		\
															
@@ -330,6 +331,9 @@ starpu_codelet_profile.1: starpu_codelet_profile
 
																 starpu_codelet_histo_profile.1: starpu_codelet_histo_profile
															
 
																 	chmod +x $<
															
 
																 	help2man --no-discard-stderr -N --output=$@ ./$<
															
 
																+starpu_mpi_comm_matrix.1: starpu_mpi_comm_matrix.py
															
 
																+	chmod +x $<
															
 
																+	help2man --no-discard-stderr -N --output=$@ ./$<
															
 
																 starpu_paje_draw_histogram.1: starpu_paje_draw_histogram
															
 
																 	chmod +x $<
															
 
																 	help2man --no-discard-stderr -N --output=$@ ./$<
															
@@ -356,6 +360,7 @@ dist_man1_MANS =\
 
																 	starpu_workers_activity.1 \
															
 
																 	starpu_codelet_profile.1 \
															
 
																 	starpu_codelet_histo_profile.1 \
															
 
																+	starpu_mpi_comm_matrix.1 \
															
 
																 	starpu_paje_draw_histogram.1 \
															
 
																 	starpu_paje_state_stats.1
															
--- a/tools/starpu_mpi_comm_matrix.py.in
+++ b/tools/starpu_mpi_comm_matrix.py.in
@@ -0,0 +1,106 @@
 
																+#!/usr/bin/python3
															
 
																+# -*- coding: utf-8 -*-
															
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2019                                      CNRS
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+
															
 
																+import sys
															
 
																+import re
															
 
																+import os
															
 
																+
															
 
																+PROGNAME=sys.argv[0]
															
 
																+
															
 
																+def usage():
															
 
																+    print("Offline tool to draw a communication matrix")
															
 
																+    print("")
															
 
																+    print("Usage: %s <output_execution>" % PROGNAME)
															
 
																+    print("")
															
 
																+    print("Options:")
															
 
																+    print( "	-h, --help          display this help and exit")
															
 
																+    print("	-v, --version       output version information and exit")
															
 
																+    print("")
															
 
																+    print("Report bugs to <@PACKAGE_BUGREPORT@>")
															
 
																+    sys.exit(1)
															
 
																+
															
 
																+if len(sys.argv) >= 2:
															
 
																+    if sys.argv[1] == '-v' or sys.argv[1] == '--version':
															
 
																+        print("%s (@PACKAGE_NAME@) @PACKAGE_VERSION@" % PROGNAME)
															
 
																+        sys.exit(0)
															
 
																+    if sys.argv[1] == '-h' or sys.argv[1] == '--help':
															
 
																+        usage()
															
 
																+if (len(sys.argv) == 1):
															
 
																+    usage()
															
 
																+
															
 
																+outputfile=sys.argv[1]
															
 
																+
															
 
																+# find the number of nodes
															
 
																+nodes=0
															
 
																+file = open(outputfile, "r")
															
 
																+for line in file.readlines():
															
 
																+    match = re.search('\TOTAL', line)
															
 
																+    if match:
															
 
																+        (node,stuff)=line.split(sep="[")[2].split("]")
															
 
																+        if (int(node) > nodes):
															
 
																+            nodes=int(node)
															
 
																+file.close()
															
 
																+nodes=nodes+1
															
 
																+
															
 
																+# extract volume of comm and bandwidth between all pair of nodes
															
 
																+volumes = [[0 for x in range(nodes)] for y in range(nodes)]
															
 
																+bandwidth = [[0 for x in range(nodes)] for y in range(nodes)]
															
 
																+file = open(outputfile, "r")
															
 
																+for line in file.readlines():
															
 
																+    match = re.search('\[starpu_comm_stats]', line)
															
 
																+    if match:
															
 
																+        match = re.search('TOTAL', line)
															
 
																+        if not match:
															
 
																+            (head,volB,B,volMB,MB,bwB,B,bwMB,MB) = line.split()
															
 
																+            (src,dst)=head.split(sep="[")[2].split(sep="]")[0].split(sep=":")
															
 
																+            volumes[int(src)][int(dst)] = float(volB)
															
 
																+            bandwidth[int(src)][int(dst)] = float(bwB)
															
 
																+file.close()
															
 
																+
															
 
																+def writeData(filename, nodes, data):
															
 
																+    ofile=open(filename, "w")
															
 
																+    for src in range(nodes):
															
 
																+        for dst in range(nodes):
															
 
																+            ofile.write("%f "% data[src][dst])
															
 
																+        ofile.write("\n")
															
 
																+    ofile.close()
															
 
																+
															
 
																+def generateGnuplotScript(filename, datafilename, outputfile, nodes):
															
 
																+    ofile=open(filename, "w")
															
 
																+    srctics=""
															
 
																+    dsttics=""
															
 
																+    for node in range(nodes-1):
															
 
																+        srctics += "\"src%d\" %d, " % (node, node)
															
 
																+        dsttics += "\"dst%d\" %d, " % (node, node)
															
 
																+    ofile.write("set term pdf color\n")
															
 
																+    ofile.write("set output \"%s\"\n" % outputfile)
															
 
																+    ofile.write("set view map scale 1\nset style data lines\nset palette rgbformulae 22,13,-31\n")
															
 
																+    ofile.write("set xtics (%s\"src%d\" %d)\n" % (srctics, nodes-1, nodes-1))
															
 
																+    ofile.write("set ytics (%s\"dst%d\" %d)\n" % (dsttics, nodes-1, nodes-1))
															
 
																+    ofile.write("plot '%s' matrix with image\n" % datafilename)
															
 
																+    ofile.close()
															
 
																+
															
 
																+# generate gnuplot volume data and script file
															
 
																+writeData(outputfile+"_volume.data", nodes, volumes)
															
 
																+generateGnuplotScript(outputfile+"_volume.gp", outputfile+"_volume.data", outputfile+"_volume_heatmap.pdf", nodes)
															
 
																+os.system("gnuplot " + outputfile+"_volume.gp")
															
 
																+
															
 
																+# generate gnuplot bandwidth data and script file
															
 
																+writeData(outputfile+"_bw.data", nodes, bandwidth)
															
 
																+generateGnuplotScript(outputfile+"_bw.gp", outputfile+"_bw.data", outputfile+"_bw_heatmap.pdf", nodes)
															
 
																+os.system("gnuplot " + outputfile+"_bw.gp")