Просмотр исходного кода

Add starpu_codelet_profile, a tool that draws the profile of a codelet.

Samuel Thibault лет назад: 12
Родитель
Сommit
a8df5c71bc
5 измененных файлов с 63 добавлено и 22 удалено
  1. 1 0
      ChangeLog
  2. 38 4
      doc/chapters/perf-feedback.texi
  3. 3 3
      tests/perfmodels/regression_based.c
  4. 5 1
      tools/Makefile.am
  5. 16 14
      tools/sampling.sh

+ 1 - 0
ChangeLog

@@ -120,6 +120,7 @@ Small features:
   traces from starpu_init.
   * Add trace_buffer_size configuration field to permit to specify the tracing
   buffer size.
+  * Add starpu_codelet_profile, a tool that draws the profile of a codelet.
 
 Small changes:
   * STARPU_NCPU should now be used instead of STARPU_NCPUS. STARPU_NCPUS is

+ 38 - 4
doc/chapters/perf-feedback.texi

@@ -383,7 +383,7 @@ file: <starpu_slu_lu_model_12.hannibal>
 @end example
 
 Here, the codelets of the lu example are available. We can examine the
-performance of the 22 kernel (in micro-seconds):
+performance of the 22 kernel (in micro-seconds), which is history-based:
 
 @example
 $ starpu_perfmodel_display -s starpu_slu_lu_model_22
@@ -406,15 +406,49 @@ execution, the GPUs are about 20 times faster than the CPUs (numbers are in
 us). The standard deviation is extremely low for the GPUs, and less than 10% for
 CPUs.
 
-The @code{starpu_regression_display} tool does the same for regression-based
-performance models. It also writes a @code{.gp} file in the current directory,
-to be run in the @code{gnuplot} tool, which shows the corresponding curve.
+This tool can also be used for regression-based performance models. It will then
+display the regression formula, and in the case of non-linear regression, the
+same performance log as for history-based performance models:
+
+@example
+$ starpu_perfmodel_display -s non_linear_memset_regression_based.type
+performance model for cpu_impl_0
+	Regression : #sample = 1400
+	Linear: y = alpha size ^ beta
+		alpha = 1.335973e-03
+		beta = 8.024020e-01
+	Non-Linear: y = a size ^b + c
+		a = 5.429195e-04
+		b = 8.654899e-01
+		c = 9.009313e-01
+# hash		size		mean		stddev		n
+a3d3725e	4096           	4.763200e+00   	7.650928e-01   	100
+870a30aa	8192           	1.827970e+00   	2.037181e-01   	100
+48e988e9	16384          	2.652800e+00   	1.876459e-01   	100
+961e65d2	32768          	4.255530e+00   	3.518025e-01   	100
+...
+@end example
+
+The @code{starpu_perfmodel_plot} tool can be used to draw performance models.
+It writes a @code{.gp} file in the current directory, to be run in the
+@code{gnuplot} tool, which shows the corresponding curve.
 
 The same can also be achieved by using StarPU's library API, see
 @ref{Performance Model API} and notably the @code{starpu_perfmodel_load_symbol}
 function. The source code of the @code{starpu_perfmodel_display} tool can be a
 useful example.
 
+When the FxT trace file @code{filename} has been generated, it is possible to
+get a profiling of each codelet by calling:
+@example
+$ starpu_fxt_tool -i filename
+$ starpu_codelet_profile distrib.data codelet_name
+@end example
+
+This will create profiling data files, and a @code{.gp} file in the current
+directory, which draws the distribution of codelet time over the application
+execution, according to data input size.
+
 @node Theoretical lower bound on execution time API
 @section Theoretical lower bound on execution time
 

+ 3 - 3
tests/perfmodels/regression_based.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011-2012  Université de Bordeaux 1
+ * Copyright (C) 2011-2013  Université de Bordeaux 1
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2012 inria
  *
@@ -121,8 +121,8 @@ static void show_task_perfs(int size, struct starpu_task *task)
 		unsigned nimpl;
 		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 		{
-			FPRINTF(stdout, "Expected time for %d on %s:\t%f\n",
-				size, name, starpu_task_expected_length(task, starpu_worker_get_perf_archtype(workerid), nimpl));
+			FPRINTF(stdout, "Expected time for %d on %s (impl %d):\t%f\n",
+				size, name, nimpl, starpu_task_expected_length(task, starpu_worker_get_perf_archtype(workerid), nimpl));
 		}
 	}
 }

+ 5 - 1
tools/Makefile.am

@@ -97,7 +97,8 @@ STARPU_TOOLS	+=			\
 noinst_PROGRAMS =	cbc2paje lp2paje
 
 dist_bin_SCRIPTS +=			\
-	starpu_workers_activity
+	starpu_workers_activity		\
+	starpu_codelet_profile
 
 
 if STARPU_HAVE_HELP2MAN
@@ -112,6 +113,9 @@ starpu_perfmodel_plot.1: starpu_perfmodel_plot$(EXEEXT)
 starpu_workers_activity.1: starpu_workers_activity$(EXEEXT)
 	chmod +x starpu_workers_activity$(EXEEXT)
 	help2man --no-discard-stderr -N --output=$@ ./$<
+starpu_codelet_profile.1: starpu_codelet_profile$(EXEEXT)
+	chmod +x starpu_codelet_profile$(EXEEXT)
+	help2man --no-discard-stderr -N --output=$@ ./$<
 
 if STARPU_USE_FXT
 starpu_fxt_tool.1: starpu_fxt_tool$(EXEEXT)

+ 16 - 14
tools/sampling.sh

@@ -2,7 +2,7 @@
 
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # 
-# Copyright (C) 2008, 2009, 2010  Université de Bordeaux 1
+# Copyright (C) 2008, 2009, 2010, 2013  Université de Bordeaux 1
 # Copyright (C) 2010  Centre National de la Recherche Scientifique
 # 
 # StarPU is free software; you can redistribute it and/or modify
@@ -16,20 +16,24 @@
 # 
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
+if [ "$#" -lt 2 -o "$1" = --help -o "$1" = -h ]
+then
+	echo "Offline tool to display codelet profile over a traced execution"
+	echo ""
+	echo "Usage: $0 distrib.data codelet_name"
+	exit 1
+fi
 
 inputfile=$1
+codelet_name=$2
 
-archlist=`cut -f 1 $inputfile | sort | uniq | xargs` 
-hashlist=`cut -f 2 $inputfile | sort | uniq | xargs` 
+archlist=`< $inputfile grep "^$codelet_name	" | cut -f 2 | sort | uniq | xargs` 
 
 # extract subfiles from the history file
 for arch in $archlist
 do
-	for h in $hashlist
-	do
-		echo "pouet $arch - $h "
-		grep "^$arch	$h" $inputfile > $inputfile.$arch.$h
-	done
+		echo "Arch $arch"
+		grep "^$codelet_name	$arch" $inputfile > $inputfile.$arch
 done
 
 # create the gnuplot file
@@ -41,6 +45,9 @@ echo "set term postscript eps enhanced color" 	>> $gpfile
 echo "set logscale x"				>> $gpfile 
 echo "set logscale y"				>> $gpfile 
 echo "set output \"$inputfile.eps\""		>> $gpfile
+echo "set key top left"				>> $gpfile
+echo "set xlabel \"Total data size\""		>> $gpfile
+echo "set ylabel \"Execution time (ms)\""	>> $gpfile
 
 echo -n "plot	" 				>> $gpfile
 
@@ -48,8 +55,6 @@ first=1
 
 for arch in $archlist
 do
-	for h in $hashlist
-	do
 		if [ $first = 0 ] 
 		then
 			echo -n "  , " >> $gpfile
@@ -57,8 +62,5 @@ do
 			first=0
 		fi
 
-		echo -n " \"$inputfile.$arch.$h\" using 3:4  title \"arch $arch hash $h\" " >> $gpfile
-	done
+		echo -n " \"$inputfile.$arch\" using 3:5  title \"${codelet_name//_/\\\\_} arch $arch\"" >> $gpfile
 done
-
-gnuplot $gpfile