12 years ago · a8df5c71bc
--- a/ChangeLog
+++ b/ChangeLog
@@ -120,6 +120,7 @@ Small features:
 
																   traces from starpu_init.
															
 
																   * Add trace_buffer_size configuration field to permit to specify the tracing
															
 
																   buffer size.
															
 
																+  * Add starpu_codelet_profile, a tool that draws the profile of a codelet.
															
 
																 Small changes:
															
 
																   * STARPU_NCPU should now be used instead of STARPU_NCPUS. STARPU_NCPUS is
															
--- a/doc/chapters/perf-feedback.texi
+++ b/doc/chapters/perf-feedback.texi
@@ -383,7 +383,7 @@ file: <starpu_slu_lu_model_12.hannibal>
 
																 @end example
															
 
																 Here, the codelets of the lu example are available. We can examine the
															
 
																-performance of the 22 kernel (in micro-seconds):
															
 
																+performance of the 22 kernel (in micro-seconds), which is history-based:
															
 
																 @example
															
 
																 $ starpu_perfmodel_display -s starpu_slu_lu_model_22
															
@@ -406,15 +406,49 @@ execution, the GPUs are about 20 times faster than the CPUs (numbers are in
 
																 us). The standard deviation is extremely low for the GPUs, and less than 10% for
															
 
																 CPUs.
															
 
																-The @code{starpu_regression_display} tool does the same for regression-based
															
 
																-performance models. It also writes a @code{.gp} file in the current directory,
															
 
																-to be run in the @code{gnuplot} tool, which shows the corresponding curve.
															
 
																+This tool can also be used for regression-based performance models. It will then
															
 
																+display the regression formula, and in the case of non-linear regression, the
															
 
																+same performance log as for history-based performance models:
															
 
																+
															
 
																+@example
															
 
																+$ starpu_perfmodel_display -s non_linear_memset_regression_based.type
															
 
																+performance model for cpu_impl_0
															
 
																+	Regression : #sample = 1400
															
 
																+	Linear: y = alpha size ^ beta
															
 
																+		alpha = 1.335973e-03
															
 
																+		beta = 8.024020e-01
															
 
																+	Non-Linear: y = a size ^b + c
															
 
																+		a = 5.429195e-04
															
 
																+		b = 8.654899e-01
															
 
																+		c = 9.009313e-01
															
 
																+# hash		size		mean		stddev		n
															
 
																+a3d3725e	4096           	4.763200e+00   	7.650928e-01   	100
															
 
																+870a30aa	8192           	1.827970e+00   	2.037181e-01   	100
															
 
																+48e988e9	16384          	2.652800e+00   	1.876459e-01   	100
															
 
																+961e65d2	32768          	4.255530e+00   	3.518025e-01   	100
															
 
																+...
															
 
																+@end example
															
 
																+
															
 
																+The @code{starpu_perfmodel_plot} tool can be used to draw performance models.
															
 
																+It writes a @code{.gp} file in the current directory, to be run in the
															
 
																+@code{gnuplot} tool, which shows the corresponding curve.
															
 
																 The same can also be achieved by using StarPU's library API, see
															
 
																 @ref{Performance Model API} and notably the @code{starpu_perfmodel_load_symbol}
															
 
																 function. The source code of the @code{starpu_perfmodel_display} tool can be a
															
 
																 useful example.
															
 
																+When the FxT trace file @code{filename} has been generated, it is possible to
															
 
																+get a profiling of each codelet by calling:
															
 
																+@example
															
 
																+$ starpu_fxt_tool -i filename
															
 
																+$ starpu_codelet_profile distrib.data codelet_name
															
 
																+@end example
															
 
																+
															
 
																+This will create profiling data files, and a @code{.gp} file in the current
															
 
																+directory, which draws the distribution of codelet time over the application
															
 
																+execution, according to data input size.
															
 
																+
															
 
																 @node Theoretical lower bound on execution time API
															
 
																 @section Theoretical lower bound on execution time
															
--- a/tests/perfmodels/regression_based.c
+++ b/tests/perfmodels/regression_based.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2011-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2011-2013  Université de Bordeaux 1
															
 
																  * Copyright (C) 2011  Télécom-SudParis
															
 
																  * Copyright (C) 2012 inria
															
 
																  *
															
@@ -121,8 +121,8 @@ static void show_task_perfs(int size, struct starpu_task *task)
 
																 		unsigned nimpl;
															
 
																 		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
															
 
																 		{
															
 
																-			FPRINTF(stdout, "Expected time for %d on %s:\t%f\n",
															
 
																-				size, name, starpu_task_expected_length(task, starpu_worker_get_perf_archtype(workerid), nimpl));
															
 
																+			FPRINTF(stdout, "Expected time for %d on %s (impl %d):\t%f\n",
															
 
																+				size, name, nimpl, starpu_task_expected_length(task, starpu_worker_get_perf_archtype(workerid), nimpl));
															
 
																 		}
															
 
																 	}
															
 
																 }
															
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -97,7 +97,8 @@ STARPU_TOOLS	+=			\
 
																 noinst_PROGRAMS =	cbc2paje lp2paje
															
 
																 dist_bin_SCRIPTS +=			\
															
 
																-	starpu_workers_activity
															
 
																+	starpu_workers_activity		\
															
 
																+	starpu_codelet_profile
															
 
																 if STARPU_HAVE_HELP2MAN
															
@@ -112,6 +113,9 @@ starpu_perfmodel_plot.1: starpu_perfmodel_plot$(EXEEXT)
 
																 starpu_workers_activity.1: starpu_workers_activity$(EXEEXT)
															
 
																 	chmod +x starpu_workers_activity$(EXEEXT)
															
 
																 	help2man --no-discard-stderr -N --output=$@ ./$<
															
 
																+starpu_codelet_profile.1: starpu_codelet_profile$(EXEEXT)
															
 
																+	chmod +x starpu_codelet_profile$(EXEEXT)
															
 
																+	help2man --no-discard-stderr -N --output=$@ ./$<
															
 
																 if STARPU_USE_FXT
															
 
																 starpu_fxt_tool.1: starpu_fxt_tool$(EXEEXT)
															
--- a/tools/sampling.sh
+++ b/tools/sampling.sh
@@ -2,7 +2,7 @@
 
																 # StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																 # 
															
 
																-# Copyright (C) 2008, 2009, 2010  Université de Bordeaux 1
															
 
																+# Copyright (C) 2008, 2009, 2010, 2013  Université de Bordeaux 1
															
 
																 # Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																 # 
															
 
																 # StarPU is free software; you can redistribute it and/or modify
															
@@ -16,20 +16,24 @@
 
																 # 
															
 
																 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+if [ "$#" -lt 2 -o "$1" = --help -o "$1" = -h ]
															
 
																+then
															
 
																+	echo "Offline tool to display codelet profile over a traced execution"
															
 
																+	echo ""
															
 
																+	echo "Usage: $0 distrib.data codelet_name"
															
 
																+	exit 1
															
 
																+fi
															
 
																 inputfile=$1
															
 
																+codelet_name=$2
															
 
																-archlist=`cut -f 1 $inputfile | sort | uniq | xargs` 
															
 
																-hashlist=`cut -f 2 $inputfile | sort | uniq | xargs` 
															
 
																+archlist=`< $inputfile grep "^$codelet_name	" | cut -f 2 | sort | uniq | xargs` 
															
 
																 # extract subfiles from the history file
															
 
																 for arch in $archlist
															
 
																 do
															
 
																-	for h in $hashlist
															
 
																-	do
															
 
																-		echo "pouet $arch - $h "
															
 
																-		grep "^$arch	$h" $inputfile > $inputfile.$arch.$h
															
 
																-	done
															
 
																+		echo "Arch $arch"
															
 
																+		grep "^$codelet_name	$arch" $inputfile > $inputfile.$arch
															
 
																 done
															
 
																 # create the gnuplot file
															
@@ -41,6 +45,9 @@ echo "set term postscript eps enhanced color" 	>> $gpfile
 
																 echo "set logscale x"				>> $gpfile 
															
 
																 echo "set logscale y"				>> $gpfile 
															
 
																 echo "set output \"$inputfile.eps\""		>> $gpfile
															
 
																+echo "set key top left"				>> $gpfile
															
 
																+echo "set xlabel \"Total data size\""		>> $gpfile
															
 
																+echo "set ylabel \"Execution time (ms)\""	>> $gpfile
															
 
																 echo -n "plot	" 				>> $gpfile
															
@@ -48,8 +55,6 @@ first=1
 
																 for arch in $archlist
															
 
																 do
															
 
																-	for h in $hashlist
															
 
																-	do
															
 
																 		if [ $first = 0 ] 
															
 
																 		then
															
 
																 			echo -n "  , " >> $gpfile
															
@@ -57,8 +62,5 @@ do
 
																 			first=0
															
 
																 		fi
															
 
																-		echo -n " \"$inputfile.$arch.$h\" using 3:4  title \"arch $arch hash $h\" " >> $gpfile
															
 
																-	done
															
 
																+		echo -n " \"$inputfile.$arch\" using 3:5  title \"${codelet_name//_/\\\\_} arch $arch\"" >> $gpfile
															
 
																 done
															
 
																-
															
 
																-gnuplot $gpfile