Bladeren bron

FxT is now automatically enabled at build-time, but not enabled at run-time by default any more, STARPU_FXT_TRACE needs to be explicitly set to 1 to enable FxT trace recording.

Nathalie Furmento 4 jaren geleden
bovenliggende
commit
e7a3f08c3f

+ 3 - 0
ChangeLog

@@ -57,6 +57,9 @@ New features:
 Small features:
   * New configure option --with-check-cflags to define flags for C,
     CXX and Fortran compilers
+  * FxT is now automatically enabled at build-time, but not enabled at
+    run-time by default any more, STARPU_FXT_TRACE needs to be explicitly set to
+    1 to enable FxT trace recording.
 
 Small changes:
   * Add a synthetic energy efficiency testcase.

+ 80 - 70
configure.ac

@@ -1836,94 +1836,104 @@ AC_MSG_RESULT($enable_coverity)
 AM_CONDITIONAL(STARPU_COVERITY, test x$enable_coverity = xyes)
 
 # shall we use FxT to generate trace of the execution ?
-AC_MSG_CHECKING(whether FxT traces should be generated)
-AC_ARG_WITH(fxt, [AS_HELP_STRING([--with-fxt[[=<dir>]]], [generate fxt traces])],
+AC_ARG_ENABLE(fxt, [AS_HELP_STRING([--disable-fxt],
+		[disable FxT trace mechanisms])],, [enable_fxt=maybe])
+AC_ARG_WITH(fxt-dir,
+	[AS_HELP_STRING([--with-fxt-dir=<path>],
+	[specify FxT installation directory])],
 	[
-		if test x$withval != xno; then
-			use_fxt=yes
-			if test x$withval = xyes; then
-				AC_MSG_RESULT(yes)
-				use_fxt_from_system=yes
-			else
-				# use specified path
-				# TODO check if the dir is actually containing FxT
-				use_fxt_from_system=no
-				fxtdir=$withval
-				AC_MSG_RESULT(yes using $fxtdir)
-				AC_SUBST(FXTDIR, $fxtdir)
-			fi
-		else
-			use_fxt=no
-			AC_MSG_RESULT(no)
-		fi
+		fxt_dir="$withval"
+		use_fxt_from_system=no
+		# in case this was not explicit yet
+		enable_fxt=yes
+		AC_SUBST(FXTDIR, $fxt_dir)
 	],
 	[
-		use_fxt=no
-		AC_MSG_RESULT(no)
+		use_fxt_from_system=yes
+		fxt_dir=""
 	])
-AC_SUBST(STARPU_USE_FXT, $use_fxt)
-AM_CONDITIONAL(STARPU_USE_FXT, test x$use_fxt = xyes)
-
-if test x$use_fxt = xyes; then
-	AC_DEFINE(STARPU_USE_FXT, [1], [enable FxT traces])
-	AC_DEFINE(CONFIG_FUT, [1], [enable FUT traces])
 
+if test x$enable_fxt != xno; then
 	if test x$use_fxt_from_system = xno; then
 	    save_PKG_CONFIG_PATH="$PKG_CONFIG_PATH"
-	    PKG_CONFIG_PATH="$fxtdir/lib/pkgconfig:$PKG_CONFIG_PATH"
-	    PKG_CHECK_MODULES([FXT],  [fxt], , [
+	    PKG_CONFIG_PATH="$fxt_dir/lib/pkgconfig:$PKG_CONFIG_PATH"
+	    PKG_CHECK_MODULES([FXT],  [fxt], [have_valid_fxt=yes], [
+	    	have_valid_fxt=yes
 		AC_MSG_WARN([Old FxT without fxt.pc file, hoping link will succeed])
-		FXT_CFLAGS="-I$fxtdir/include/ "
-		FXT_LDFLAGS="-L$fxtdir/lib/"
+		FXT_CFLAGS="-I$fxt_dir/include/ "
+		FXT_LDFLAGS="-L$fxt_dir/lib/"
 		AC_ARG_VAR(FXT_LDFLAGS)
 		FXT_LIBS="-lfxt"
 	    ])
 	    PKG_CONFIG_PATH="$save_PKG_CONFIG_PATH"
 	else
-	    PKG_CHECK_MODULES([FXT],  [fxt])
-	fi
-	save_LIBS="$LIBS"
-	LIBS="$LIBS $FXT_LIBS"
-	save_LDFLAGS="$LDFLAGS"
-	LDFLAGS="$LDFLAGS $FXT_LDFLAGS"
-	AC_CHECK_FUNCS([fxt_close])
-	AC_CHECK_FUNCS([fxt_blockev_leave])
-	AC_CHECK_FUNCS([enable_fut_flush])
-	AC_CHECK_FUNCS([fut_set_filename])
-	AC_CHECK_FUNCS([fut_setup_flush_callback])
-	LDFLAGS="$save_LDFLAGS"
-	LIBS="$save_LIBS"
-	save_CFLAGS="$CFLAGS"
-	CFLAGS="$CFLAGS $FXT_CFLAGS"
-	AC_CHECK_DECLS([enable_fut_flush], [], [], [[#include <fut.h>]])
-	AC_CHECK_DECLS([fut_set_filename], [], [], [[#include <fut.h>]])
-	AC_CHECK_DECLS([fut_setup_flush_callback], [], [], [[#include <fut.h>]])
-	CFLAGS="$save_CFLAGS"
-
-	if test x$enable_simgrid = xyes -a x$enable_shared = xno ; then
-                # simgrid's SMPI needs fxt to be linked in statically for
-                # variable privatization to work
-		FXT_LIBS="$(pkg-config --variable=libdir fxt)/libfxt.a -Wl,--as-needed $(pkg-config --libs --static fxt) -Wl,--no-as-needed"
+	    PKG_CHECK_MODULES([FXT],  [fxt], [have_valid_fxt=yes], [have_valid_fxt=no])
 	fi
-
-	##########################################
-	# Poti is a library to generate paje trace files
-	##########################################
-	PKG_CHECK_MODULES([POTI], [poti], [have_valid_poti=yes], [have_valid_poti=no])
-	AC_ARG_ENABLE(poti, [AS_HELP_STRING([--enable-poti],
-				[Enable the use of the POTI library to generate Paje traces])],
-				enable_poti=$enableval, enable_poti=no)
-	if test x$enable_poti = xyes -a x$have_valid_poti = xyes ; then
-		AC_DEFINE(STARPU_HAVE_POTI, [1], [Define to 1 if you have libpoti and it is meant to be used])
+	if test x$have_valid_fxt = xyes ; then
+		enable_fxt=yes
 		save_LIBS="$LIBS"
-		LIBS="$LIBS $POTI_LIBS"
-		AC_CHECK_FUNCS([poti_init_custom poti_user_NewEvent])
+		LIBS="$LIBS $FXT_LIBS"
+			save_LDFLAGS="$LDFLAGS"
+		LDFLAGS="$LDFLAGS $FXT_LDFLAGS"
+		AC_CHECK_FUNCS([fxt_close])
+		AC_CHECK_FUNCS([fxt_blockev_leave])
+		AC_CHECK_FUNCS([enable_fut_flush])
+		AC_CHECK_FUNCS([fut_set_filename])
+		AC_CHECK_FUNCS([fut_setup_flush_callback])
+		LDFLAGS="$save_LDFLAGS"
 		LIBS="$save_LIBS"
-		FXT_CFLAGS="$FXT_CFLAGS $POTI_CFLAGS"
-		FXT_LIBS="$FXT_LIBS $POTI_LIBS"
+		save_CFLAGS="$CFLAGS"
+		CFLAGS="$CFLAGS $FXT_CFLAGS"
+		AC_CHECK_DECLS([enable_fut_flush], [], [], [[#include <fut.h>]])
+		AC_CHECK_DECLS([fut_set_filename], [], [], [[#include <fut.h>]])
+		AC_CHECK_DECLS([fut_setup_flush_callback], [], [], [[#include <fut.h>]])
+		CFLAGS="$save_CFLAGS"
+
+		if test x$enable_simgrid = xyes -a x$enable_shared = xno ; then
+	                # simgrid's SMPI needs fxt to be linked in statically for
+	                # variable privatization to work
+			FXT_LIBS="$(pkg-config --variable=libdir fxt)/libfxt.a -Wl,--as-needed $(pkg-config --libs --static fxt) -Wl,--no-as-needed"
+		fi
+
+		AC_CHECK_LIB([papi], [PAPI_library_init],
+			     [AC_DEFINE([STARPU_PAPI], [1], [Define to 1 if you have the libpapi library])
+			      PAPI_LIBS=-lpapi])
+
+		##########################################
+		# Poti is a library to generate paje trace files
+		##########################################
+		PKG_CHECK_MODULES([POTI], [poti], [have_valid_poti=yes], [have_valid_poti=no])
+		AC_ARG_ENABLE(poti, [AS_HELP_STRING([--enable-poti],
+					[Enable the use of the POTI library to generate Paje traces])],
+					enable_poti=$enableval, enable_poti=no)
+		if test x$enable_poti = xyes -a x$have_valid_poti = xyes ; then
+			AC_DEFINE(STARPU_HAVE_POTI, [1], [Define to 1 if you have libpoti and it is meant to be used])
+			save_LIBS="$LIBS"
+			LIBS="$LIBS $POTI_LIBS"
+			AC_CHECK_FUNCS([poti_init_custom poti_user_NewEvent])
+			LIBS="$save_LIBS"
+			FXT_CFLAGS="$FXT_CFLAGS $POTI_CFLAGS"
+			FXT_LIBS="$FXT_LIBS $POTI_LIBS"
+		fi
+	else
+		if test x$enable_fxt = xyes ; then
+			AC_MSG_ERROR([FxT is required but not available])
+		fi
+		enable_fxt=no
 	fi
 fi
 
+AC_MSG_CHECKING(whether FxT traces should be generated)
+AC_MSG_RESULT($enable_fxt)
+
+if test x$enable_fxt = xyes; then
+	AC_DEFINE(STARPU_USE_FXT, [1], [enable FxT traces])
+	AC_DEFINE(CONFIG_FUT, [1], [enable FUT traces])
+fi
+
+AC_SUBST(STARPU_USE_FXT, $enable_fxt)
+AM_CONDITIONAL(STARPU_USE_FXT, test x$enable_fxt = xyes)
+
 AC_MSG_CHECKING(whether additional locking systems FxT traces should be enabled)
 AC_ARG_ENABLE(fxt-lock, [AS_HELP_STRING([--enable-fxt-lock],
 			[enable additional locking systems FxT traces])],
@@ -3549,7 +3559,7 @@ AC_MSG_NOTICE([
 	Magma enabled:     $have_magma
 	BLAS library:      $blas_lib
 	hwloc:             $have_valid_hwloc
-	FxT trace enabled: $use_fxt
+	FxT trace enabled: $enable_fxt
 
         Documentation HTML:  $enable_build_doc
         Documentation PDF:   $enable_build_doc_pdf

+ 10 - 7
doc/doxygen/chapters/380_offline_performance_tools.doxy

@@ -70,7 +70,14 @@ $ make
 $ make install
 \endverbatim
 
-In order to have StarPU to generate traces, StarPU should be configured with
+In order to have StarPU to generate traces, StarPU needs be configured again
+after installing FxT, and configuration show:
+
+\verbatim
+FxT trace enabled: yes
+\endverbatim
+
+If <c>configure</c> does not find FxT automatically, it can be specified by hand with
 the option \ref with-fxt "--with-fxt" :
 
 \verbatim
@@ -78,10 +85,9 @@ $ ./configure --with-fxt=$FXTDIR
 \endverbatim
 
 Or you can simply point the <c>PKG_CONFIG_PATH</c> to
-<c>$FXTDIR/lib/pkgconfig</c> and pass
-\ref with-fxt "--with-fxt" to <c>configure</c>
+<c>$FXTDIR/lib/pkgconfig</c>
 
-When FxT is enabled, a trace is generated when StarPU is terminated by calling
+When \ref STARPU_FXT_TRACE is set to 1, a trace is generated when StarPU is terminated by calling
 starpu_shutdown(). The trace is a binary file whose name has the form
 <c>prof_file_XXX_YYY</c> where <c>XXX</c> is the user name, and
 <c>YYY</c> is the MPI id of the process that used StarPU (or 0 when running a sequential program).
@@ -97,9 +103,6 @@ be used to generate trace events which describes the locks behaviour during
 the execution. It is however very heavy and should not be used unless debugging
 StarPU's internal locking.
 
-The environment variable \ref STARPU_FXT_TRACE can be set to 0 to disable the
-generation of the <c>prof_file_XXX_YYY</c> file.
-
 When the FxT trace file <c>prof_file_something</c> has been generated,
 it is possible to generate different trace formats by calling:
 

+ 1 - 1
doc/doxygen/chapters/501_environment_variables.doxy

@@ -1016,7 +1016,7 @@ Specify in which file to save the generated trace if FxT is enabled.
 <dd>
 \anchor STARPU_FXT_TRACE
 \addindex __env__STARPU_FXT_TRACE
-Specify whether to generate (1) or not (0) the FxT trace in /tmp/prof_file_XXX_YYY (the directory and file name can be changed with \ref STARPU_FXT_PREFIX and \ref STARPU_FXT_SUFFIX). The default is 1 (generate it)
+Specify whether to generate (1) or not (0) the FxT trace in /tmp/prof_file_XXX_YYY (the directory and file name can be changed with \ref STARPU_FXT_PREFIX and \ref STARPU_FXT_SUFFIX). The default is 0 (do not generate it)
 </dd>
 
 <dt>STARPU_LIMIT_CUDA_devid_MEM</dt>

+ 3 - 0
examples/Makefile.am

@@ -81,6 +81,9 @@ EXTRA_DIST = 					\
 
 CLEANFILES = *.gcno *.gcda *.linkinfo *.mod starpu_idle_microsec.log *.mps */*.mps */*/*.mps *.dot */*.dot */*/*.dot *.pl */*.pl */*/*.pl *.png *.output tasks.rec perfs.rec */perfs.rec */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 native_fortran/fstarpu_mod.f90
 
+clean-local:
+	-rm -rf mult/sgemm.traces lu/lu.traces
+
 if STARPU_USE_CUDA
 
 if STARPU_COVERITY

+ 1 - 0
examples/lu/lu.sh

@@ -23,6 +23,7 @@ rm -rf $PREFIX/lu.traces
 mkdir -p $PREFIX/lu.traces
 
 export STARPU_FXT_PREFIX=$PREFIX/lu.traces
+export STARPU_FXT_TRACE=1
 
 $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -piv
 $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -no-stride

+ 1 - 1
examples/mult/sgemm.sh

@@ -30,7 +30,7 @@ mkdir -p $PREFIX/sgemm.traces
 
 export STARPU_FXT_PREFIX=$PREFIX/sgemm.traces
 
-STARPU_SCHED=dmdas $PREFIX/sgemm -check
+STARPU_FXT_TRACE=1 STARPU_SCHED=dmdas $PREFIX/sgemm -check
 if [ -x $PREFIX/../../tools/starpu_fxt_tool ];
 then
 	$STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_plot -o $STARPU_FXT_PREFIX -s starpu_sgemm_gemm -i $STARPU_FXT_PREFIX/prof_file_${USER}_0

+ 1 - 1
src/common/fxt.c

@@ -151,7 +151,7 @@ void starpu_fxt_stop_profiling()
 
 int starpu_fxt_is_enabled()
 {
-	return starpu_get_env_number_default("STARPU_FXT_TRACE", 1);
+	return starpu_get_env_number_default("STARPU_FXT_TRACE", 0);
 }
 
 #ifdef HAVE_FUT_SETUP_FLUSH_CALLBACK

+ 34 - 21
src/common/fxt.h

@@ -330,11 +330,12 @@ void _starpu_fxt_dump_file(void);
 #define _STARPU_FUT_COMMIT(size) do { } while (0)
 #endif
 
-#ifdef FUT_ALWAYS_PROBE1STR
+#ifdef FUT_RAW_ALWAYS_PROBE1STR
 #define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str) FUT_RAW_ALWAYS_PROBE1STR(CODE, P1, str)
 #else
 #define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str)	\
 do {									\
+    if( STARPU_UNLIKELY(fut_active) ) { \
 	/* No more than FXT_MAX_PARAMS args are allowed */		\
 	/* we add a \0 just in case ... */				\
 	size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 1)*sizeof(unsigned long));\
@@ -347,7 +348,7 @@ do {									\
 	snprintf((char *)futargs, len, "%s", str);			\
 	((char *)futargs)[len - 1] = '\0';				\
 	_STARPU_FUT_COMMIT(total_len);					\
-} while (0)
+    }} while (0)
 #endif
 
 #ifdef FUT_FULL_PROBE1STR
@@ -358,7 +359,7 @@ do {									\
  * by a string. */
 #define _STARPU_FUT_FULL_PROBE1STR(KEYMASK, CODE, P1, str)		\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str);		\
     }									\
 } while (0)
@@ -390,7 +391,7 @@ do {									\
 #else
 #define _STARPU_FUT_FULL_PROBE2STR(KEYMASK, CODE, P1, P2, str)		\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str);		\
     }									\
 } while (0)
@@ -423,7 +424,7 @@ do {									\
 #else
 #define _STARPU_FUT_FULL_PROBE3STR(KEYMASK, CODE, P1, P2, P3, str)		\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str);	\
     }									\
 } while (0)
@@ -457,7 +458,7 @@ do {									\
 #else
 #define _STARPU_FUT_FULL_PROBE4STR(KEYMASK, CODE, P1, P2, P3, P4, str)		\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str);	\
     }									\
 } while (0)
@@ -492,7 +493,7 @@ do {									\
 #else
 #define _STARPU_FUT_FULL_PROBE5STR(KEYMASK, CODE, P1, P2, P3, P4, P5, str)		\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str);	\
     }									\
 } while (0)
@@ -528,7 +529,7 @@ do {									\
 #else
 #define _STARPU_FUT_FULL_PROBE6STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, str)		\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str);	\
     }									\
 } while (0)
@@ -565,7 +566,7 @@ do {									\
 #else
 #define _STARPU_FUT_FULL_PROBE7STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, P7, str)		\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str);	\
     }									\
 } while (0)
@@ -573,7 +574,7 @@ do {									\
 
 #ifndef FUT_RAW_PROBE7
 #define FUT_RAW_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do {		\
-		if(fut_active) {					\
+		if(STARPU_UNLIKELY(fut_active)) {			\
 			unsigned long *__args __attribute__((unused))=	\
 				fut_getstampedbuffer(CODE,		\
 						     FUT_SIZE(7)); \
@@ -677,7 +678,7 @@ do {									\
 /* full probes */
 #ifndef FUT_FULL_PROBE0
 #define FUT_FULL_PROBE0(KEYMASK,CODE) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE0(FUT_CODE(CODE, 0)); \
         } \
 } while(0)
@@ -685,7 +686,7 @@ do {									\
 
 #ifndef FUT_FULL_PROBE1
 #define FUT_FULL_PROBE1(KEYMASK,CODE,P1) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE1(FUT_CODE(CODE, 1),P1); \
         } \
 } while(0)
@@ -693,7 +694,7 @@ do {									\
 
 #ifndef FUT_FULL_PROBE2
 #define FUT_FULL_PROBE2(KEYMASK,CODE,P1,P2) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE2(FUT_CODE(CODE, 2),P1,P2); \
         } \
 } while(0)
@@ -701,7 +702,7 @@ do {									\
 
 #ifndef FUT_FULL_PROBE3
 #define FUT_FULL_PROBE3(KEYMASK,CODE,P1,P2,P3) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE3(FUT_CODE(CODE, 3),P1,P2,P3); \
         } \
 } while(0)
@@ -709,7 +710,7 @@ do {									\
 
 #ifndef FUT_FULL_PROBE4
 #define FUT_FULL_PROBE4(KEYMASK,CODE,P1,P2,P3,P4) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE4(FUT_CODE(CODE, 4),P1,P2,P3,P4); \
         } \
 } while(0)
@@ -717,7 +718,7 @@ do {									\
 
 #ifndef FUT_FULL_PROBE5
 #define FUT_FULL_PROBE5(KEYMASK,CODE,P1,P2,P3,P4,P5) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE5(FUT_CODE(CODE, 5),P1,P2,P3,P4,P5); \
         } \
 } while(0)
@@ -725,7 +726,7 @@ do {									\
 
 #ifndef FUT_FULL_PROBE6
 #define FUT_FULL_PROBE6(KEYMASK,CODE,P1,P2,P3,P4,P5,P6) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE6(FUT_CODE(CODE, 6),P1,P2,P3,P4,P5,P6); \
         } \
 } while(0)
@@ -733,7 +734,7 @@ do {									\
 
 #ifndef FUT_FULL_PROBE7
 #define FUT_FULL_PROBE7(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \
         } \
 } while(0)
@@ -769,6 +770,7 @@ do {									\
 
 #define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid)				\
 do {									\
+    if( STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK|_STARPU_FUT_KEYMASK_TASK_VERBOSE|_STARPU_FUT_KEYMASK_DATA) & fut_active) ) { \
 	FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid)); \
 	{								\
 		if ((job)->task->cl)					\
@@ -792,15 +794,18 @@ do {									\
 		const uint32_t __job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\
 		FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_CODELET_DETAILS, ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->flops / 1000 / ((job)->task->cl && job->task->cl->type != STARPU_SEQ ? j->task_size : 1), (job)->task->tag_id, workerid, ((job)->job_id)); \
 	}								\
+    } \
 } while(0)
 
 #define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, perf_arch, workerid)			\
 do {									\
+    if( STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active) ) { \
 	const size_t job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));	\
 	const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\
 	char _archname[32]=""; \
 	starpu_perfmodel_get_arch_name(perf_arch, _archname, 32, 0);	\
 	_STARPU_FUT_FULL_PROBE5STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_END_CODELET_BODY, (job)->job_id, (job_size), (job_hash), workerid, _starpu_gettid(), _archname); \
+    } \
 } while(0)
 
 #define _STARPU_TRACE_START_EXECUTING()				\
@@ -867,7 +872,8 @@ do {									\
 } while(0)
 
 #define _STARPU_TRACE_TASK_NAME(job)					\
-	do {								\
+do {								\
+    if( STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active) ) { \
         const char *model_name = _starpu_job_get_model_name((job));		\
 	const char *name = _starpu_job_get_task_name((job));			\
 	if (name)					                        \
@@ -879,14 +885,17 @@ do {									\
 	}									\
 	if (model_name)					\
 		_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, _starpu_gettid(), model_name); \
+    } \
 } while(0)
 
 #define _STARPU_TRACE_TASK_COLOR(job)						\
 do { \
+    if( STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active) ) { \
 	if ((job)->task->color != 0) \
 		FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->color); \
 	else if ((job)->task->cl && (job)->task->cl->color != 0) \
 		FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->cl->color); \
+    } \
 } while(0)
 
 #define _STARPU_TRACE_TASK_DONE(job)						\
@@ -894,6 +903,7 @@ do { \
 
 #define _STARPU_TRACE_TAG_DONE(tag)						\
 do {										\
+    if( STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active) ) { \
         struct _starpu_job *job = (tag)->job;                                  \
         const char *model_name = _starpu_job_get_task_name((job));                       \
 	if (model_name)                                                         \
@@ -903,6 +913,7 @@ do {										\
 	else {									\
 		FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 0);\
 	}									\
+    } \
 } while(0)
 
 #define _STARPU_TRACE_DATA_NAME(handle, name) \
@@ -1288,10 +1299,10 @@ do {										\
 	FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_POP_PRIO, _starpu_gettid(), workerid, ntasks, exp_len);
 
 #define _STARPU_TRACE_SCHED_COMPONENT_NEW(component)		\
-	_STARPU_FUT_ALWAYS_PROBE1STR(_STARPU_FUT_SCHED_COMPONENT_NEW, component, (component)->name);
+	if (STARPU_UNLIKELY(fut_active)) _STARPU_FUT_ALWAYS_PROBE1STR(_STARPU_FUT_SCHED_COMPONENT_NEW, component, (component)->name);
 
 #define _STARPU_TRACE_SCHED_COMPONENT_CONNECT(parent, child)		\
-	FUT_RAW_ALWAYS_PROBE2(FUT_CODE(_STARPU_FUT_SCHED_COMPONENT_CONNECT,2), parent, child);
+	if (STARPU_UNLIKELY(fut_active)) FUT_RAW_ALWAYS_PROBE2(FUT_CODE(_STARPU_FUT_SCHED_COMPONENT_CONNECT,2), parent, child);
 
 #define _STARPU_TRACE_SCHED_COMPONENT_PUSH(from, to, task, prio)		\
 	FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PUSH, _starpu_gettid(), from, to, task, prio);
@@ -1300,6 +1311,7 @@ do {										\
 	FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PULL, _starpu_gettid(), from, to, task, (task)->priority);
 
 #define _STARPU_TRACE_HANDLE_DATA_REGISTER(handle)	do {	\
+    if( STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_META) & fut_active) ) { \
 	const size_t __data_size = handle->ops->get_size(handle); \
 	const starpu_ssize_t __max_data_size = _starpu_data_get_max_size(handle); \
 	char __buf[(FXT_MAX_PARAMS-4)*sizeof(long)]; \
@@ -1309,6 +1321,7 @@ do {										\
 	else \
 		__buf[0] = 0; \
 	_STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_HANDLE_DATA_REGISTER, handle, __data_size, __max_data_size, handle->home_node, __buf); \
+    } \
 } while (0)
 
 #define _STARPU_TRACE_HANDLE_DATA_UNREGISTER(handle)	\

+ 8 - 2
src/core/jobs.c

@@ -86,9 +86,15 @@ struct _starpu_job* STARPU_ATTRIBUTE_MALLOC _starpu_job_create(struct starpu_tas
 
 	job->task = task;
 
-#if !defined(STARPU_USE_FXT) && !defined(STARPU_DEBUG)
-	if (_starpu_bound_recording || _starpu_task_break_on_push != -1 || _starpu_task_break_on_sched != -1 || _starpu_task_break_on_pop != -1 || _starpu_task_break_on_exec != -1 || STARPU_AYU_EVENT)
+	if (
+#if defined(STARPU_DEBUG)
+	    1
+#elif defined(STARPU_USE_FXT)
+	    fut_active
+#else
+	    _starpu_bound_recording || _starpu_task_break_on_push != -1 || _starpu_task_break_on_sched != -1 || _starpu_task_break_on_pop != -1 || _starpu_task_break_on_exec != -1 || STARPU_AYU_EVENT
 #endif
+	   )
 	{
 		job->job_id = _starpu_fxt_get_job_id();
 		STARPU_AYU_ADDTASK(job->job_id, task);

+ 17 - 6
src/core/workers.c

@@ -953,7 +953,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 		 * before starting another one, to make sure they appear in
 		 * order in the trace.
 		 */
-		if ((!workerarg->set || workerarg->set->workers == workerarg)
+		if (fut_active && (!workerarg->set || workerarg->set->workers == workerarg)
 			&& workerarg->run_by_starpu == 1 && workerarg->arch != STARPU_MPI_MS_WORKER)
 		{
 			STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
@@ -979,10 +979,13 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
                 /* We use the first worker to know if everything are finished */
 #ifdef STARPU_USE_FXT
-                STARPU_PTHREAD_MUTEX_LOCK(&worker_zero->mutex);
-                while (!worker_zero->worker_is_running)
-                        STARPU_PTHREAD_COND_WAIT(&worker_zero->started_cond, &worker_zero->mutex);
-                STARPU_PTHREAD_MUTEX_UNLOCK(&worker_zero->mutex);
+		if (fut_active)
+		{
+			STARPU_PTHREAD_MUTEX_LOCK(&worker_zero->mutex);
+			while (!worker_zero->worker_is_running)
+				STARPU_PTHREAD_COND_WAIT(&worker_zero->started_cond, &worker_zero->mutex);
+			STARPU_PTHREAD_MUTEX_UNLOCK(&worker_zero->mutex);
+		}
 #endif
 
                 STARPU_PTHREAD_MUTEX_LOCK(&worker_set_zero->mutex);
@@ -1455,7 +1458,8 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 	_STARPU_DISP("Warning: StarPU was configured with --enable-verbose, which slows down a bit\n");
 #endif
 #ifdef STARPU_USE_FXT
-	_STARPU_DISP("Warning: StarPU was configured with --with-fxt, which slows down a bit, limits scalability and makes worker initialization sequential\n");
+	if (starpu_fxt_is_enabled())
+		_STARPU_DISP("Warning: FxT is enabled, which slows down a bit, limits scalability and makes worker initialization sequential\n");
 #endif
 #ifdef STARPU_FXT_LOCK_TRACES
 	_STARPU_DISP("Warning: StarPU was configured with --enable-fxt-lock, which slows down things a huge lot, and is really only meant for StarPU insides debugging. Did you really want to enable that?\n");
@@ -1477,6 +1481,13 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 #endif
 #endif
 
+#ifndef STARPU_USE_FXT
+	if (starpu_get_env_number("STARPU_FXT_TRACE") > 0)
+	{
+		_STARPU_DISP("Warning: FxT trace is requested but StarPU was configured without FxT support\n");
+	}
+#endif
+
 	if (starpu_getenv("STARPU_ENABLE_STATS"))
 	{
 		_STARPU_DISP("Warning: STARPU_ENABLE_STATS is enabled, which slows down a bit\n");

+ 2 - 1
src/datawizard/coherency.c

@@ -1245,7 +1245,8 @@ void _starpu_fetch_task_input_tail(struct starpu_task *task, struct _starpu_job
 			_starpu_redux_init_data_replicate(handle, local_replicate, workerid);
 
 #ifdef STARPU_USE_FXT
-		total_size += _starpu_data_get_size(handle);
+		if (fut_active)
+			total_size += _starpu_data_get_size(handle);
 #endif
 	}
 	_STARPU_TRACE_DATA_LOAD(workerid,total_size);

+ 6 - 3
src/datawizard/copy_driver.c

@@ -235,10 +235,13 @@ int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT _starpu_driver_copy_data_1_to_1(starpu_d
 		_starpu_bus_update_profiling_info((int)src_node, (int)dst_node, size);
 
 #ifdef STARPU_USE_FXT
-		com_id = STARPU_ATOMIC_ADDL(&communication_cnt, 1);
+		if (fut_active)
+		{
+			com_id = STARPU_ATOMIC_ADDL(&communication_cnt, 1);
 
-		if (req)
-			req->com_id = com_id;
+			if (req)
+				req->com_id = com_id;
+		}
 #endif
 
 		dst_replicate->initialized = 1;

+ 3 - 3
src/datawizard/data_request.c

@@ -444,15 +444,15 @@ static void starpu_handle_data_request_completion(struct _starpu_data_request *r
 #endif
 	}
 
-	if (r->canceled < 2 && r->com_id > 0)
-	{
 #ifdef STARPU_USE_FXT
+	if (fut_active && r->canceled < 2 && r->com_id > 0)
+	{
 		unsigned src_node = src_replicate->memory_node;
 		unsigned dst_node = dst_replicate->memory_node;
 		size_t size = _starpu_data_get_size(handle);
 		_STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, r->com_id, r->prefetch);
-#endif
 	}
+#endif
 
 	/* Once the request has been fulfilled, we may submit the requests that
 	 * were chained to that request. */

+ 20 - 14
src/drivers/cuda/driver_cuda.c

@@ -677,13 +677,16 @@ static void execute_job_on_cuda(struct starpu_task *task, struct _starpu_worker
 				STARPU_CUDA_REPORT_ERROR(cures);
 #endif
 #ifdef STARPU_USE_FXT
-			int k;
-			for (k = 0; k < (int) worker->set->nworkers; k++)
-				if (worker->set->workers[k].ntasks == worker->set->workers[k].pipeline_length)
-					break;
-			if (k == (int) worker->set->nworkers)
-				/* Everybody busy */
-				_STARPU_TRACE_START_EXECUTING();
+			if (fut_active)
+			{
+				int k;
+				for (k = 0; k < (int) worker->set->nworkers; k++)
+					if (worker->set->workers[k].ntasks == worker->set->workers[k].pipeline_length)
+						break;
+				if (k == (int) worker->set->nworkers)
+					/* Everybody busy */
+					_STARPU_TRACE_START_EXECUTING();
+			}
 #endif
 		}
 	}
@@ -934,13 +937,16 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
 					_STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid);
 			}
 #ifdef STARPU_USE_FXT
-			int k;
-			for (k = 0; k < (int) worker_set->nworkers; k++)
-				if (worker_set->workers[k].ntasks)
-					break;
-			if (k == (int) worker_set->nworkers)
-				/* Everybody busy */
-				_STARPU_TRACE_END_EXECUTING()
+			if (fut_active)
+			{
+				int k;
+				for (k = 0; k < (int) worker_set->nworkers; k++)
+					if (worker_set->workers[k].ntasks)
+						break;
+				if (k == (int) worker_set->nworkers)
+					/* Everybody busy */
+					_STARPU_TRACE_END_EXECUTING()
+			}
 #endif
 			_STARPU_TRACE_START_PROGRESS(memnode);
 		}

+ 4 - 0
src/sched_policies/component_prio.c

@@ -23,17 +23,21 @@
 
 #ifdef STARPU_USE_FXT
 #define STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component,ntasks,exp_len) do {                                 \
+    if (fut_active) { \
 	int workerid = STARPU_NMAXWORKERS + 1;									\
 	if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) \
 		workerid = starpu_sched_component_worker_get_workerid(component->children[0]); \
 	_STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(workerid, ntasks, exp_len); \
+    } \
 } while (0)
 
 #define STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(component,ntasks,exp_len) do {                                 \
+    if (fut_active) { \
 	int workerid = STARPU_NMAXWORKERS + 1;									\
 	if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) \
 		workerid = starpu_sched_component_worker_get_workerid(component->children[0]); \
 	_STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(workerid, ntasks, exp_len); \
+    } \
 } while (0)
 #else
 #define STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component,ntasks,exp_len) do { } while (0)

+ 3 - 0
tests/Makefile.am

@@ -76,6 +76,9 @@ EXTRA_DIST =					\
 CLEANFILES = 					\
 	*.gcno *.gcda *.linkinfo core starpu_idle_microsec.log *.mod *.png *.output tasks.rec perfs.rec */perfs.rec */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 bandwidth-*.dat bandwidth.gp bandwidth.eps bandwidth.svg
 
+clean-local:
+	-rm -rf overlap/overlap.traces datawizard/locality.traces
+
 BUILT_SOURCES =
 SUBDIRS =
 

+ 1 - 1
tests/datawizard/locality.sh

@@ -29,7 +29,7 @@ mkdir -p $PREFIX/locality.traces
 test -x $PREFIX/../../tools/starpu_fxt_tool || exit 77
 
 export STARPU_FXT_PREFIX=$PREFIX/locality.traces
-STARPU_SCHED=modular-eager $STARPU_LAUNCH $PREFIX/locality
+STARPU_FXT_TRACE=1 STARPU_SCHED=modular-eager $STARPU_LAUNCH $PREFIX/locality
 $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_tool -d $STARPU_FXT_PREFIX -memory-states -label-deps -i $STARPU_FXT_PREFIX/prof_file_${USER}_0
 
 # Check that they are approved by Grenoble :)

+ 1 - 1
tests/overlap/overlap.sh

@@ -30,7 +30,7 @@ mkdir -p $PREFIX/overlap.traces
 
 export STARPU_FXT_PREFIX=$PREFIX/overlap.traces
 
-STARPU_SCHED=dmdas $PREFIX/overlap
+STARPU_FXT_TRACE=1 STARPU_SCHED=dmdas $PREFIX/overlap
 if [ -x $PREFIX/../../tools/starpu_fxt_tool ];
 then
 	$STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_plot -o $STARPU_FXT_PREFIX -s overlap_sleep_1024_24 -i $STARPU_FXT_PREFIX/prof_file_${USER}_0