Browse Source

FxT is now automatically enabled at build-time, but not enabled at run-time by default any more, STARPU_FXT_TRACE needs to be explicitly set to 1 to enable FxT trace recording.

Nathalie Furmento 4 years ago
parent
commit
e7a3f08c3f

+ 3 - 0
ChangeLog

@@ -57,6 +57,9 @@ New features:
 Small features:
 Small features:
   * New configure option --with-check-cflags to define flags for C,
   * New configure option --with-check-cflags to define flags for C,
     CXX and Fortran compilers
     CXX and Fortran compilers
+  * FxT is now automatically enabled at build-time, but not enabled at
+    run-time by default any more, STARPU_FXT_TRACE needs to be explicitly set to
+    1 to enable FxT trace recording.
 
 
 Small changes:
 Small changes:
   * Add a synthetic energy efficiency testcase.
   * Add a synthetic energy efficiency testcase.

+ 80 - 70
configure.ac

@@ -1836,94 +1836,104 @@ AC_MSG_RESULT($enable_coverity)
 AM_CONDITIONAL(STARPU_COVERITY, test x$enable_coverity = xyes)
 AM_CONDITIONAL(STARPU_COVERITY, test x$enable_coverity = xyes)
 
 
 # shall we use FxT to generate trace of the execution ?
 # shall we use FxT to generate trace of the execution ?
-AC_MSG_CHECKING(whether FxT traces should be generated)
-AC_ARG_WITH(fxt, [AS_HELP_STRING([--with-fxt[[=<dir>]]], [generate fxt traces])],
+AC_ARG_ENABLE(fxt, [AS_HELP_STRING([--disable-fxt],
+		[disable FxT trace mechanisms])],, [enable_fxt=maybe])
+AC_ARG_WITH(fxt-dir,
+	[AS_HELP_STRING([--with-fxt-dir=<path>],
+	[specify FxT installation directory])],
 	[
 	[
-		if test x$withval != xno; then
-			use_fxt=yes
-			if test x$withval = xyes; then
-				AC_MSG_RESULT(yes)
-				use_fxt_from_system=yes
-			else
-				# use specified path
-				# TODO check if the dir is actually containing FxT
-				use_fxt_from_system=no
-				fxtdir=$withval
-				AC_MSG_RESULT(yes using $fxtdir)
-				AC_SUBST(FXTDIR, $fxtdir)
-			fi
-		else
-			use_fxt=no
-			AC_MSG_RESULT(no)
-		fi
+		fxt_dir="$withval"
+		use_fxt_from_system=no
+		# in case this was not explicit yet
+		enable_fxt=yes
+		AC_SUBST(FXTDIR, $fxt_dir)
 	],
 	],
 	[
 	[
-		use_fxt=no
-		AC_MSG_RESULT(no)
+		use_fxt_from_system=yes
+		fxt_dir=""
 	])
 	])
-AC_SUBST(STARPU_USE_FXT, $use_fxt)
-AM_CONDITIONAL(STARPU_USE_FXT, test x$use_fxt = xyes)
-
-if test x$use_fxt = xyes; then
-	AC_DEFINE(STARPU_USE_FXT, [1], [enable FxT traces])
-	AC_DEFINE(CONFIG_FUT, [1], [enable FUT traces])
 
 
+if test x$enable_fxt != xno; then
 	if test x$use_fxt_from_system = xno; then
 	if test x$use_fxt_from_system = xno; then
 	    save_PKG_CONFIG_PATH="$PKG_CONFIG_PATH"
 	    save_PKG_CONFIG_PATH="$PKG_CONFIG_PATH"
-	    PKG_CONFIG_PATH="$fxtdir/lib/pkgconfig:$PKG_CONFIG_PATH"
-	    PKG_CHECK_MODULES([FXT],  [fxt], , [
+	    PKG_CONFIG_PATH="$fxt_dir/lib/pkgconfig:$PKG_CONFIG_PATH"
+	    PKG_CHECK_MODULES([FXT],  [fxt], [have_valid_fxt=yes], [
+	    	have_valid_fxt=yes
 		AC_MSG_WARN([Old FxT without fxt.pc file, hoping link will succeed])
 		AC_MSG_WARN([Old FxT without fxt.pc file, hoping link will succeed])
-		FXT_CFLAGS="-I$fxtdir/include/ "
-		FXT_LDFLAGS="-L$fxtdir/lib/"
+		FXT_CFLAGS="-I$fxt_dir/include/ "
+		FXT_LDFLAGS="-L$fxt_dir/lib/"
 		AC_ARG_VAR(FXT_LDFLAGS)
 		AC_ARG_VAR(FXT_LDFLAGS)
 		FXT_LIBS="-lfxt"
 		FXT_LIBS="-lfxt"
 	    ])
 	    ])
 	    PKG_CONFIG_PATH="$save_PKG_CONFIG_PATH"
 	    PKG_CONFIG_PATH="$save_PKG_CONFIG_PATH"
 	else
 	else
-	    PKG_CHECK_MODULES([FXT],  [fxt])
-	fi
-	save_LIBS="$LIBS"
-	LIBS="$LIBS $FXT_LIBS"
-	save_LDFLAGS="$LDFLAGS"
-	LDFLAGS="$LDFLAGS $FXT_LDFLAGS"
-	AC_CHECK_FUNCS([fxt_close])
-	AC_CHECK_FUNCS([fxt_blockev_leave])
-	AC_CHECK_FUNCS([enable_fut_flush])
-	AC_CHECK_FUNCS([fut_set_filename])
-	AC_CHECK_FUNCS([fut_setup_flush_callback])
-	LDFLAGS="$save_LDFLAGS"
-	LIBS="$save_LIBS"
-	save_CFLAGS="$CFLAGS"
-	CFLAGS="$CFLAGS $FXT_CFLAGS"
-	AC_CHECK_DECLS([enable_fut_flush], [], [], [[#include <fut.h>]])
-	AC_CHECK_DECLS([fut_set_filename], [], [], [[#include <fut.h>]])
-	AC_CHECK_DECLS([fut_setup_flush_callback], [], [], [[#include <fut.h>]])
-	CFLAGS="$save_CFLAGS"
-
-	if test x$enable_simgrid = xyes -a x$enable_shared = xno ; then
-                # simgrid's SMPI needs fxt to be linked in statically for
-                # variable privatization to work
-		FXT_LIBS="$(pkg-config --variable=libdir fxt)/libfxt.a -Wl,--as-needed $(pkg-config --libs --static fxt) -Wl,--no-as-needed"
+	    PKG_CHECK_MODULES([FXT],  [fxt], [have_valid_fxt=yes], [have_valid_fxt=no])
 	fi
 	fi
-
-	##########################################
-	# Poti is a library to generate paje trace files
-	##########################################
-	PKG_CHECK_MODULES([POTI], [poti], [have_valid_poti=yes], [have_valid_poti=no])
-	AC_ARG_ENABLE(poti, [AS_HELP_STRING([--enable-poti],
-				[Enable the use of the POTI library to generate Paje traces])],
-				enable_poti=$enableval, enable_poti=no)
-	if test x$enable_poti = xyes -a x$have_valid_poti = xyes ; then
-		AC_DEFINE(STARPU_HAVE_POTI, [1], [Define to 1 if you have libpoti and it is meant to be used])
+	if test x$have_valid_fxt = xyes ; then
+		enable_fxt=yes
 		save_LIBS="$LIBS"
 		save_LIBS="$LIBS"
-		LIBS="$LIBS $POTI_LIBS"
-		AC_CHECK_FUNCS([poti_init_custom poti_user_NewEvent])
+		LIBS="$LIBS $FXT_LIBS"
+			save_LDFLAGS="$LDFLAGS"
+		LDFLAGS="$LDFLAGS $FXT_LDFLAGS"
+		AC_CHECK_FUNCS([fxt_close])
+		AC_CHECK_FUNCS([fxt_blockev_leave])
+		AC_CHECK_FUNCS([enable_fut_flush])
+		AC_CHECK_FUNCS([fut_set_filename])
+		AC_CHECK_FUNCS([fut_setup_flush_callback])
+		LDFLAGS="$save_LDFLAGS"
 		LIBS="$save_LIBS"
 		LIBS="$save_LIBS"
-		FXT_CFLAGS="$FXT_CFLAGS $POTI_CFLAGS"
-		FXT_LIBS="$FXT_LIBS $POTI_LIBS"
+		save_CFLAGS="$CFLAGS"
+		CFLAGS="$CFLAGS $FXT_CFLAGS"
+		AC_CHECK_DECLS([enable_fut_flush], [], [], [[#include <fut.h>]])
+		AC_CHECK_DECLS([fut_set_filename], [], [], [[#include <fut.h>]])
+		AC_CHECK_DECLS([fut_setup_flush_callback], [], [], [[#include <fut.h>]])
+		CFLAGS="$save_CFLAGS"
+
+		if test x$enable_simgrid = xyes -a x$enable_shared = xno ; then
+	                # simgrid's SMPI needs fxt to be linked in statically for
+	                # variable privatization to work
+			FXT_LIBS="$(pkg-config --variable=libdir fxt)/libfxt.a -Wl,--as-needed $(pkg-config --libs --static fxt) -Wl,--no-as-needed"
+		fi
+
+		AC_CHECK_LIB([papi], [PAPI_library_init],
+			     [AC_DEFINE([STARPU_PAPI], [1], [Define to 1 if you have the libpapi library])
+			      PAPI_LIBS=-lpapi])
+
+		##########################################
+		# Poti is a library to generate paje trace files
+		##########################################
+		PKG_CHECK_MODULES([POTI], [poti], [have_valid_poti=yes], [have_valid_poti=no])
+		AC_ARG_ENABLE(poti, [AS_HELP_STRING([--enable-poti],
+					[Enable the use of the POTI library to generate Paje traces])],
+					enable_poti=$enableval, enable_poti=no)
+		if test x$enable_poti = xyes -a x$have_valid_poti = xyes ; then
+			AC_DEFINE(STARPU_HAVE_POTI, [1], [Define to 1 if you have libpoti and it is meant to be used])
+			save_LIBS="$LIBS"
+			LIBS="$LIBS $POTI_LIBS"
+			AC_CHECK_FUNCS([poti_init_custom poti_user_NewEvent])
+			LIBS="$save_LIBS"
+			FXT_CFLAGS="$FXT_CFLAGS $POTI_CFLAGS"
+			FXT_LIBS="$FXT_LIBS $POTI_LIBS"
+		fi
+	else
+		if test x$enable_fxt = xyes ; then
+			AC_MSG_ERROR([FxT is required but not available])
+		fi
+		enable_fxt=no
 	fi
 	fi
 fi
 fi
 
 
+AC_MSG_CHECKING(whether FxT traces should be generated)
+AC_MSG_RESULT($enable_fxt)
+
+if test x$enable_fxt = xyes; then
+	AC_DEFINE(STARPU_USE_FXT, [1], [enable FxT traces])
+	AC_DEFINE(CONFIG_FUT, [1], [enable FUT traces])
+fi
+
+AC_SUBST(STARPU_USE_FXT, $enable_fxt)
+AM_CONDITIONAL(STARPU_USE_FXT, test x$enable_fxt = xyes)
+
 AC_MSG_CHECKING(whether additional locking systems FxT traces should be enabled)
 AC_MSG_CHECKING(whether additional locking systems FxT traces should be enabled)
 AC_ARG_ENABLE(fxt-lock, [AS_HELP_STRING([--enable-fxt-lock],
 AC_ARG_ENABLE(fxt-lock, [AS_HELP_STRING([--enable-fxt-lock],
 			[enable additional locking systems FxT traces])],
 			[enable additional locking systems FxT traces])],
@@ -3549,7 +3559,7 @@ AC_MSG_NOTICE([
 	Magma enabled:     $have_magma
 	Magma enabled:     $have_magma
 	BLAS library:      $blas_lib
 	BLAS library:      $blas_lib
 	hwloc:             $have_valid_hwloc
 	hwloc:             $have_valid_hwloc
-	FxT trace enabled: $use_fxt
+	FxT trace enabled: $enable_fxt
 
 
         Documentation HTML:  $enable_build_doc
         Documentation HTML:  $enable_build_doc
         Documentation PDF:   $enable_build_doc_pdf
         Documentation PDF:   $enable_build_doc_pdf

+ 10 - 7
doc/doxygen/chapters/380_offline_performance_tools.doxy

@@ -70,7 +70,14 @@ $ make
 $ make install
 $ make install
 \endverbatim
 \endverbatim
 
 
-In order to have StarPU to generate traces, StarPU should be configured with
+In order to have StarPU to generate traces, StarPU needs be configured again
+after installing FxT, and configuration show:
+
+\verbatim
+FxT trace enabled: yes
+\endverbatim
+
+If <c>configure</c> does not find FxT automatically, it can be specified by hand with
 the option \ref with-fxt "--with-fxt" :
 the option \ref with-fxt "--with-fxt" :
 
 
 \verbatim
 \verbatim
@@ -78,10 +85,9 @@ $ ./configure --with-fxt=$FXTDIR
 \endverbatim
 \endverbatim
 
 
 Or you can simply point the <c>PKG_CONFIG_PATH</c> to
 Or you can simply point the <c>PKG_CONFIG_PATH</c> to
-<c>$FXTDIR/lib/pkgconfig</c> and pass
-\ref with-fxt "--with-fxt" to <c>configure</c>
+<c>$FXTDIR/lib/pkgconfig</c>
 
 
-When FxT is enabled, a trace is generated when StarPU is terminated by calling
+When \ref STARPU_FXT_TRACE is set to 1, a trace is generated when StarPU is terminated by calling
 starpu_shutdown(). The trace is a binary file whose name has the form
 starpu_shutdown(). The trace is a binary file whose name has the form
 <c>prof_file_XXX_YYY</c> where <c>XXX</c> is the user name, and
 <c>prof_file_XXX_YYY</c> where <c>XXX</c> is the user name, and
 <c>YYY</c> is the MPI id of the process that used StarPU (or 0 when running a sequential program).
 <c>YYY</c> is the MPI id of the process that used StarPU (or 0 when running a sequential program).
@@ -97,9 +103,6 @@ be used to generate trace events which describes the locks behaviour during
 the execution. It is however very heavy and should not be used unless debugging
 the execution. It is however very heavy and should not be used unless debugging
 StarPU's internal locking.
 StarPU's internal locking.
 
 
-The environment variable \ref STARPU_FXT_TRACE can be set to 0 to disable the
-generation of the <c>prof_file_XXX_YYY</c> file.
-
 When the FxT trace file <c>prof_file_something</c> has been generated,
 When the FxT trace file <c>prof_file_something</c> has been generated,
 it is possible to generate different trace formats by calling:
 it is possible to generate different trace formats by calling:
 
 

+ 1 - 1
doc/doxygen/chapters/501_environment_variables.doxy

@@ -1016,7 +1016,7 @@ Specify in which file to save the generated trace if FxT is enabled.
 <dd>
 <dd>
 \anchor STARPU_FXT_TRACE
 \anchor STARPU_FXT_TRACE
 \addindex __env__STARPU_FXT_TRACE
 \addindex __env__STARPU_FXT_TRACE
-Specify whether to generate (1) or not (0) the FxT trace in /tmp/prof_file_XXX_YYY (the directory and file name can be changed with \ref STARPU_FXT_PREFIX and \ref STARPU_FXT_SUFFIX). The default is 1 (generate it)
+Specify whether to generate (1) or not (0) the FxT trace in /tmp/prof_file_XXX_YYY (the directory and file name can be changed with \ref STARPU_FXT_PREFIX and \ref STARPU_FXT_SUFFIX). The default is 0 (do not generate it)
 </dd>
 </dd>
 
 
 <dt>STARPU_LIMIT_CUDA_devid_MEM</dt>
 <dt>STARPU_LIMIT_CUDA_devid_MEM</dt>

+ 3 - 0
examples/Makefile.am

@@ -81,6 +81,9 @@ EXTRA_DIST = 					\
 
 
 CLEANFILES = *.gcno *.gcda *.linkinfo *.mod starpu_idle_microsec.log *.mps */*.mps */*/*.mps *.dot */*.dot */*/*.dot *.pl */*.pl */*/*.pl *.png *.output tasks.rec perfs.rec */perfs.rec */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 native_fortran/fstarpu_mod.f90
 CLEANFILES = *.gcno *.gcda *.linkinfo *.mod starpu_idle_microsec.log *.mps */*.mps */*/*.mps *.dot */*.dot */*/*.dot *.pl */*.pl */*/*.pl *.png *.output tasks.rec perfs.rec */perfs.rec */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 native_fortran/fstarpu_mod.f90
 
 
+clean-local:
+	-rm -rf mult/sgemm.traces lu/lu.traces
+
 if STARPU_USE_CUDA
 if STARPU_USE_CUDA
 
 
 if STARPU_COVERITY
 if STARPU_COVERITY

+ 1 - 0
examples/lu/lu.sh

@@ -23,6 +23,7 @@ rm -rf $PREFIX/lu.traces
 mkdir -p $PREFIX/lu.traces
 mkdir -p $PREFIX/lu.traces
 
 
 export STARPU_FXT_PREFIX=$PREFIX/lu.traces
 export STARPU_FXT_PREFIX=$PREFIX/lu.traces
+export STARPU_FXT_TRACE=1
 
 
 $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -piv
 $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -piv
 $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -no-stride
 $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $((160 * 4)) -nblocks 4 -no-stride

+ 1 - 1
examples/mult/sgemm.sh

@@ -30,7 +30,7 @@ mkdir -p $PREFIX/sgemm.traces
 
 
 export STARPU_FXT_PREFIX=$PREFIX/sgemm.traces
 export STARPU_FXT_PREFIX=$PREFIX/sgemm.traces
 
 
-STARPU_SCHED=dmdas $PREFIX/sgemm -check
+STARPU_FXT_TRACE=1 STARPU_SCHED=dmdas $PREFIX/sgemm -check
 if [ -x $PREFIX/../../tools/starpu_fxt_tool ];
 if [ -x $PREFIX/../../tools/starpu_fxt_tool ];
 then
 then
 	$STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_plot -o $STARPU_FXT_PREFIX -s starpu_sgemm_gemm -i $STARPU_FXT_PREFIX/prof_file_${USER}_0
 	$STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_plot -o $STARPU_FXT_PREFIX -s starpu_sgemm_gemm -i $STARPU_FXT_PREFIX/prof_file_${USER}_0

+ 1 - 1
src/common/fxt.c

@@ -151,7 +151,7 @@ void starpu_fxt_stop_profiling()
 
 
 int starpu_fxt_is_enabled()
 int starpu_fxt_is_enabled()
 {
 {
-	return starpu_get_env_number_default("STARPU_FXT_TRACE", 1);
+	return starpu_get_env_number_default("STARPU_FXT_TRACE", 0);
 }
 }
 
 
 #ifdef HAVE_FUT_SETUP_FLUSH_CALLBACK
 #ifdef HAVE_FUT_SETUP_FLUSH_CALLBACK

+ 34 - 21
src/common/fxt.h

@@ -330,11 +330,12 @@ void _starpu_fxt_dump_file(void);
 #define _STARPU_FUT_COMMIT(size) do { } while (0)
 #define _STARPU_FUT_COMMIT(size) do { } while (0)
 #endif
 #endif
 
 
-#ifdef FUT_ALWAYS_PROBE1STR
+#ifdef FUT_RAW_ALWAYS_PROBE1STR
 #define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str) FUT_RAW_ALWAYS_PROBE1STR(CODE, P1, str)
 #define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str) FUT_RAW_ALWAYS_PROBE1STR(CODE, P1, str)
 #else
 #else
 #define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str)	\
 #define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str)	\
 do {									\
 do {									\
+    if( STARPU_UNLIKELY(fut_active) ) { \
 	/* No more than FXT_MAX_PARAMS args are allowed */		\
 	/* No more than FXT_MAX_PARAMS args are allowed */		\
 	/* we add a \0 just in case ... */				\
 	/* we add a \0 just in case ... */				\
 	size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 1)*sizeof(unsigned long));\
 	size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 1)*sizeof(unsigned long));\
@@ -347,7 +348,7 @@ do {									\
 	snprintf((char *)futargs, len, "%s", str);			\
 	snprintf((char *)futargs, len, "%s", str);			\
 	((char *)futargs)[len - 1] = '\0';				\
 	((char *)futargs)[len - 1] = '\0';				\
 	_STARPU_FUT_COMMIT(total_len);					\
 	_STARPU_FUT_COMMIT(total_len);					\
-} while (0)
+    }} while (0)
 #endif
 #endif
 
 
 #ifdef FUT_FULL_PROBE1STR
 #ifdef FUT_FULL_PROBE1STR
@@ -358,7 +359,7 @@ do {									\
  * by a string. */
  * by a string. */
 #define _STARPU_FUT_FULL_PROBE1STR(KEYMASK, CODE, P1, str)		\
 #define _STARPU_FUT_FULL_PROBE1STR(KEYMASK, CODE, P1, str)		\
 do {									\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str);		\
 	_STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str);		\
     }									\
     }									\
 } while (0)
 } while (0)
@@ -390,7 +391,7 @@ do {									\
 #else
 #else
 #define _STARPU_FUT_FULL_PROBE2STR(KEYMASK, CODE, P1, P2, str)		\
 #define _STARPU_FUT_FULL_PROBE2STR(KEYMASK, CODE, P1, P2, str)		\
 do {									\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str);		\
 	_STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str);		\
     }									\
     }									\
 } while (0)
 } while (0)
@@ -423,7 +424,7 @@ do {									\
 #else
 #else
 #define _STARPU_FUT_FULL_PROBE3STR(KEYMASK, CODE, P1, P2, P3, str)		\
 #define _STARPU_FUT_FULL_PROBE3STR(KEYMASK, CODE, P1, P2, P3, str)		\
 do {									\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str);	\
 	_STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str);	\
     }									\
     }									\
 } while (0)
 } while (0)
@@ -457,7 +458,7 @@ do {									\
 #else
 #else
 #define _STARPU_FUT_FULL_PROBE4STR(KEYMASK, CODE, P1, P2, P3, P4, str)		\
 #define _STARPU_FUT_FULL_PROBE4STR(KEYMASK, CODE, P1, P2, P3, P4, str)		\
 do {									\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str);	\
 	_STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str);	\
     }									\
     }									\
 } while (0)
 } while (0)
@@ -492,7 +493,7 @@ do {									\
 #else
 #else
 #define _STARPU_FUT_FULL_PROBE5STR(KEYMASK, CODE, P1, P2, P3, P4, P5, str)		\
 #define _STARPU_FUT_FULL_PROBE5STR(KEYMASK, CODE, P1, P2, P3, P4, P5, str)		\
 do {									\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str);	\
 	_STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str);	\
     }									\
     }									\
 } while (0)
 } while (0)
@@ -528,7 +529,7 @@ do {									\
 #else
 #else
 #define _STARPU_FUT_FULL_PROBE6STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, str)		\
 #define _STARPU_FUT_FULL_PROBE6STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, str)		\
 do {									\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str);	\
 	_STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str);	\
     }									\
     }									\
 } while (0)
 } while (0)
@@ -565,7 +566,7 @@ do {									\
 #else
 #else
 #define _STARPU_FUT_FULL_PROBE7STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, P7, str)		\
 #define _STARPU_FUT_FULL_PROBE7STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, P7, str)		\
 do {									\
 do {									\
-    if(KEYMASK & fut_active) {						\
+    if (STARPU_UNLIKELY(KEYMASK & fut_active)) {			\
 	_STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str);	\
 	_STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str);	\
     }									\
     }									\
 } while (0)
 } while (0)
@@ -573,7 +574,7 @@ do {									\
 
 
 #ifndef FUT_RAW_PROBE7
 #ifndef FUT_RAW_PROBE7
 #define FUT_RAW_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do {		\
 #define FUT_RAW_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do {		\
-		if(fut_active) {					\
+		if(STARPU_UNLIKELY(fut_active)) {			\
 			unsigned long *__args __attribute__((unused))=	\
 			unsigned long *__args __attribute__((unused))=	\
 				fut_getstampedbuffer(CODE,		\
 				fut_getstampedbuffer(CODE,		\
 						     FUT_SIZE(7)); \
 						     FUT_SIZE(7)); \
@@ -677,7 +678,7 @@ do {									\
 /* full probes */
 /* full probes */
 #ifndef FUT_FULL_PROBE0
 #ifndef FUT_FULL_PROBE0
 #define FUT_FULL_PROBE0(KEYMASK,CODE) do { \
 #define FUT_FULL_PROBE0(KEYMASK,CODE) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE0(FUT_CODE(CODE, 0)); \
                 FUT_RAW_ALWAYS_PROBE0(FUT_CODE(CODE, 0)); \
         } \
         } \
 } while(0)
 } while(0)
@@ -685,7 +686,7 @@ do {									\
 
 
 #ifndef FUT_FULL_PROBE1
 #ifndef FUT_FULL_PROBE1
 #define FUT_FULL_PROBE1(KEYMASK,CODE,P1) do { \
 #define FUT_FULL_PROBE1(KEYMASK,CODE,P1) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE1(FUT_CODE(CODE, 1),P1); \
                 FUT_RAW_ALWAYS_PROBE1(FUT_CODE(CODE, 1),P1); \
         } \
         } \
 } while(0)
 } while(0)
@@ -693,7 +694,7 @@ do {									\
 
 
 #ifndef FUT_FULL_PROBE2
 #ifndef FUT_FULL_PROBE2
 #define FUT_FULL_PROBE2(KEYMASK,CODE,P1,P2) do { \
 #define FUT_FULL_PROBE2(KEYMASK,CODE,P1,P2) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE2(FUT_CODE(CODE, 2),P1,P2); \
                 FUT_RAW_ALWAYS_PROBE2(FUT_CODE(CODE, 2),P1,P2); \
         } \
         } \
 } while(0)
 } while(0)
@@ -701,7 +702,7 @@ do {									\
 
 
 #ifndef FUT_FULL_PROBE3
 #ifndef FUT_FULL_PROBE3
 #define FUT_FULL_PROBE3(KEYMASK,CODE,P1,P2,P3) do { \
 #define FUT_FULL_PROBE3(KEYMASK,CODE,P1,P2,P3) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE3(FUT_CODE(CODE, 3),P1,P2,P3); \
                 FUT_RAW_ALWAYS_PROBE3(FUT_CODE(CODE, 3),P1,P2,P3); \
         } \
         } \
 } while(0)
 } while(0)
@@ -709,7 +710,7 @@ do {									\
 
 
 #ifndef FUT_FULL_PROBE4
 #ifndef FUT_FULL_PROBE4
 #define FUT_FULL_PROBE4(KEYMASK,CODE,P1,P2,P3,P4) do { \
 #define FUT_FULL_PROBE4(KEYMASK,CODE,P1,P2,P3,P4) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE4(FUT_CODE(CODE, 4),P1,P2,P3,P4); \
                 FUT_RAW_ALWAYS_PROBE4(FUT_CODE(CODE, 4),P1,P2,P3,P4); \
         } \
         } \
 } while(0)
 } while(0)
@@ -717,7 +718,7 @@ do {									\
 
 
 #ifndef FUT_FULL_PROBE5
 #ifndef FUT_FULL_PROBE5
 #define FUT_FULL_PROBE5(KEYMASK,CODE,P1,P2,P3,P4,P5) do { \
 #define FUT_FULL_PROBE5(KEYMASK,CODE,P1,P2,P3,P4,P5) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE5(FUT_CODE(CODE, 5),P1,P2,P3,P4,P5); \
                 FUT_RAW_ALWAYS_PROBE5(FUT_CODE(CODE, 5),P1,P2,P3,P4,P5); \
         } \
         } \
 } while(0)
 } while(0)
@@ -725,7 +726,7 @@ do {									\
 
 
 #ifndef FUT_FULL_PROBE6
 #ifndef FUT_FULL_PROBE6
 #define FUT_FULL_PROBE6(KEYMASK,CODE,P1,P2,P3,P4,P5,P6) do { \
 #define FUT_FULL_PROBE6(KEYMASK,CODE,P1,P2,P3,P4,P5,P6) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE6(FUT_CODE(CODE, 6),P1,P2,P3,P4,P5,P6); \
                 FUT_RAW_ALWAYS_PROBE6(FUT_CODE(CODE, 6),P1,P2,P3,P4,P5,P6); \
         } \
         } \
 } while(0)
 } while(0)
@@ -733,7 +734,7 @@ do {									\
 
 
 #ifndef FUT_FULL_PROBE7
 #ifndef FUT_FULL_PROBE7
 #define FUT_FULL_PROBE7(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7) do { \
 #define FUT_FULL_PROBE7(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7) do { \
-        if( KEYMASK & fut_active ) { \
+        if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \
                 FUT_RAW_ALWAYS_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \
                 FUT_RAW_ALWAYS_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \
         } \
         } \
 } while(0)
 } while(0)
@@ -769,6 +770,7 @@ do {									\
 
 
 #define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid)				\
 #define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid)				\
 do {									\
 do {									\
+    if( STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK|_STARPU_FUT_KEYMASK_TASK_VERBOSE|_STARPU_FUT_KEYMASK_DATA) & fut_active) ) { \
 	FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid)); \
 	FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid)); \
 	{								\
 	{								\
 		if ((job)->task->cl)					\
 		if ((job)->task->cl)					\
@@ -792,15 +794,18 @@ do {									\
 		const uint32_t __job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\
 		const uint32_t __job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\
 		FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_CODELET_DETAILS, ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->flops / 1000 / ((job)->task->cl && job->task->cl->type != STARPU_SEQ ? j->task_size : 1), (job)->task->tag_id, workerid, ((job)->job_id)); \
 		FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_CODELET_DETAILS, ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->flops / 1000 / ((job)->task->cl && job->task->cl->type != STARPU_SEQ ? j->task_size : 1), (job)->task->tag_id, workerid, ((job)->job_id)); \
 	}								\
 	}								\
+    } \
 } while(0)
 } while(0)
 
 
 #define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, perf_arch, workerid)			\
 #define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, perf_arch, workerid)			\
 do {									\
 do {									\
+    if( STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active) ) { \
 	const size_t job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));	\
 	const size_t job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));	\
 	const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\
 	const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\
 	char _archname[32]=""; \
 	char _archname[32]=""; \
 	starpu_perfmodel_get_arch_name(perf_arch, _archname, 32, 0);	\
 	starpu_perfmodel_get_arch_name(perf_arch, _archname, 32, 0);	\
 	_STARPU_FUT_FULL_PROBE5STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_END_CODELET_BODY, (job)->job_id, (job_size), (job_hash), workerid, _starpu_gettid(), _archname); \
 	_STARPU_FUT_FULL_PROBE5STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_END_CODELET_BODY, (job)->job_id, (job_size), (job_hash), workerid, _starpu_gettid(), _archname); \
+    } \
 } while(0)
 } while(0)
 
 
 #define _STARPU_TRACE_START_EXECUTING()				\
 #define _STARPU_TRACE_START_EXECUTING()				\
@@ -867,7 +872,8 @@ do {									\
 } while(0)
 } while(0)
 
 
 #define _STARPU_TRACE_TASK_NAME(job)					\
 #define _STARPU_TRACE_TASK_NAME(job)					\
-	do {								\
+do {								\
+    if( STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active) ) { \
         const char *model_name = _starpu_job_get_model_name((job));		\
         const char *model_name = _starpu_job_get_model_name((job));		\
 	const char *name = _starpu_job_get_task_name((job));			\
 	const char *name = _starpu_job_get_task_name((job));			\
 	if (name)					                        \
 	if (name)					                        \
@@ -879,14 +885,17 @@ do {									\
 	}									\
 	}									\
 	if (model_name)					\
 	if (model_name)					\
 		_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, _starpu_gettid(), model_name); \
 		_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, _starpu_gettid(), model_name); \
+    } \
 } while(0)
 } while(0)
 
 
 #define _STARPU_TRACE_TASK_COLOR(job)						\
 #define _STARPU_TRACE_TASK_COLOR(job)						\
 do { \
 do { \
+    if( STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active) ) { \
 	if ((job)->task->color != 0) \
 	if ((job)->task->color != 0) \
 		FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->color); \
 		FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->color); \
 	else if ((job)->task->cl && (job)->task->cl->color != 0) \
 	else if ((job)->task->cl && (job)->task->cl->color != 0) \
 		FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->cl->color); \
 		FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->cl->color); \
+    } \
 } while(0)
 } while(0)
 
 
 #define _STARPU_TRACE_TASK_DONE(job)						\
 #define _STARPU_TRACE_TASK_DONE(job)						\
@@ -894,6 +903,7 @@ do { \
 
 
 #define _STARPU_TRACE_TAG_DONE(tag)						\
 #define _STARPU_TRACE_TAG_DONE(tag)						\
 do {										\
 do {										\
+    if( STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active) ) { \
         struct _starpu_job *job = (tag)->job;                                  \
         struct _starpu_job *job = (tag)->job;                                  \
         const char *model_name = _starpu_job_get_task_name((job));                       \
         const char *model_name = _starpu_job_get_task_name((job));                       \
 	if (model_name)                                                         \
 	if (model_name)                                                         \
@@ -903,6 +913,7 @@ do {										\
 	else {									\
 	else {									\
 		FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 0);\
 		FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 0);\
 	}									\
 	}									\
+    } \
 } while(0)
 } while(0)
 
 
 #define _STARPU_TRACE_DATA_NAME(handle, name) \
 #define _STARPU_TRACE_DATA_NAME(handle, name) \
@@ -1288,10 +1299,10 @@ do {										\
 	FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_POP_PRIO, _starpu_gettid(), workerid, ntasks, exp_len);
 	FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_POP_PRIO, _starpu_gettid(), workerid, ntasks, exp_len);
 
 
 #define _STARPU_TRACE_SCHED_COMPONENT_NEW(component)		\
 #define _STARPU_TRACE_SCHED_COMPONENT_NEW(component)		\
-	_STARPU_FUT_ALWAYS_PROBE1STR(_STARPU_FUT_SCHED_COMPONENT_NEW, component, (component)->name);
+	if (STARPU_UNLIKELY(fut_active)) _STARPU_FUT_ALWAYS_PROBE1STR(_STARPU_FUT_SCHED_COMPONENT_NEW, component, (component)->name);
 
 
 #define _STARPU_TRACE_SCHED_COMPONENT_CONNECT(parent, child)		\
 #define _STARPU_TRACE_SCHED_COMPONENT_CONNECT(parent, child)		\
-	FUT_RAW_ALWAYS_PROBE2(FUT_CODE(_STARPU_FUT_SCHED_COMPONENT_CONNECT,2), parent, child);
+	if (STARPU_UNLIKELY(fut_active)) FUT_RAW_ALWAYS_PROBE2(FUT_CODE(_STARPU_FUT_SCHED_COMPONENT_CONNECT,2), parent, child);
 
 
 #define _STARPU_TRACE_SCHED_COMPONENT_PUSH(from, to, task, prio)		\
 #define _STARPU_TRACE_SCHED_COMPONENT_PUSH(from, to, task, prio)		\
 	FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PUSH, _starpu_gettid(), from, to, task, prio);
 	FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PUSH, _starpu_gettid(), from, to, task, prio);
@@ -1300,6 +1311,7 @@ do {										\
 	FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PULL, _starpu_gettid(), from, to, task, (task)->priority);
 	FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PULL, _starpu_gettid(), from, to, task, (task)->priority);
 
 
 #define _STARPU_TRACE_HANDLE_DATA_REGISTER(handle)	do {	\
 #define _STARPU_TRACE_HANDLE_DATA_REGISTER(handle)	do {	\
+    if( STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_META) & fut_active) ) { \
 	const size_t __data_size = handle->ops->get_size(handle); \
 	const size_t __data_size = handle->ops->get_size(handle); \
 	const starpu_ssize_t __max_data_size = _starpu_data_get_max_size(handle); \
 	const starpu_ssize_t __max_data_size = _starpu_data_get_max_size(handle); \
 	char __buf[(FXT_MAX_PARAMS-4)*sizeof(long)]; \
 	char __buf[(FXT_MAX_PARAMS-4)*sizeof(long)]; \
@@ -1309,6 +1321,7 @@ do {										\
 	else \
 	else \
 		__buf[0] = 0; \
 		__buf[0] = 0; \
 	_STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_HANDLE_DATA_REGISTER, handle, __data_size, __max_data_size, handle->home_node, __buf); \
 	_STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_HANDLE_DATA_REGISTER, handle, __data_size, __max_data_size, handle->home_node, __buf); \
+    } \
 } while (0)
 } while (0)
 
 
 #define _STARPU_TRACE_HANDLE_DATA_UNREGISTER(handle)	\
 #define _STARPU_TRACE_HANDLE_DATA_UNREGISTER(handle)	\

+ 8 - 2
src/core/jobs.c

@@ -86,9 +86,15 @@ struct _starpu_job* STARPU_ATTRIBUTE_MALLOC _starpu_job_create(struct starpu_tas
 
 
 	job->task = task;
 	job->task = task;
 
 
-#if !defined(STARPU_USE_FXT) && !defined(STARPU_DEBUG)
-	if (_starpu_bound_recording || _starpu_task_break_on_push != -1 || _starpu_task_break_on_sched != -1 || _starpu_task_break_on_pop != -1 || _starpu_task_break_on_exec != -1 || STARPU_AYU_EVENT)
+	if (
+#if defined(STARPU_DEBUG)
+	    1
+#elif defined(STARPU_USE_FXT)
+	    fut_active
+#else
+	    _starpu_bound_recording || _starpu_task_break_on_push != -1 || _starpu_task_break_on_sched != -1 || _starpu_task_break_on_pop != -1 || _starpu_task_break_on_exec != -1 || STARPU_AYU_EVENT
 #endif
 #endif
+	   )
 	{
 	{
 		job->job_id = _starpu_fxt_get_job_id();
 		job->job_id = _starpu_fxt_get_job_id();
 		STARPU_AYU_ADDTASK(job->job_id, task);
 		STARPU_AYU_ADDTASK(job->job_id, task);

+ 17 - 6
src/core/workers.c

@@ -953,7 +953,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 		 * before starting another one, to make sure they appear in
 		 * before starting another one, to make sure they appear in
 		 * order in the trace.
 		 * order in the trace.
 		 */
 		 */
-		if ((!workerarg->set || workerarg->set->workers == workerarg)
+		if (fut_active && (!workerarg->set || workerarg->set->workers == workerarg)
 			&& workerarg->run_by_starpu == 1 && workerarg->arch != STARPU_MPI_MS_WORKER)
 			&& workerarg->run_by_starpu == 1 && workerarg->arch != STARPU_MPI_MS_WORKER)
 		{
 		{
 			STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
 			STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
@@ -979,10 +979,13 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
 
                 /* We use the first worker to know if everything are finished */
                 /* We use the first worker to know if everything are finished */
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
-                STARPU_PTHREAD_MUTEX_LOCK(&worker_zero->mutex);
-                while (!worker_zero->worker_is_running)
-                        STARPU_PTHREAD_COND_WAIT(&worker_zero->started_cond, &worker_zero->mutex);
-                STARPU_PTHREAD_MUTEX_UNLOCK(&worker_zero->mutex);
+		if (fut_active)
+		{
+			STARPU_PTHREAD_MUTEX_LOCK(&worker_zero->mutex);
+			while (!worker_zero->worker_is_running)
+				STARPU_PTHREAD_COND_WAIT(&worker_zero->started_cond, &worker_zero->mutex);
+			STARPU_PTHREAD_MUTEX_UNLOCK(&worker_zero->mutex);
+		}
 #endif
 #endif
 
 
                 STARPU_PTHREAD_MUTEX_LOCK(&worker_set_zero->mutex);
                 STARPU_PTHREAD_MUTEX_LOCK(&worker_set_zero->mutex);
@@ -1455,7 +1458,8 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 	_STARPU_DISP("Warning: StarPU was configured with --enable-verbose, which slows down a bit\n");
 	_STARPU_DISP("Warning: StarPU was configured with --enable-verbose, which slows down a bit\n");
 #endif
 #endif
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
-	_STARPU_DISP("Warning: StarPU was configured with --with-fxt, which slows down a bit, limits scalability and makes worker initialization sequential\n");
+	if (starpu_fxt_is_enabled())
+		_STARPU_DISP("Warning: FxT is enabled, which slows down a bit, limits scalability and makes worker initialization sequential\n");
 #endif
 #endif
 #ifdef STARPU_FXT_LOCK_TRACES
 #ifdef STARPU_FXT_LOCK_TRACES
 	_STARPU_DISP("Warning: StarPU was configured with --enable-fxt-lock, which slows down things a huge lot, and is really only meant for StarPU insides debugging. Did you really want to enable that?\n");
 	_STARPU_DISP("Warning: StarPU was configured with --enable-fxt-lock, which slows down things a huge lot, and is really only meant for StarPU insides debugging. Did you really want to enable that?\n");
@@ -1477,6 +1481,13 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 #endif
 #endif
 #endif
 #endif
 
 
+#ifndef STARPU_USE_FXT
+	if (starpu_get_env_number("STARPU_FXT_TRACE") > 0)
+	{
+		_STARPU_DISP("Warning: FxT trace is requested but StarPU was configured without FxT support\n");
+	}
+#endif
+
 	if (starpu_getenv("STARPU_ENABLE_STATS"))
 	if (starpu_getenv("STARPU_ENABLE_STATS"))
 	{
 	{
 		_STARPU_DISP("Warning: STARPU_ENABLE_STATS is enabled, which slows down a bit\n");
 		_STARPU_DISP("Warning: STARPU_ENABLE_STATS is enabled, which slows down a bit\n");

+ 2 - 1
src/datawizard/coherency.c

@@ -1245,7 +1245,8 @@ void _starpu_fetch_task_input_tail(struct starpu_task *task, struct _starpu_job
 			_starpu_redux_init_data_replicate(handle, local_replicate, workerid);
 			_starpu_redux_init_data_replicate(handle, local_replicate, workerid);
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
-		total_size += _starpu_data_get_size(handle);
+		if (fut_active)
+			total_size += _starpu_data_get_size(handle);
 #endif
 #endif
 	}
 	}
 	_STARPU_TRACE_DATA_LOAD(workerid,total_size);
 	_STARPU_TRACE_DATA_LOAD(workerid,total_size);

+ 6 - 3
src/datawizard/copy_driver.c

@@ -235,10 +235,13 @@ int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT _starpu_driver_copy_data_1_to_1(starpu_d
 		_starpu_bus_update_profiling_info((int)src_node, (int)dst_node, size);
 		_starpu_bus_update_profiling_info((int)src_node, (int)dst_node, size);
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
-		com_id = STARPU_ATOMIC_ADDL(&communication_cnt, 1);
+		if (fut_active)
+		{
+			com_id = STARPU_ATOMIC_ADDL(&communication_cnt, 1);
 
 
-		if (req)
-			req->com_id = com_id;
+			if (req)
+				req->com_id = com_id;
+		}
 #endif
 #endif
 
 
 		dst_replicate->initialized = 1;
 		dst_replicate->initialized = 1;

+ 3 - 3
src/datawizard/data_request.c

@@ -444,15 +444,15 @@ static void starpu_handle_data_request_completion(struct _starpu_data_request *r
 #endif
 #endif
 	}
 	}
 
 
-	if (r->canceled < 2 && r->com_id > 0)
-	{
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
+	if (fut_active && r->canceled < 2 && r->com_id > 0)
+	{
 		unsigned src_node = src_replicate->memory_node;
 		unsigned src_node = src_replicate->memory_node;
 		unsigned dst_node = dst_replicate->memory_node;
 		unsigned dst_node = dst_replicate->memory_node;
 		size_t size = _starpu_data_get_size(handle);
 		size_t size = _starpu_data_get_size(handle);
 		_STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, r->com_id, r->prefetch);
 		_STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, r->com_id, r->prefetch);
-#endif
 	}
 	}
+#endif
 
 
 	/* Once the request has been fulfilled, we may submit the requests that
 	/* Once the request has been fulfilled, we may submit the requests that
 	 * were chained to that request. */
 	 * were chained to that request. */

+ 20 - 14
src/drivers/cuda/driver_cuda.c

@@ -677,13 +677,16 @@ static void execute_job_on_cuda(struct starpu_task *task, struct _starpu_worker
 				STARPU_CUDA_REPORT_ERROR(cures);
 				STARPU_CUDA_REPORT_ERROR(cures);
 #endif
 #endif
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
-			int k;
-			for (k = 0; k < (int) worker->set->nworkers; k++)
-				if (worker->set->workers[k].ntasks == worker->set->workers[k].pipeline_length)
-					break;
-			if (k == (int) worker->set->nworkers)
-				/* Everybody busy */
-				_STARPU_TRACE_START_EXECUTING();
+			if (fut_active)
+			{
+				int k;
+				for (k = 0; k < (int) worker->set->nworkers; k++)
+					if (worker->set->workers[k].ntasks == worker->set->workers[k].pipeline_length)
+						break;
+				if (k == (int) worker->set->nworkers)
+					/* Everybody busy */
+					_STARPU_TRACE_START_EXECUTING();
+			}
 #endif
 #endif
 		}
 		}
 	}
 	}
@@ -934,13 +937,16 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
 					_STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid);
 					_STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid);
 			}
 			}
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
-			int k;
-			for (k = 0; k < (int) worker_set->nworkers; k++)
-				if (worker_set->workers[k].ntasks)
-					break;
-			if (k == (int) worker_set->nworkers)
-				/* Everybody busy */
-				_STARPU_TRACE_END_EXECUTING()
+			if (fut_active)
+			{
+				int k;
+				for (k = 0; k < (int) worker_set->nworkers; k++)
+					if (worker_set->workers[k].ntasks)
+						break;
+				if (k == (int) worker_set->nworkers)
+					/* Everybody busy */
+					_STARPU_TRACE_END_EXECUTING()
+			}
 #endif
 #endif
 			_STARPU_TRACE_START_PROGRESS(memnode);
 			_STARPU_TRACE_START_PROGRESS(memnode);
 		}
 		}

+ 4 - 0
src/sched_policies/component_prio.c

@@ -23,17 +23,21 @@
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
 #define STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component,ntasks,exp_len) do {                                 \
 #define STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component,ntasks,exp_len) do {                                 \
+    if (fut_active) { \
 	int workerid = STARPU_NMAXWORKERS + 1;									\
 	int workerid = STARPU_NMAXWORKERS + 1;									\
 	if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) \
 	if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) \
 		workerid = starpu_sched_component_worker_get_workerid(component->children[0]); \
 		workerid = starpu_sched_component_worker_get_workerid(component->children[0]); \
 	_STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(workerid, ntasks, exp_len); \
 	_STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(workerid, ntasks, exp_len); \
+    } \
 } while (0)
 } while (0)
 
 
 #define STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(component,ntasks,exp_len) do {                                 \
 #define STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(component,ntasks,exp_len) do {                                 \
+    if (fut_active) { \
 	int workerid = STARPU_NMAXWORKERS + 1;									\
 	int workerid = STARPU_NMAXWORKERS + 1;									\
 	if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) \
 	if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) \
 		workerid = starpu_sched_component_worker_get_workerid(component->children[0]); \
 		workerid = starpu_sched_component_worker_get_workerid(component->children[0]); \
 	_STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(workerid, ntasks, exp_len); \
 	_STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(workerid, ntasks, exp_len); \
+    } \
 } while (0)
 } while (0)
 #else
 #else
 #define STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component,ntasks,exp_len) do { } while (0)
 #define STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component,ntasks,exp_len) do { } while (0)

+ 3 - 0
tests/Makefile.am

@@ -76,6 +76,9 @@ EXTRA_DIST =					\
 CLEANFILES = 					\
 CLEANFILES = 					\
 	*.gcno *.gcda *.linkinfo core starpu_idle_microsec.log *.mod *.png *.output tasks.rec perfs.rec */perfs.rec */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 bandwidth-*.dat bandwidth.gp bandwidth.eps bandwidth.svg
 	*.gcno *.gcda *.linkinfo core starpu_idle_microsec.log *.mod *.png *.output tasks.rec perfs.rec */perfs.rec */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 bandwidth-*.dat bandwidth.gp bandwidth.eps bandwidth.svg
 
 
+clean-local:
+	-rm -rf overlap/overlap.traces datawizard/locality.traces
+
 BUILT_SOURCES =
 BUILT_SOURCES =
 SUBDIRS =
 SUBDIRS =
 
 

+ 1 - 1
tests/datawizard/locality.sh

@@ -29,7 +29,7 @@ mkdir -p $PREFIX/locality.traces
 test -x $PREFIX/../../tools/starpu_fxt_tool || exit 77
 test -x $PREFIX/../../tools/starpu_fxt_tool || exit 77
 
 
 export STARPU_FXT_PREFIX=$PREFIX/locality.traces
 export STARPU_FXT_PREFIX=$PREFIX/locality.traces
-STARPU_SCHED=modular-eager $STARPU_LAUNCH $PREFIX/locality
+STARPU_FXT_TRACE=1 STARPU_SCHED=modular-eager $STARPU_LAUNCH $PREFIX/locality
 $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_tool -d $STARPU_FXT_PREFIX -memory-states -label-deps -i $STARPU_FXT_PREFIX/prof_file_${USER}_0
 $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_tool -d $STARPU_FXT_PREFIX -memory-states -label-deps -i $STARPU_FXT_PREFIX/prof_file_${USER}_0
 
 
 # Check that they are approved by Grenoble :)
 # Check that they are approved by Grenoble :)

+ 1 - 1
tests/overlap/overlap.sh

@@ -30,7 +30,7 @@ mkdir -p $PREFIX/overlap.traces
 
 
 export STARPU_FXT_PREFIX=$PREFIX/overlap.traces
 export STARPU_FXT_PREFIX=$PREFIX/overlap.traces
 
 
-STARPU_SCHED=dmdas $PREFIX/overlap
+STARPU_FXT_TRACE=1 STARPU_SCHED=dmdas $PREFIX/overlap
 if [ -x $PREFIX/../../tools/starpu_fxt_tool ];
 if [ -x $PREFIX/../../tools/starpu_fxt_tool ];
 then
 then
 	$STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_plot -o $STARPU_FXT_PREFIX -s overlap_sleep_1024_24 -i $STARPU_FXT_PREFIX/prof_file_${USER}_0
 	$STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_plot -o $STARPU_FXT_PREFIX -s overlap_sleep_1024_24 -i $STARPU_FXT_PREFIX/prof_file_${USER}_0