浏览代码

rework FUT PROBEs use within StarPU

Olivier Aumage 5 年之前
父节点
当前提交
43f291ec25
共有 4 个文件被更改,包括 288 次插入191 次删除
  1. 2 6
      mpi/src/starpu_mpi_fxt.h
  2. 4 9
      src/common/fxt.c
  3. 281 175
      src/common/fxt.h
  4. 1 1
      src/core/workers.c

+ 2 - 6
mpi/src/starpu_mpi_fxt.h

@@ -63,9 +63,9 @@ extern "C"
 #ifdef STARPU_USE_FXT
 
 #define _STARPU_MPI_TRACE_START(rank, worldsize)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_START, (rank), (worldsize), _starpu_gettid());
+	FUT_DO_ALWAYS_PROBE3(_STARPU_MPI_FUT_START, (rank), (worldsize), _starpu_gettid());
 #define _STARPU_MPI_TRACE_STOP(rank, worldsize)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_STOP, (rank), (worldsize), _starpu_gettid());
+	FUT_DO_ALWAYS_PROBE3(_STARPU_MPI_FUT_STOP, (rank), (worldsize), _starpu_gettid());
 #define _STARPU_MPI_TRACE_BARRIER(rank, worldsize, key)	do {\
 	if (_starpu_fxt_started) \
 	FUT_DO_ALWAYS_PROBE4(_STARPU_MPI_FUT_BARRIER, (rank), (worldsize), (key), _starpu_gettid()); \
@@ -143,10 +143,6 @@ extern "C"
 	FUT_DO_PROBE1(_STARPU_MPI_FUT_DRIVER_RUN_BEGIN,  _starpu_gettid());
 #define _STARPU_MPI_TRACE_DRIVER_RUN_END()	\
 	FUT_DO_PROBE1(_STARPU_MPI_FUT_DRIVER_RUN_END, _starpu_gettid());
-#define _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN()	\
-	FUT_DO_PROBE1(_STARPU_MPI_FUT_DRIVER_RUN_BEGIN,  _starpu_gettid());
-#define _STARPU_MPI_TRACE_DRIVER_RUN_END()	\
-	FUT_DO_PROBE1(_STARPU_MPI_FUT_DRIVER_RUN_END, _starpu_gettid());
 #define TRACE
 #else
 #define _STARPU_MPI_TRACE_START(a, b)				do {} while(0);

+ 4 - 9
src/common/fxt.c

@@ -123,21 +123,21 @@ void starpu_fxt_autostart_profiling(int autostart)
 	if (autostart)
 		initial_key_mask = FUT_KEYMASKALL;
 	else
-		initial_key_mask = FUT_KEYMASK0;
+		initial_key_mask = _STARPU_FUT_KEYMASK_META;
 }
 
 void starpu_fxt_start_profiling()
 {
 	unsigned threadid = _starpu_gettid();
 	fut_keychange(FUT_ENABLE, FUT_KEYMASKALL, threadid);
-	_STARPU_TRACE_EVENT("start_profiling");
+	_STARPU_TRACE_META("start_profiling");
 }
 
 void starpu_fxt_stop_profiling()
 {
 	unsigned threadid = _starpu_gettid();
-	_STARPU_TRACE_EVENT("stop_profiling");
-	fut_keychange(FUT_DISABLE, FUT_KEYMASKALL, threadid);
+	_STARPU_TRACE_META("stop_profiling");
+	fut_keychange(FUT_SETMASK, _STARPU_FUT_KEYMASK_META, threadid);
 }
 
 void _starpu_fxt_init_profiling(unsigned trace_buffer_size)
@@ -299,11 +299,6 @@ void _starpu_stop_fxt_profiling(void)
 	}
 }
 
-void _starpu_fxt_register_thread(unsigned cpuid)
-{
-	FUT_DO_PROBE2(FUT_NEW_LWP_CODE, cpuid, _starpu_gettid());
-}
-
 #else // STARPU_USE_FXT
 
 void starpu_fxt_autostart_profiling(int autostart STARPU_ATTRIBUTE_UNUSED)

+ 281 - 175
src/common/fxt.h

@@ -235,6 +235,29 @@
 
 #define _STARPU_FUT_DATA_REQUEST_CREATED   0x5185
 
+
+/* Predefined FUT key masks */
+#define _STARPU_FUT_KEYMASK_META           FUT_KEYMASK0
+#define _STARPU_FUT_KEYMASK_USER           FUT_KEYMASK1
+#define _STARPU_FUT_KEYMASK_TASK           FUT_KEYMASK2
+#define _STARPU_FUT_KEYMASK_TASK_VERBOSE   FUT_KEYMASK3
+#define _STARPU_FUT_KEYMASK_DATA           FUT_KEYMASK4
+#define _STARPU_FUT_KEYMASK_DATA_VERBOSE   FUT_KEYMASK5
+#define _STARPU_FUT_KEYMASK_WORKER         FUT_KEYMASK6
+#define _STARPU_FUT_KEYMASK_WORKER_VERBOSE FUT_KEYMASK7
+#define _STARPU_FUT_KEYMASK_DSM            FUT_KEYMASK8
+#define _STARPU_FUT_KEYMASK_DSM_VERBOSE    FUT_KEYMASK9
+#define _STARPU_FUT_KEYMASK_SCHED          FUT_KEYMASK10
+#define _STARPU_FUT_KEYMASK_SCHED_VERBOSE  FUT_KEYMASK11
+#define _STARPU_FUT_KEYMASK_LOCK           FUT_KEYMASK12
+#define _STARPU_FUT_KEYMASK_LOCKVERBOSE    FUT_KEYMASK13
+#define _STARPU_FUT_KEYMASK_EVENT          FUT_KEYMASK14
+#define _STARPU_FUT_KEYMASK_EVENT_VERBOSE  FUT_KEYMASK15
+#define _STARPU_FUT_KEYMASK_MPI            FUT_KEYMASK16
+#define _STARPU_FUT_KEYMASK_MPI_VERBOSE    FUT_KEYMASK17
+#define _STARPU_FUT_KEYMASK_HYP            FUT_KEYMASK18
+#define _STARPU_FUT_KEYMASK_HYP_VERBOSE    FUT_KEYMASK19
+
 extern unsigned long _starpu_job_cnt;
 
 static inline unsigned long _starpu_fxt_get_job_id(void)
@@ -296,25 +319,21 @@ void _starpu_stop_fxt_profiling(void);
 /* Generate the trace file. Used when catching signals SIGINT and SIGSEGV */
 void _starpu_fxt_dump_file(void);
 
-/* Associate the current processing unit to the identifier of the LWP that runs
- * the worker. */
-void _starpu_fxt_register_thread(unsigned);
-
 #ifdef FUT_NEEDS_COMMIT
 #define _STARPU_FUT_COMMIT(size) fut_commitstampedbuffer(size)
 #else
 #define _STARPU_FUT_COMMIT(size) do { } while (0)
 #endif
 
-#ifdef FUT_DO_PROBE1STR
-#define _STARPU_FUT_DO_PROBE1STR(CODE, P1, str) FUT_DO_PROBE1STR(CODE, P1, str)
+#ifdef FUT_FULL_PROBE1STR
+#define _STARPU_FUT_FULL_PROBE1STR(KEYMASK, CODE, P1, str) FUT_FULL_PROBE1STR(CODE, P1, str)
 #else
 /* Sometimes we need something a little more specific than the wrappers from
  * FxT: these macro permit to put add an event with 3 (or 4) numbers followed
  * by a string. */
-#define _STARPU_FUT_DO_PROBE1STR(CODE, P1, str)			\
+#define _STARPU_FUT_FULL_PROBE1STR(KEYMASK, CODE, P1, str)			\
 do {									\
-    if(fut_active) {							\
+    if(KEYMASK & fut_active) {							\
 	/* No more than FXT_MAX_PARAMS args are allowed */		\
 	/* we add a \0 just in case ... */				\
 	size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 1)*sizeof(unsigned long));\
@@ -331,15 +350,15 @@ do {									\
 } while (0);
 #endif
 
-#ifdef FUT_DO_PROBE2STR
-#define _STARPU_FUT_DO_PROBE2STR(CODE, P1, P2, str) FUT_DO_PROBE2STR(CODE, P1, P2, str)
+#ifdef FUT_FULL_PROBE2STR
+#define _STARPU_FUT_FULL_PROBE2STR(KEYMASK, CODE, P1, P2, str) FUT_FULL_PROBE2STR(CODE, P1, P2, str)
 #else
 /* Sometimes we need something a little more specific than the wrappers from
  * FxT: these macro permit to put add an event with 3 (or 4) numbers followed
  * by a string. */
-#define _STARPU_FUT_DO_PROBE2STR(CODE, P1, P2, str)			\
+#define _STARPU_FUT_FULL_PROBE2STR(KEYMASK, CODE, P1, P2, str)			\
 do {									\
-    if(fut_active) {							\
+    if(KEYMASK & fut_active) {							\
 	/* No more than FXT_MAX_PARAMS args are allowed */		\
 	/* we add a \0 just in case ... */				\
 	size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 2)*sizeof(unsigned long));\
@@ -357,12 +376,12 @@ do {									\
 } while (0);
 #endif
 
-#ifdef FUT_DO_PROBE3STR
-#define _STARPU_FUT_DO_PROBE3STR(CODE, P1, P2, P3, str) FUT_DO_PROBE3STR(CODE, P1, P2, P3, str)
+#ifdef FUT_FULL_PROBE3STR
+#define _STARPU_FUT_FULL_PROBE3STR(KEYMASK, CODE, P1, P2, P3, str) FUT_FULL_PROBE3STR(CODE, P1, P2, P3, str)
 #else
-#define _STARPU_FUT_DO_PROBE3STR(CODE, P1, P2, P3, str)			\
+#define _STARPU_FUT_FULL_PROBE3STR(KEYMASK, CODE, P1, P2, P3, str)			\
 do {									\
-    if(fut_active) {							\
+    if(KEYMASK & fut_active) {							\
 	/* No more than FXT_MAX_PARAMS args are allowed */		\
 	/* we add a \0 just in case ... */				\
 	size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 3)*sizeof(unsigned long));\
@@ -381,12 +400,12 @@ do {									\
 } while (0);
 #endif
 
-#ifdef FUT_DO_PROBE4STR
-#define _STARPU_FUT_DO_PROBE4STR(CODE, P1, P2, P3, P4, str) FUT_DO_PROBE4STR(CODE, P1, P2, P3, P4, str)
+#ifdef FUT_FULL_PROBE4STR
+#define _STARPU_FUT_FULL_PROBE4STR(KEYMASK, CODE, P1, P2, P3, P4, str) FUT_FULL_PROBE4STR(CODE, P1, P2, P3, P4, str)
 #else
-#define _STARPU_FUT_DO_PROBE4STR(CODE, P1, P2, P3, P4, str)		\
+#define _STARPU_FUT_FULL_PROBE4STR(KEYMASK, CODE, P1, P2, P3, P4, str)		\
 do {									\
-    if(fut_active) {							\
+    if(KEYMASK & fut_active) {							\
 	/* No more than FXT_MAX_PARAMS args are allowed */		\
 	/* we add a \0 just in case ... */				\
 	size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 4)*sizeof(unsigned long));\
@@ -406,12 +425,12 @@ do {									\
 } while (0);
 #endif
 
-#ifdef FUT_DO_PROBE5STR
-#define _STARPU_FUT_DO_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) FUT_DO_PROBE5STR(CODE, P1, P2, P3, P4, P5, str)
+#ifdef FUT_FULL_PROBE5STR
+#define _STARPU_FUT_FULL_PROBE5STR(KEYMASK, CODE, P1, P2, P3, P4, P5, str) FUT_FULL_PROBE5STR(CODE, P1, P2, P3, P4, P5, str)
 #else
-#define _STARPU_FUT_DO_PROBE5STR(CODE, P1, P2, P3, P4, P5, str)		\
+#define _STARPU_FUT_FULL_PROBE5STR(KEYMASK, CODE, P1, P2, P3, P4, P5, str)		\
 do {									\
-    if(fut_active) {							\
+    if(KEYMASK & fut_active) {							\
 	/* No more than FXT_MAX_PARAMS args are allowed */		\
 	/* we add a \0 just in case ... */				\
 	size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 5)*sizeof(unsigned long));\
@@ -432,12 +451,12 @@ do {									\
 } while (0);
 #endif
 
-#ifdef FUT_DO_PROBE6STR
-#define _STARPU_FUT_DO_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) FUT_DO_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str)
+#ifdef FUT_FULL_PROBE6STR
+#define _STARPU_FUT_FULL_PROBE6STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, str) FUT_FULL_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str)
 #else
-#define _STARPU_FUT_DO_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str)	\
+#define _STARPU_FUT_FULL_PROBE6STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, str)	\
 do {									\
-    if(fut_active) {							\
+    if(KEYMASK & fut_active) {							\
 	/* No more than FXT_MAX_PARAMS args are allowed */		\
 	/* we add a \0 just in case ... */				\
 	size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 6)*sizeof(unsigned long));\
@@ -459,12 +478,12 @@ do {									\
 } while (0);
 #endif
 
-#ifdef FUT_DO_PROBE7STR
-#define _STARPU_FUT_DO_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) FUT_DO_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str)
+#ifdef FUT_FULL_PROBE7STR
+#define _STARPU_FUT_FULL_PROBE7STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, P7, str) FUT_FULL_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str)
 #else
-#define _STARPU_FUT_DO_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str)	\
+#define _STARPU_FUT_FULL_PROBE7STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, P7, str)	\
 do {									\
-    if(fut_active) {							\
+    if(KEYMASK & fut_active) {							\
 	/* No more than FXT_MAX_PARAMS args are allowed */		\
 	/* we add a \0 just in case ... */				\
 	size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 7)*sizeof(unsigned long));\
@@ -499,12 +518,6 @@ do {									\
 	} while (0)
 #endif
 
-#ifndef FUT_DO_PROBE7
-#define FUT_DO_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \
-        FUT_RAW_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \
-} while (0)
-#endif
-
 #ifndef FUT_RAW_ALWAYS_PROBE2
 #define FUT_RAW_ALWAYS_PROBE2(CODE,P1,P2) do {	\
 		unsigned long *__args __attribute__((unused))=	\
@@ -584,18 +597,92 @@ do {									\
 } while (0)
 
 
+/* full probes */
+#ifndef FUT_FULL_PROBE0
+#define FUT_FULL_PROBE0(KEYMASK,CODE) do { \
+        if( KEYMASK & fut_active ) { \
+                FUT_RAW_ALWAYS_PROBE0(FUT_CODE(CODE, 0)); \
+        } \
+} while(0)
+#endif
+
+#ifndef FUT_FULL_PROBE1
+#define FUT_FULL_PROBE1(KEYMASK,CODE,P1) do { \
+        if( KEYMASK & fut_active ) { \
+                FUT_RAW_ALWAYS_PROBE1(FUT_CODE(CODE, 1),P1); \
+        } \
+} while(0)
+#endif
+
+#ifndef FUT_FULL_PROBE2
+#define FUT_FULL_PROBE2(KEYMASK,CODE,P1,P2) do { \
+        if( KEYMASK & fut_active ) { \
+                FUT_RAW_ALWAYS_PROBE2(FUT_CODE(CODE, 2),P1,P2); \
+        } \
+} while(0)
+#endif
+
+#ifndef FUT_FULL_PROBE3
+#define FUT_FULL_PROBE3(KEYMASK,CODE,P1,P2,P3) do { \
+        if( KEYMASK & fut_active ) { \
+                FUT_RAW_ALWAYS_PROBE3(FUT_CODE(CODE, 3),P1,P2,P3); \
+        } \
+} while(0)
+#endif
+
+#ifndef FUT_FULL_PROBE4
+#define FUT_FULL_PROBE4(KEYMASK,CODE,P1,P2,P3,P4) do { \
+        if( KEYMASK & fut_active ) { \
+                FUT_RAW_ALWAYS_PROBE4(FUT_CODE(CODE, 4),P1,P2,P3,P4); \
+        } \
+} while(0)
+#endif
+
+#ifndef FUT_FULL_PROBE5
+#define FUT_FULL_PROBE5(KEYMASK,CODE,P1,P2,P3,P4,P5) do { \
+        if( KEYMASK & fut_active ) { \
+                FUT_RAW_ALWAYS_PROBE5(FUT_CODE(CODE, 5),P1,P2,P3,P4,P5); \
+        } \
+} while(0)
+#endif
+
+#ifndef FUT_FULL_PROBE6
+#define FUT_FULL_PROBE6(KEYMASK,CODE,P1,P2,P3,P4,P5,P6) do { \
+        if( KEYMASK & fut_active ) { \
+                FUT_RAW_ALWAYS_PROBE6(FUT_CODE(CODE, 6),P1,P2,P3,P4,P5,P6); \
+        } \
+} while(0)
+#endif
+
+#ifndef FUT_FULL_PROBE7
+#define FUT_FULL_PROBE7(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7) do { \
+        if( KEYMASK & fut_active ) { \
+                FUT_RAW_ALWAYS_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \
+        } \
+} while(0)
+#endif
+
 
-/* workerkind = _STARPU_FUT_CPU_KEY for instance */
 #define _STARPU_TRACE_NEW_MEM_NODE(nodeid)			do {\
 	if (_starpu_fxt_started) \
 		FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_NEW_MEM_NODE, nodeid, _starpu_gettid()); \
 } while (0)
 
-#define _STARPU_TRACE_WORKER_INIT_START(workerkind, workerid, devid, memnode, bindid, sync)	\
-	FUT_DO_PROBE7(_STARPU_FUT_WORKER_INIT_START, workerkind, workerid, devid, memnode, bindid, sync, _starpu_gettid());
+#define _STARPU_TRACE_REGISTER_THREAD(cpuid)			do {\
+	if (_starpu_fxt_started) \
+		FUT_DO_ALWAYS_PROBE2(FUT_NEW_LWP_CODE, cpuid, _starpu_gettid()); \
+} while (0)
+
+/* workerkind = _STARPU_FUT_CPU_KEY for instance */
+#define _STARPU_TRACE_WORKER_INIT_START(workerkind, workerid, devid, memnode, bindid, sync)	do {\
+	if (_starpu_fxt_started) \
+		FUT_DO_ALWAYS_PROBE7(_STARPU_FUT_WORKER_INIT_START, workerkind, workerid, devid, memnode, bindid, sync, _starpu_gettid()); \
+} while (0)
 
-#define _STARPU_TRACE_WORKER_INIT_END(__workerid)				\
-	FUT_DO_PROBE2(_STARPU_FUT_WORKER_INIT_END, _starpu_gettid(), (__workerid));
+#define _STARPU_TRACE_WORKER_INIT_END(__workerid)		do {\
+	if (_starpu_fxt_started) \
+		FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_INIT_END, _starpu_gettid(), (__workerid)); \
+} while (0)
 
 #define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid)				\
 do {									\
@@ -603,12 +690,12 @@ do {									\
 	if (name)                                                 \
 	{								\
 		/* we include the task name */			\
-		_STARPU_FUT_DO_PROBE5STR(_STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid), 1, name); \
+		_STARPU_FUT_FULL_PROBE5STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid), 1, name); \
 		if (model_name && strcmp(model_name, name))				\
-			_STARPU_FUT_DO_PROBE1STR(_STARPU_FUT_MODEL_NAME, (job)->job_id, model_name); \
+			_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, model_name); \
 	}								\
 	else {                                                          \
-		FUT_DO_PROBE5(_STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid), 0); \
+		FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid), 0); \
 	}								\
 	{								\
 		if ((job)->task->cl)					\
@@ -623,14 +710,14 @@ do {									\
 				if (__handle->ops->describe)		\
 				{					\
 					__handle->ops->describe(__interface, __buf, sizeof(__buf));	\
-					_STARPU_FUT_DO_PROBE1STR(_STARPU_FUT_CODELET_DATA, workerid, __buf);	\
+					_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_CODELET_DATA, workerid, __buf);	\
 				}					\
-				FUT_DO_PROBE4(_STARPU_FUT_CODELET_DATA_HANDLE, (job)->job_id, (__handle), _starpu_data_get_size(__handle), STARPU_TASK_GET_MODE((job)->task, __i));	\
+				FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_CODELET_DATA_HANDLE, (job)->job_id, (__handle), _starpu_data_get_size(__handle), STARPU_TASK_GET_MODE((job)->task, __i));	\
 			}						\
 		}							\
 		const size_t __job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));	\
 		const uint32_t __job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\
-		FUT_DO_PROBE7(_STARPU_FUT_CODELET_DETAILS, ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->flops / 1000 / ((job)->task->cl && job->task->cl->type != STARPU_SEQ ? j->task_size : 1), (job)->task->tag_id, workerid, ((job)->job_id)); \
+		FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_CODELET_DETAILS, ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->flops / 1000 / ((job)->task->cl && job->task->cl->type != STARPU_SEQ ? j->task_size : 1), (job)->task->tag_id, workerid, ((job)->job_id)); \
 	}								\
 } while(0);
 
@@ -640,59 +727,59 @@ do {									\
 	const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\
 	char _archname[32]=""; \
 	starpu_perfmodel_get_arch_name(perf_arch, _archname, 32, 0);	\
-	_STARPU_FUT_DO_PROBE5STR(_STARPU_FUT_END_CODELET_BODY, (job)->job_id, (job_size), (job_hash), workerid, _starpu_gettid(), _archname); \
+	_STARPU_FUT_FULL_PROBE5STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_END_CODELET_BODY, (job)->job_id, (job_size), (job_hash), workerid, _starpu_gettid(), _archname); \
 } while(0);
 
 #define _STARPU_TRACE_START_EXECUTING()				\
-	FUT_DO_PROBE1(_STARPU_FUT_START_EXECUTING, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_EXECUTING, _starpu_gettid());
 
 #define _STARPU_TRACE_END_EXECUTING()				\
-	FUT_DO_PROBE1(_STARPU_FUT_END_EXECUTING, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_EXECUTING, _starpu_gettid());
 
 #define _STARPU_TRACE_START_CALLBACK(job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_START_CALLBACK, job, _starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_CALLBACK, job, _starpu_gettid());
 
 #define _STARPU_TRACE_END_CALLBACK(job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_END_CALLBACK, job, _starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_CALLBACK, job, _starpu_gettid());
 
 #define _STARPU_TRACE_JOB_PUSH(task, prio)	\
-	FUT_DO_PROBE3(_STARPU_FUT_JOB_PUSH, _starpu_get_job_associated_to_task(task)->job_id, prio, _starpu_gettid());
+	FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_JOB_PUSH, _starpu_get_job_associated_to_task(task)->job_id, prio, _starpu_gettid());
 
 #define _STARPU_TRACE_JOB_POP(task, prio)	\
-	FUT_DO_PROBE3(_STARPU_FUT_JOB_POP, _starpu_get_job_associated_to_task(task)->job_id, prio, _starpu_gettid());
+	FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_JOB_POP, _starpu_get_job_associated_to_task(task)->job_id, prio, _starpu_gettid());
 
 #define _STARPU_TRACE_UPDATE_TASK_CNT(counter)	\
-	FUT_DO_PROBE2(_STARPU_FUT_UPDATE_TASK_CNT, counter, _starpu_gettid())
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_UPDATE_TASK_CNT, counter, _starpu_gettid())
 
 #define _STARPU_TRACE_START_FETCH_INPUT(job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_START_FETCH_INPUT_ON_TID, job, _starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_FETCH_INPUT_ON_TID, job, _starpu_gettid());
 
 #define _STARPU_TRACE_END_FETCH_INPUT(job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_END_FETCH_INPUT_ON_TID, job, _starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_FETCH_INPUT_ON_TID, job, _starpu_gettid());
 
 #define _STARPU_TRACE_START_PUSH_OUTPUT(job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_START_PUSH_OUTPUT_ON_TID, job, _starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_PUSH_OUTPUT_ON_TID, job, _starpu_gettid());
 
 #define _STARPU_TRACE_END_PUSH_OUTPUT(job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_END_PUSH_OUTPUT_ON_TID, job, _starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_PUSH_OUTPUT_ON_TID, job, _starpu_gettid());
 
 #define _STARPU_TRACE_WORKER_END_FETCH_INPUT(job, id)	\
-	FUT_DO_PROBE2(_STARPU_FUT_END_FETCH_INPUT, job, id);
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_FETCH_INPUT, job, id);
 
 #define _STARPU_TRACE_WORKER_START_FETCH_INPUT(job, id)	\
-	FUT_DO_PROBE2(_STARPU_FUT_START_FETCH_INPUT, job, id);
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_FETCH_INPUT, job, id);
 
 #define _STARPU_TRACE_TAG(tag, job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_TAG, tag, (job)->job_id)
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG, tag, (job)->job_id)
 
 #define _STARPU_TRACE_TAG_DEPS(tag_child, tag_father)	\
-	FUT_DO_PROBE2(_STARPU_FUT_TAG_DEPS, tag_child, tag_father)
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DEPS, tag_child, tag_father)
 
 #define _STARPU_TRACE_TASK_DEPS(job_prev, job_succ)	\
-	_STARPU_FUT_DO_PROBE4STR(_STARPU_FUT_TASK_DEPS, (job_prev)->job_id, (job_succ)->job_id, (job_succ)->task->type, 1, "task")
+	_STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_DEPS, (job_prev)->job_id, (job_succ)->job_id, (job_succ)->task->type, 1, "task")
 
 #define _STARPU_TRACE_GHOST_TASK_DEPS(ghost_prev_id, job_succ)		\
-	_STARPU_FUT_DO_PROBE4STR(_STARPU_FUT_TASK_DEPS, (ghost_prev_id), (job_succ)->job_id, (job_succ)->task->type, 1, "ghost")
+	_STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_DEPS, (ghost_prev_id), (job_succ)->job_id, (job_succ)->task->type, 1, "ghost")
 
 #define _STARPU_TRACE_TASK_NAME(job)						\
 do {										\
@@ -700,23 +787,23 @@ do {										\
         const char *model_name = _starpu_job_get_task_name((job));                       \
 	if (model_name)					                        \
 	{									\
-		_STARPU_FUT_DO_PROBE4STR(_STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), (long unsigned)exclude_from_dag, 1, model_name);\
+		_STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), (long unsigned)exclude_from_dag, 1, model_name);\
 	}									\
 	else {									\
-		FUT_DO_PROBE4(_STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), (long unsigned)exclude_from_dag, 0);\
+		FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), (long unsigned)exclude_from_dag, 0);\
 	}									\
 } while(0);
 
 #define _STARPU_TRACE_TASK_COLOR(job)						\
 do { \
 	if ((job)->task->color != 0) \
-		FUT_DO_PROBE3(_STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->color, _starpu_gettid()); \
+		FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->color, _starpu_gettid()); \
 	else if ((job)->task->cl && (job)->task->cl->color != 0) \
-		FUT_DO_PROBE3(_STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->cl->color, _starpu_gettid()); \
+		FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->cl->color, _starpu_gettid()); \
 } while(0)
 
 #define _STARPU_TRACE_TASK_DONE(job)						\
-	FUT_DO_PROBE2(_STARPU_FUT_TASK_DONE, (job)->job_id, _starpu_gettid())
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_DONE, (job)->job_id, _starpu_gettid())
 
 #define _STARPU_TRACE_TAG_DONE(tag)						\
 do {										\
@@ -724,266 +811,282 @@ do {										\
         const char *model_name = _starpu_job_get_task_name((job));                       \
 	if (model_name)                                                         \
 	{									\
-          _STARPU_FUT_DO_PROBE3STR(_STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 1, model_name); \
+          _STARPU_FUT_FULL_PROBE3STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 1, model_name); \
 	}									\
 	else {									\
-		FUT_DO_PROBE3(_STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 0);\
+		FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 0);\
 	}									\
 } while(0);
 
 #define _STARPU_TRACE_DATA_NAME(handle, name) \
-	_STARPU_FUT_DO_PROBE1STR(_STARPU_FUT_DATA_NAME, handle, name)
+	_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_DATA_NAME, handle, name)
 
 #define _STARPU_TRACE_DATA_COORDINATES(handle, dim, v) do {\
 	if (_starpu_fxt_started) \
 	switch (dim) { \
-	case 1: FUT_DO_ALWAYS_PROBE3(_STARPU_FUT_DATA_COORDINATES, handle, dim, v[0]); break; \
-	case 2: FUT_DO_ALWAYS_PROBE4(_STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1]); break; \
-	case 3: FUT_DO_ALWAYS_PROBE5(_STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2]); break; \
-	case 4: FUT_DO_ALWAYS_PROBE6(_STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2], v[3]); break; \
-	default: FUT_DO_ALWAYS_PROBE7(_STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2], v[3], v[4]); break; \
+	case 1: FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DATA_VERBOSE, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0]); break; \
+	case 2: FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DATA_VERBOSE, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1]); break; \
+	case 3: FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DATA_VERBOSE, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2]); break; \
+	case 4: FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_DATA_VERBOSE, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2], v[3]); break; \
+	default: FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_DATA_VERBOSE, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2], v[3], v[4]); break; \
 	} \
 } while (0)
 
 #define _STARPU_TRACE_DATA_COPY(src_node, dst_node, size)	\
-	FUT_DO_PROBE3(_STARPU_FUT_DATA_COPY, src_node, dst_node, size)
+	FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_COPY, src_node, dst_node, size)
 
 #define _STARPU_TRACE_DATA_WONT_USE(handle)						\
-	FUT_DO_PROBE4(_STARPU_FUT_DATA_WONT_USE, handle, _starpu_fxt_get_submit_order(), _starpu_fxt_get_job_id(), _starpu_gettid())
+	FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_DATA_WONT_USE, handle, _starpu_fxt_get_submit_order(), _starpu_fxt_get_job_id(), _starpu_gettid())
 
 #define _STARPU_TRACE_DATA_DOING_WONT_USE(handle)						\
-	FUT_DO_PROBE1(_STARPU_FUT_DATA_DOING_WONT_USE, handle)
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_DOING_WONT_USE, handle)
 
 #define _STARPU_TRACE_START_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch, handle) \
-	FUT_DO_PROBE6(_STARPU_FUT_START_DRIVER_COPY, src_node, dst_node, size, com_id, prefetch, handle)
+	FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_DRIVER_COPY, src_node, dst_node, size, com_id, prefetch, handle)
 
 #define _STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch)	\
-	FUT_DO_PROBE5(_STARPU_FUT_END_DRIVER_COPY, src_node, dst_node, size, com_id, prefetch)
+	FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_DRIVER_COPY, src_node, dst_node, size, com_id, prefetch)
 
 #define _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node)	\
-	FUT_DO_PROBE2(_STARPU_FUT_START_DRIVER_COPY_ASYNC, src_node, dst_node)
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_DRIVER_COPY_ASYNC, src_node, dst_node)
 
 #define _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node)	\
-	FUT_DO_PROBE2(_STARPU_FUT_END_DRIVER_COPY_ASYNC, src_node, dst_node)
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_DRIVER_COPY_ASYNC, src_node, dst_node)
 
 #define _STARPU_TRACE_WORK_STEALING(empty_q, victim_q)		\
-	FUT_DO_PROBE2(_STARPU_FUT_WORK_STEALING, empty_q, victim_q)
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_SCHED_VERBOSE, _STARPU_FUT_WORK_STEALING, empty_q, victim_q)
 
-#define _STARPU_TRACE_WORKER_DEINIT_START			\
-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_DEINIT_START, _starpu_gettid());
+#define _STARPU_TRACE_WORKER_DEINIT_START			do {\
+	if (_starpu_fxt_started) \
+		FUT_DO_ALWAYS_PROBE1(_STARPU_FUT_WORKER_DEINIT_START, _starpu_gettid()); \
+} while(0)
 
-#define _STARPU_TRACE_WORKER_DEINIT_END(workerkind)		\
-	FUT_DO_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, workerkind, _starpu_gettid());
+#define _STARPU_TRACE_WORKER_DEINIT_END(workerkind)		do {\
+	if (_starpu_fxt_started) \
+		FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, workerkind, _starpu_gettid()); \
+} while(0)
 
 #define _STARPU_TRACE_WORKER_SCHEDULING_START	\
-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SCHEDULING_START, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_START, _starpu_gettid());
 
 #define _STARPU_TRACE_WORKER_SCHEDULING_END	\
-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SCHEDULING_END, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_END, _starpu_gettid());
 
 #define _STARPU_TRACE_WORKER_SCHEDULING_PUSH	\
-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SCHEDULING_PUSH, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_PUSH, _starpu_gettid());
 
 #define _STARPU_TRACE_WORKER_SCHEDULING_POP	\
-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SCHEDULING_POP, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_POP, _starpu_gettid());
 
 #define _STARPU_TRACE_WORKER_SLEEP_START	\
-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_START, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_WORKER_SLEEP_START, _starpu_gettid());
 
 #define _STARPU_TRACE_WORKER_SLEEP_END	\
-	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_END, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_WORKER_SLEEP_END, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_SUBMIT(job, iter, subiter)	\
-	FUT_DO_PROBE7(_STARPU_FUT_TASK_SUBMIT, (job)->job_id, iter, subiter, (job)->task->no_submitorder?0:_starpu_fxt_get_submit_order(), (job)->task->priority, (job)->task->type, _starpu_gettid());
+	FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_SUBMIT, (job)->job_id, iter, subiter, (job)->task->no_submitorder?0:_starpu_fxt_get_submit_order(), (job)->task->priority, (job)->task->type, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_SUBMIT_START()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_SUBMIT_START, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_SUBMIT_START, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_SUBMIT_END()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_SUBMIT_END, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_SUBMIT_END, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_THROTTLE_START()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_THROTTLE_START, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_THROTTLE_START, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_THROTTLE_END()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_THROTTLE_END, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_THROTTLE_END, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_BUILD_START()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_BUILD_START, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_BUILD_START, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_BUILD_END()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_BUILD_END, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_BUILD_END, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_MPI_DECODE_START()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_MPI_DECODE_START, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_DECODE_START, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_MPI_DECODE_END()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_MPI_DECODE_END, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_DECODE_END, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_MPI_PRE_START()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_MPI_PRE_START, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_PRE_START, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_MPI_PRE_END()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_MPI_PRE_END, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_PRE_END, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_MPI_POST_START()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_MPI_POST_START, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_POST_START, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_MPI_POST_END()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_MPI_POST_END, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_POST_END, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_WAIT_START(job)	\
-	FUT_DO_PROBE2(_STARPU_FUT_TASK_WAIT_START, (job)->job_id, _starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_START, (job)->job_id, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_WAIT_END()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_WAIT_END, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_END, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_WAIT_FOR_ALL_START()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_WAIT_FOR_ALL_START, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_FOR_ALL_START, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_WAIT_FOR_ALL_END()	\
-	FUT_DO_PROBE1(_STARPU_FUT_TASK_WAIT_FOR_ALL_END, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_FOR_ALL_END, _starpu_gettid());
 
 #define _STARPU_TRACE_USER_DEFINED_START	\
-	FUT_DO_PROBE1(_STARPU_FUT_USER_DEFINED_START, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_USER, _STARPU_FUT_USER_DEFINED_START, _starpu_gettid());
 
 #define _STARPU_TRACE_USER_DEFINED_END		\
-	FUT_DO_PROBE1(_STARPU_FUT_USER_DEFINED_END, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_USER, _STARPU_FUT_USER_DEFINED_END, _starpu_gettid());
 
 #define _STARPU_TRACE_START_ALLOC(memnode, size, handle, is_prefetch)               \
-       FUT_DO_PROBE5(_STARPU_FUT_START_ALLOC, memnode, _starpu_gettid(), size, handle, is_prefetch);
+       FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_ALLOC, memnode, _starpu_gettid(), size, handle, is_prefetch);
 
 #define _STARPU_TRACE_END_ALLOC(memnode, handle, r)            \
-       FUT_DO_PROBE4(_STARPU_FUT_END_ALLOC, memnode, _starpu_gettid(), handle, r);
+       FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_ALLOC, memnode, _starpu_gettid(), handle, r);
 
 #define _STARPU_TRACE_START_ALLOC_REUSE(memnode, size, handle, is_prefetch)         \
-       FUT_DO_PROBE5(_STARPU_FUT_START_ALLOC_REUSE, memnode, _starpu_gettid(), size, handle, is_prefetch);
+       FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_ALLOC_REUSE, memnode, _starpu_gettid(), size, handle, is_prefetch);
 
 #define _STARPU_TRACE_END_ALLOC_REUSE(memnode, handle, r)              \
-       FUT_DO_PROBE4(_STARPU_FUT_END_ALLOC_REUSE, memnode, _starpu_gettid(), handle, r);
+       FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_ALLOC_REUSE, memnode, _starpu_gettid(), handle, r);
 
 #define _STARPU_TRACE_START_FREE(memnode, size, handle)                \
-       FUT_DO_PROBE4(_STARPU_FUT_START_FREE, memnode, _starpu_gettid(), size, handle);
+       FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_FREE, memnode, _starpu_gettid(), size, handle);
 
 #define _STARPU_TRACE_END_FREE(memnode, handle)                \
-       FUT_DO_PROBE3(_STARPU_FUT_END_FREE, memnode, _starpu_gettid(), handle);
+       FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_FREE, memnode, _starpu_gettid(), handle);
 
 #define _STARPU_TRACE_START_WRITEBACK(memnode, handle)         \
-       FUT_DO_PROBE3(_STARPU_FUT_START_WRITEBACK, memnode, _starpu_gettid(), handle);
+       FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_WRITEBACK, memnode, _starpu_gettid(), handle);
 
 #define _STARPU_TRACE_END_WRITEBACK(memnode, handle)           \
-       FUT_DO_PROBE3(_STARPU_FUT_END_WRITEBACK, memnode, _starpu_gettid(), handle);
+       FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_WRITEBACK, memnode, _starpu_gettid(), handle);
 
 #define _STARPU_TRACE_USED_MEM(memnode,used)		\
-	FUT_DO_PROBE3(_STARPU_FUT_USED_MEM, memnode, used, _starpu_gettid());
+	FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_USED_MEM, memnode, used, _starpu_gettid());
 
 #define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch)		\
-	FUT_DO_PROBE3(_STARPU_FUT_START_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid());
+	FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid());
 
 #define _STARPU_TRACE_END_MEMRECLAIM(memnode, is_prefetch)		\
-	FUT_DO_PROBE3(_STARPU_FUT_END_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid());
+	FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid());
 
 #define _STARPU_TRACE_START_WRITEBACK_ASYNC(memnode)		\
-	FUT_DO_PROBE2(_STARPU_FUT_START_WRITEBACK_ASYNC, memnode, _starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_WRITEBACK_ASYNC, memnode, _starpu_gettid());
 
 #define _STARPU_TRACE_END_WRITEBACK_ASYNC(memnode)		\
-	FUT_DO_PROBE2(_STARPU_FUT_END_WRITEBACK_ASYNC, memnode, _starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_WRITEBACK_ASYNC, memnode, _starpu_gettid());
 
 /* We skip these events becasue they are called so often that they cause FxT to
  * fail and make the overall trace unreadable anyway. */
 #define _STARPU_TRACE_START_PROGRESS(memnode)		\
-	FUT_DO_PROBE2(_STARPU_FUT_START_PROGRESS_ON_TID, memnode, _starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_PROGRESS_ON_TID, memnode, _starpu_gettid());
 
 #define _STARPU_TRACE_END_PROGRESS(memnode)		\
-	FUT_DO_PROBE2(_STARPU_FUT_END_PROGRESS_ON_TID, memnode, _starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_PROGRESS_ON_TID, memnode, _starpu_gettid());
 
 #define _STARPU_TRACE_USER_EVENT(code)			\
-	FUT_DO_PROBE2(_STARPU_FUT_USER_EVENT, code, _starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_USER, _STARPU_FUT_USER_EVENT, code, _starpu_gettid());
+
+#define _STARPU_TRACE_META(S)			\
+	FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_EVENT,S)
 
 #define _STARPU_TRACE_SET_PROFILING(status)		\
-	FUT_DO_PROBE2(_STARPU_FUT_SET_PROFILING, status, _starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_SET_PROFILING, status, _starpu_gettid());
 
 #define _STARPU_TRACE_TASK_WAIT_FOR_ALL			\
-	FUT_DO_PROBE0(_STARPU_FUT_TASK_WAIT_FOR_ALL)
+	FUT_FULL_PROBE0(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_WAIT_FOR_ALL)
+
+#define _STARPU_TRACE_EVENT_ALWAYS(S)			do {\
+	if (_starpu_fxt_started) \
+		FUT_DO_ALWAYS_PROBESTR(_STARPU_FUT_EVENT,S) \
+} while(0)
 
 #define _STARPU_TRACE_EVENT(S)			\
-	FUT_DO_PROBESTR(_STARPU_FUT_EVENT,S)
+	FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_EVENT, _STARPU_FUT_EVENT,S)
+
+#define _STARPU_TRACE_EVENT_VERBOSE(S)			\
+	FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_EVENT_VERBOSE, _STARPU_FUT_EVENT,S)
+
 
 #define _STARPU_TRACE_THREAD_EVENT(S)			\
-	_STARPU_FUT_DO_PROBE1STR(_STARPU_FUT_THREAD_EVENT, _starpu_gettid(), S)
+	_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_THREAD_EVENT, _starpu_gettid(), S)
 
 #define _STARPU_TRACE_HYPERVISOR_BEGIN()  \
-	FUT_DO_PROBE1(_STARPU_FUT_HYPERVISOR_BEGIN, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_HYP, _STARPU_FUT_HYPERVISOR_BEGIN, _starpu_gettid());
 
 #define _STARPU_TRACE_HYPERVISOR_END() \
-	FUT_DO_PROBE1(_STARPU_FUT_HYPERVISOR_END, _starpu_gettid());
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_HYP, _STARPU_FUT_HYPERVISOR_END, _starpu_gettid());
 
 #ifdef STARPU_FXT_LOCK_TRACES
 
 #define _STARPU_TRACE_LOCKING_MUTEX()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_LOCKING_MUTEX,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_LOCKING_MUTEX,__LINE__,_starpu_gettid(),file); \
 } while (0)
 
 #define _STARPU_TRACE_MUTEX_LOCKED()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_MUTEX_LOCKED,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_MUTEX_LOCKED,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #define _STARPU_TRACE_UNLOCKING_MUTEX()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_UNLOCKING_MUTEX,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_UNLOCKING_MUTEX,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #define _STARPU_TRACE_MUTEX_UNLOCKED()	do {\
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_MUTEX_UNLOCKED,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_MUTEX_UNLOCKED,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #define _STARPU_TRACE_TRYLOCK_MUTEX()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_TRYLOCK_MUTEX,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_TRYLOCK_MUTEX,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #define _STARPU_TRACE_RDLOCKING_RWLOCK()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_RDLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_RDLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #define _STARPU_TRACE_RWLOCK_RDLOCKED()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_RWLOCK_RDLOCKED,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_RDLOCKED,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #define _STARPU_TRACE_WRLOCKING_RWLOCK()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_WRLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_WRLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #define _STARPU_TRACE_RWLOCK_WRLOCKED()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_RWLOCK_WRLOCKED,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_WRLOCKED,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #define _STARPU_TRACE_UNLOCKING_RWLOCK()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_UNLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_UNLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #define _STARPU_TRACE_RWLOCK_UNLOCKED()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_RWLOCK_UNLOCKED,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_UNLOCKED,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #define STARPU_TRACE_SPINLOCK_CONDITITION (starpu_worker_get_type(starpu_worker_get_id()) == STARPU_CUDA_WORKER)
@@ -992,7 +1095,7 @@ do {										\
 	if (STARPU_TRACE_SPINLOCK_CONDITITION) { \
 		const char *xfile; \
 		xfile = strrchr(file,'/') + 1; \
-		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_LOCKING_SPINLOCK,line,_starpu_gettid(),xfile); \
+		_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_LOCKING_SPINLOCK,line,_starpu_gettid(),xfile); \
 	} \
 } while(0)
 
@@ -1000,7 +1103,7 @@ do {										\
 	if (STARPU_TRACE_SPINLOCK_CONDITITION) { \
 		const char *xfile; \
 		xfile = strrchr(file,'/') + 1; \
-		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_SPINLOCK_LOCKED,line,_starpu_gettid(),xfile); \
+		_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_SPINLOCK_LOCKED,line,_starpu_gettid(),xfile); \
 	} \
 } while(0)
 
@@ -1008,7 +1111,7 @@ do {										\
 	if (STARPU_TRACE_SPINLOCK_CONDITITION) { \
 		const char *xfile; \
 		xfile = strrchr(file,'/') + 1; \
-		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_UNLOCKING_SPINLOCK,line,_starpu_gettid(),xfile); \
+		_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_UNLOCKING_SPINLOCK,line,_starpu_gettid(),xfile); \
 	} \
 } while(0)
 
@@ -1016,7 +1119,7 @@ do {										\
 	if (STARPU_TRACE_SPINLOCK_CONDITITION) { \
 		const char *xfile; \
 		xfile = strrchr(file,'/') + 1; \
-		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_SPINLOCK_UNLOCKED,line,_starpu_gettid(),xfile); \
+		_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_SPINLOCK_UNLOCKED,line,_starpu_gettid(),xfile); \
 	} \
 } while(0)
 
@@ -1024,32 +1127,32 @@ do {										\
 	if (STARPU_TRACE_SPINLOCK_CONDITITION) { \
 		const char *xfile; \
 		xfile = strrchr(file,'/') + 1; \
-		_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_TRYLOCK_SPINLOCK,line,_starpu_gettid(),xfile); \
+		_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_TRYLOCK_SPINLOCK,line,_starpu_gettid(),xfile); \
 	} \
 } while(0)
 
 #define _STARPU_TRACE_COND_WAIT_BEGIN()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_COND_WAIT_BEGIN,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_COND_WAIT_BEGIN,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #define _STARPU_TRACE_COND_WAIT_END()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_COND_WAIT_END,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_COND_WAIT_END,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #define _STARPU_TRACE_BARRIER_WAIT_BEGIN()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_BARRIER_WAIT_BEGIN,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_BARRIER_WAIT_BEGIN,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #define _STARPU_TRACE_BARRIER_WAIT_END()	do { \
 	const char *file; \
 	file = strrchr(__FILE__,'/') + 1; \
-	_STARPU_FUT_DO_PROBE2STR(_STARPU_FUT_BARRIER_WAIT_END,__LINE__,_starpu_gettid(),file); \
+	_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_BARRIER_WAIT_END,__LINE__,_starpu_gettid(),file); \
 } while(0)
 
 #else // !STARPU_FXT_LOCK_TRACES
@@ -1078,34 +1181,34 @@ do {										\
 #endif // STARPU_FXT_LOCK_TRACES
 
 #define _STARPU_TRACE_MEMORY_FULL(size)	\
-	FUT_DO_PROBE2(_STARPU_FUT_MEMORY_FULL,size,_starpu_gettid());
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_MEMORY_FULL,size,_starpu_gettid());
 
 #define _STARPU_TRACE_DATA_LOAD(workerid,size)	\
-	FUT_DO_PROBE2(_STARPU_FUT_DATA_LOAD, workerid, size);
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_LOAD, workerid, size);
 
 #define _STARPU_TRACE_START_UNPARTITION(handle, memnode)		\
-	FUT_DO_PROBE3(_STARPU_FUT_START_UNPARTITION_ON_TID, memnode, _starpu_gettid(), handle);
+	FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_UNPARTITION_ON_TID, memnode, _starpu_gettid(), handle);
 
 #define _STARPU_TRACE_END_UNPARTITION(handle, memnode)		\
-	FUT_DO_PROBE3(_STARPU_FUT_END_UNPARTITION_ON_TID, memnode, _starpu_gettid(), handle);
+	FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_UNPARTITION_ON_TID, memnode, _starpu_gettid(), handle);
 
 #define _STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(workerid, ntasks, exp_len)		\
-	FUT_DO_PROBE4(_STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO, _starpu_gettid(), workerid, ntasks, exp_len);
+	FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO, _starpu_gettid(), workerid, ntasks, exp_len);
 
 #define _STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(workerid, ntasks, exp_len)		\
-	FUT_DO_PROBE4(_STARPU_FUT_SCHED_COMPONENT_POP_PRIO, _starpu_gettid(), workerid, ntasks, exp_len);
+	FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_POP_PRIO, _starpu_gettid(), workerid, ntasks, exp_len);
 
 #define _STARPU_TRACE_SCHED_COMPONENT_NEW(component)		\
-	_STARPU_FUT_DO_PROBE1STR(_STARPU_FUT_SCHED_COMPONENT_NEW, component, (component)->name);
+	_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_NEW, component, (component)->name);
 
 #define _STARPU_TRACE_SCHED_COMPONENT_CONNECT(parent, child)		\
-	FUT_DO_PROBE2(_STARPU_FUT_SCHED_COMPONENT_CONNECT, parent, child);
+	FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_CONNECT, parent, child);
 
 #define _STARPU_TRACE_SCHED_COMPONENT_PUSH(from, to, task)		\
-	FUT_DO_PROBE5(_STARPU_FUT_SCHED_COMPONENT_PUSH, _starpu_gettid(), from, to, task, (task)->priority);
+	FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PUSH, _starpu_gettid(), from, to, task, (task)->priority);
 
 #define _STARPU_TRACE_SCHED_COMPONENT_PULL(from, to, task)		\
-	FUT_DO_PROBE5(_STARPU_FUT_SCHED_COMPONENT_PULL, _starpu_gettid(), from, to, task, (task)->priority);
+	FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PULL, _starpu_gettid(), from, to, task, (task)->priority);
 
 #define _STARPU_TRACE_HANDLE_DATA_REGISTER(handle)	do {	\
 	const size_t __data_size = handle->ops->get_size(handle); \
@@ -1116,30 +1219,31 @@ do {										\
 		handle->ops->describe(__interface, __buf, sizeof(__buf)); \
 	else \
 		__buf[0] = 0; \
-	FUT_DO_PROBE4STR(_STARPU_FUT_HANDLE_DATA_REGISTER, handle, __data_size, __max_data_size, handle->home_node, __buf); \
+	_STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_HANDLE_DATA_REGISTER, handle, __data_size, __max_data_size, handle->home_node, __buf); \
 } while (0)
 
 #define _STARPU_TRACE_HANDLE_DATA_UNREGISTER(handle)	\
-	FUT_DO_PROBE1(_STARPU_FUT_HANDLE_DATA_UNREGISTER, handle)
+	FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_HANDLE_DATA_UNREGISTER, handle)
 
 //Coherency Data Traces
 #define _STARPU_TRACE_DATA_STATE_INVALID(handle, node)      \
-       FUT_DO_PROBE2(_STARPU_FUT_DATA_STATE_INVALID, handle, node)
+       FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_INVALID, handle, node)
 
 #define _STARPU_TRACE_DATA_STATE_OWNER(handle, node)           \
-       FUT_DO_PROBE2(_STARPU_FUT_DATA_STATE_OWNER, handle, node)
+       FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_OWNER, handle, node)
 
 #define _STARPU_TRACE_DATA_STATE_SHARED(handle, node)          \
-       FUT_DO_PROBE2(_STARPU_FUT_DATA_STATE_SHARED, handle, node)
+       FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_SHARED, handle, node)
 
 #define _STARPU_TRACE_DATA_REQUEST_CREATED(handle, orig, dest, prio, is_pre)          \
-       FUT_DO_PROBE5(_STARPU_FUT_DATA_REQUEST_CREATED, orig, dest, prio, handle, is_pre)
+       FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_REQUEST_CREATED, orig, dest, prio, handle, is_pre)
 
 
 #else // !STARPU_USE_FXT
 
 /* Dummy macros in case FxT is disabled */
 #define _STARPU_TRACE_NEW_MEM_NODE(nodeid)		do {(void)(nodeid);} while(0)
+#define _STARPU_TRACE_REGISTER_THREAD(cpuid)		do {(void)(cpuid);} while(0)
 #define _STARPU_TRACE_WORKER_INIT_START(a,b,c,d,e,f)	do {(void)(a); (void)(b); (void)(c); (void)(d); (void)(e); (void)(f);} while(0)
 #define _STARPU_TRACE_WORKER_INIT_END(workerid)		do {(void)(workerid);} while(0)
 #define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid) 	do {(void)(job); (void)(nimpl); (void)(perf_arch); (void)(workerid);} while(0)
@@ -1218,7 +1322,9 @@ do {										\
 #define _STARPU_TRACE_USER_EVENT(code)			do {(void)(code);} while(0)
 #define _STARPU_TRACE_SET_PROFILING(status)		do {(void)(status);} while(0)
 #define _STARPU_TRACE_TASK_WAIT_FOR_ALL()		do {} while(0)
+#define _STARPU_TRACE_EVENT_ALWAYS(S)				do {(void)(S);} while(0)
 #define _STARPU_TRACE_EVENT(S)				do {(void)(S);} while(0)
+#define _STARPU_TRACE_EVENT_VERBOSE(S)				do {(void)(S);} while(0)
 #define _STARPU_TRACE_THREAD_EVENT(S)			do {(void)(S);} while(0)
 #define _STARPU_TRACE_LOCKING_MUTEX()			do {} while(0)
 #define _STARPU_TRACE_MUTEX_LOCKED()			do {} while(0)

+ 1 - 1
src/core/workers.c

@@ -769,7 +769,7 @@ void _starpu_driver_start(struct _starpu_worker *worker, unsigned fut_key, unsig
 	(void) devid;
 
 #ifdef STARPU_USE_FXT
-	_starpu_fxt_register_thread(worker->bindid);
+	_STARPU_TRACE_REGISTER_THREAD(worker->bindid);
 	_starpu_worker_start(worker, fut_key, sync);
 #endif
 	_starpu_set_local_worker_key(worker);