Browse Source

merge from trunk

Olivier Aumage 8 years ago
parent
commit
d4656fcb50

+ 1 - 0
ChangeLog

@@ -273,6 +273,7 @@ Small features:
     allows to copy in a new buffer values which have not been unpacked by
     allows to copy in a new buffer values which have not been unpacked by
     the current call
     the current call
   * Add STARPU_CODELET_SIMGRID_EXECUTE flag.
   * Add STARPU_CODELET_SIMGRID_EXECUTE flag.
+  * Add STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT flag.
   * Add STARPU_CL_ARGS flag to starpu_task_insert() and
   * Add STARPU_CL_ARGS flag to starpu_task_insert() and
     starpu_mpi_task_insert() functions call
     starpu_mpi_task_insert() functions call
 
 

+ 3 - 2
doc/doxygen/chapters/320_scheduling.doxy

@@ -285,8 +285,9 @@ be used to get information about how well the execution proceeded, and thus the
 overall quality of the execution.
 overall quality of the execution.
 
 
 Precise debugging can also be performed by using the
 Precise debugging can also be performed by using the
-\ref STARPU_TASK_BREAK_ON_SCHED, \ref STARPU_TASK_BREAK_ON_PUSH, and
-\ref STARPU_TASK_BREAK_ON_POP environment variables. By setting the job_id of a task
+\ref STARPU_TASK_BREAK_ON_PUSH, \ref STARPU_TASK_BREAK_ON_SCHED,
+\ref STARPU_TASK_BREAK_ON_POP, and \ref STARPU_TASK_BREAK_ON_EXEC environment variables.
+By setting the job_id of a task
 in these environment variables, StarPU will raise <c>SIGTRAP</c> when the task is being
 in these environment variables, StarPU will raise <c>SIGTRAP</c> when the task is being
 scheduled, pushed, or popped by the scheduler. That means that when one notices
 scheduled, pushed, or popped by the scheduler. That means that when one notices
 that a task is being scheduled in a seemingly odd way, one can just reexecute
 that a task is being scheduled in a seemingly odd way, one can just reexecute

+ 2 - 1
doc/doxygen/chapters/470_simgrid.doxy

@@ -36,7 +36,8 @@ To be able to run the application with e.g. CUDA simulation on a system which
 does not have CUDA installed, one can fill the cuda_funcs with (void*)1, to
 does not have CUDA installed, one can fill the cuda_funcs with (void*)1, to
 express that there is a CUDA implementation, even if one does not actually
 express that there is a CUDA implementation, even if one does not actually
 provide it. StarPU will not actually run it in Simgrid mode anyway by default
 provide it. StarPU will not actually run it in Simgrid mode anyway by default
-(unless the ::STARPU_CODELET_SIMGRID_EXECUTE flag is set in the codelet)
+(unless the ::STARPU_CODELET_SIMGRID_EXECUTE or ::STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT
+flags are set in the codelet)
 
 
 \snippet simgrid.c To be included. You should update doxygen if you see this text.
 \snippet simgrid.c To be included. You should update doxygen if you see this text.
 
 

+ 20 - 10
doc/doxygen/chapters/501_environment_variables.doxy

@@ -642,9 +642,10 @@ especially regarding data transfers.
 <dd>
 <dd>
 \anchor STARPU_SIMGRID_SCHED_COST
 \anchor STARPU_SIMGRID_SCHED_COST
 \addindex __env__STARPU_SIMGRID_SCHED_COST
 \addindex __env__STARPU_SIMGRID_SCHED_COST
-When set to 1 (which is the default), scheduling costs are taken into
+When set to 1 (0 is the default), scheduling costs are taken into
 account in simgrid mode. This provides more accurate simgrid predictions,
 account in simgrid mode. This provides more accurate simgrid predictions,
-and allows studying scheduling overhead of the runtime system.
+and allows studying scheduling overhead of the runtime system. However,
+it also makes simulation non-deterministic.
 </dd>
 </dd>
 
 
 </dl>
 </dl>
@@ -1013,6 +1014,15 @@ dog is reached, thus allowing to catch the situation in gdb, etc
 (see \ref DetectionStuckConditions)
 (see \ref DetectionStuckConditions)
 </dd>
 </dd>
 
 
+<dt>STARPU_TASK_BREAK_ON_PUSH</dt>
+<dd>
+\anchor STARPU_TASK_BREAK_ON_PUSH
+\addindex __env__STARPU_TASK_BREAK_ON_PUSH
+When this variable contains a job id, StarPU will raise SIGTRAP when the task
+with that job id is being pushed to the scheduler, which will be nicely catched by debuggers
+(see \ref DebuggingScheduling)
+</dd>
+
 <dt>STARPU_TASK_BREAK_ON_SCHED</dt>
 <dt>STARPU_TASK_BREAK_ON_SCHED</dt>
 <dd>
 <dd>
 \anchor STARPU_TASK_BREAK_ON_SCHED
 \anchor STARPU_TASK_BREAK_ON_SCHED
@@ -1024,21 +1034,21 @@ This only works for schedulers which have such a scheduling point defined
 (see \ref DebuggingScheduling)
 (see \ref DebuggingScheduling)
 </dd>
 </dd>
 
 
-<dt>STARPU_TASK_BREAK_ON_PUSH</dt>
+<dt>STARPU_TASK_BREAK_ON_POP</dt>
 <dd>
 <dd>
-\anchor STARPU_TASK_BREAK_ON_PUSH
-\addindex __env__STARPU_TASK_BREAK_ON_PUSH
+\anchor STARPU_TASK_BREAK_ON_POP
+\addindex __env__STARPU_TASK_BREAK_ON_POP
 When this variable contains a job id, StarPU will raise SIGTRAP when the task
 When this variable contains a job id, StarPU will raise SIGTRAP when the task
-with that job id is being pushed to the scheduler, which will be nicely catched by debuggers
+with that job id is being popped from the scheduler, which will be nicely catched by debuggers
 (see \ref DebuggingScheduling)
 (see \ref DebuggingScheduling)
 </dd>
 </dd>
 
 
-<dt>STARPU_TASK_BREAK_ON_POP</dt>
+<dt>STARPU_TASK_BREAK_ON_EXEC</dt>
 <dd>
 <dd>
-\anchor STARPU_TASK_BREAK_ON_POP
-\addindex __env__STARPU_TASK_BREAK_ON_POP
+\anchor STARPU_TASK_BREAK_ON_EXEC
+\addindex __env__STARPU_TASK_BREAK_ON_EXEC
 When this variable contains a job id, StarPU will raise SIGTRAP when the task
 When this variable contains a job id, StarPU will raise SIGTRAP when the task
-with that job id is being popped from the scheduler, which will be nicely catched by debuggers
+with that job id is being executed, which will be nicely catched by debuggers
 (see \ref DebuggingScheduling)
 (see \ref DebuggingScheduling)
 </dd>
 </dd>
 
 

+ 5 - 0
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -135,6 +135,11 @@ Value to be set in starpu_codelet::opencl_flags to allow asynchronous OpenCL ker
 \ingroup API_Codelet_And_Tasks
 \ingroup API_Codelet_And_Tasks
 Value to be set in starpu_codelet::flags to execute the codelet functions even in simgrid mode.
 Value to be set in starpu_codelet::flags to execute the codelet functions even in simgrid mode.
 
 
+\def STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT
+\ingroup API_Codelet_And_Tasks
+Value to be set in starpu_codelet::flags to execute the codelet functions even in simgrid mode,
+and later inject the measured timing inside the simulation.
+
 \typedef starpu_cpu_func_t
 \typedef starpu_cpu_func_t
 \ingroup API_Codelet_And_Tasks
 \ingroup API_Codelet_And_Tasks
 CPU implementation of a codelet.
 CPU implementation of a codelet.

+ 4 - 1
include/fstarpu_mod.f90

@@ -82,6 +82,7 @@ module fstarpu_mod
         type(c_ptr), bind(C) :: FSTARPU_SCC
         type(c_ptr), bind(C) :: FSTARPU_SCC
 
 
         type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE
         type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE
+        type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT
         type(c_ptr), bind(C) :: FSTARPU_CUDA_ASYNC
         type(c_ptr), bind(C) :: FSTARPU_CUDA_ASYNC
         type(c_ptr), bind(C) :: FSTARPU_OPENCL_ASYNC
         type(c_ptr), bind(C) :: FSTARPU_OPENCL_ASYNC
 
 
@@ -2331,7 +2332,9 @@ module fstarpu_mod
                             fstarpu_get_constant(C_CHAR_"FSTARPU_SCC"//C_NULL_CHAR)
                             fstarpu_get_constant(C_CHAR_"FSTARPU_SCC"//C_NULL_CHAR)
 
 
                         FSTARPU_CODELET_SIMGRID_EXECUTE = &
                         FSTARPU_CODELET_SIMGRID_EXECUTE = &
-                            fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE"//C_NULL_CHAR)
+                             fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE"//C_NULL_CHAR)
+                        FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT = &
+                             fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT"//C_NULL_CHAR)
                         FSTARPU_CUDA_ASYNC = &
                         FSTARPU_CUDA_ASYNC = &
                             fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_ASYNC"//C_NULL_CHAR)
                             fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_ASYNC"//C_NULL_CHAR)
                         FSTARPU_OPENCL_ASYNC = &
                         FSTARPU_OPENCL_ASYNC = &

+ 1 - 0
include/starpu_task.h

@@ -46,6 +46,7 @@ extern "C"
 #define STARPU_MPI_MS	((1ULL)<<9)
 #define STARPU_MPI_MS	((1ULL)<<9)
 
 
 #define STARPU_CODELET_SIMGRID_EXECUTE	(1<<0)
 #define STARPU_CODELET_SIMGRID_EXECUTE	(1<<0)
+#define STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT	(1<<1)
 #define STARPU_CUDA_ASYNC	(1<<0)
 #define STARPU_CUDA_ASYNC	(1<<0)
 #define STARPU_OPENCL_ASYNC	(1<<0)
 #define STARPU_OPENCL_ASYNC	(1<<0)
 
 

+ 9 - 9
include/starpu_thread.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010, 2012-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2012-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -53,7 +53,6 @@ typedef int starpu_pthread_attr_t;
 int starpu_pthread_equal(starpu_pthread_t t1, starpu_pthread_t t2);
 int starpu_pthread_equal(starpu_pthread_t t1, starpu_pthread_t t2);
 starpu_pthread_t starpu_pthread_self(void);
 starpu_pthread_t starpu_pthread_self(void);
 int starpu_pthread_create_on(char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg, msg_host_t host);
 int starpu_pthread_create_on(char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg, msg_host_t host);
-#define starpu_pthread_setname(name)
 int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
 int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
 int starpu_pthread_join(starpu_pthread_t thread, void **retval);
 int starpu_pthread_join(starpu_pthread_t thread, void **retval);
 int starpu_pthread_exit(void *retval) STARPU_ATTRIBUTE_NORETURN;
 int starpu_pthread_exit(void *retval) STARPU_ATTRIBUTE_NORETURN;
@@ -70,6 +69,14 @@ typedef pthread_attr_t starpu_pthread_attr_t;
 #define starpu_pthread_self pthread_self
 #define starpu_pthread_self pthread_self
 #define starpu_pthread_create pthread_create
 #define starpu_pthread_create pthread_create
 #define starpu_pthread_create_on(name, thread, attr, routine, arg, where) starpu_pthread_create(thread, attr, routine, arg)
 #define starpu_pthread_create_on(name, thread, attr, routine, arg, where) starpu_pthread_create(thread, attr, routine, arg)
+#define starpu_pthread_join pthread_join
+#define starpu_pthread_exit pthread_exit
+#define starpu_pthread_attr_init pthread_attr_init
+#define starpu_pthread_attr_destroy pthread_attr_destroy
+#define starpu_pthread_attr_setdetachstate pthread_attr_setdetachstate
+
+#endif /* STARPU_SIMGRID, _MSC_VER */
+
 #ifdef STARPU_HAVE_PTHREAD_SETNAME_NP
 #ifdef STARPU_HAVE_PTHREAD_SETNAME_NP
 #ifdef STARPU_HAVE_DARWIN
 #ifdef STARPU_HAVE_DARWIN
 #define starpu_pthread_setname(name) pthread_setname_np(name)
 #define starpu_pthread_setname(name) pthread_setname_np(name)
@@ -79,13 +86,6 @@ typedef pthread_attr_t starpu_pthread_attr_t;
 #else
 #else
 #define starpu_pthread_setname(name)
 #define starpu_pthread_setname(name)
 #endif
 #endif
-#define starpu_pthread_join pthread_join
-#define starpu_pthread_exit pthread_exit
-#define starpu_pthread_attr_init pthread_attr_init
-#define starpu_pthread_attr_destroy pthread_attr_destroy
-#define starpu_pthread_attr_setdetachstate pthread_attr_setdetachstate
-
-#endif /* STARPU_SIMGRID, _MSC_VER */
 
 
 /*
 /*
  * Encapsulation of the pthread_mutex_* functions.
  * Encapsulation of the pthread_mutex_* functions.

+ 2 - 2
src/core/jobs.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011, 2014, 2016  INRIA
  * Copyright (C) 2011, 2014, 2016  INRIA
@@ -88,7 +88,7 @@ struct _starpu_job* STARPU_ATTRIBUTE_MALLOC _starpu_job_create(struct starpu_tas
 
 
 #ifndef STARPU_USE_FXT
 #ifndef STARPU_USE_FXT
 	if (_starpu_bound_recording || _starpu_top_status_get() ||
 	if (_starpu_bound_recording || _starpu_top_status_get() ||
-		_starpu_task_break_on_push != -1 || _starpu_task_break_on_pop != -1 || _starpu_task_break_on_sched != -1
+		_starpu_task_break_on_push != -1 || _starpu_task_break_on_sched != -1 || _starpu_task_break_on_pop != -1 || _starpu_task_break_on_exec != -1
 		|| STARPU_AYU_EVENT)
 		|| STARPU_AYU_EVENT)
 #endif
 #endif
 	{
 	{

+ 4 - 2
src/core/sched_policy.c

@@ -31,15 +31,17 @@ static double idle[STARPU_NMAXWORKERS];
 static double idle_start[STARPU_NMAXWORKERS];
 static double idle_start[STARPU_NMAXWORKERS];
 
 
 long _starpu_task_break_on_push = -1;
 long _starpu_task_break_on_push = -1;
-long _starpu_task_break_on_pop = -1;
 long _starpu_task_break_on_sched = -1;
 long _starpu_task_break_on_sched = -1;
+long _starpu_task_break_on_pop = -1;
+long _starpu_task_break_on_exec = -1;
 static const char *starpu_idle_file;
 static const char *starpu_idle_file;
 
 
 void _starpu_sched_init(void)
 void _starpu_sched_init(void)
 {
 {
 	_starpu_task_break_on_push = starpu_get_env_number_default("STARPU_TASK_BREAK_ON_PUSH", -1);
 	_starpu_task_break_on_push = starpu_get_env_number_default("STARPU_TASK_BREAK_ON_PUSH", -1);
-	_starpu_task_break_on_pop = starpu_get_env_number_default("STARPU_TASK_BREAK_ON_POP", -1);
 	_starpu_task_break_on_sched = starpu_get_env_number_default("STARPU_TASK_BREAK_ON_SCHED", -1);
 	_starpu_task_break_on_sched = starpu_get_env_number_default("STARPU_TASK_BREAK_ON_SCHED", -1);
+	_starpu_task_break_on_pop = starpu_get_env_number_default("STARPU_TASK_BREAK_ON_POP", -1);
+	_starpu_task_break_on_exec = starpu_get_env_number_default("STARPU_TASK_BREAK_ON_EXEC", -1);
 	starpu_idle_file = starpu_getenv("STARPU_IDLE_FILE");
 	starpu_idle_file = starpu_getenv("STARPU_IDLE_FILE");
 }
 }
 
 

+ 3 - 2
src/core/sched_policy.h

@@ -28,7 +28,7 @@
 
 
 #define _STARPU_SCHED_BEGIN \
 #define _STARPU_SCHED_BEGIN \
 	_STARPU_TRACE_WORKER_SCHEDULING_PUSH;	\
 	_STARPU_TRACE_WORKER_SCHEDULING_PUSH;	\
-	_SIMGRID_TIMER_BEGIN
+	_SIMGRID_TIMER_BEGIN(_starpu_simgrid_sched_cost())
 #define _STARPU_SCHED_END \
 #define _STARPU_SCHED_END \
 	_SIMGRID_TIMER_END;			\
 	_SIMGRID_TIMER_END;			\
 	_STARPU_TRACE_WORKER_SCHEDULING_POP
 	_STARPU_TRACE_WORKER_SCHEDULING_POP
@@ -103,8 +103,9 @@ extern struct starpu_sched_policy _starpu_sched_modular_heft2_policy;
 extern struct starpu_sched_policy _starpu_sched_graph_test_policy;
 extern struct starpu_sched_policy _starpu_sched_graph_test_policy;
 
 
 extern long _starpu_task_break_on_push;
 extern long _starpu_task_break_on_push;
-extern long _starpu_task_break_on_pop;
 extern long _starpu_task_break_on_sched;
 extern long _starpu_task_break_on_sched;
+extern long _starpu_task_break_on_pop;
+extern long _starpu_task_break_on_exec;
 
 
 #ifdef SIGTRAP
 #ifdef SIGTRAP
 #define _STARPU_TASK_BREAK_ON(task, what) do { \
 #define _STARPU_TASK_BREAK_ON(task, what) do { \

+ 4 - 4
src/core/simgrid.h

@@ -69,7 +69,7 @@ starpu_pthread_queue_t _starpu_simgrid_task_queue[STARPU_NMAXWORKERS];
 #define _starpu_simgrid_queue_malloc_cost() starpu_get_env_number_default("STARPU_SIMGRID_QUEUE_MALLOC_COST", 1)
 #define _starpu_simgrid_queue_malloc_cost() starpu_get_env_number_default("STARPU_SIMGRID_QUEUE_MALLOC_COST", 1)
 #define _starpu_simgrid_task_submit_cost() starpu_get_env_number_default("STARPU_SIMGRID_TASK_SUBMIT_COST", 1)
 #define _starpu_simgrid_task_submit_cost() starpu_get_env_number_default("STARPU_SIMGRID_TASK_SUBMIT_COST", 1)
 #define _starpu_simgrid_fetching_input_cost() starpu_get_env_number_default("STARPU_SIMGRID_FETCHING_INPUT_COST", 1)
 #define _starpu_simgrid_fetching_input_cost() starpu_get_env_number_default("STARPU_SIMGRID_FETCHING_INPUT_COST", 1)
-#define _starpu_simgrid_sched_cost() starpu_get_env_number_default("STARPU_SIMGRID_SCHED_COST", 1)
+#define _starpu_simgrid_sched_cost() starpu_get_env_number_default("STARPU_SIMGRID_SCHED_COST", 0)
 
 
 /* Called at initialization to count how many GPUs are interfering with each
 /* Called at initialization to count how many GPUs are interfering with each
  * bus */
  * bus */
@@ -78,10 +78,10 @@ void _starpu_simgrid_count_ngpus(void);
 void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code,
 void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code,
 				       void *param);
 				       void *param);
 
 
-#define _SIMGRID_TIMER_BEGIN		\
+#define _SIMGRID_TIMER_BEGIN(cond)			\
 	{		\
 	{		\
 		xbt_os_timer_t __timer = NULL;		\
 		xbt_os_timer_t __timer = NULL;		\
-		if (_starpu_simgrid_sched_cost()) {		\
+		if (cond) {		\
 		  __timer = xbt_os_timer_new();		\
 		  __timer = xbt_os_timer_new();		\
 		  xbt_os_threadtimer_start(__timer);	\
 		  xbt_os_threadtimer_start(__timer);	\
 		}
 		}
@@ -94,7 +94,7 @@ void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code,
 	}
 	}
 
 
 #else // !STARPU_SIMGRID
 #else // !STARPU_SIMGRID
-#define _SIMGRID_TIMER_BEGIN {
+#define _SIMGRID_TIMER_BEGIN(cond) {
 #define _SIMGRID_TIMER_END }
 #define _SIMGRID_TIMER_END }
 #endif
 #endif
 
 

+ 6 - 6
src/debug/traces/starpu_paje.c

@@ -193,7 +193,7 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED)
 	poti_DefineEntityValue("E", "S", "Executing", ".0 .6 .5");
 	poti_DefineEntityValue("E", "S", "Executing", ".0 .6 .5");
 	poti_DefineEntityValue("Sc", "S", "Scheduling", ".7 .36 .0");
 	poti_DefineEntityValue("Sc", "S", "Scheduling", ".7 .36 .0");
 	poti_DefineEntityValue("Sl", "S", "Sleeping", ".9 .1 .0");
 	poti_DefineEntityValue("Sl", "S", "Sleeping", ".9 .1 .0");
-	poti_DefineEntityValue("P", "S", "Progressing", ".0 .0 .4");
+	poti_DefineEntityValue("P", "S", "Progressing", ".1 .3 .1");
 	poti_DefineEntityValue("U", "S", "Unpartitioning", ".0 .0 1.0");
 	poti_DefineEntityValue("U", "S", "Unpartitioning", ".0 .0 1.0");
 	poti_DefineEntityValue("H", "S", "Hypervisor", ".5 .18 .0");
 	poti_DefineEntityValue("H", "S", "Hypervisor", ".5 .18 .0");
 	poti_DefineEntityValue("Bu", "S", "Building task", ".5 .18 .0");
 	poti_DefineEntityValue("Bu", "S", "Building task", ".5 .18 .0");
@@ -213,7 +213,7 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED)
 	poti_DefineEntityValue("E", "WS", "Executing", ".0 .6 .5");
 	poti_DefineEntityValue("E", "WS", "Executing", ".0 .6 .5");
 	poti_DefineEntityValue("Sc", "WS", "Scheduling", ".7 .36 .0");
 	poti_DefineEntityValue("Sc", "WS", "Scheduling", ".7 .36 .0");
 	poti_DefineEntityValue("Sl", "WS", "Sleeping", ".9 .1 .0");
 	poti_DefineEntityValue("Sl", "WS", "Sleeping", ".9 .1 .0");
-	poti_DefineEntityValue("P", "WS", "Progressing", ".0 .0 .4");
+	poti_DefineEntityValue("P", "WS", "Progressing", ".1 .3 .1");
 	poti_DefineEntityValue("U", "WS", "Unpartitioning", ".0 .0 1.0");
 	poti_DefineEntityValue("U", "WS", "Unpartitioning", ".0 .0 1.0");
 	poti_DefineEntityValue("H", "WS", "Hypervisor", ".5 .18 .0");
 	poti_DefineEntityValue("H", "WS", "Hypervisor", ".5 .18 .0");
 	poti_DefineEntityValue("Bu", "WS", "Building task", ".5 .18 .0");
 	poti_DefineEntityValue("Bu", "WS", "Building task", ".5 .18 .0");
@@ -268,7 +268,7 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED)
 		poti_DefineEntityValue("E", ctx, "Executing", ".0 .6 .5");
 		poti_DefineEntityValue("E", ctx, "Executing", ".0 .6 .5");
 		poti_DefineEntityValue("Sc", ctx, "Scheduling", ".7 .36 .0");
 		poti_DefineEntityValue("Sc", ctx, "Scheduling", ".7 .36 .0");
 		poti_DefineEntityValue("Sl", ctx, "Sleeping", ".9 .1 .0");
 		poti_DefineEntityValue("Sl", ctx, "Sleeping", ".9 .1 .0");
-		poti_DefineEntityValue("P", ctx, "Progressing", ".0 .0 .4");
+		poti_DefineEntityValue("P", ctx, "Progressing", ".1 .3 .1");
 		poti_DefineEntityValue("U", ctx, "Unpartitioning", ".0 .0 1.0");
 		poti_DefineEntityValue("U", ctx, "Unpartitioning", ".0 .0 1.0");
 		poti_DefineEntityValue("H", ctx, "Hypervisor", ".5 .18 .0");
 		poti_DefineEntityValue("H", ctx, "Hypervisor", ".5 .18 .0");
 	}
 	}
@@ -331,7 +331,7 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED)
 6       E       S       Executing         \".0 .6 .5\"		\n\
 6       E       S       Executing         \".0 .6 .5\"		\n\
 6       Sc       S      Scheduling         \".7 .36 .0\"		\n\
 6       Sc       S      Scheduling         \".7 .36 .0\"		\n\
 6       Sl       S      Sleeping         \".9 .1 .0\"		\n\
 6       Sl       S      Sleeping         \".9 .1 .0\"		\n\
-6       P       S       Progressing         \".0 .0 .4\"		\n\
+6       P       S       Progressing         \".1 .3 .1\"		\n\
 6       U       S       Unpartitioning      \".0 .0 1.0\"		\n\
 6       U       S       Unpartitioning      \".0 .0 1.0\"		\n\
 6       H       S       Hypervisor      \".5 .18 .0\"		\n\
 6       H       S       Hypervisor      \".5 .18 .0\"		\n\
 6       Bu      S       \"Building task\"   \".5 .18 .0\"		\n\
 6       Bu      S       \"Building task\"   \".5 .18 .0\"		\n\
@@ -351,7 +351,7 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED)
 6       E       WS       Executing         \".0 .6 .5\"		\n\
 6       E       WS       Executing         \".0 .6 .5\"		\n\
 6       Sc       WS      Scheduling         \".7 .36 .0\"		\n\
 6       Sc       WS      Scheduling         \".7 .36 .0\"		\n\
 6       Sl       WS      Sleeping         \".9 .1 .0\"		\n\
 6       Sl       WS      Sleeping         \".9 .1 .0\"		\n\
-6       P       WS       Progressing         \".0 .0 .4\"		\n\
+6       P       WS       Progressing         \".1 .3 .1\"		\n\
 6       U       WS       Unpartitioning      \".0 .0 1.0\"		\n\
 6       U       WS       Unpartitioning      \".0 .0 1.0\"		\n\
 6       H       WS       Hypervisor      \".5 .18 .0\"		\n\
 6       H       WS       Hypervisor      \".5 .18 .0\"		\n\
 6       Bu      WS       \"Building task\"   \".5 .18 .0\"		\n\
 6       Bu      WS       \"Building task\"   \".5 .18 .0\"		\n\
@@ -394,7 +394,7 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED)
 6       E       Ctx%u       Executing         \".0 .6 .5\"		\n\
 6       E       Ctx%u       Executing         \".0 .6 .5\"		\n\
 6       Sc       Ctx%u      Scheduling         \".7 .36 .0\"		\n\
 6       Sc       Ctx%u      Scheduling         \".7 .36 .0\"		\n\
 6       Sl       Ctx%u      Sleeping         \".9 .1 .0\"		\n\
 6       Sl       Ctx%u      Sleeping         \".9 .1 .0\"		\n\
-6       P       Ctx%u       Progressing         \".0 .0 .4\"		\n\
+6       P       Ctx%u       Progressing         \".1 .3 .1\"		\n\
 6       U       Ctx%u       Unpartitioning         \".0 .0 1.0\"	\n\
 6       U       Ctx%u       Unpartitioning         \".0 .0 1.0\"	\n\
 6       H       Ctx%u       Hypervisor         \".5 .18 .0\"		\n",
 6       H       Ctx%u       Hypervisor         \".5 .18 .0\"		\n",
 		i, i, i, i, i, i, i, i, i, i, i, i, i);
 		i, i, i, i, i, i, i, i, i, i, i, i, i);

+ 6 - 0
src/drivers/cpu/driver_cpu.c

@@ -89,6 +89,12 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 			if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE)
 			if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE)
 				func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
 				func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
+			else if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT)
+			{
+				_SIMGRID_TIMER_BEGIN(1);
+				func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
+				_SIMGRID_TIMER_END;
+			}
 			else
 			else
 				_starpu_simgrid_submit_job(cpu_args->workerid, j, perf_arch, NAN, NULL);
 				_starpu_simgrid_submit_job(cpu_args->workerid, j, perf_arch, NAN, NULL);
 #else
 #else

+ 10 - 4
src/drivers/cuda/driver_cuda.c

@@ -507,6 +507,12 @@ static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worke
 		unsigned workerid = worker->workerid;
 		unsigned workerid = worker->workerid;
 		if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE && !async)
 		if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE && !async)
 			func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
 			func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
+		else if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT && !async)
+			{
+				_SIMGRID_TIMER_BEGIN(1);
+				func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
+				_SIMGRID_TIMER_END;
+			}
 		else
 		else
 			_starpu_simgrid_submit_job(workerid, j, &worker->perf_arch, NAN,
 			_starpu_simgrid_submit_job(workerid, j, &worker->perf_arch, NAN,
 				async ? &task_finished[workerid][pipeline_idx] : NULL);
 				async ? &task_finished[workerid][pipeline_idx] : NULL);
@@ -763,6 +769,7 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
 		task = worker->task_transferring;
 		task = worker->task_transferring;
 		if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer)
 		if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer)
 		{
 		{
+			_STARPU_TRACE_END_PROGRESS(memnode);
 			j = _starpu_get_job_associated_to_task(task);
 			j = _starpu_get_job_associated_to_task(task);
 
 
 			_starpu_set_local_worker_key(worker);
 			_starpu_set_local_worker_key(worker);
@@ -779,10 +786,9 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
 			}
 			}
 			else
 			else
 			{
 			{
-				_STARPU_TRACE_END_PROGRESS(memnode);
 				execute_job_on_cuda(task, worker);
 				execute_job_on_cuda(task, worker);
-				_STARPU_TRACE_START_PROGRESS(memnode);
 			}
 			}
+			_STARPU_TRACE_START_PROGRESS(memnode);
 		}
 		}
 
 
 		/* Then test for termination of queued tasks */
 		/* Then test for termination of queued tasks */
@@ -811,6 +817,7 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
 		else
 		else
 #endif /* !STARPU_SIMGRID */
 #endif /* !STARPU_SIMGRID */
 		{
 		{
+			_STARPU_TRACE_END_PROGRESS(memnode);
 			/* Asynchronous task completed! */
 			/* Asynchronous task completed! */
 			_starpu_set_local_worker_key(worker);
 			_starpu_set_local_worker_key(worker);
 			finish_job_on_cuda(_starpu_get_job_associated_to_task(task), worker);
 			finish_job_on_cuda(_starpu_get_job_associated_to_task(task), worker);
@@ -831,11 +838,9 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
 					 * flushing the pipeline, we can now at
 					 * flushing the pipeline, we can now at
 					 * last execute it.  */
 					 * last execute it.  */
 
 
-					_STARPU_TRACE_END_PROGRESS(memnode);
 					_STARPU_TRACE_EVENT("sync_task");
 					_STARPU_TRACE_EVENT("sync_task");
 					execute_job_on_cuda(task, worker);
 					execute_job_on_cuda(task, worker);
 					_STARPU_TRACE_EVENT("end_sync_task");
 					_STARPU_TRACE_EVENT("end_sync_task");
-					_STARPU_TRACE_START_PROGRESS(memnode);
 					worker->pipeline_stuck = 0;
 					worker->pipeline_stuck = 0;
 				}
 				}
 			}
 			}
@@ -848,6 +853,7 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
 				/* Everybody busy */
 				/* Everybody busy */
 				_STARPU_TRACE_END_EXECUTING()
 				_STARPU_TRACE_END_EXECUTING()
 #endif
 #endif
+			_STARPU_TRACE_START_PROGRESS(memnode);
 		}
 		}
 
 
 		if (!worker->pipeline_length || worker->ntasks < worker->pipeline_length)
 		if (!worker->pipeline_length || worker->ntasks < worker->pipeline_length)

+ 1 - 0
src/drivers/driver_common/driver_common.c

@@ -101,6 +101,7 @@ void _starpu_driver_start_job(struct _starpu_worker *worker, struct _starpu_job
 	}
 	}
 	else
 	else
 		_STARPU_TRACE_START_CODELET_BODY(j, j->nimpl, perf_arch, workerid);
 		_STARPU_TRACE_START_CODELET_BODY(j, j->nimpl, perf_arch, workerid);
+	_STARPU_TASK_BREAK_ON(task, exec);
 }
 }
 
 
 void _starpu_driver_end_job(struct _starpu_worker *worker, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, struct timespec *codelet_end, int rank, int profiling)
 void _starpu_driver_end_job(struct _starpu_worker *worker, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, struct timespec *codelet_end, int rank, int profiling)

+ 8 - 0
src/drivers/opencl/driver_opencl.c

@@ -954,6 +954,14 @@ static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker
 			simulate = 1;
 			simulate = 1;
 		#endif
 		#endif
 		}
 		}
+		else if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT && !async)
+			{
+				_SIMGRID_TIMER_BEGIN(1);
+				func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
+				_SIMGRID_TIMER_END;
+				simulate=0;
+			}
+
 		if (simulate)
 		if (simulate)
 			_starpu_simgrid_submit_job(worker->workerid, j, &worker->perf_arch, length,
 			_starpu_simgrid_submit_job(worker->workerid, j, &worker->perf_arch, length,
 						   async ? &task_finished[worker->devid][pipeline_idx] : NULL);
 						   async ? &task_finished[worker->devid][pipeline_idx] : NULL);

+ 2 - 0
src/util/fstarpu.c

@@ -85,6 +85,7 @@ static const intptr_t fstarpu_starpu_mic	= STARPU_MIC;
 static const intptr_t fstarpu_starpu_scc	= STARPU_SCC;
 static const intptr_t fstarpu_starpu_scc	= STARPU_SCC;
 
 
 static const intptr_t fstarpu_starpu_codelet_simgrid_execute	= STARPU_CODELET_SIMGRID_EXECUTE;
 static const intptr_t fstarpu_starpu_codelet_simgrid_execute	= STARPU_CODELET_SIMGRID_EXECUTE;
+static const intptr_t fstarpu_starpu_codelet_simgrid_execute_and_inject	= STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT;
 static const intptr_t fstarpu_starpu_cuda_async	= STARPU_CUDA_ASYNC;
 static const intptr_t fstarpu_starpu_cuda_async	= STARPU_CUDA_ASYNC;
 static const intptr_t fstarpu_starpu_opencl_async	= STARPU_OPENCL_ASYNC;
 static const intptr_t fstarpu_starpu_opencl_async	= STARPU_OPENCL_ASYNC;
 
 
@@ -153,6 +154,7 @@ intptr_t fstarpu_get_constant(char *s)
 	else if (!strcmp(s, "FSTARPU_SCC"))	{ return fstarpu_starpu_scc; }
 	else if (!strcmp(s, "FSTARPU_SCC"))	{ return fstarpu_starpu_scc; }
 
 
 	else if (!strcmp(s, "FSTARPU_CODELET_SIMGRID_EXECUTE"))	{ return fstarpu_starpu_codelet_simgrid_execute; }
 	else if (!strcmp(s, "FSTARPU_CODELET_SIMGRID_EXECUTE"))	{ return fstarpu_starpu_codelet_simgrid_execute; }
+	else if (!strcmp(s, "FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT"))	{ return fstarpu_starpu_codelet_simgrid_execute_and_inject; }
 	else if (!strcmp(s, "FSTARPU_CUDA_ASYNC"))	{ return fstarpu_starpu_cuda_async; }
 	else if (!strcmp(s, "FSTARPU_CUDA_ASYNC"))	{ return fstarpu_starpu_cuda_async; }
 	else if (!strcmp(s, "FSTARPU_OPENCL_ASYNC"))	{ return fstarpu_starpu_opencl_async; }
 	else if (!strcmp(s, "FSTARPU_OPENCL_ASYNC"))	{ return fstarpu_starpu_opencl_async; }