Browse Source

Add starpu_set_iteration and starpu_set_subiteration to describe tasks, for better offline traces analysis

Samuel Thibault 8 years ago
parent
commit
7d765d4d6f

+ 3 - 1
ChangeLog

@@ -62,7 +62,9 @@ New features:
   * Add modular-heft-prio scheduler.
   * Add modular-heft-prio scheduler.
   * Add starpu_cublas_get_local_handle helper.
   * Add starpu_cublas_get_local_handle helper.
   * Add starpu_data_set_name, starpu_data_set_coordinates_array, and
   * Add starpu_data_set_name, starpu_data_set_coordinates_array, and
-    starpu_data_set_coordinates to describe data.
+    starpu_data_set_coordinates to describe data, and starpu_set_iteration and
+    starpu_set_subiteration to describe tasks, for better offline traces
+    analysis.
 
 
 Changes:
 Changes:
   * Fix performance regression of lws for small tasks.
   * Fix performance regression of lws for small tasks.

+ 4 - 0
doc/doxygen/chapters/380_offline_performance_tools.doxy

@@ -143,6 +143,10 @@ It can also set the starpu_task::name field of the task (or use \ref STARPU_NAME
 when using starpu_task_insert()), to replace in traces the name of the codelet
 when using starpu_task_insert()), to replace in traces the name of the codelet
 with an arbitrarily chosen name.
 with an arbitrarily chosen name.
 
 
+It can also set the iteration number, by just calling starpu_set_iteration() at
+the beginning of the first task submission loop. This iteration number will show
+up in traces for all tasks submitted from there.
+
 Coordinates can also be given to data with the starpu_data_set_coordinates() or
 Coordinates can also be given to data with the starpu_data_set_coordinates() or
 starpu_data_set_coordinates_array() function. In the trace, tasks will then be
 starpu_data_set_coordinates_array() function. In the trace, tasks will then be
 assigned the coordinates of the first data they write to.
 assigned the coordinates of the first data they write to.

+ 12 - 0
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -951,6 +951,18 @@ codelet implementation to be executed when executing \p task.
 Return the codelet implementation to be executed
 Return the codelet implementation to be executed
 when executing \p task.
 when executing \p task.
 
 
+\fn void starpu_set_iteration(unsigned long iteration)
+\ingroup API_Codelet_And_Tasks
+Sets the iteration number for all the tasks to be submitted after this
+call. This is typically called at the beginning of the main task submission
+loop. This number will then show up in tracing tools.
+
+\fn void starpu_set_subiteration(unsigned long subiteration)
+\ingroup API_Codelet_And_Tasks
+Sets the subiteration number for all the tasks to be submitted after this
+call. This is typically called at the beginning of the second-nested task
+submission loop. This number will then show up in tracing tools.
+
 \fn void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg)
 \fn void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg)
 \ingroup API_Codelet_And_Tasks
 \ingroup API_Codelet_And_Tasks
 Create (and submit) an empty task that unlocks a tag once all its dependencies are fulfilled.
 Create (and submit) an empty task that unlocks a tag once all its dependencies are fulfilled.

+ 3 - 1
examples/cg/cg.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2012, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2010-2012, 2014-2017  Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -308,6 +308,8 @@ static int cg(void)
 		double delta_old;
 		double delta_old;
 		double alpha, beta;
 		double alpha, beta;
 
 
+		starpu_set_iteration(i);
+
 		/* q <- A d */
 		/* q <- A d */
 		gemv_kernel(q_handle, A_handle, d_handle, 0.0, 1.0, nblocks, use_reduction);
 		gemv_kernel(q_handle, A_handle, d_handle, 0.0, 1.0, nblocks, use_reduction);
 
 

+ 2 - 1
examples/cholesky/cholesky_grain_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2017  CNRS
  *
  *
@@ -190,6 +190,7 @@ static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned
 
 
 	for (k = 0; k < nbigblocks; k++)
 	for (k = 0; k < nbigblocks; k++)
 	{
 	{
+		starpu_set_iteration(k);
 		struct starpu_task *task = create_task_11(dataA, k, reclevel);
 		struct starpu_task *task = create_task_11(dataA, k, reclevel);
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */
 		if (k == 0)
 		if (k == 0)

+ 1 - 0
examples/cholesky/cholesky_implicit.c

@@ -59,6 +59,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	for (k = 0; k < nblocks; k++)
 	for (k = 0; k < nblocks; k++)
 	{
 	{
 		int ret;
 		int ret;
+		starpu_set_iteration(k);
                 starpu_data_handle_t sdatakk = starpu_data_get_sub_data(dataA, 2, k, k);
                 starpu_data_handle_t sdatakk = starpu_data_get_sub_data(dataA, 2, k, k);
 
 
                 ret = starpu_task_insert(&cl11,
                 ret = starpu_task_insert(&cl11,

+ 2 - 1
examples/cholesky/cholesky_tag.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012, 2013, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2017  CNRS
  *
  *
@@ -171,6 +171,7 @@ static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
 
 	for (k = 0; k < nblocks; k++)
 	for (k = 0; k < nblocks; k++)
 	{
 	{
+		starpu_set_iteration(k);
 		struct starpu_task *task = create_task_11(dataA, k);
 		struct starpu_task *task = create_task_11(dataA, k);
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */
 		if (k == 0)
 		if (k == 0)

+ 1 - 0
examples/cholesky/cholesky_tile_tag.c

@@ -167,6 +167,7 @@ static int cholesky_no_stride(void)
 
 
 	for (k = 0; k < nblocks_p; k++)
 	for (k = 0; k < nblocks_p; k++)
 	{
 	{
+		starpu_set_iteration(k);
 		struct starpu_task *task = create_task_11(k, nblocks_p);
 		struct starpu_task *task = create_task_11(k, nblocks_p);
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */
 		if (k == 0)
 		if (k == 0)

+ 2 - 1
examples/lu/xlu.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010-2011, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010-2011, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  CNRS
  * Copyright (C) 2010, 2011, 2012  CNRS
  *
  *
@@ -184,6 +184,7 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
 
 	for (k = 0; k < nblocks; k++)
 	for (k = 0; k < nblocks; k++)
 	{
 	{
+		starpu_set_iteration(k);
 		struct starpu_task *task = create_task_11(dataA, k);
 		struct starpu_task *task = create_task_11(dataA, k);
 
 
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */

+ 3 - 1
examples/lu/xlu_implicit.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2011, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2010-2011, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012, 2015, 2016  CNRS
  * Copyright (C) 2010, 2011, 2012, 2015, 2016  CNRS
  *
  *
@@ -127,6 +127,8 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 	{
 	{
 		int ret;
 		int ret;
 
 
+		starpu_set_iteration(k);
+
 		ret = create_task_11(dataA, k);
 		ret = create_task_11(dataA, k);
 		if (ret == -ENODEV) return ret;
 		if (ret == -ENODEV) return ret;
 
 

+ 3 - 1
examples/lu/xlu_implicit_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2012, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2010-2012, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012, 2016  CNRS
  * Copyright (C) 2010, 2011, 2012, 2016  CNRS
  *
  *
@@ -173,6 +173,8 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 	{
 	{
 		int ret;
 		int ret;
 
 
+		starpu_set_iteration(k);
+
 		ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
 		ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
 		if (ret == -ENODEV) return ret;
 		if (ret == -ENODEV) return ret;
 
 

+ 2 - 1
examples/lu/xlu_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2012, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009-2012, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012  CNRS
  * Copyright (C) 2010, 2011, 2012  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -247,6 +247,7 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
 
 	for (k = 0; k < nblocks; k++)
 	for (k = 0; k < nblocks; k++)
 	{
 	{
+		starpu_set_iteration(k);
 		struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
 		struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
 
 
 		/* we defer the launch of the first task */
 		/* we defer the launch of the first task */

+ 3 - 1
examples/mandelbrot/mandelbrot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010, 2011, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -544,6 +544,8 @@ int main(int argc, char **argv)
 		 * parallel task. */
 		 * parallel task. */
 		int per_block_cnt[nblocks_p];
 		int per_block_cnt[nblocks_p];
 
 
+		starpu_set_iteration(niter_p);
+
 		for (iby = 0; iby < nblocks_p; iby++)
 		for (iby = 0; iby < nblocks_p; iby++)
 		{
 		{
 			per_block_cnt[iby] = 0;
 			per_block_cnt[iby] = 0;

+ 2 - 1
examples/ppm_downscaler/yuv_downscaler.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2011, 2013-2015  Université de Bordeaux
+ * Copyright (C) 2010-2011, 2013-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
  * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
  *
  *
@@ -218,6 +218,7 @@ int main(int argc, char **argv)
 	/* do the computation */
 	/* do the computation */
 	for (frame = 0; frame < nframes; frame++)
 	for (frame = 0; frame < nframes; frame++)
 	{
 	{
+		starpu_set_iteration(frame);
 		unsigned blocky;
 		unsigned blocky;
 		for (blocky = 0; blocky < nblocks_y; blocky++)
 		for (blocky = 0; blocky < nblocks_y; blocky++)
 		{
 		{

+ 2 - 1
examples/stencil/stencil-tasks.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010, 2013-2015  Université de Bordeaux
+ * Copyright (C) 2010, 2013-2015, 2017  Université de Bordeaux
  * Copyright (C) 2012, 2013, 2015, 2017  CNRS
  * Copyright (C) 2012, 2013, 2015, 2017  CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -296,6 +296,7 @@ void create_tasks(int rank)
 
 
 	for (iter = 0; iter <= niter; iter++)
 	for (iter = 0; iter <= niter; iter++)
 	{
 	{
+	     starpu_set_iteration(iter);
 	     for (bz = 0; bz < nbz; bz++)
 	     for (bz = 0; bz < nbz; bz++)
 	     {
 	     {
 		  if ((iter > 0) && (get_block_mpi_node(bz) == rank))
 		  if ((iter > 0) && (get_block_mpi_node(bz) == rank))

+ 3 - 0
include/starpu_task.h

@@ -317,6 +317,9 @@ int starpu_task_wait_for_no_ready(void);
 int starpu_task_nready(void);
 int starpu_task_nready(void);
 int starpu_task_nsubmitted(void);
 int starpu_task_nsubmitted(void);
 
 
+void starpu_set_iteration(unsigned long iteration);
+void starpu_set_subiteration(unsigned long subiteration);
+
 void starpu_do_schedule(void);
 void starpu_do_schedule(void);
 
 
 void starpu_codelet_init(struct starpu_codelet *cl);
 void starpu_codelet_init(struct starpu_codelet *cl);

+ 3 - 3
src/common/fxt.h

@@ -650,8 +650,8 @@ do {										\
 #define _STARPU_TRACE_WORKER_SLEEP_END	\
 #define _STARPU_TRACE_WORKER_SLEEP_END	\
 	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_END, _starpu_gettid());
 	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_END, _starpu_gettid());
 
 
-#define _STARPU_TRACE_TASK_SUBMIT(job)	\
+#define _STARPU_TRACE_TASK_SUBMIT(job, iter, subiter)	\
-	FUT_DO_PROBE2(_STARPU_FUT_TASK_SUBMIT, (job)->job_id, _starpu_gettid());
+	FUT_DO_PROBE4(_STARPU_FUT_TASK_SUBMIT, (job)->job_id, iter, subiter, _starpu_gettid());
 
 
 #define _STARPU_TRACE_TASK_SUBMIT_START()	\
 #define _STARPU_TRACE_TASK_SUBMIT_START()	\
 	FUT_DO_PROBE1(_STARPU_FUT_TASK_SUBMIT_START, _starpu_gettid());
 	FUT_DO_PROBE1(_STARPU_FUT_TASK_SUBMIT_START, _starpu_gettid());
@@ -1009,7 +1009,7 @@ do {										\
 #define _STARPU_TRACE_WORKER_SCHEDULING_POP		do {} while(0)
 #define _STARPU_TRACE_WORKER_SCHEDULING_POP		do {} while(0)
 #define _STARPU_TRACE_WORKER_SLEEP_START		do {} while(0)
 #define _STARPU_TRACE_WORKER_SLEEP_START		do {} while(0)
 #define _STARPU_TRACE_WORKER_SLEEP_END			do {} while(0)
 #define _STARPU_TRACE_WORKER_SLEEP_END			do {} while(0)
-#define _STARPU_TRACE_TASK_SUBMIT(job)			do {(void)(job);} while(0)
+#define _STARPU_TRACE_TASK_SUBMIT(job, a, b)			do {(void)(job); (void)(a);(void)(b);} while(0)
 #define _STARPU_TRACE_TASK_SUBMIT_START()		do {} while(0)
 #define _STARPU_TRACE_TASK_SUBMIT_START()		do {} while(0)
 #define _STARPU_TRACE_TASK_SUBMIT_END()			do {} while(0)
 #define _STARPU_TRACE_TASK_SUBMIT_END()			do {} while(0)
 #define _STARPU_TRACE_TASK_BUILD_START()		do {} while(0)
 #define _STARPU_TRACE_TASK_BUILD_START()		do {} while(0)

+ 2 - 0
src/core/sched_ctx.c

@@ -501,6 +501,8 @@ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *p
 	_starpu_barrier_counter_init(&sched_ctx->ready_tasks_barrier, 0);
 	_starpu_barrier_counter_init(&sched_ctx->ready_tasks_barrier, 0);
 
 
 	sched_ctx->ready_flops = 0.0;
 	sched_ctx->ready_flops = 0.0;
+	sched_ctx->iteration = 0;
+	sched_ctx->subiteration = 0;
 	sched_ctx->main_master = -1;
 	sched_ctx->main_master = -1;
 	sched_ctx->perf_arch.devices = NULL;
 	sched_ctx->perf_arch.devices = NULL;
 	sched_ctx->perf_arch.ndevices = 0;
 	sched_ctx->perf_arch.ndevices = 0;

+ 3 - 0
src/core/sched_ctx.h

@@ -69,6 +69,9 @@ struct _starpu_sched_ctx
 	/* amount of ready flops in a context */
 	/* amount of ready flops in a context */
 	double ready_flops;
 	double ready_flops;
 
 
+	/* Iteration number, as advertised by application */
+	unsigned long iteration, subiteration;
+
 	/* cond to block push when there are no workers in the ctx */
 	/* cond to block push when there are no workers in the ctx */
 	starpu_pthread_cond_t no_workers_cond;
 	starpu_pthread_cond_t no_workers_cond;
 
 

+ 13 - 1
src/core/task.c

@@ -651,7 +651,9 @@ int starpu_task_submit(struct starpu_task *task)
 	}
 	}
 
 
 	if (!j->internal && !continuation)
 	if (!j->internal && !continuation)
-		_STARPU_TRACE_TASK_SUBMIT(j);
+		_STARPU_TRACE_TASK_SUBMIT(j,
+			_starpu_get_sched_ctx_struct(task->sched_ctx)->iteration,
+			_starpu_get_sched_ctx_struct(task->sched_ctx)->subiteration);
 
 
 	/* If this is a continuation, we don't modify the implicit data dependencies detected earlier. */
 	/* If this is a continuation, we don't modify the implicit data dependencies detected earlier. */
 	if (task->cl && !continuation)
 	if (task->cl && !continuation)
@@ -989,6 +991,16 @@ int starpu_task_wait_for_no_ready(void)
 	return 0;
 	return 0;
 }
 }
 
 
+void starpu_set_iteration(unsigned long iteration)
+{
+	_starpu_get_sched_ctx_struct(_starpu_sched_ctx_get_current_context())->iteration = iteration;
+}
+
+void starpu_set_subiteration(unsigned long subiteration)
+{
+	_starpu_get_sched_ctx_struct(_starpu_sched_ctx_get_current_context())->subiteration = subiteration;
+}
+
 void starpu_do_schedule(void)
 void starpu_do_schedule(void)
 {
 {
 	struct _starpu_machine_config *config = _starpu_get_machine_config();
 	struct _starpu_machine_config *config = _starpu_get_machine_config();

+ 18 - 6
src/debug/traces/starpu_fxt.c

@@ -101,6 +101,8 @@ struct task_info {
 	double end_time;
 	double end_time;
 	unsigned long footprint;
 	unsigned long footprint;
 	unsigned long kflops;
 	unsigned long kflops;
+	long iteration;
+	long subiteration;
 	char *parameters;
 	char *parameters;
 	unsigned int ndeps;
 	unsigned int ndeps;
 	unsigned long *dependencies;
 	unsigned long *dependencies;
@@ -131,6 +133,8 @@ static struct task_info *get_task(unsigned long job_id, int mpi_rank)
 		task->end_time = 0.;
 		task->end_time = 0.;
 		task->footprint = 0;
 		task->footprint = 0;
 		task->kflops = 0.;
 		task->kflops = 0.;
+		task->iteration = -1;
+		task->subiteration = -1;
 		task->parameters = NULL;
 		task->parameters = NULL;
 		task->ndeps = 0;
 		task->ndeps = 0;
 		task->dependencies = NULL;
 		task->dependencies = NULL;
@@ -187,6 +191,10 @@ static void task_dump(unsigned long job_id, int mpi_rank)
 	fprintf(tasks_file, "Footprint: %lx\n", task->footprint);
 	fprintf(tasks_file, "Footprint: %lx\n", task->footprint);
 	if (task->kflops != 0)
 	if (task->kflops != 0)
 		fprintf(tasks_file, "GFlop: %f\n", ((double) task->kflops) / 1000000);
 		fprintf(tasks_file, "GFlop: %f\n", ((double) task->kflops) / 1000000);
+	if (task->iteration != -1)
+		fprintf(tasks_file, "Iteration: %ld\n", task->iteration);
+	if (task->subiteration != -1)
+		fprintf(tasks_file, "Subiteration: %ld\n", task->subiteration);
 	if (task->parameters)
 	if (task->parameters)
 	{
 	{
 		fprintf(tasks_file, "Parameters: %s\n", task->parameters);
 		fprintf(tasks_file, "Parameters: %s\n", task->parameters);
@@ -781,7 +789,7 @@ static void thread_pop_state(double time, const char *prefix, long unsigned int
 #endif
 #endif
 }
 }
 
 
-static void worker_set_detailed_state(double time, const char *prefix, long unsigned int workerid, const char *name, unsigned long size, const char *parameters, unsigned long footprint, unsigned long long tag, unsigned long job_id, double gflop, unsigned X, unsigned Y, unsigned Z)
+static void worker_set_detailed_state(double time, const char *prefix, long unsigned int workerid, const char *name, unsigned long size, const char *parameters, unsigned long footprint, unsigned long long tag, unsigned long job_id, double gflop, unsigned X, unsigned Y, unsigned Z, long iteration, long subiteration)
 {
 {
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
 	char container[STARPU_POTI_STR_LEN];
 	char container[STARPU_POTI_STR_LEN];
@@ -789,7 +797,7 @@ static void worker_set_detailed_state(double time, const char *prefix, long unsi
 	/* TODO: set detailed state */
 	/* TODO: set detailed state */
 	poti_SetState(time, container, "WS", name);
 	poti_SetState(time, container, "WS", name);
 #else
 #else
-	fprintf(out_paje_file, "20	%.9f	%sw%lu	WS	%s	%lu	%s	%08lx	%016llx	%lu	%f	%u	%u	%u\n", time, prefix, workerid, name, size, parameters, footprint, tag, job_id, gflop, X, Y, Z);
+	fprintf(out_paje_file, "20	%.9f	%sw%lu	WS	%s	%lu	%s	%08lx	%016llx	%lu	%f	%u	%u	%u	%ld	%ld\n", time, prefix, workerid, name, size, parameters, footprint, tag, job_id, gflop, X, Y, Z, iteration, subiteration);
 #endif
 #endif
 }
 }
 
 
@@ -1444,7 +1452,7 @@ static void handle_codelet_details(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 		char *prefix = options->file_prefix;
 		char *prefix = options->file_prefix;
 		unsigned sched_ctx = ev->param[0];
 		unsigned sched_ctx = ev->param[0];
 
 
-		worker_set_detailed_state(last_codelet_start[worker], prefix, worker, _starpu_last_codelet_symbol[worker], ev->param[1], parameters, ev->param[2], ev->param[4], job_id, ((double) task->kflops) / 1000000, X, Y, Z);
+		worker_set_detailed_state(last_codelet_start[worker], prefix, worker, _starpu_last_codelet_symbol[worker], ev->param[1], parameters, ev->param[2], ev->param[4], job_id, ((double) task->kflops) / 1000000, X, Y, Z, task->iteration, task->subiteration);
 		if (sched_ctx != 0)
 		if (sched_ctx != 0)
 		{
 		{
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
@@ -2296,10 +2304,14 @@ static void handle_task_deps(struct fxt_ev_64 *ev, struct starpu_fxt_options *op
 
 
 static void handle_task_submit(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 static void handle_task_submit(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
 {
-	unsigned long job_id;
+	unsigned long job_id = ev->param[0];
-	job_id = ev->param[0];
+	unsigned long iteration = ev->param[1];
+	unsigned long subiteration = ev->param[2];
 
 
-	get_task(job_id, options->file_rank)->submit_time = get_event_time_stamp(ev, options);
+	struct task_info *task = get_task(job_id, options->file_rank);
+	task->submit_time = get_event_time_stamp(ev, options);
+	task->iteration = iteration;
+	task->subiteration = subiteration;
 }
 }
 
 
 static void handle_task_done(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 static void handle_task_done(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)

+ 2 - 0
src/debug/traces/starpu_paje.c

@@ -144,6 +144,8 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED)
 	fprintf(file, "%%	X	string\n");
 	fprintf(file, "%%	X	string\n");
 	fprintf(file, "%%	Y	string\n");
 	fprintf(file, "%%	Y	string\n");
 	fprintf(file, "%%	Z	string\n");
 	fprintf(file, "%%	Z	string\n");
+	fprintf(file, "%%	Iteration	string\n");
+	fprintf(file, "%%	Subiteration	string\n");
 	fprintf(file, "%%EndEventDef\n");
 	fprintf(file, "%%EndEventDef\n");
 #endif
 #endif