Browse Source

* Add codelet size, footprint and tag id in the paje trace.

Samuel Thibault 11 years ago
parent
commit
778b4c444a

+ 1 - 0
ChangeLog

@@ -67,6 +67,7 @@ Small features:
     information belonging to a given scheduler context
     information belonging to a given scheduler context
   * The option --enable-verbose can be called with
   * The option --enable-verbose can be called with
     --enable-verbose=extra to increase the verbosity
     --enable-verbose=extra to increase the verbosity
+  * Add codelet size, footprint and tag id in the paje trace.
 
 
 Changes:
 Changes:
   * Data interfaces (variable, vector, matrix and block) now define
   * Data interfaces (variable, vector, matrix and block) now define

+ 6 - 0
configure.ac

@@ -1482,6 +1482,12 @@ if test x$use_fxt = xyes; then
 	AC_CHECK_DECLS([fut_set_filename])
 	AC_CHECK_DECLS([fut_set_filename])
 	CFLAGS="$save_CFLAGS"
 	CFLAGS="$save_CFLAGS"
 
 
+        AC_ARG_ENABLE(paje-codelet-details, [AS_HELP_STRING([--enable-paje-codelet-details],
+			[enable details about codelets in the paje trace])],
+			enable_paje_codelet_details=$enableval, enable_paje_codelet_details=no)
+        if  test x$enable_paje_codelet_details = xyes; then
+        	AC_DEFINE(STARPU_ENABLE_PAJE_CODELET_DETAILS, [1], [enable details about codelets in the paje trace])
+        fi
 	##########################################
 	##########################################
 	# Poti is a library to generate paje trace files
 	# Poti is a library to generate paje trace files
 	##########################################
 	##########################################

+ 3 - 0
doc/doxygen/chapters/13offline_performance_tools.doxy

@@ -118,6 +118,9 @@ $ vite paje.trace
 
 
 To get names of tasks instead of "unknown", fill the optional
 To get names of tasks instead of "unknown", fill the optional
 starpu_codelet::name, or use a performance model for them.
 starpu_codelet::name, or use a performance model for them.
+Details of the codelet execution can be obtained by passing
+<c>--enable-paje-codelet-details</c> and using a recent enough version of ViTE
+(at least r1430).
 
 
 In the MPI execution case, collect the trace files from the MPI nodes, and
 In the MPI execution case, collect the trace files from the MPI nodes, and
 specify them all on the command <c>starpu_fxt_tool</c>, for instance:
 specify them all on the command <c>starpu_fxt_tool</c>, for instance:

+ 8 - 0
doc/doxygen/chapters/41configure_options.doxy

@@ -372,6 +372,14 @@ Enable performance debugging through gprof.
 Enable performance model debugging.
 Enable performance model debugging.
 </dd>
 </dd>
 
 
+<dt>--enable-paje-codelet-details</dt>
+<dd>
+\anchor enable-paje-codelet-details
+\addindex __configure__--enable-paje-codelet-details
+Enable details about codelets in the paje trace. This requires a recent enough
+version of ViTE (at least r1430).
+</dd>
+
 <dt>--enable-fxt-lock</dt>
 <dt>--enable-fxt-lock</dt>
 <dd>
 <dd>
 \anchor enable-fxt-lock
 \anchor enable-fxt-lock

+ 8 - 1
src/common/fxt.h

@@ -108,6 +108,8 @@
 #define _STARPU_FUT_EVENT	0x513c
 #define _STARPU_FUT_EVENT	0x513c
 #define _STARPU_FUT_THREAD_EVENT	0x513d
 #define _STARPU_FUT_THREAD_EVENT	0x513d
 
 
+#define	_STARPU_FUT_CODELET_DETAILS	0x513e
+
 #define _STARPU_FUT_LOCKING_MUTEX	0x5140	
 #define _STARPU_FUT_LOCKING_MUTEX	0x5140	
 #define _STARPU_FUT_MUTEX_LOCKED	0x5141	
 #define _STARPU_FUT_MUTEX_LOCKED	0x5141	
 
 
@@ -404,7 +406,7 @@ do {									\
 #define _STARPU_TRACE_WORKER_INIT_END(workerid)				\
 #define _STARPU_TRACE_WORKER_INIT_END(workerid)				\
 	FUT_DO_PROBE2(_STARPU_FUT_WORKER_INIT_END, _starpu_gettid(), (workerid));
 	FUT_DO_PROBE2(_STARPU_FUT_WORKER_INIT_END, _starpu_gettid(), (workerid));
 
 
-#define _STARPU_TRACE_START_CODELET_BODY(job)				\
+#define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, archtype)				\
 do {									\
 do {									\
         const char *model_name = _starpu_job_get_model_name((job));         \
         const char *model_name = _starpu_job_get_model_name((job));         \
 	if (model_name)                                                 \
 	if (model_name)                                                 \
@@ -415,6 +417,11 @@ do {									\
 	else {                                                          \
 	else {                                                          \
 		FUT_DO_PROBE4(_STARPU_FUT_START_CODELET_BODY, (job), ((job)->task)->sched_ctx, _starpu_gettid(), 0); \
 		FUT_DO_PROBE4(_STARPU_FUT_START_CODELET_BODY, (job), ((job)->task)->sched_ctx, _starpu_gettid(), 0); \
 	}								\
 	}								\
+	{								\
+		const size_t __job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));	\
+		const uint32_t __job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));\
+		FUT_DO_PROBE6(_STARPU_FUT_CODELET_DETAILS, (job), ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->tag_id, _starpu_gettid());	\
+	}								\
 } while(0);
 } while(0);
 
 
 #define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, archtype)			\
 #define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, archtype)			\

+ 49 - 3
src/debug/traces/starpu_fxt.c

@@ -275,6 +275,17 @@ static void worker_set_state(double time, const char *prefix, long unsigned int
 #endif
 #endif
 }
 }
 
 
+static void worker_set_detailed_state(double time, const char *prefix, long unsigned int workerid, const char *name, unsigned long size, unsigned long footprint, unsigned long long tag)
+{
+#ifdef STARPU_HAVE_POTI
+	char container[STARPU_POTI_STR_LEN];
+	thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, workerid);
+	poti_SetState(time, container, "S", name);
+#else
+	fprintf(out_paje_file, "20	%.9f	%st%lu	S	%s	%lu	%08lx	%016llx\n", time, prefix, workerid, name, size, footprint, tag);
+#endif
+}
+
 static void worker_push_state(double time, const char *prefix, long unsigned int workerid, const char *name)
 static void worker_push_state(double time, const char *prefix, long unsigned int workerid, const char *name)
 {
 {
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
@@ -631,11 +642,8 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 	int worker;
 	int worker;
 	worker = find_worker_id(ev->param[2]);
 	worker = find_worker_id(ev->param[2]);
 
 
-	unsigned sched_ctx = ev->param[1];
 	if (worker < 0) return;
 	if (worker < 0) return;
 
 
-	char *prefix = options->file_prefix;
-
 	unsigned long has_name = ev->param[3];
 	unsigned long has_name = ev->param[3];
 	char *name = has_name?(char *)&ev->param[4]:"unknown";
 	char *name = has_name?(char *)&ev->param[4]:"unknown";
 
 
@@ -646,8 +654,12 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 
 
 	create_paje_state_if_not_found(name, options);
 	create_paje_state_if_not_found(name, options);
 
 
+#ifndef STARPU_ENABLE_PAJE_CODELET_DETAILS
 	if (out_paje_file)
 	if (out_paje_file)
 	{
 	{
+		char *prefix = options->file_prefix;
+		unsigned sched_ctx = ev->param[1];
+
 		worker_set_state(start_codelet_time, prefix, ev->param[2], name);
 		worker_set_state(start_codelet_time, prefix, ev->param[2], name);
 		if (sched_ctx != 0)
 		if (sched_ctx != 0)
 		{
 		{
@@ -662,7 +674,38 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 #endif
 #endif
 		}
 		}
 	}
 	}
+#endif /* STARPU_ENABLE_PAJE_CODELET_DETAILS */
+
+}
+
+static void handle_codelet_details(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
+{
+#ifdef STARPU_ENABLE_PAJE_CODELET_DETAILS
+	int worker;
+	worker = find_worker_id(ev->param[5]);
 
 
+	unsigned sched_ctx = ev->param[1];
+	if (worker < 0) return;
+
+	char *prefix = options->file_prefix;
+
+	if (out_paje_file)
+	{
+		worker_set_detailed_state(last_codelet_start[worker], prefix, ev->param[5], last_codelet_symbol[worker], ev->param[2], ev->param[3], ev->param[4]);
+		if (sched_ctx != 0)
+		{
+#ifdef STARPU_HAVE_POTI
+			char container[STARPU_POTI_STR_LEN];
+			char ctx[6];
+			snprintf(ctx, sizeof(ctx), "Ctx%d", sched_ctx);
+			thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, ev->param[5]);
+			poti_SetState(last_codelet_start[worker], container, ctx, last_codelet_symbol[worker]);
+#else
+			fprintf(out_paje_file, "20	%.9f	%st%"PRIu64"	Ctx%d	%s	%08lx	%lu	%016llx\n", last_codelet_start[worker], prefix, ev->param[2], sched_ctx, last_codelet_symbol[worker], (unsigned long) ev->param[2], (unsigned long) ev->param[3], (unsigned long long) ev->param[4]);
+#endif
+		}
+	}
+#endif /* STARPU_ENABLE_PAJE_CODELET_DETAILS */
 }
 }
 
 
 static long dumped_codelets_count;
 static long dumped_codelets_count;
@@ -1524,6 +1567,9 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 			case _STARPU_FUT_START_CODELET_BODY:
 			case _STARPU_FUT_START_CODELET_BODY:
 				handle_start_codelet_body(&ev, options);
 				handle_start_codelet_body(&ev, options);
 				break;
 				break;
+			case _STARPU_FUT_CODELET_DETAILS:
+				handle_codelet_details(&ev, options);
+				break;
 			case _STARPU_FUT_END_CODELET_BODY:
 			case _STARPU_FUT_END_CODELET_BODY:
 				handle_end_codelet_body(&ev, options);
 				handle_end_codelet_body(&ev, options);
 				break;
 				break;

+ 11 - 0
src/debug/traces/starpu_paje.c

@@ -130,6 +130,17 @@ void _starpu_fxt_write_paje_header(FILE *file)
 	fprintf(file, "%%	DestContainer	string\n");
 	fprintf(file, "%%	DestContainer	string\n");
 	fprintf(file, "%%	Key	string\n");
 	fprintf(file, "%%	Key	string\n");
 	fprintf(file, "%%EndEventDef\n");
 	fprintf(file, "%%EndEventDef\n");
+#ifdef STARPU_ENABLE_PAJE_CODELET_DETAILS
+	fprintf(file, "%%EventDef PajeSetState 20\n");
+	fprintf(file, "%%	Time	date\n");
+	fprintf(file, "%%	Container	string\n");
+	fprintf(file, "%%	Type	string\n");
+	fprintf(file, "%%	Value	string\n");
+	fprintf(file, "%%	Size	string\n");
+	fprintf(file, "%%	Footprint	string\n");
+	fprintf(file, "%%	Tag	string\n");
+	fprintf(file, "%%EndEventDef\n");
+#endif
 #endif
 #endif
 
 
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI

+ 1 - 1
src/drivers/cpu/driver_cpu.c

@@ -81,7 +81,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 	}
 	}
 
 
 	/* Give profiling variable */
 	/* Give profiling variable */
-	_starpu_driver_start_job(cpu_args, j, &codelet_start, rank, profiling);
+	_starpu_driver_start_job(cpu_args, j, perf_arch, &codelet_start, rank, profiling);
 
 
 	/* In case this is a Fork-join parallel task, the worker does not
 	/* In case this is a Fork-join parallel task, the worker does not
 	 * execute the kernel at all. */
 	 * execute the kernel at all. */

+ 1 - 1
src/drivers/cuda/driver_cuda.c

@@ -396,7 +396,7 @@ static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *args)
 		return -EAGAIN;
 		return -EAGAIN;
 	}
 	}
 
 
-	_starpu_driver_start_job(args, j, &j->cl_start, 0, profiling);
+	_starpu_driver_start_job(args, j, &args->perf_arch, &j->cl_start, 0, profiling);
 
 
 #if defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
 #if defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
 	/* We make sure we do manipulate the proper device */
 	/* We make sure we do manipulate the proper device */

+ 2 - 2
src/drivers/driver_common/driver_common.c

@@ -33,7 +33,7 @@
 #define BACKOFF_MAX 32  /* TODO : use parameter to define them */
 #define BACKOFF_MAX 32  /* TODO : use parameter to define them */
 #define BACKOFF_MIN 1
 #define BACKOFF_MIN 1
 
 
-void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct timespec *codelet_start, int rank, int profiling)
+void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch, struct timespec *codelet_start, int rank, int profiling)
 {
 {
 	struct starpu_task *task = j->task;
 	struct starpu_task *task = j->task;
 	struct starpu_codelet *cl = task->cl;
 	struct starpu_codelet *cl = task->cl;
@@ -73,7 +73,7 @@ void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j
 	if (starpu_top)
 	if (starpu_top)
 		_starpu_top_task_started(task,workerid,codelet_start);
 		_starpu_top_task_started(task,workerid,codelet_start);
 
 
-	_STARPU_TRACE_START_CODELET_BODY(j);
+	_STARPU_TRACE_START_CODELET_BODY(j, j->nimpl, perf_arch);
 }
 }
 
 
 void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, struct timespec *codelet_end, int rank, int profiling)
 void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, struct timespec *codelet_end, int rank, int profiling)

+ 2 - 2
src/drivers/driver_common/driver_common.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2012, 2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -23,7 +23,7 @@
 #include <core/jobs.h>
 #include <core/jobs.h>
 #include <common/utils.h>
 #include <common/utils.h>
 
 
-void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j,
+void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch,
 			      struct timespec *codelet_start, int rank, int profiling);
 			      struct timespec *codelet_start, int rank, int profiling);
 void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch,
 void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch,
 			    struct timespec *codelet_end, int rank, int profiling);
 			    struct timespec *codelet_end, int rank, int profiling);

+ 1 - 1
src/drivers/mp_common/source_common.c

@@ -421,7 +421,7 @@ static int _starpu_src_common_execute(struct _starpu_job *j,
 
 
 	void (*kernel)(void)  = node->get_kernel_from_job(node,j);
 	void (*kernel)(void)  = node->get_kernel_from_job(node,j);
 
 
-	_starpu_driver_start_job(worker, j, &j->cl_start, 0, profiling);
+	_starpu_driver_start_job(worker, j, &worker->perf_arch, &j->cl_start, 0, profiling);
 
 
 
 
 	//_STARPU_DEBUG("\nworkerid:%d, rank:%d, type:%d,	cb_workerid:%d, task_size:%d\n\n",worker->devid,worker->current_rank,task->cl->type,j->combined_workerid,j->task_size);
 	//_STARPU_DEBUG("\nworkerid:%d, rank:%d, type:%d,	cb_workerid:%d, task_size:%d\n\n",worker->devid,worker->current_rank,task->cl->type,j->combined_workerid,j->task_size);

+ 1 - 1
src/drivers/opencl/driver_opencl.c

@@ -825,7 +825,7 @@ static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker
 		return -EAGAIN;
 		return -EAGAIN;
 	}
 	}
 
 
-	_starpu_driver_start_job(args, j, &j->cl_start, 0, profiling);
+	_starpu_driver_start_job(args, j, &args->perf_arch, &j->cl_start, 0, profiling);
 
 
 	starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, j->nimpl);
 	starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, j->nimpl);
 	STARPU_ASSERT_MSG(func, "when STARPU_OPENCL is defined in 'where', opencl_func or opencl_funcs has to be defined");
 	STARPU_ASSERT_MSG(func, "when STARPU_OPENCL is defined in 'where', opencl_func or opencl_funcs has to be defined");