Przeglądaj źródła

* Add codelet size, footprint and tag id in the paje trace.

Samuel Thibault 11 lat temu
rodzic
commit
778b4c444a

+ 1 - 0
ChangeLog

@@ -67,6 +67,7 @@ Small features:
     information belonging to a given scheduler context
   * The option --enable-verbose can be called with
     --enable-verbose=extra to increase the verbosity
+  * Add codelet size, footprint and tag id in the paje trace.
 
 Changes:
   * Data interfaces (variable, vector, matrix and block) now define

+ 6 - 0
configure.ac

@@ -1482,6 +1482,12 @@ if test x$use_fxt = xyes; then
 	AC_CHECK_DECLS([fut_set_filename])
 	CFLAGS="$save_CFLAGS"
 
+        AC_ARG_ENABLE(paje-codelet-details, [AS_HELP_STRING([--enable-paje-codelet-details],
+			[enable details about codelets in the paje trace])],
+			enable_paje_codelet_details=$enableval, enable_paje_codelet_details=no)
+        if  test x$enable_paje_codelet_details = xyes; then
+        	AC_DEFINE(STARPU_ENABLE_PAJE_CODELET_DETAILS, [1], [enable details about codelets in the paje trace])
+        fi
 	##########################################
 	# Poti is a library to generate paje trace files
 	##########################################

+ 3 - 0
doc/doxygen/chapters/13offline_performance_tools.doxy

@@ -118,6 +118,9 @@ $ vite paje.trace
 
 To get names of tasks instead of "unknown", fill the optional
 starpu_codelet::name, or use a performance model for them.
+Details of the codelet execution can be obtained by passing
+<c>--enable-paje-codelet-details</c> and using a recent enough version of ViTE
+(at least r1430).
 
 In the MPI execution case, collect the trace files from the MPI nodes, and
 specify them all on the command <c>starpu_fxt_tool</c>, for instance:

+ 8 - 0
doc/doxygen/chapters/41configure_options.doxy

@@ -372,6 +372,14 @@ Enable performance debugging through gprof.
 Enable performance model debugging.
 </dd>
 
+<dt>--enable-paje-codelet-details</dt>
+<dd>
+\anchor enable-paje-codelet-details
+\addindex __configure__--enable-paje-codelet-details
+Enable details about codelets in the paje trace. This requires a recent enough
+version of ViTE (at least r1430).
+</dd>
+
 <dt>--enable-fxt-lock</dt>
 <dd>
 \anchor enable-fxt-lock

+ 8 - 1
src/common/fxt.h

@@ -108,6 +108,8 @@
 #define _STARPU_FUT_EVENT	0x513c
 #define _STARPU_FUT_THREAD_EVENT	0x513d
 
+#define	_STARPU_FUT_CODELET_DETAILS	0x513e
+
 #define _STARPU_FUT_LOCKING_MUTEX	0x5140	
 #define _STARPU_FUT_MUTEX_LOCKED	0x5141	
 
@@ -404,7 +406,7 @@ do {									\
 #define _STARPU_TRACE_WORKER_INIT_END(workerid)				\
 	FUT_DO_PROBE2(_STARPU_FUT_WORKER_INIT_END, _starpu_gettid(), (workerid));
 
-#define _STARPU_TRACE_START_CODELET_BODY(job)				\
+#define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, archtype)				\
 do {									\
         const char *model_name = _starpu_job_get_model_name((job));         \
 	if (model_name)                                                 \
@@ -415,6 +417,11 @@ do {									\
 	else {                                                          \
 		FUT_DO_PROBE4(_STARPU_FUT_START_CODELET_BODY, (job), ((job)->task)->sched_ctx, _starpu_gettid(), 0); \
 	}								\
+	{								\
+		const size_t __job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));	\
+		const uint32_t __job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));\
+		FUT_DO_PROBE6(_STARPU_FUT_CODELET_DETAILS, (job), ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->tag_id, _starpu_gettid());	\
+	}								\
 } while(0);
 
 #define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, archtype)			\

+ 49 - 3
src/debug/traces/starpu_fxt.c

@@ -275,6 +275,17 @@ static void worker_set_state(double time, const char *prefix, long unsigned int
 #endif
 }
 
+static void worker_set_detailed_state(double time, const char *prefix, long unsigned int workerid, const char *name, unsigned long size, unsigned long footprint, unsigned long long tag)
+{
+#ifdef STARPU_HAVE_POTI
+	char container[STARPU_POTI_STR_LEN];
+	thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, workerid);
+	poti_SetState(time, container, "S", name);
+#else
+	fprintf(out_paje_file, "20	%.9f	%st%lu	S	%s	%lu	%08lx	%016llx\n", time, prefix, workerid, name, size, footprint, tag);
+#endif
+}
+
 static void worker_push_state(double time, const char *prefix, long unsigned int workerid, const char *name)
 {
 #ifdef STARPU_HAVE_POTI
@@ -631,11 +642,8 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 	int worker;
 	worker = find_worker_id(ev->param[2]);
 
-	unsigned sched_ctx = ev->param[1];
 	if (worker < 0) return;
 
-	char *prefix = options->file_prefix;
-
 	unsigned long has_name = ev->param[3];
 	char *name = has_name?(char *)&ev->param[4]:"unknown";
 
@@ -646,8 +654,12 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 
 	create_paje_state_if_not_found(name, options);
 
+#ifndef STARPU_ENABLE_PAJE_CODELET_DETAILS
 	if (out_paje_file)
 	{
+		char *prefix = options->file_prefix;
+		unsigned sched_ctx = ev->param[1];
+
 		worker_set_state(start_codelet_time, prefix, ev->param[2], name);
 		if (sched_ctx != 0)
 		{
@@ -662,7 +674,38 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 #endif
 		}
 	}
+#endif /* STARPU_ENABLE_PAJE_CODELET_DETAILS */
+
+}
+
+static void handle_codelet_details(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
+{
+#ifdef STARPU_ENABLE_PAJE_CODELET_DETAILS
+	int worker;
+	worker = find_worker_id(ev->param[5]);
 
+	unsigned sched_ctx = ev->param[1];
+	if (worker < 0) return;
+
+	char *prefix = options->file_prefix;
+
+	if (out_paje_file)
+	{
+		worker_set_detailed_state(last_codelet_start[worker], prefix, ev->param[5], last_codelet_symbol[worker], ev->param[2], ev->param[3], ev->param[4]);
+		if (sched_ctx != 0)
+		{
+#ifdef STARPU_HAVE_POTI
+			char container[STARPU_POTI_STR_LEN];
+			char ctx[6];
+			snprintf(ctx, sizeof(ctx), "Ctx%d", sched_ctx);
+			thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, ev->param[5]);
+			poti_SetState(last_codelet_start[worker], container, ctx, last_codelet_symbol[worker]);
+#else
+			fprintf(out_paje_file, "20	%.9f	%st%"PRIu64"	Ctx%d	%s	%08lx	%lu	%016llx\n", last_codelet_start[worker], prefix, ev->param[2], sched_ctx, last_codelet_symbol[worker], (unsigned long) ev->param[2], (unsigned long) ev->param[3], (unsigned long long) ev->param[4]);
+#endif
+		}
+	}
+#endif /* STARPU_ENABLE_PAJE_CODELET_DETAILS */
 }
 
 static long dumped_codelets_count;
@@ -1524,6 +1567,9 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 			case _STARPU_FUT_START_CODELET_BODY:
 				handle_start_codelet_body(&ev, options);
 				break;
+			case _STARPU_FUT_CODELET_DETAILS:
+				handle_codelet_details(&ev, options);
+				break;
 			case _STARPU_FUT_END_CODELET_BODY:
 				handle_end_codelet_body(&ev, options);
 				break;

+ 11 - 0
src/debug/traces/starpu_paje.c

@@ -130,6 +130,17 @@ void _starpu_fxt_write_paje_header(FILE *file)
 	fprintf(file, "%%	DestContainer	string\n");
 	fprintf(file, "%%	Key	string\n");
 	fprintf(file, "%%EndEventDef\n");
+#ifdef STARPU_ENABLE_PAJE_CODELET_DETAILS
+	fprintf(file, "%%EventDef PajeSetState 20\n");
+	fprintf(file, "%%	Time	date\n");
+	fprintf(file, "%%	Container	string\n");
+	fprintf(file, "%%	Type	string\n");
+	fprintf(file, "%%	Value	string\n");
+	fprintf(file, "%%	Size	string\n");
+	fprintf(file, "%%	Footprint	string\n");
+	fprintf(file, "%%	Tag	string\n");
+	fprintf(file, "%%EndEventDef\n");
+#endif
 #endif
 
 #ifdef STARPU_HAVE_POTI

+ 1 - 1
src/drivers/cpu/driver_cpu.c

@@ -81,7 +81,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 	}
 
 	/* Give profiling variable */
-	_starpu_driver_start_job(cpu_args, j, &codelet_start, rank, profiling);
+	_starpu_driver_start_job(cpu_args, j, perf_arch, &codelet_start, rank, profiling);
 
 	/* In case this is a Fork-join parallel task, the worker does not
 	 * execute the kernel at all. */

+ 1 - 1
src/drivers/cuda/driver_cuda.c

@@ -396,7 +396,7 @@ static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *args)
 		return -EAGAIN;
 	}
 
-	_starpu_driver_start_job(args, j, &j->cl_start, 0, profiling);
+	_starpu_driver_start_job(args, j, &args->perf_arch, &j->cl_start, 0, profiling);
 
 #if defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
 	/* We make sure we do manipulate the proper device */

+ 2 - 2
src/drivers/driver_common/driver_common.c

@@ -33,7 +33,7 @@
 #define BACKOFF_MAX 32  /* TODO : use parameter to define them */
 #define BACKOFF_MIN 1
 
-void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct timespec *codelet_start, int rank, int profiling)
+void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch, struct timespec *codelet_start, int rank, int profiling)
 {
 	struct starpu_task *task = j->task;
 	struct starpu_codelet *cl = task->cl;
@@ -73,7 +73,7 @@ void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j
 	if (starpu_top)
 		_starpu_top_task_started(task,workerid,codelet_start);
 
-	_STARPU_TRACE_START_CODELET_BODY(j);
+	_STARPU_TRACE_START_CODELET_BODY(j, j->nimpl, perf_arch);
 }
 
 void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, struct timespec *codelet_end, int rank, int profiling)

+ 2 - 2
src/drivers/driver_common/driver_common.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2012, 2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -23,7 +23,7 @@
 #include <core/jobs.h>
 #include <common/utils.h>
 
-void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j,
+void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch,
 			      struct timespec *codelet_start, int rank, int profiling);
 void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch,
 			    struct timespec *codelet_end, int rank, int profiling);

+ 1 - 1
src/drivers/mp_common/source_common.c

@@ -421,7 +421,7 @@ static int _starpu_src_common_execute(struct _starpu_job *j,
 
 	void (*kernel)(void)  = node->get_kernel_from_job(node,j);
 
-	_starpu_driver_start_job(worker, j, &j->cl_start, 0, profiling);
+	_starpu_driver_start_job(worker, j, &worker->perf_arch, &j->cl_start, 0, profiling);
 
 
 	//_STARPU_DEBUG("\nworkerid:%d, rank:%d, type:%d,	cb_workerid:%d, task_size:%d\n\n",worker->devid,worker->current_rank,task->cl->type,j->combined_workerid,j->task_size);

+ 1 - 1
src/drivers/opencl/driver_opencl.c

@@ -825,7 +825,7 @@ static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker
 		return -EAGAIN;
 	}
 
-	_starpu_driver_start_job(args, j, &j->cl_start, 0, profiling);
+	_starpu_driver_start_job(args, j, &args->perf_arch, &j->cl_start, 0, profiling);
 
 	starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, j->nimpl);
 	STARPU_ASSERT_MSG(func, "when STARPU_OPENCL is defined in 'where', opencl_func or opencl_funcs has to be defined");