Przeglądaj źródła

In case we have CPU parallel tasks, the archtype is not that of the CPU, but a
combined CPU archtype.

Cédric Augonnet 14 lat temu
rodzic
commit
bb0dc7df61

+ 1 - 0
include/starpu_util.h

@@ -166,6 +166,7 @@ void starpu_trace_user_event(unsigned long code);
 struct starpu_fxt_codelet_event {
 	char symbol[256]; /* name of the codelet */
 	int workerid;
+	enum starpu_perf_archtype archtype;
 	uint32_t hash;
 	size_t size;
 	float time;

+ 4 - 4
src/common/fxt.h

@@ -196,11 +196,11 @@ do {									\
 	}								\
 } while(0);
 
-#define STARPU_TRACE_END_CODELET_BODY(job)	\
-do {						\
+#define STARPU_TRACE_END_CODELET_BODY(job, archtype)			\
+do {									\
 	const size_t job_size = _starpu_job_get_data_size((job));	\
 	const uint32_t job_hash = _starpu_compute_buffers_footprint(job);\
-	FUT_DO_PROBE4(STARPU_FUT_END_CODELET_BODY, job, (job_size), (job_hash), syscall(SYS_gettid));	\
+	FUT_DO_PROBE5(STARPU_FUT_END_CODELET_BODY, job, (job_size), (job_hash), (archtype), syscall(SYS_gettid));	\
 } while(0);
 
 #define STARPU_TRACE_START_CALLBACK(job)	\
@@ -343,7 +343,7 @@ do {										\
 #define STARPU_TRACE_WORKER_INIT_START(a,b,c)	do {} while(0);
 #define STARPU_TRACE_WORKER_INIT_END		do {} while(0);
 #define STARPU_TRACE_START_CODELET_BODY(job)	do {} while(0);
-#define STARPU_TRACE_END_CODELET_BODY(job)	do {} while(0);
+#define STARPU_TRACE_END_CODELET_BODY(job, a)	do {} while(0);
 #define STARPU_TRACE_START_CALLBACK(job)	do {} while(0);
 #define STARPU_TRACE_END_CALLBACK(job)		do {} while(0);
 #define STARPU_TRACE_JOB_PUSH(task, prio)	do {} while(0);

+ 5 - 1
src/debug/traces/starpu_fxt.c

@@ -367,7 +367,7 @@ static struct starpu_fxt_codelet_event *dumped_codelets;
 static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
 	int worker;
-	worker = find_worker_id(ev->param[3]);
+	worker = find_worker_id(ev->param[4]);
 	if (worker < 0) return;
 
 	char *prefix = options->file_prefix;
@@ -390,11 +390,15 @@ static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_opti
 
 	if (options->dumped_codelets)
 	{
+		enum starpu_perf_archtype archtype = ev->param[3];
+
 		dumped_codelets_count++;
 		dumped_codelets = realloc(dumped_codelets, dumped_codelets_count*sizeof(struct starpu_fxt_codelet_event));
 
 		snprintf(dumped_codelets[dumped_codelets_count - 1].symbol, 256, "%s", last_codelet_symbol[worker]);
 		dumped_codelets[dumped_codelets_count - 1].workerid = worker;
+		dumped_codelets[dumped_codelets_count - 1].archtype = archtype;
+
 		dumped_codelets[dumped_codelets_count - 1].size = codelet_size;
 		dumped_codelets[dumped_codelets_count - 1].hash = codelet_hash;
 		dumped_codelets[dumped_codelets_count - 1].time = codelet_length;

+ 1 - 1
src/drivers/cpu/driver_cpu.c

@@ -87,7 +87,7 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args,
 	if (is_parallel_task)
 		PTHREAD_BARRIER_WAIT(&j->after_work_barrier);
 
-	STARPU_TRACE_END_CODELET_BODY(j);
+	STARPU_TRACE_END_CODELET_BODY(j, perf_arch);
 
 	cpu_args->status = STATUS_UNKNOWN;
 

+ 3 - 2
src/drivers/cuda/driver_cuda.c

@@ -219,12 +219,13 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 	if ((profiling && profiling_info) || calibrate_model)
 		starpu_clock_gettime(&codelet_end);
 
-	STARPU_TRACE_END_CODELET_BODY(j);	
+	enum starpu_perf_archtype archtype = args->perf_arch;
+	STARPU_TRACE_END_CODELET_BODY(j, archtype);
 	args->status = STATUS_UNKNOWN;
 
 	_starpu_push_task_output(task, mask);
 
-	_starpu_driver_update_job_feedback(j, args, profiling_info, args->perf_arch,
+	_starpu_driver_update_job_feedback(j, args, profiling_info, archtype,
 			&codelet_start, &codelet_end);
 
 	return 0;

+ 3 - 2
src/drivers/opencl/driver_opencl.c

@@ -546,12 +546,13 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 	if ((profiling && profiling_info) || calibrate_model)
 		starpu_clock_gettime(&codelet_end);
 
-	STARPU_TRACE_END_CODELET_BODY(j);
+	enum starpu_perf_archtype archtype = args->perf_arch;
+	STARPU_TRACE_END_CODELET_BODY(j, archtype);
 	args->status = STATUS_UNKNOWN;
 
 	_starpu_push_task_output(task, mask);
 
-	_starpu_driver_update_job_feedback(j, args, profiling_info, args->perf_arch,
+	_starpu_driver_update_job_feedback(j, args, profiling_info, archtype,
 							&codelet_start, &codelet_end);
 
 	return EXIT_SUCCESS;

+ 1 - 3
tools/starpu_regression_display.c

@@ -43,7 +43,6 @@ static struct starpu_fxt_options options;
 #endif
 
 static int archtype_is_found[STARPU_NARCH_VARIATIONS];
-static long dumped_per_archtype_count[STARPU_NARCH_VARIATIONS];
 
 static char data_file_name[256];
 static char gnuplot_file_name[256];
@@ -189,8 +188,7 @@ static void dump_data_file(FILE *data_file)
 	{
 		/* Dump only if the symbol matches user's request */
 		if (strcmp(dumped_codelets[i].symbol, symbol) == 0) {
-			int workerid = dumped_codelets[i].workerid;
-			enum starpu_perf_archtype archtype = options.worker_archtypes[workerid];
+			enum starpu_perf_archtype archtype = dumped_codelets[i].archtype;
 			archtype_is_found[archtype] = 1;
 
 			size_t size = dumped_codelets[i].size;