Explorar o código

Factorize getting the profiling flag. Avoids issues when it turns 1 in between and thus job_feedback believes codelet_start/end are filled in while they are not

Samuel Thibault %!s(int64=13) %!d(string=hai) anos
pai
achega
11e1a38f70

+ 6 - 3
src/drivers/cpu/driver_cpu.c

@@ -19,6 +19,7 @@
 
 #include <math.h>
 #include <starpu.h>
+#include <starpu_profiling.h>
 #include <drivers/driver_common/driver_common.h>
 #include <common/utils.h>
 #include <core/debug.h>
@@ -28,6 +29,7 @@
 static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_args, int is_parallel_task, int rank, enum starpu_perf_archtype perf_arch)
 {
 	int ret;
+	int profiling = starpu_profiling_status_get();
 	struct timespec codelet_start, codelet_end;
 
 	struct starpu_task *task = j->task;
@@ -49,7 +51,8 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 	if (is_parallel_task)
 		_STARPU_PTHREAD_BARRIER_WAIT(&j->before_work_barrier);
 
-	_starpu_driver_start_job(cpu_args, j, &codelet_start, rank);
+	/* Give profiling variable */
+	_starpu_driver_start_job(cpu_args, j, &codelet_start, rank, profiling);
 
 	/* In case this is a Fork-join parallel task, the worker does not
 	 * execute the kernel at all. */
@@ -66,7 +69,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 			_starpu_bind_thread_on_cpu(cpu_args->config, cpu_args->bindid);
 	}
 
-	_starpu_driver_end_job(cpu_args, j, perf_arch, &codelet_end, rank);
+	_starpu_driver_end_job(cpu_args, j, perf_arch, &codelet_end, rank, profiling);
 
 	if (is_parallel_task)
 		_STARPU_PTHREAD_BARRIER_WAIT(&j->after_work_barrier);
@@ -74,7 +77,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 	if (rank == 0)
 	{
 		_starpu_driver_update_job_feedback(j, cpu_args,
-				perf_arch, &codelet_start, &codelet_end);
+				perf_arch, &codelet_start, &codelet_end, profiling);
 		_starpu_push_task_output(j, 0);
 	}
 

+ 5 - 3
src/drivers/cuda/driver_cuda.c

@@ -19,6 +19,7 @@
 
 #include <starpu.h>
 #include <starpu_cuda.h>
+#include <starpu_profiling.h>
 #include <common/utils.h>
 #include <common/config.h>
 #include <core/debug.h>
@@ -196,6 +197,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 
 	struct timespec codelet_start, codelet_end;
 
+	int profiling = starpu_profiling_status_get();
 	unsigned calibrate_model = 0;
 
 	STARPU_ASSERT(task);
@@ -221,7 +223,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 			STARPU_CUDA_REPORT_ERROR(cures);
 	}
 
-	_starpu_driver_start_job(args, j, &codelet_start, 0);
+	_starpu_driver_start_job(args, j, &codelet_start, 0, profiling);
 
 #ifdef HAVE_CUDA_MEMCPY_PEER
 	/* We make sure we do manipulate the proper device */
@@ -234,9 +236,9 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 	STARPU_ASSERT(func);
 	func(task->interfaces, task->cl_arg);
 
-	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0);
+	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
 
-	_starpu_driver_update_job_feedback(j, args, args->perf_arch, &codelet_start, &codelet_end);
+	_starpu_driver_update_job_feedback(j, args, args->perf_arch, &codelet_start, &codelet_end, profiling);
 
 	_starpu_push_task_output(j, mask);
 

+ 3 - 6
src/drivers/driver_common/driver_common.c

@@ -27,12 +27,11 @@
 #include <core/sched_policy.h>
 #include <top/starpu_top_core.h>
 
-void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct timespec *codelet_start, int rank)
+void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct timespec *codelet_start, int rank, int profiling)
 {
 	struct starpu_task *task = j->task;
 	struct starpu_codelet *cl = task->cl;
 	struct starpu_task_profiling_info *profiling_info;
-	int profiling = starpu_profiling_status_get();
 	int starpu_top=_starpu_top_status_get();
 	int workerid = args->workerid;
 	unsigned calibrate_model = 0;
@@ -65,12 +64,11 @@ void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j
 	_STARPU_TRACE_START_CODELET_BODY(j);
 }
 
-void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch STARPU_ATTRIBUTE_UNUSED, struct timespec *codelet_end, int rank)
+void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch STARPU_ATTRIBUTE_UNUSED, struct timespec *codelet_end, int rank, int profiling)
 {
 	struct starpu_task *task = j->task;
 	struct starpu_codelet *cl = task->cl;
 	struct starpu_task_profiling_info *profiling_info = task->profiling_info;
-	int profiling = starpu_profiling_status_get();
 	int starpu_top=_starpu_top_status_get();
 	int workerid = args->workerid;
 	unsigned calibrate_model = 0;
@@ -93,7 +91,7 @@ void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j,
 }
 void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args,
 					enum starpu_perf_archtype perf_arch,
-					struct timespec *codelet_start, struct timespec *codelet_end)
+					struct timespec *codelet_start, struct timespec *codelet_end, int profiling)
 {
 	struct starpu_task_profiling_info *profiling_info = j->task->profiling_info;
 	struct timespec measured_ts;
@@ -101,7 +99,6 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 	int workerid = worker_args->workerid;
 	struct starpu_codelet *cl = j->task->cl;
 	int calibrate_model = 0;
-	int profiling = starpu_profiling_status_get();
 	int updated = 0;
 
 	if (cl->model && cl->model->benchmarking)

+ 4 - 4
src/drivers/driver_common/driver_common.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -24,12 +24,12 @@
 #include <common/utils.h>
 
 void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j,
-			      struct timespec *codelet_start, int rank);
+			      struct timespec *codelet_start, int rank, int profiling);
 void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch,
-			    struct timespec *codelet_end, int rank);
+			    struct timespec *codelet_end, int rank, int profiling);
 void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args,
 					enum starpu_perf_archtype perf_arch,
-					struct timespec *codelet_start, struct timespec *codelet_end);
+					struct timespec *codelet_start, struct timespec *codelet_end, int profiling);
 
 void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex);
 

+ 5 - 3
src/drivers/opencl/driver_opencl.c

@@ -19,6 +19,7 @@
 
 #include <math.h>
 #include <starpu.h>
+#include <starpu_profiling.h>
 #include <common/config.h>
 #include <common/utils.h>
 #include <core/debug.h>
@@ -578,6 +579,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 	STARPU_ASSERT(j);
 	struct starpu_task *task = j->task;
 
+	int profiling = starpu_profiling_status_get();
 	struct timespec codelet_start, codelet_end;
 
 	STARPU_ASSERT(task);
@@ -593,16 +595,16 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 		return -EAGAIN;
 	}
 
-	_starpu_driver_start_job(args, j, &codelet_start, 0);
+	_starpu_driver_start_job(args, j, &codelet_start, 0, profiling);
 
 	starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, j->nimpl);
 	STARPU_ASSERT(func);
 	func(task->interfaces, task->cl_arg);
 
-	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0);
+	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
 
 	_starpu_driver_update_job_feedback(j, args, args->perf_arch,
-							&codelet_start, &codelet_end);
+					   &codelet_start, &codelet_end, profiling);
 
 	_starpu_push_task_output(j, mask);