Explorar o código

Fix paje trace when pipelining tasks and transfers

Samuel Thibault %!s(int64=8) %!d(string=hai) anos
pai
achega
bfc1b8ff4e

+ 8 - 4
src/datawizard/coherency.c

@@ -1,5 +1,5 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures. *
- * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
  * Copyright (C) 2014  INRIA
  *
@@ -961,7 +961,8 @@ int _starpu_fetch_task_input(struct starpu_task *task, struct _starpu_job *j, in
 	{
 		worker->task_transferring = task;
 		worker->nb_buffers_transferred = 0;
-		_STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid);
+		if (worker->ntasks <= 1)
+			_STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid);
 	}
 	else
 		_STARPU_TRACE_START_FETCH_INPUT(NULL);
@@ -1093,10 +1094,13 @@ enomem:
 }
 
 /* This is to be called after having called _starpu_fetch_task_input with async=1 and getting the cb called as many times as there are buffers.  */
-int _starpu_release_fetch_task_input_async(struct _starpu_job *j, int workerid, int nbtransfers)
+int _starpu_release_fetch_task_input_async(struct _starpu_job *j, struct _starpu_worker *worker)
 {
+	unsigned workerid = worker->workerid;
+	unsigned nbtransfers = worker->nb_buffers_totransfer;
 	STARPU_RMB();
-	_STARPU_TRACE_WORKER_END_FETCH_INPUT(NULL, workerid);
+	if (worker->ntasks <= 1)
+		_STARPU_TRACE_WORKER_END_FETCH_INPUT(NULL, workerid);
 	struct starpu_task *task = j->task;
 
 	struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);

+ 3 - 2
src/datawizard/coherency.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2017  CNRS
  * Copyright (C) 2014-2016  Inria
  *
@@ -300,9 +300,10 @@ void _starpu_push_task_output(struct _starpu_job *j);
 
 void _starpu_release_nowhere_task_output(struct _starpu_job *j);
 
+struct _starpu_worker;
 STARPU_ATTRIBUTE_WARN_UNUSED_RESULT
 int _starpu_fetch_task_input(struct starpu_task *task, struct _starpu_job *j, int async);
-int _starpu_release_fetch_task_input_async(struct _starpu_job *j, int workerid, int nbtransfers);
+int _starpu_release_fetch_task_input_async(struct _starpu_job *j, struct _starpu_worker *worker);
 void _starpu_fetch_nowhere_task_input(struct _starpu_job *j);
 
 unsigned _starpu_is_data_present_or_requested(struct _starpu_data_state *state, unsigned node);

+ 1 - 1
src/drivers/cpu/driver_cpu.c

@@ -328,7 +328,7 @@ int _starpu_cpu_driver_run_once(struct _starpu_worker *cpu_worker)
 	{
 		j = _starpu_get_job_associated_to_task(pending_task);
 
-		_starpu_release_fetch_task_input_async(j, workerid, cpu_worker->nb_buffers_totransfer);
+		_starpu_release_fetch_task_input_async(j, cpu_worker);
 		/* Reset it */
 		cpu_worker->task_transferring = NULL;
 

+ 1 - 1
src/drivers/cuda/driver_cuda.c

@@ -760,7 +760,7 @@ int _starpu_cuda_driver_run_once(struct _starpu_worker_set *worker_set)
 		{
 			j = _starpu_get_job_associated_to_task(task);
 
-			_starpu_release_fetch_task_input_async(j, workerid, worker->nb_buffers_totransfer);
+			_starpu_release_fetch_task_input_async(j, worker);
 			/* Reset it */
 			worker->task_transferring = NULL;
 

+ 1 - 1
src/drivers/mp_common/source_common.c

@@ -956,7 +956,7 @@ static void _starpu_src_common_worker_internal_work(struct _starpu_worker_set *
 			struct _starpu_job * j = _starpu_get_job_associated_to_task(task);
 
 			_starpu_set_local_worker_key(&worker_set->workers[i]);
-			_starpu_release_fetch_task_input_async(j, worker_set->workers[i].workerid, worker_set->workers[i].nb_buffers_totransfer);
+			_starpu_release_fetch_task_input_async(j, &worker_set->workers[i]);
 
 			/* Execute the task */
 			res =  _starpu_src_common_execute(j, &worker_set->workers[i], mp_node);

+ 1 - 1
src/drivers/opencl/driver_opencl.c

@@ -699,7 +699,7 @@ int _starpu_opencl_driver_run_once(struct _starpu_worker *worker)
 	{
 		j = _starpu_get_job_associated_to_task(task);
 
-		_starpu_release_fetch_task_input_async(j, workerid, worker->nb_buffers_totransfer);
+		_starpu_release_fetch_task_input_async(j, worker);
 		/* Reset it */
 		worker->task_transferring = NULL;