Explorar o código

- Rework the profiling API to use "struct timespec" rather than int64_t
counters.
- Factorize some code common to the different drivers into the
src/drivers/driver_common/ directory.
- Reduce the number of calls to the timing facilities
- Fix the implementation of starpu_worker_get_profiling_info so that we
properly handle the case of worker that are already sleeping when profiling
is enabled, or if the worker is still sleeping when the profiling info is
retrieved by the application.

Cédric Augonnet %!s(int64=15) %!d(string=hai) anos
pai
achega
ec7ac2c525

+ 17 - 14
examples/profiling/profiling.c

@@ -75,8 +75,8 @@ int main(int argc, char **argv)
 
 	starpu_task_wait_for_all();
 
-	int64_t delay_sum = 0;
-	int64_t length_sum = 0;
+	double delay_sum = 0.0;
+	double length_sum = 0.0;
 
 	for (i = 0; i < niter; i++)
 	{
@@ -84,12 +84,10 @@ int main(int argc, char **argv)
 		struct starpu_task_profiling_info *info = task->profiling_info;
 
 		/* How much time did it take before the task started ? */
-		int64_t delay = (info->start_time - info->submit_time);
-		delay_sum += delay;
+		delay_sum += starpu_timing_timespec_delay_us(&info->submit_time, &info->start_time);
 
 		/* How long was the task execution ? */
-		int64_t length = (info->end_time - info->start_time);
-		length_sum += length;
+		length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time);
 
 		/* We don't need the task structure anymore */
 		starpu_task_destroy(task);
@@ -97,25 +95,30 @@ int main(int argc, char **argv)
 
 	free(tasks);
 
-	fprintf(stderr, "Avg. delay : %2.2f us\n", ((double)delay_sum)/niter);
-	fprintf(stderr, "Avg. length : %2.2f us\n", ((double)length_sum)/niter);
+	fprintf(stderr, "Avg. delay : %2.2lf us\n", (delay_sum)/niter);
+	fprintf(stderr, "Avg. length : %2.2lf us\n", (length_sum)/niter);
 
 	/* Display the occupancy of all workers during the test */
 	int worker;
 	for (worker = 0; worker < starpu_worker_get_count(); worker++)
 	{
 		struct starpu_worker_profiling_info worker_info;
-		starpu_worker_get_profiling_info(worker, &worker_info);
+		int ret = starpu_worker_get_profiling_info(worker, &worker_info);
+		STARPU_ASSERT(!ret);
 
-		float executing_ratio = ((100.0*worker_info.executing_time)/worker_info.total_time);
-		float sleeping_ratio = ((100.0*worker_info.sleeping_time)/worker_info.total_time);
+		double total_time = starpu_timing_timespec_to_us(&worker_info.total_time);
+		double executing_time = starpu_timing_timespec_to_us(&worker_info.executing_time);
+		double sleeping_time = starpu_timing_timespec_to_us(&worker_info.sleeping_time);
+
+		float executing_ratio = 100.0*executing_time/total_time;
+		float sleeping_ratio = 100.0*sleeping_time/total_time;
 
 		char workername[128];
 		starpu_worker_get_name(worker, workername, 128);
 		fprintf(stderr, "Worker %s:\n", workername);
-		fprintf(stderr, "\ttotal time : %ld us\n", worker_info.total_time);
-		fprintf(stderr, "\texec time  : %ld us (%.2f %%)\n", worker_info.executing_time, executing_ratio);
-		fprintf(stderr, "\tblocked time  : %ld us (%.2f %%)\n", worker_info.sleeping_time, sleeping_ratio);
+		fprintf(stderr, "\ttotal time : %.2lf ms\n", total_time*1e-3);
+		fprintf(stderr, "\texec time  : %.2lf ms (%.2f %%)\n", executing_time*1e-3, executing_ratio);
+		fprintf(stderr, "\tblocked time  : %.2lf ms (%.2f %%)\n", sleeping_time*1e-3, sleeping_ratio);
 	}
 
 	starpu_shutdown();

+ 67 - 7
include/starpu_profiling.h

@@ -18,6 +18,7 @@
 #define __STARPU_PROFILING_H__
 
 #include <errno.h>
+#include <sys/time.h>
 #include <starpu.h>
 
 #define STARPU_PROFILING_DISABLE	0
@@ -26,19 +27,19 @@
 /* -ENOSYS is returned in case the info is not available. Timing are shown in
  * microseconds. */
 struct starpu_task_profiling_info {
-	int64_t submit_time;
-	int64_t start_time;
-	int64_t end_time;
+	struct timespec submit_time;
+	struct timespec start_time;
+	struct timespec end_time;
 	/* TODO add expected length, expected start/end ? */
 	int workerid;
 };
 
 /* The timing is provided since the previous call to starpu_worker_get_profiling_info */
 struct starpu_worker_profiling_info {
-	int64_t start_time;
-	int64_t total_time;
-	int64_t executing_time;
-	int64_t sleeping_time;
+	struct timespec start_time;
+	struct timespec total_time;
+	struct timespec executing_time;
+	struct timespec sleeping_time;
 	int executed_tasks;
 };
 
@@ -57,4 +58,63 @@ int starpu_profiling_status_get(void);
  * measurements. If worker_info is NULL, we only reset the counters. */
 int starpu_worker_get_profiling_info(int workerid, struct starpu_worker_profiling_info *worker_info);
 
+/* Some helper functions to manipulate profiling API output */
+/* Reset timespec */
+static inline void starpu_timespec_clear(struct timespec *tsp)
+{
+	tsp->tv_sec = 0;
+	tsp->tv_nsec = 0;
+}
+
+/* Computes result = a + b */
+static inline void starpu_timespec_add(struct timespec *a,
+					struct timespec *b,
+					struct timespec *result)
+{
+	result->tv_sec = a->tv_sec + b->tv_sec;
+	result->tv_nsec = a->tv_nsec + b->tv_nsec;
+
+	if (result->tv_nsec >= 1000000000)
+	{
+		++(result)->tv_sec;
+		result->tv_nsec -= 1000000000;
+	}
+}
+
+/* Computes res += b */
+static inline void starpu_timespec_accumulate(struct timespec *result,
+						struct timespec *a)
+{
+	result->tv_sec += a->tv_sec;
+	result->tv_nsec += a->tv_nsec;
+
+	if (result->tv_nsec >= 1000000000)
+	{
+		++(result)->tv_sec;
+		result->tv_nsec -= 1000000000;
+	}
+}
+
+/* Computes result = a - b */
+static inline void starpu_timespec_sub(struct timespec *a,
+					struct timespec *b,
+					struct timespec *result)
+{
+	result->tv_sec = a->tv_sec - b->tv_sec;
+	result->tv_nsec = a->tv_nsec - b->tv_nsec;
+
+	if ((result)->tv_nsec < 0)
+	{
+		--(result)->tv_sec;
+		result->tv_nsec += 1000000000;
+	}
+}
+
+#define starpu_timespec_cmp(a, b, CMP)                          \
+	(((a)->tv_sec == (b)->tv_sec) ? ((a)->tv_nsec CMP (b)->tv_nsec) : ((a)->tv_sec CMP (b)->tv_sec))
+
+/* Returns the time elapsed between start and end in microseconds */
+double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end);
+double starpu_timing_timespec_to_us(struct timespec *ts);
+
 #endif // __STARPU_PROFILING_H__

+ 2 - 0
src/Makefile.am

@@ -99,6 +99,7 @@ noinst_HEADERS = 						\
 	common/starpu_spinlock.h				\
 	common/fxt.h						\
 	common/utils.h						\
+	drivers/driver_common/driver_common.h			\
 	drivers/cpu/driver_cpu.h				\
 	drivers/gordon/driver_gordon.h				\
 	drivers/gordon/gordon_interface.h			\
@@ -145,6 +146,7 @@ libstarpu_la_SOURCES = 						\
 	core/policies/deque_modeling_policy.c			\
 	core/policies/random_policy.c				\
 	core/policies/deque_modeling_policy_data_aware.c	\
+	drivers/driver_common/driver_common.c			\
 	datawizard/memory_nodes.c				\
 	datawizard/write_back.c					\
 	datawizard/coherency.c					\

+ 10 - 6
src/common/timing.c

@@ -16,6 +16,10 @@
 
 #include "timing.h"
 
+#include <starpu.h>
+#include <common/config.h>
+#include <profiling/profiling.h>
+
 #ifdef HAVE_CLOCK_GETTIME
 #include <time.h>
 #ifndef _POSIX_C_SOURCE
@@ -70,7 +74,7 @@ void starpu_clock_gettime(struct timespec *ts)
 	__starpu_clock_gettime(&absolute_ts);
 
 	/* Compute the relative time since initialization */
-	_starpu_timespec_sub(&absolute_ts, &reference_start_time_ts, ts);
+	starpu_timespec_sub(&absolute_ts, &reference_start_time_ts, ts);
 }
 
 #else // !HAVE_CLOCK_GETTIME
@@ -165,20 +169,20 @@ void starpu_clock_gettime(struct timespec *ts)
 #endif // HAVE_CLOCK_GETTIME
 
 /* Returns the time elapsed between start and end in microseconds */
-double _starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end)
+double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end)
 {
 	struct timespec diff;
 	
-	_starpu_timespec_sub(end, start, &diff);
+	starpu_timespec_sub(end, start, &diff);
 
 	double us = (diff.tv_sec*1e6) + (diff.tv_nsec*1e-3);
 
 	return us;
 }
 
-double _starpu_timing_timespec_to_us(struct timespec *ts)
+double starpu_timing_timespec_to_us(struct timespec *ts)
 {
-	return (ts->tv_sec*1e6) + (ts->tv_nsec*1e-3);
+	return (1000000.0*ts->tv_sec) + (0.001*ts->tv_nsec);
 }
 
 double _starpu_timing_now(void)
@@ -186,5 +190,5 @@ double _starpu_timing_now(void)
 	struct timespec now;
 	starpu_clock_gettime(&now);
 
-	return _starpu_timing_timespec_to_us(&now);
+	return starpu_timing_timespec_to_us(&now);
 }

+ 0 - 28
src/common/timing.h

@@ -28,39 +28,11 @@
 #include <common/config.h>
 #include <starpu.h>
 
-/* Computes result = a + b */
-#define _starpu_timespec_add(a, b, result)				\
-	do {								\
-		(result)->tv_sec = (a)->tv_sec + (b)->tv_sec;		\
-   		(result)->tv_nsec = (a)->tv_nsec + (b)->tv_nsec; 	\
-		if ((result)->tv_nsec >= 1000000000)			\
-		{							\
-			++(result)->tv_sec;				\
-			(result)->tv_nsec -= 1000000000;		\
-		}							\
-	} while (0)
-
-
-/* Computes result = a - b */
-#define _starpu_timespec_sub(a, b, result)				\
-	do {								\
-		(result)->tv_sec = (a)->tv_sec - (b)->tv_sec;		\
-   		(result)->tv_nsec = (a)->tv_nsec - (b)->tv_nsec; 	\
-		if ((result)->tv_nsec < 0)				\
-		{							\
-			--(result)->tv_sec;				\
-			(result)->tv_nsec += 1000000000;		\
-		}							\
-	} while (0)
 
 void _starpu_timing_init(void);
 void starpu_clock_gettime(struct timespec *ts);
 double _starpu_timing_now(void);
 
-/* Returns the time elapsed between start and end in microseconds */
-double _starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end);
-double _starpu_timing_timespec_to_us(struct timespec *ts);
-
 #endif /* TIMING_H */
 
 

+ 3 - 1
src/core/errorcheck.h

@@ -30,7 +30,9 @@ typedef enum {
 	/* during the execution of a codelet */
 	STATUS_EXECUTING,
 	/* during the execution of the callback */
-	STATUS_CALLBACK
+	STATUS_CALLBACK,
+	/* while sleeping because there is nothing to do */
+	STATUS_SLEEPING
 } starpu_worker_status;
 
 void _starpu_set_local_worker_status(starpu_worker_status st);

+ 1 - 1
src/core/task.c

@@ -225,7 +225,7 @@ int starpu_task_submit(struct starpu_task *task)
 	task->status = STARPU_TASK_BLOCKED;
 	
 	if (info)
-		info->submit_time = (int64_t)_starpu_timing_now();
+		starpu_clock_gettime(&info->submit_time);
 
 	/* internally, StarPU manipulates a starpu_job_t which is a wrapper around a
 	* task structure, it is possible that this job structure was already

+ 23 - 8
src/core/workers.c

@@ -609,25 +609,40 @@ void starpu_worker_get_name(int id, char *dst, size_t maxlen)
 	snprintf(dst, maxlen, "%s", name);
 }
 
+starpu_worker_status _starpu_worker_get_status(int workerid)
+{
+	return config.workers[workerid].status;
+}
+
+void _starpu_worker_set_status(int workerid, starpu_worker_status status)
+{
+	config.workers[workerid].status = status;
+}
+
 /* TODO move in some driver/common/ directory */
 /* Workers may block when there is no work to do at all. We assume that the
  * mutex is hold when that function is called. */
 void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex)
 {
-	int profiling;
-	
-	profiling = starpu_profiling_status_get();
-
-	int64_t start_time, end_time;
-	start_time = (int64_t)_starpu_timing_now();
+	struct timespec start_time, end_time;
 
 	STARPU_TRACE_WORKER_SLEEP_START
+	config.workers[workerid].status = STATUS_SLEEPING;
+
+	starpu_clock_gettime(&start_time);
+	_starpu_worker_register_sleeping_start_date(workerid, &start_time);
 
 	PTHREAD_COND_WAIT(cond, mutex);
-	end_time = (int64_t)_starpu_timing_now();
 
+	config.workers[workerid].status = STATUS_UNKNOWN;
 	STARPU_TRACE_WORKER_SLEEP_END
+	starpu_clock_gettime(&end_time);
 
+	int profiling = starpu_profiling_status_get();
 	if (profiling)
-		_starpu_worker_update_profiling_info(workerid, 0, end_time - start_time, 0);
+	{
+		struct timespec sleeping_time;
+		starpu_timespec_sub(&end_time, &start_time, &sleeping_time);
+		_starpu_worker_update_profiling_info_sleeping(workerid, &start_time, &end_time);
+	}
 }

+ 3 - 0
src/core/workers.h

@@ -166,6 +166,9 @@ struct starpu_worker_s *_starpu_get_worker_struct(unsigned id);
 
 struct starpu_machine_config_s *_starpu_get_machine_config(void);
 
+starpu_worker_status _starpu_worker_get_status(int workerid);
+void _starpu_worker_set_status(int workerid, starpu_worker_status status);
+
 /* TODO move */
 unsigned _starpu_execute_registered_progression_hooks(void);
 

+ 14 - 46
src/drivers/cpu/driver_cpu.c

@@ -1,6 +1,6 @@
 /*
  * StarPU
- * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
+ * Copyright (C) INRIA 2008-2010 (see AUTHORS file)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,7 +18,7 @@
 #include <starpu.h>
 #include <starpu_profiling.h>
 #include <profiling/profiling.h>
-
+#include <drivers/driver_common/driver_common.h>
 #include <common/utils.h>
 #include <core/debug.h>
 #include "driver_cpu.h"
@@ -29,8 +29,6 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 	int ret;
 	struct timespec codelet_start, codelet_end;
 	struct timespec codelet_start_comm, codelet_end_comm;
-	int64_t start_time;
-	int64_t end_time;
 
 	unsigned calibrate_model = 0;
 	int workerid = cpu_args->workerid;
@@ -43,12 +41,12 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 	if (cl->model && cl->model->benchmarking)
 		calibrate_model = 1;
 
-	if (calibrate_model || STARPU_BENCHMARK_COMM)
+	if (STARPU_BENCHMARK_COMM)
 		starpu_clock_gettime(&codelet_start_comm);
 
 	ret = _starpu_fetch_task_input(task, 0);
 
-	if (calibrate_model || STARPU_BENCHMARK_COMM)
+	if (STARPU_BENCHMARK_COMM)
 		starpu_clock_gettime(&codelet_end_comm);
 
 	if (ret != 0) {
@@ -59,13 +57,14 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 
 	STARPU_TRACE_START_CODELET_BODY(j);
 
-	if (calibrate_model || STARPU_BENCHMARK_COMM)
-		starpu_clock_gettime(&codelet_start);
-
-	int profiling_status = starpu_profiling_status_get();
+	struct starpu_task_profiling_info *profiling_info;
+	profiling_info = task->profiling_info;
 
-	if (profiling_status)
-		start_time = (int64_t)_starpu_timing_now();
+	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
+	{
+		starpu_clock_gettime(&codelet_start);
+		_starpu_worker_register_executing_start_date(workerid, &codelet_start);
+	}
 
 	cpu_args->status = STATUS_EXECUTING;
 	task->status = STARPU_TASK_RUNNING;	
@@ -75,47 +74,16 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 
 	cl->per_worker_stats[workerid]++;
 	
-	if (calibrate_model || STARPU_BENCHMARK_COMM)
+	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
 		starpu_clock_gettime(&codelet_end);
 
-	if (profiling_status)
-		end_time = (int64_t)_starpu_timing_now();
-
-	struct starpu_task_profiling_info *profiling_info;
-	profiling_info = task->profiling_info;
-
-	if (profiling_info)
-	{
-		profiling_info->start_time = start_time;
-		profiling_info->end_time = end_time;
-		profiling_info->workerid = workerid;
-	}
-
-	if (profiling_status)
-		_starpu_worker_update_profiling_info(workerid, end_time - start_time, 0, 1);
-
 	STARPU_TRACE_END_CODELET_BODY(j);
 	cpu_args->status = STATUS_UNKNOWN;
 
 	_starpu_push_task_output(task, 0);
 
-	if (calibrate_model || STARPU_BENCHMARK_COMM)
-	{
-		double measured = _starpu_timing_timespec_delay_us(&codelet_start, &codelet_end);
-		double measured_comm = _starpu_timing_timespec_delay_us(&codelet_start_comm, &codelet_end_comm);
-
-//		fprintf(stderr, "%d\t%d\n", (int)j->penality, (int)measured_comm);
-		cpu_args->jobq->total_computation_time += measured;
-		cpu_args->jobq->total_communication_time += measured_comm;
-
-		double error;
-		error = fabs(STARPU_MAX(measured, 0.0) - STARPU_MAX(j->predicted, 0.0)); 
-//		fprintf(stderr, "Error -> %le, predicted -> %le measured ->%le\n", error, j->predicted, measured);
-		cpu_args->jobq->total_computation_time_error += error;
-
-		if (calibrate_model)
-			_starpu_update_perfmodel_history(j, cpu_args->arch, cpu_args->devid, measured);
-	}
+	_starpu_driver_update_job_feedback(j, cpu_args, profiling_info, calibrate_model,
+			&codelet_start, &codelet_end, &codelet_start_comm, &codelet_end_comm);
 
 	(void)STARPU_ATOMIC_ADD(&cpu_args->jobq->total_job_performed, 1);
 

+ 14 - 43
src/drivers/cuda/driver_cuda.c

@@ -19,6 +19,7 @@
 #include <common/utils.h>
 #include <common/config.h>
 #include <core/debug.h>
+#include <drivers/driver_common/driver_common.h>
 #include "driver_cuda.h"
 #include <core/policies/sched_policy.h>
 #include <profiling/profiling.h>
@@ -92,8 +93,6 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 	cudaError_t cures;
 	struct timespec codelet_start, codelet_end;
 	struct timespec codelet_start_comm, codelet_end_comm;
-	int64_t start_time;
-	int64_t end_time;
 
 	unsigned calibrate_model = 0;
 	int workerid = args->workerid;
@@ -122,7 +121,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 		return -EAGAIN;
 	}
 
-	if (calibrate_model || STARPU_BENCHMARK_COMM)
+	if (STARPU_BENCHMARK_COMM)
 	{
 		cures = cudaThreadSynchronize();
 		if (STARPU_UNLIKELY(cures))
@@ -132,65 +131,37 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 
 	STARPU_TRACE_START_CODELET_BODY(j);
 
-	int profiling_status = starpu_profiling_status_get();
+	struct starpu_task_profiling_info *profiling_info;
+	profiling_info = task->profiling_info;
 
-	if (profiling_status)
-		start_time = (int64_t)_starpu_timing_now();
+	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
+	{
+		starpu_clock_gettime(&codelet_start);
+		_starpu_worker_register_executing_start_date(workerid, &codelet_start);
+	}
 
 	args->status = STATUS_EXECUTING;
 	task->status = STARPU_TASK_RUNNING;	
 
 	cl_func func = cl->cuda_func;
 	STARPU_ASSERT(func);
-	starpu_clock_gettime(&codelet_start);
 	func(task->interface, task->cl_arg);
 
 	cl->per_worker_stats[workerid]++;
 
-
-	if (profiling_status)
-		end_time = (int64_t)_starpu_timing_now();
-
-	struct starpu_task_profiling_info *profiling_info;
-	profiling_info = task->profiling_info;
-
-	if (profiling_info)
-	{
-		profiling_info->start_time = start_time;
-		profiling_info->end_time = end_time;
-		profiling_info->workerid = workerid;
-	}
-
-	if (profiling_status)
-		_starpu_worker_update_profiling_info(workerid, end_time - start_time, 0, 1);
-
-	if (calibrate_model || STARPU_BENCHMARK_COMM)
+	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
 		starpu_clock_gettime(&codelet_end);
 
-	args->status = STATUS_UNKNOWN;
-
 	STARPU_TRACE_END_CODELET_BODY(j);	
+	args->status = STATUS_UNKNOWN;
 
-	if (calibrate_model || STARPU_BENCHMARK_COMM)
-	{
-		double measured = _starpu_timing_timespec_delay_us(&codelet_start, &codelet_end);
-		double measured_comm = _starpu_timing_timespec_delay_us(&codelet_start_comm, &codelet_end_comm);
-
-		args->jobq->total_computation_time += measured;
-		args->jobq->total_communication_time += measured_comm;
-
-		double error;
-		error = fabs(STARPU_MAX(measured, 0.0) - STARPU_MAX(j->predicted, 0.0)); 
-		args->jobq->total_computation_time_error += error;
+	_starpu_push_task_output(task, mask);
 
-		if (calibrate_model)
-			_starpu_update_perfmodel_history(j, args->perf_arch, (unsigned)args->devid, measured);
-	}
+	_starpu_driver_update_job_feedback(j, args, profiling_info, calibrate_model,
+			&codelet_start, &codelet_end, &codelet_start_comm, &codelet_end_comm);
 
 	(void)STARPU_ATOMIC_ADD(&args->jobq->total_job_performed, 1);
 
-	_starpu_push_task_output(task, mask);
-
 	return 0;
 }
 

+ 70 - 0
src/drivers/driver_common/driver_common.c

@@ -0,0 +1,70 @@
+/*
+ * StarPU
+ * Copyright (C) INRIA 2008-2010 (see AUTHORS file)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <math.h>
+#include <starpu.h>
+#include <starpu_profiling.h>
+#include <profiling/profiling.h>
+#include <common/utils.h>
+#include <core/debug.h>
+
+void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *worker_args,
+					struct starpu_task_profiling_info *profiling_info,
+					unsigned calibrate_model,
+					struct timespec *codelet_start, struct timespec *codelet_end,
+					struct timespec *codelet_start_comm, struct timespec *codelet_end_comm)
+{
+	struct timespec measured_ts;
+	struct timespec measured_comm_ts;
+	double measured;
+	double measured_comm;
+
+	if (profiling_info || calibrate_model)
+	{
+		starpu_timespec_sub(codelet_end, codelet_start, &measured_ts);
+		measured = starpu_timing_timespec_to_us(&measured_ts);
+
+		worker_args->jobq->total_computation_time += measured;
+
+		double error;
+		error = fabs(STARPU_MAX(measured, 0.0) - STARPU_MAX(j->predicted, 0.0)); 
+		worker_args->jobq->total_computation_time_error += error;
+
+		if (profiling_info)
+		{
+			memcpy(&profiling_info->start_time, codelet_start, sizeof(struct timespec));
+			memcpy(&profiling_info->end_time, codelet_end, sizeof(struct timespec));
+
+			int workerid = worker_args->workerid;
+			profiling_info->workerid = workerid;
+			
+			_starpu_worker_update_profiling_info_executing(workerid, &measured_ts, 1);
+		}
+
+		if (calibrate_model)
+			_starpu_update_perfmodel_history(j, worker_args->arch, worker_args->devid, measured);
+	}
+
+	if (STARPU_BENCHMARK_COMM)
+	{
+		starpu_timespec_sub(codelet_end_comm, codelet_start_comm, &measured_comm_ts);
+		measured_comm = starpu_timing_timespec_to_us(&measured_comm_ts);
+
+		worker_args->jobq->total_communication_time += measured_comm;
+	}
+
+	(void)STARPU_ATOMIC_ADD(&worker_args->jobq->total_job_performed, 1);
+}

+ 33 - 0
src/drivers/driver_common/driver_common.h

@@ -0,0 +1,33 @@
+/*
+ * StarPU
+ * Copyright (C) INRIA 2008-2010 (see AUTHORS file)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __DRIVER_COMMON_H__
+#define __DRIVER_COMMON_H__
+
+#include <sys/time.h>
+#include <starpu.h>
+#include <starpu_profiling.h>
+#include <core/jobs.h>
+#include <profiling/profiling.h>
+#include <common/utils.h>
+
+void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *worker_args,
+		struct starpu_task_profiling_info *profiling_info,
+		unsigned calibrate_model,
+		struct timespec *codelet_start, struct timespec *codelet_end,
+		struct timespec *codelet_start_comm, struct timespec *codelet_end_comm);
+
+#endif // __DRIVER_COMMON_H__

+ 16 - 42
src/drivers/opencl/driver_opencl.c

@@ -22,6 +22,7 @@
 #include <common/utils.h>
 #include <core/debug.h>
 #include <starpu_opencl.h>
+#include <drivers/driver_common/driver_common.h>
 #include "driver_opencl.h"
 #include "driver_opencl_utils.h"
 #include <common/utils.h>
@@ -132,7 +133,7 @@ int _starpu_opencl_copy_from_opencl(cl_mem buffer, void *ptr, size_t size, size_
         return EXIT_SUCCESS;
 }
 
-void _starpu_opencl_init()
+void _starpu_opencl_init(void)
 {
         if (!init_done) {
                 cl_platform_id platform_id[STARPU_OPENCL_PLATFORM_MAX];
@@ -351,8 +352,6 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 
 	struct timespec codelet_start, codelet_end;
 	struct timespec codelet_start_comm, codelet_end_comm;
-	int64_t start_time;
-	int64_t end_time;
 
 	unsigned calibrate_model = 0;
 	int workerid = args->workerid;
@@ -379,7 +378,7 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 		return -EAGAIN;
 	}
 
-	if (calibrate_model || STARPU_BENCHMARK_COMM)
+	if (STARPU_BENCHMARK_COMM)
 	{
                 //barrier(CLK_GLOBAL_MEM_FENCE);
 		starpu_clock_gettime(&codelet_end_comm);
@@ -387,62 +386,37 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 
 	STARPU_TRACE_START_CODELET_BODY(j);
 
-	int profiling_status = starpu_profiling_status_get();
+	struct starpu_task_profiling_info *profiling_info;
+	profiling_info = task->profiling_info;
 
-	if (profiling_status)
-		start_time = (int64_t)_starpu_timing_now();
+	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
+	{
+		starpu_clock_gettime(&codelet_start);
+		_starpu_worker_register_executing_start_date(workerid, &codelet_start);
+	}
 
 	args->status = STATUS_EXECUTING;
 	task->status = STARPU_TASK_RUNNING;	
 
 	cl_func func = cl->opencl_func;
 	STARPU_ASSERT(func);
-	starpu_clock_gettime(&codelet_start);
 	func(task->interface, task->cl_arg);
 
 	cl->per_worker_stats[workerid]++;
 
-	if (profiling_status)
-		end_time = (int64_t)_starpu_timing_now();
-
-	struct starpu_task_profiling_info *profiling_info;
-	profiling_info = task->profiling_info;
-
-	if (profiling_info)
-	{
-		profiling_info->start_time = start_time;
-		profiling_info->end_time = end_time;
-		profiling_info->workerid = workerid;
-	}
-
-	if (profiling_status)
-		_starpu_worker_update_profiling_info(workerid, end_time - start_time, 0, 1);
-
-	starpu_clock_gettime(&codelet_end);
-
-	args->status = STATUS_UNKNOWN;
+	if (profiling_info || calibrate_model || STARPU_BENCHMARK_COMM)
+		starpu_clock_gettime(&codelet_end);
 
 	STARPU_TRACE_END_CODELET_BODY(j);
+	args->status = STATUS_UNKNOWN;
 
-	if (calibrate_model || STARPU_BENCHMARK_COMM)
-	{
-		double measured = _starpu_timing_timespec_delay_us(&codelet_start, &codelet_end);
-		double measured_comm = _starpu_timing_timespec_delay_us(&codelet_start_comm, &codelet_end_comm);
-
-		args->jobq->total_computation_time += measured;
-		args->jobq->total_communication_time += measured_comm;
+	_starpu_push_task_output(task, mask);
 
-		double error;
-		error = fabs(STARPU_MAX(measured, 0.0) - STARPU_MAX(j->predicted, 0.0));
-		args->jobq->total_computation_time_error += error;
-
-		if (calibrate_model)
-			_starpu_update_perfmodel_history(j, args->perf_arch, (unsigned)args->devid, measured);
-	}
+	_starpu_driver_update_job_feedback(j, args, profiling_info, calibrate_model,
+			&codelet_start, &codelet_end, &codelet_start_comm, &codelet_end_comm);
 
 	(void)STARPU_ATOMIC_ADD(&args->jobq->total_job_performed, 1);
 
-	_starpu_push_task_output(task, mask);
 
 	return EXIT_SUCCESS;
 }

+ 116 - 20
src/profiling/profiling.c

@@ -17,6 +17,7 @@
 #include <starpu.h>
 #include <starpu_profiling.h>
 #include <profiling/profiling.h>
+#include <core/workers.h>
 #include <common/config.h>
 #include <common/utils.h>
 #include <common/timing.h>
@@ -26,6 +27,14 @@
 static struct starpu_worker_profiling_info worker_info[STARPU_NMAXWORKERS];
 static pthread_mutex_t worker_info_mutex[STARPU_NMAXWORKERS];
 
+/* In case the worker is still sleeping when the user request profiling info,
+ * we need to account for the time elasped while sleeping. */
+static unsigned worker_registered_sleeping_start[STARPU_NMAXWORKERS];
+static struct timespec sleeping_start_date[STARPU_NMAXWORKERS];
+
+static unsigned worker_registered_executing_start[STARPU_NMAXWORKERS];
+static struct timespec executing_start_date[STARPU_NMAXWORKERS];
+
 /*
  *	Global control of profiling
  */
@@ -46,12 +55,6 @@ int starpu_profiling_status_set(int status)
 		int worker;
 		for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
 			_starpu_worker_reset_profiling_info(worker);
-
-		/* In case there are blocked workers, we wake them up so that
-		 * the sleeping is measured from the time we activate
-		 * profiling, and not when the worker is awoken for the first
-		 * time later on. */
-		starpu_wake_all_blocked_workers();
 	}
 
 	return prev_value;
@@ -90,9 +93,9 @@ struct starpu_task_profiling_info *_starpu_allocate_profiling_info_if_needed(voi
 		info = calloc(1, sizeof(struct starpu_task_profiling_info));
 		STARPU_ASSERT(info);
 
-		info->submit_time = -ENOSYS;
-		info->start_time = -ENOSYS;
-		info->end_time = -ENOSYS;
+		starpu_timespec_clear(&info->submit_time);
+		starpu_timespec_clear(&info->start_time);
+		starpu_timespec_clear(&info->end_time);
 	}
 
 	return info;
@@ -104,15 +107,38 @@ struct starpu_task_profiling_info *_starpu_allocate_profiling_info_if_needed(voi
 
 static void _do_starpu_worker_reset_profiling_info(int workerid)
 {
-	worker_info[workerid].start_time = (int64_t)_starpu_timing_now();
+	starpu_clock_gettime(&worker_info[workerid].start_time);
 
 	/* This is computed in a lazy fashion when the application queries
 	 * profiling info. */
-	worker_info[workerid].total_time = -ENOSYS;
+	starpu_timespec_clear(&worker_info[workerid].total_time);
+
+	starpu_timespec_clear(&worker_info[workerid].executing_time);
+	starpu_timespec_clear(&worker_info[workerid].sleeping_time);
 
-	worker_info[workerid].executing_time = 0;
-	worker_info[workerid].sleeping_time = 0;
 	worker_info[workerid].executed_tasks = 0;
+	
+	/* We detect if the worker is already sleeping or doing some
+	 * computation */
+	starpu_worker_status status = _starpu_worker_get_status(workerid);
+
+	if (status == STATUS_SLEEPING)
+	{
+		worker_registered_sleeping_start[workerid] = 1;
+		starpu_clock_gettime(&sleeping_start_date[workerid]);
+	}
+	else {
+		worker_registered_sleeping_start[workerid] = 0;
+	}
+
+	if (status == STATUS_EXECUTING)
+	{
+		worker_registered_executing_start[workerid] = 1;
+		starpu_clock_gettime(&executing_start_date[workerid]);
+	}
+	else {
+		worker_registered_executing_start[workerid] = 0;
+	}
 }
 
 void _starpu_worker_reset_profiling_info(int workerid)
@@ -122,15 +148,65 @@ void _starpu_worker_reset_profiling_info(int workerid)
 	PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
 }
 
-void _starpu_worker_update_profiling_info(int workerid, int64_t executing_time,
-					int64_t sleeping_time, int executed_tasks)
+void _starpu_worker_register_sleeping_start_date(int workerid, struct timespec *sleeping_start)
 {
 	if (profiling)
 	{
 		PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
-	
-		worker_info[workerid].executing_time += executing_time;
-		worker_info[workerid].sleeping_time += sleeping_time;
+		worker_registered_sleeping_start[workerid] = 1;	
+		memcpy(&sleeping_start_date[workerid], sleeping_start, sizeof(struct timespec));
+		PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
+	}
+}
+
+void _starpu_worker_register_executing_start_date(int workerid, struct timespec *executing_start)
+{
+	if (profiling)
+	{
+		PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
+		worker_registered_executing_start[workerid] = 1;	
+		memcpy(&executing_start_date[workerid], executing_start, sizeof(struct timespec));
+		PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
+	}
+}
+
+void _starpu_worker_update_profiling_info_sleeping(int workerid, struct timespec *sleeping_start, struct timespec *sleeping_end)
+{
+	if (profiling)
+	{
+		PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
+
+                /* Perhaps that profiling was enabled while the worker was
+                 * already blocked, so we don't measure (end - start), but 
+                 * (end - max(start,worker_start)) where worker_start is the
+                 * date of the previous profiling info reset on the worker */
+		struct timespec *worker_start = &worker_info[workerid].start_time;
+		if (starpu_timespec_cmp(sleeping_start, worker_start, <))
+		{
+			/* sleeping_start < worker_start */
+			sleeping_start = worker_start;
+		}
+
+		struct timespec sleeping_time;
+		starpu_timespec_sub(sleeping_end, sleeping_start, &sleeping_time);
+
+		starpu_timespec_accumulate(&worker_info[workerid].sleeping_time, &sleeping_time);
+
+		worker_registered_sleeping_start[workerid] = 0;	
+
+		PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
+	}
+}
+
+
+void _starpu_worker_update_profiling_info_executing(int workerid, struct timespec *executing_time, int executed_tasks)
+{
+	if (profiling)
+	{
+		PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
+
+		starpu_timespec_accumulate(&worker_info[workerid].executing_time, executing_time);
+
 		worker_info[workerid].executed_tasks += executed_tasks;
 	
 		PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
@@ -147,8 +223,28 @@ int starpu_worker_get_profiling_info(int workerid, struct starpu_worker_profilin
 	if (info)
 	{
 		/* The total time is computed in a lazy fashion */
-		int64_t total_time = ((int64_t)_starpu_timing_now()) - worker_info[workerid].start_time;
-		worker_info[workerid].total_time = total_time;
+		struct timespec now;
+		starpu_clock_gettime(&now);
+
+		/* In case some worker is currently sleeping, we take into
+		 * account the time spent since it registered. */
+		if (worker_registered_sleeping_start[workerid])
+		{
+			struct timespec sleeping_time;
+			starpu_timespec_sub(&now, &sleeping_start_date[workerid], &sleeping_time);
+			starpu_timespec_accumulate(&worker_info[workerid].sleeping_time, &sleeping_time);
+		}
+
+		if (worker_registered_executing_start[workerid])
+		{
+			struct timespec executing_time;
+			starpu_timespec_sub(&now, &executing_start_date[workerid], &executing_time);
+			starpu_timespec_accumulate(&worker_info[workerid].executing_time, &executing_time);
+		}
+
+		/* total_time = now - start_time */
+		starpu_timespec_sub(&now, &worker_info[workerid].start_time,
+					&worker_info[workerid].total_time);
 
 		memcpy(info, &worker_info[workerid], sizeof(struct starpu_worker_profiling_info));
 	}

+ 5 - 1
src/profiling/profiling.h

@@ -17,12 +17,16 @@
 #ifndef __PROFILING_H__
 #define __PROFILING_H__
 
+#include <sys/time.h>
 #include <starpu.h>
 #include <starpu_profiling.h>
 #include <common/config.h>
 
 struct starpu_task_profiling_info *_starpu_allocate_profiling_info_if_needed(void);
 void _starpu_worker_reset_profiling_info(int workerid);
-void _starpu_worker_update_profiling_info(int workerid, int64_t executing_time, int64_t sleeping_time, int executed_tasks);
+void _starpu_worker_update_profiling_info_executing(int workerid, struct timespec *executing_time, int executed_tasks);
+void _starpu_worker_update_profiling_info_sleeping(int workerid, struct timespec *sleeping_start, struct timespec *sleeping_end);
+void _starpu_worker_register_sleeping_start_date(int workerid, struct timespec *sleeping_start);
+void _starpu_worker_register_executing_start_date(int workerid, struct timespec *executing_start);
 
 #endif // __PROFILING_H__