Quellcode durchsuchen

Add profiling capabilities to the workers. It is now possible to query a worker
to know the time it has spent doing computation or idling for instance.
The starpu_profiling_status_set now permits to enable or disable profiling and
replace starpu_profiling_{enable, disable}.

Cédric Augonnet vor 15 Jahren
Ursprung
Commit
cc2b13c2db

+ 19 - 2
examples/profiling/profiling.c

@@ -18,7 +18,7 @@
 #include <starpu_profiling.h>
 #include <assert.h>
 
-static unsigned niter = 50;
+static unsigned niter = 500;
 
 void sleep_codelet(__attribute__ ((unused)) void *descr[],
 			__attribute__ ((unused)) void *_args)
@@ -34,7 +34,7 @@ int main(int argc, char **argv)
 	starpu_init(NULL);
 
 	/* Enable profiling */
-	starpu_enable_profiling();
+	starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
 
 	starpu_codelet cl =
 	{
@@ -96,6 +96,23 @@ int main(int argc, char **argv)
 	fprintf(stderr, "Avg. delay : %2.2f us\n", ((double)delay_sum)/niter);
 	fprintf(stderr, "Avg. length : %2.2f us\n", ((double)length_sum)/niter);
 
+	/* Display the occupancy of all workers during the test */
+	int worker;
+	for (worker = 0; worker < starpu_worker_get_count(); worker++)
+	{
+		struct starpu_worker_profiling_info worker_info;
+		starpu_worker_get_profiling_info(worker, &worker_info);
+
+		char workername[128];
+		starpu_worker_get_name(worker, workername, 128);
+		fprintf(stderr, "Worker %s:\n", workername);
+		fprintf(stderr, "\ttotal time : %ld us\n", worker_info.total_time);
+		fprintf(stderr, "\texec time  : %ld us\n", worker_info.executing_time);
+
+		float overhead = 100.0 - ((100.0*worker_info.executing_time)/worker_info.total_time);
+		fprintf(stderr, "\toverhead : %.2f %%\n", overhead);
+	}
+
 	starpu_shutdown();
 
 	return 0;

+ 28 - 7
include/starpu_profiling.h

@@ -20,20 +20,41 @@
 #include <errno.h>
 #include <starpu.h>
 
-/* -ENOSYS is returned in case the info is not available */
+#define STARPU_PROFILING_DISABLE	0
+#define STARPU_PROFILING_ENABLE		1
+
+/* -ENOSYS is returned in case the info is not available. Timing are shown in
+ * microseconds. */
 struct starpu_task_profiling_info {
 	int64_t submit_time;
 	int64_t start_time;
 	int64_t end_time;
+	/* TODO add expected length, expected start/end ? */
 	int workerid;
 };
 
-/* Enable the profiling and return the previous profiling status (0 if
- * disabled, 1 if enabled). */
-unsigned starpu_enable_profiling(void);
+/* The timing is provided since the previous call to starpu_worker_get_profiling_info */
+struct starpu_worker_profiling_info {
+	int64_t start_time;
+	int64_t total_time;
+	int64_t executing_time;
+	int64_t sleeping_time;
+	int executed_tasks;
+};
+
+/* This function sets the profiling status:
+ * - enable with STARPU_PROFILING_ENABLE
+ * - disable with STARPU_PROFILING_DISABLE 
+ * Negative return values indicate an error, otherwise the previous status is
+ * returned. */
+int starpu_profiling_status_set(int status);
+
+/* Return the current profiling status or a negative value in case there was an
+ * error. */
+int starpu_profiling_status_get(void);
 
-/* Disable the profiling and return the previous profiling status (0 if
- * disabled, 1 if enabled). */
-unsigned starpu_disable_profiling(void);
+/* Get the profiling info associated to a worker, and reset the profiling
+ * measurements. If worker_info is NULL, we only reset the counters. */
+int starpu_worker_get_profiling_info(int workerid, struct starpu_worker_profiling_info *worker_info);
 
 #endif // __STARPU_PROFILING_H__

+ 1 - 8
src/core/perfmodel/perfmodel.c

@@ -36,14 +36,7 @@ void _starpu_set_calibrate_flag(unsigned val)
 {
 	calibrate_flag = val;
 
-	if (calibrate_flag > 0)
-	{
-		starpu_enable_profiling();
-	}
-	else
-	{
-		starpu_disable_profiling();
-	}
+	starpu_profiling_status_set((calibrate_flag > 0)?STARPU_PROFILING_ENABLE:STARPU_PROFILING_DISABLE);
 }
 
 unsigned _starpu_get_calibrate_flag(void)

+ 0 - 4
src/core/task.c

@@ -209,11 +209,7 @@ int starpu_task_submit(struct starpu_task *task)
 	task->profiling_info = info;
 	
 	if (info)
-	{
 		info->submit_time = (int64_t)_starpu_timing_now();
-		info->start_time = -ENOSYS;
-		info->end_time = -ENOSYS;
-	}
 
 	/* internally, StarPU manipulates a starpu_job_t which is a wrapper around a
 	* task structure, it is possible that this job structure was already

+ 21 - 10
src/drivers/cpu/driver_cpu.c

@@ -17,6 +17,7 @@
 #include <math.h>
 #include <starpu.h>
 #include <starpu_profiling.h>
+#include <profiling/profiling.h>
 
 #include <common/utils.h>
 #include <core/debug.h>
@@ -28,8 +29,11 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 	int ret;
 	starpu_tick_t codelet_start, codelet_end;
 	starpu_tick_t codelet_start_comm, codelet_end_comm;
+	int64_t start_time;
+	int64_t end_time;
 
 	unsigned calibrate_model = 0;
+	int workerid = cpu_args->workerid;
 	struct starpu_task *task = j->task;
 	struct starpu_codelet_t *cl = task->cl;
 
@@ -58,33 +62,41 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 	if (calibrate_model || STARPU_BENCHMARK_COMM)
 		STARPU_GET_TICK(codelet_start);
 
-	struct starpu_task_profiling_info *profiling_info;
-	profiling_info = task->profiling_info;
+	int profiling_status = starpu_profiling_status_get();
 
-	if (profiling_info)
-		profiling_info->start_time = (int64_t)_starpu_timing_now();
+	if (profiling_status)
+		start_time = (int64_t)_starpu_timing_now();
 
 	cpu_args->status = STATUS_EXECUTING;
 	cl_func func = cl->cpu_func;
 	func(task->interface, task->cl_arg);
 
-	cl->per_worker_stats[cpu_args->workerid]++;
+	cl->per_worker_stats[workerid]++;
 	
 	if (calibrate_model || STARPU_BENCHMARK_COMM)
 		STARPU_GET_TICK(codelet_end);
 
+	if (profiling_status)
+		end_time = (int64_t)_starpu_timing_now();
+
+	struct starpu_task_profiling_info *profiling_info;
+	profiling_info = task->profiling_info;
+
 	if (profiling_info)
 	{
-		profiling_info->end_time = (int64_t)_starpu_timing_now();
-		profiling_info->workerid = cpu_args->workerid;
+		profiling_info->start_time = start_time;
+		profiling_info->end_time = end_time;
+		profiling_info->workerid = workerid;
 	}
 
+	if (profiling_status)
+		_starpu_worker_update_profiling_info(workerid, end_time - start_time, 0, 1);
+
 	STARPU_TRACE_END_CODELET_BODY(j);
 	cpu_args->status = STATUS_UNKNOWN;
 
 	_starpu_push_task_output(task, 0);
 
-//#ifdef STARPU_MODEL_DEBUG
 	if (calibrate_model || STARPU_BENCHMARK_COMM)
 	{
 		double measured = _starpu_timing_delay(&codelet_start, &codelet_end);
@@ -102,9 +114,8 @@ static int execute_job_on_cpu(starpu_job_t j, struct starpu_worker_s *cpu_args)
 		if (calibrate_model)
 			_starpu_update_perfmodel_history(j, cpu_args->arch, cpu_args->devid, measured);
 	}
-//#endif
 
-	STARPU_ATOMIC_ADD(&cpu_args->jobq->total_job_performed, 1);
+	(void)STARPU_ATOMIC_ADD(&cpu_args->jobq->total_job_performed, 1);
 
 	return 0;
 }

+ 25 - 11
src/drivers/cuda/driver_cuda.c

@@ -21,6 +21,7 @@
 #include <core/debug.h>
 #include "driver_cuda.h"
 #include <core/policies/sched_policy.h>
+#include <profiling/profiling.h>
 
 /* the number of CUDA devices */
 static int ncudagpus;
@@ -83,7 +84,6 @@ void _starpu_init_cuda(void)
 static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 {
 	int ret;
-//	uint32_t mask = (1<<0);
 	uint32_t mask = 0;
 
 	STARPU_ASSERT(j);
@@ -92,8 +92,11 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 	cudaError_t cures;
 	starpu_tick_t codelet_start, codelet_end;
 	starpu_tick_t codelet_start_comm, codelet_end_comm;
-	
+	int64_t start_time;
+	int64_t end_time;
+
 	unsigned calibrate_model = 0;
+	int workerid = args->workerid;
 
 	STARPU_ASSERT(task);
 	struct starpu_codelet_t *cl = task->cl;
@@ -129,11 +132,10 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 
 	STARPU_TRACE_START_CODELET_BODY(j);
 
-	struct starpu_task_profiling_info *profiling_info;
-	profiling_info = task->profiling_info;
+	int profiling_status = starpu_profiling_status_get();
 
-	if (profiling_info)
-		profiling_info->start_time = (int64_t)_starpu_timing_now();
+	if (profiling_status)
+		start_time = (int64_t)_starpu_timing_now();
 
 	args->status = STATUS_EXECUTING;
 	cl_func func = cl->cuda_func;
@@ -141,15 +143,27 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 	STARPU_GET_TICK(codelet_start);
 	func(task->interface, task->cl_arg);
 
-	cl->per_worker_stats[args->workerid]++;
+	cl->per_worker_stats[workerid]++;
+
+
+	if (profiling_status)
+		end_time = (int64_t)_starpu_timing_now();
+
+	struct starpu_task_profiling_info *profiling_info;
+	profiling_info = task->profiling_info;
 
 	if (profiling_info)
 	{
-		profiling_info->end_time = (int64_t)_starpu_timing_now();
-		profiling_info->workerid = args->workerid;
+		profiling_info->start_time = start_time;
+		profiling_info->end_time = end_time;
+		profiling_info->workerid = workerid;
 	}
 
-	STARPU_GET_TICK(codelet_end);
+	if (profiling_status)
+		_starpu_worker_update_profiling_info(workerid, end_time - start_time, 0, 1);
+
+	if (calibrate_model || STARPU_BENCHMARK_COMM)
+		STARPU_GET_TICK(codelet_end);
 
 	args->status = STATUS_UNKNOWN;
 
@@ -171,7 +185,7 @@ static int execute_job_on_cuda(starpu_job_t j, struct starpu_worker_s *args)
 			_starpu_update_perfmodel_history(j, args->perf_arch, (unsigned)args->devid, measured);
 	}
 
-	STARPU_ATOMIC_ADD(&args->jobq->total_job_performed, 1);
+	(void)STARPU_ATOMIC_ADD(&args->jobq->total_job_performed, 1);
 
 	_starpu_push_task_output(task, mask);
 

+ 21 - 9
src/drivers/opencl/driver_opencl.c

@@ -24,6 +24,7 @@
 #include "driver_opencl.h"
 #include "driver_opencl_utils.h"
 #include <common/utils.h>
+#include <profiling/profiling.h>
 
 static cl_context contexts[STARPU_MAXOPENCLDEVS];
 static cl_device_id devices[STARPU_MAXOPENCLDEVS];
@@ -336,7 +337,6 @@ unsigned _starpu_opencl_get_device_count(void)
 static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *args)
 {
 	int ret;
-//	uint32_t mask = (1<<0);
 	uint32_t mask = 0;
 
 	STARPU_ASSERT(j);
@@ -344,8 +344,11 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 
 	starpu_tick_t codelet_start, codelet_end;
 	starpu_tick_t codelet_start_comm, codelet_end_comm;
+	int64_t start_time;
+	int64_t end_time;
 
 	unsigned calibrate_model = 0;
+	int workerid = args->workerid;
 
 	STARPU_ASSERT(task);
 	struct starpu_codelet_t *cl = task->cl;
@@ -377,11 +380,10 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 
 	STARPU_TRACE_START_CODELET_BODY(j);
 
-	struct starpu_task_profiling_info *profiling_info;
-	profiling_info = task->profiling_info;
+	int profiling_status = starpu_profiling_status_get();
 
-	if (profiling_info)
-		profiling_info->start_time = (int64_t)_starpu_timing_now();
+	if (profiling_status)
+		start_time = (int64_t)_starpu_timing_now();
 
 	args->status = STATUS_EXECUTING;
 	cl_func func = cl->opencl_func;
@@ -389,14 +391,24 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 	STARPU_GET_TICK(codelet_start);
 	func(task->interface, task->cl_arg);
 
-	cl->per_worker_stats[args->workerid]++;
+	cl->per_worker_stats[workerid]++;
+
+	if (profiling_status)
+		end_time = (int64_t)_starpu_timing_now();
+
+	struct starpu_task_profiling_info *profiling_info;
+	profiling_info = task->profiling_info;
 
 	if (profiling_info)
 	{
-		profiling_info->end_time = (int64_t)_starpu_timing_now();
-		profiling_info->workerid = args->workerid;
+		profiling_info->start_time = start_time;
+		profiling_info->end_time = end_time;
+		profiling_info->workerid = workerid;
 	}
 
+	if (profiling_status)
+		_starpu_worker_update_profiling_info(workerid, end_time - start_time, 0, 1);
+
 	STARPU_GET_TICK(codelet_end);
 
 	args->status = STATUS_UNKNOWN;
@@ -419,7 +431,7 @@ static int _starpu_opencl_execute_job(starpu_job_t j, struct starpu_worker_s *ar
 			_starpu_update_perfmodel_history(j, args->perf_arch, (unsigned)args->devid, measured);
 	}
 
-	STARPU_ATOMIC_ADD(&args->jobq->total_job_performed, 1);
+	(void)STARPU_ATOMIC_ADD(&args->jobq->total_job_performed, 1);
 
 	_starpu_push_task_output(task, mask);
 

+ 113 - 8
src/profiling/profiling.c

@@ -16,27 +16,66 @@
 
 #include <starpu.h>
 #include <starpu_profiling.h>
+#include <profiling/profiling.h>
 #include <common/config.h>
+#include <common/utils.h>
+#include <common/timing.h>
+#include <errno.h>
+
+static struct starpu_worker_profiling_info worker_info[STARPU_NMAXWORKERS];
+static pthread_mutex_t worker_info_mutex[STARPU_NMAXWORKERS];
+
+/*
+ *	Global control of profiling
+ */
 
 /* Disabled by default */
-static unsigned profiling = 0;
+static int profiling = 0;
 
-unsigned starpu_enable_profiling(void)
+int starpu_profiling_status_set(int status)
 {
-	unsigned prev_value = profiling;
-	profiling = 1;
+	int prev_value = profiling;
+
+	if (prev_value != status)
+	{
+		/* If we enable profiling, we reset the counters. */
+		if (status == STARPU_PROFILING_ENABLE)
+		{
+			int worker;
+			for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
+				_starpu_worker_reset_profiling_info(worker);
+		}
+	
+		profiling = status;
+	}
 
 	return prev_value;
 }
 
-unsigned starpu_disable_profiling(void)
+int starpu_profiling_status_get(void)
 {
-	unsigned prev_value = profiling;
-	profiling = 0;
+	return profiling;
+}
 
-	return prev_value;
+void starpu_profiling_init(void)
+{
+	int worker;
+	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
+	{
+		PTHREAD_MUTEX_INIT(&worker_info_mutex[worker], NULL);
+		_starpu_worker_reset_profiling_info(worker);
+	}
 }
 
+void starpu_profiling_terminate(void)
+{
+
+}
+
+/*
+ *	Task profiling
+ */
+
 struct starpu_task_profiling_info *_starpu_allocate_profiling_info_if_needed(void)
 {
 	struct starpu_task_profiling_info *info = NULL;
@@ -45,7 +84,73 @@ struct starpu_task_profiling_info *_starpu_allocate_profiling_info_if_needed(voi
 	{
 		info = calloc(1, sizeof(struct starpu_task_profiling_info));
 		STARPU_ASSERT(info);
+
+		info->submit_time = -ENOSYS;
+		info->start_time = -ENOSYS;
+		info->end_time = -ENOSYS;
 	}
 
 	return info;
 }
+
+/*
+ *	Worker profiling
+ */
+
+static void _do_starpu_worker_reset_profiling_info(int workerid)
+{
+	worker_info[workerid].start_time = (int64_t)_starpu_timing_now();
+
+	/* This is computed in a lazy fashion when the application queries
+	 * profiling info. */
+	worker_info[workerid].total_time = -ENOSYS;
+
+	worker_info[workerid].executing_time = 0;
+	worker_info[workerid].sleeping_time = 0;
+	worker_info[workerid].executed_tasks = 0;
+}
+
+void _starpu_worker_reset_profiling_info(int workerid)
+{
+	PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
+	_do_starpu_worker_reset_profiling_info(workerid);
+	PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
+}
+
+void _starpu_worker_update_profiling_info(int workerid, int64_t executing_time,
+					int64_t sleeping_time, int executed_tasks)
+{
+	if (profiling)
+	{
+		PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
+	
+		worker_info[workerid].executing_time += executing_time;
+		worker_info[workerid].sleeping_time += sleeping_time;
+		worker_info[workerid].executed_tasks += executed_tasks;
+	
+		PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
+	}
+}
+
+int starpu_worker_get_profiling_info(int workerid, struct starpu_worker_profiling_info *info)
+{
+	if (!profiling)
+		return -EINVAL;
+
+	PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
+
+	if (info)
+	{
+		/* The total time is computed in a lazy fashion */
+		int64_t total_time = ((int64_t)_starpu_timing_now()) - worker_info[workerid].start_time;
+		worker_info[workerid].total_time = total_time;
+
+		memcpy(info, &worker_info[workerid], sizeof(struct starpu_worker_profiling_info));
+	}
+
+	_do_starpu_worker_reset_profiling_info(workerid);
+
+	PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
+
+	return 0;
+}

+ 2 - 0
src/profiling/profiling.h

@@ -22,5 +22,7 @@
 #include <common/config.h>
 
 struct starpu_task_profiling_info *_starpu_allocate_profiling_info_if_needed(void);
+void _starpu_worker_reset_profiling_info(int workerid);
+void _starpu_worker_update_profiling_info(int workerid, int64_t executing_time, int64_t sleeping_time, int executed_tasks);
 
 #endif // __PROFILING_H__