Forráskód Böngészése

add API to start/stop performance counter collection, do not auto start collection at startup by default

Olivier Aumage 5 éve
szülő
commit
c20be7e557

+ 7 - 2
examples/perf_monitoring/perf_counters_02.c

@@ -107,9 +107,14 @@ const enum starpu_perf_counter_scope c_scope = starpu_perf_counter_scope_per_cod
 
 int main(int argc, char **argv)
 {
-	int ret;
+	struct starpu_conf conf;
+	starpu_conf_init(&conf);
+	
+	/* Start collecting perfomance counter right after initialization */
+	conf.start_perf_counter_collection = 1;
 
-	ret = starpu_init(NULL);
+	int ret;
+	ret = starpu_init(&conf);
 	if (ret == -ENODEV)
 		return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

+ 6 - 0
include/starpu.h

@@ -426,6 +426,12 @@ struct starpu_conf
 	   \ref STARPU_CATCH_SIGNALS
 	 */
 	int catch_signals;
+
+	/**
+	   Specify whether StarPU should automatically start to collect
+	   performance counters after initialization
+	 */
+	unsigned start_perf_counter_collection;
 };
 
 /**

+ 9 - 0
include/starpu_perf_monitoring.h

@@ -63,6 +63,15 @@ struct starpu_perf_counter_sample;
 struct starpu_perf_counter_set;
 
 /**
+  Start collecting performance counter values.
+  */
+void starpu_perf_counter_collection_start();
+/**
+  Stop collecting performance counter values.
+  */
+void starpu_perf_counter_collection_stop();
+
+/**
   Translate scope name constant string to scope id.
   */
 int starpu_perf_counter_scope_name_to_id(const char *name);

+ 26 - 1
src/common/knobs.c

@@ -65,8 +65,19 @@ void _starpu_perf_counter_sample_exit(struct starpu_perf_counter_sample *sample)
 
 /* - */
 
-void _starpu_perf_counter_init(void)
+void _starpu_perf_counter_init(struct _starpu_machine_config *pconfig)
 {
+	if (pconfig->conf.start_perf_counter_collection)
+	{
+		/* start perf counter collection immediately */
+		pconfig->perf_counter_pause_depth = 0;
+	}
+	else
+	{
+		/* defer perf counter collection until call to
+		 * starpu_perf_counter_start_collection () */
+		pconfig->perf_counter_pause_depth = 1;
+	}
 	STARPU_ASSERT(!_starpu_machine_is_running());
 	_starpu_perf_counter_sample_init(&global_sample, starpu_perf_counter_scope_global);
 
@@ -84,6 +95,20 @@ void _starpu_perf_counter_exit(void)
 
 /* - */
 
+void starpu_perf_counter_collection_start()
+{
+	STARPU_HG_DISABLE_CHECKING(_starpu_config.perf_counter_pause_depth);
+	(void)STARPU_ATOMIC_ADD(&_starpu_config.perf_counter_pause_depth, -1);
+}
+
+void starpu_perf_counter_collection_stop()
+{
+	STARPU_HG_DISABLE_CHECKING(_starpu_config.perf_counter_pause_depth);
+	(void)STARPU_ATOMIC_ADD(&_starpu_config.perf_counter_pause_depth, +1);
+}
+
+/* - */
+
 int starpu_perf_counter_scope_name_to_id(const char * const name)
 {
 	if (strcmp(name, "global") == 0)

+ 1 - 1
src/common/knobs.h

@@ -202,7 +202,7 @@ static inline int _starpu_perf_counter_id_build(const enum starpu_perf_counter_s
 
 void _starpu_perf_counter_sample_init(struct starpu_perf_counter_sample *sample, enum starpu_perf_counter_scope scope);
 void _starpu_perf_counter_sample_exit(struct starpu_perf_counter_sample *sample);
-void _starpu_perf_counter_init(void);
+void _starpu_perf_counter_init(struct _starpu_machine_config *pconfig);
 void _starpu_perf_counter_exit(void);
 
 int _starpu_perf_counter_register(enum starpu_perf_counter_scope scope, const char *name, enum starpu_perf_counter_type type, const char *help);

+ 2 - 2
src/core/sched_policy.c

@@ -446,7 +446,7 @@ int _starpu_repush_task(struct _starpu_job *j)
 		0
 #endif
 		;
-	if (!j->internal && !continuation)
+	if (!_starpu_perf_counter_paused() && !j->internal && !continuation)
 	{
 		(void) STARPU_ATOMIC_ADD64(& _starpu_task__g_current_submitted__value, -1);
 		int64_t value = STARPU_ATOMIC_ADD64(& _starpu_task__g_current_ready__value, 1);
@@ -494,7 +494,7 @@ int _starpu_repush_task(struct _starpu_job *j)
 	 * corresponding dependencies */
 	if (task->cl == NULL || task->where == STARPU_NOWHERE)
 	{
-		if (!j->internal)
+		if (!_starpu_perf_counter_paused() && !j->internal)
 		{
 			(void)STARPU_ATOMIC_ADD64(& _starpu_task__g_current_ready__value, -1);
 			if (task->cl && task->cl->perf_counter_values)

+ 11 - 6
src/core/task.c

@@ -859,7 +859,7 @@ int _starpu_task_submit(struct starpu_task *task, int nodeps)
 		0
 #endif
 		;
-	if (!j->internal && !continuation)
+	if (!_starpu_perf_counter_paused() && !j->internal && !continuation)
 	{
 		(void) STARPU_ATOMIC_ADD64(&_starpu_task__g_total_submitted__value, 1);
 		int64_t value = STARPU_ATOMIC_ADD64(&_starpu_task__g_current_submitted__value, 1);
@@ -1137,7 +1137,8 @@ int _starpu_task_wait_for_all_and_return_nb_waited_tasks(void)
 int starpu_task_wait_for_all(void)
 {
 	_starpu_task_wait_for_all_and_return_nb_waited_tasks();
-	_starpu_perf_counter_update_global_sample();
+	if (!_starpu_perf_counter_paused())
+		_starpu_perf_counter_update_global_sample();
 	return 0;
 }
 
@@ -1154,7 +1155,8 @@ int _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(unsigned sched_c
 int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx)
 {
 	_starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(sched_ctx);
-	_starpu_perf_counter_update_global_sample();
+	if (!_starpu_perf_counter_paused())
+		_starpu_perf_counter_update_global_sample();
 	return 0;
 }
 
@@ -1196,7 +1198,8 @@ int starpu_task_wait_for_n_submitted(unsigned n)
 		_STARPU_DEBUG("Waiting for tasks submitted to context %u\n", sched_ctx_id);
 		_starpu_wait_for_n_submitted_tasks_of_sched_ctx(sched_ctx_id, n);
 	}
-	_starpu_perf_counter_update_global_sample();
+	if (!_starpu_perf_counter_paused())
+		_starpu_perf_counter_update_global_sample();
 	return 0;
 }
 
@@ -1204,7 +1207,8 @@ int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx, unsigned n)
 {
 	_starpu_wait_for_n_submitted_tasks_of_sched_ctx(sched_ctx, n);
 
-	_starpu_perf_counter_update_global_sample();
+	if (!_starpu_perf_counter_paused())
+		_starpu_perf_counter_update_global_sample();
 	return 0;
 }
 /*
@@ -1240,7 +1244,8 @@ int starpu_task_wait_for_no_ready(void)
 		}
 	}
 
-	_starpu_perf_counter_update_global_sample();
+	if (!_starpu_perf_counter_paused())
+		_starpu_perf_counter_update_global_sample();
 	return 0;
 }
 

+ 4 - 1
src/core/workers.c

@@ -1143,6 +1143,9 @@ int starpu_conf_init(struct starpu_conf *conf)
 
 	/* 64MiB by default */
 	conf->trace_buffer_size = starpu_get_env_number_default("STARPU_TRACE_BUFFER_SIZE", 64) << 20;
+
+	/* Do not start performance counter collection by default */
+	conf->start_perf_counter_collection = 0;
 	return 0;
 }
 
@@ -1601,7 +1604,7 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 	}
 
 	_starpu_initialize_registered_performance_models();
-	_starpu_perf_counter_init();
+	_starpu_perf_counter_init(&_starpu_config);
 	_starpu_perf_knob_init();
 
 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)

+ 10 - 0
src/core/workers.h

@@ -432,6 +432,9 @@ struct _starpu_machine_config
 
 	int watchdog_ok;
 
+	/** When >0, StarPU should stop performance counters collection. */
+	int perf_counter_pause_depth;
+
 	starpu_pthread_mutex_t submitted_mutex;
 };
 
@@ -1180,6 +1183,13 @@ void _starpu_worker_refuse_task(struct _starpu_worker *worker, struct starpu_tas
 void _starpu_set_catch_signals(int do_catch_signal);
 int _starpu_get_catch_signals(void);
 
+/* Performance Monitoring */
+static inline int _starpu_perf_counter_paused(void) 
+{
+	STARPU_RMB();
+	return _starpu_config.perf_counter_pause_depth > 0;
+}
+
 /* @}*/
 
 #endif // __WORKERS_H__

+ 12 - 9
src/drivers/driver_common/driver_common.c

@@ -68,7 +68,7 @@ void _starpu_driver_start_job(struct _starpu_worker *worker, struct _starpu_job
 	if (rank == 0)
 	{
 		STARPU_ASSERT(task->status == STARPU_TASK_READY);
-		if (!j->internal)
+		if (!_starpu_perf_counter_paused() && !j->internal)
 		{
 			(void)STARPU_ATOMIC_ADD64(& _starpu_task__g_current_ready__value, -1);
 			if (task->cl && task->cl->perf_counter_values)
@@ -211,15 +211,18 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 	starpu_timespec_sub(&worker->cl_end, &worker->cl_start, &measured_ts);
 	double measured = starpu_timing_timespec_to_us(&measured_ts);
 
-	worker->__w_total_executed__value++;
-	worker->__w_cumul_execution_time__value += measured;
-	_starpu_perf_counter_update_per_worker_sample(worker->workerid);
-	if (cl->perf_counter_values)
+	if (!_starpu_perf_counter_paused())
 	{
-		struct starpu_perf_counter_sample_cl_values * const pcv = cl->perf_counter_values;
-		(void)STARPU_ATOMIC_ADD64(&pcv->task.total_executed, 1);
-		_starpu_perf_counter_update_acc_double(&pcv->task.cumul_execution_time, measured);
-		_starpu_perf_counter_update_per_codelet_sample(cl);
+		worker->__w_total_executed__value++;
+		worker->__w_cumul_execution_time__value += measured;
+		_starpu_perf_counter_update_per_worker_sample(worker->workerid);
+		if (cl->perf_counter_values)
+		{
+			struct starpu_perf_counter_sample_cl_values * const pcv = cl->perf_counter_values;
+			(void)STARPU_ATOMIC_ADD64(&pcv->task.total_executed, 1);
+			_starpu_perf_counter_update_acc_double(&pcv->task.cumul_execution_time, measured);
+			_starpu_perf_counter_update_per_codelet_sample(cl);
+		}
 	}
 
 	if ((profiling && profiling_info) || calibrate_model)