Browse Source

src: debug mechanism to log in and out functions calls (was helpful to debug MPI...)

Nathalie Furmento 15 years ago
parent
commit
553f253990

+ 11 - 0
src/common/utils.h

@@ -28,6 +28,17 @@
 #else
 #  define _STARPU_DEBUG(fmt, args ...)
 #endif
+
+#ifdef STARPU_VERBOSE0
+#  define _STARPU_LOG_IN()             fprintf(stderr, "[starpu][%ld][%s] -->\n", pthread_self(), __func__ );
+#  define _STARPU_LOG_OUT()            fprintf(stderr, "[starpu][%ld][%s] <--\n", pthread_self(), __func__ );
+#  define _STARPU_LOG_OUT_TAG(outtag)  fprintf(stderr, "[starpu][%ld][%s] <-- (%s)\n", pthread_self(), __func__, outtag);
+#else
+#  define _STARPU_LOG_IN()
+#  define _STARPU_LOG_OUT()
+#  define _STARPU_LOG_OUT_TAG(outtag)
+#endif
+
 #define _STARPU_DISP(fmt, args ...) fprintf(stderr, "[starpu][%s] " fmt ,__func__ ,##args)
 #define _STARPU_ERROR(fmt, args ...)                                                  \
 	do {                                                                          \

+ 2 - 0
src/core/dependencies/data_concurrency.c

@@ -67,6 +67,7 @@ unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle han
 						void (*callback)(void *), void *argcb)
 {
 	unsigned ret;
+        _STARPU_LOG_IN();
 
 	_starpu_spin_lock(&handle->header_lock);
 
@@ -109,6 +110,7 @@ unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle han
 	}
 
 	_starpu_spin_unlock(&handle->header_lock);
+        _STARPU_LOG_OUT();
 	return ret;
 }
 

+ 6 - 0
src/core/dependencies/implicit_data_deps.c

@@ -38,6 +38,7 @@ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_
 						starpu_data_handle handle, starpu_access_mode mode)
 {
 	STARPU_ASSERT(!(mode & STARPU_SCRATCH));
+        _STARPU_LOG_IN();
 
 	if (handle->sequential_consistency)
 	{
@@ -204,12 +205,14 @@ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_
 	
 		handle->last_submitted_mode = mode;
 	}
+        _STARPU_LOG_OUT();
 }
 
 /* Create the implicit dependencies for a newly submitted task */
 void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 {
 	STARPU_ASSERT(task->cl);
+        _STARPU_LOG_IN();
 
 	unsigned nbuffers = task->cl->nbuffers;
 
@@ -227,6 +230,7 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 		_starpu_detect_implicit_data_deps_with_handle(task, task, handle, mode);
 		PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 	}
+        _STARPU_LOG_OUT();
 }
 
 /* This function is called when a task has been executed so that we don't
@@ -322,6 +326,7 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
 void _starpu_add_post_sync_tasks(struct starpu_task *post_sync_task, starpu_data_handle handle)
 {
+        _STARPU_LOG_IN();
 	PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
 
 	if (handle->sequential_consistency)
@@ -335,6 +340,7 @@ void _starpu_add_post_sync_tasks(struct starpu_task *post_sync_task, starpu_data
 	}
 
 	PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
+        _STARPU_LOG_OUT();
 }
 
 void _starpu_unlock_post_sync_tasks(starpu_data_handle handle)

+ 15 - 3
src/core/jobs.c

@@ -55,6 +55,7 @@ void _starpu_exclude_task_from_dag(struct starpu_task *task)
 starpu_job_t __attribute__((malloc)) _starpu_job_create(struct starpu_task *task)
 {
 	starpu_job_t job;
+        _STARPU_LOG_IN();
 
 	job = starpu_job_new();
 
@@ -84,6 +85,7 @@ starpu_job_t __attribute__((malloc)) _starpu_job_create(struct starpu_task *task
 	if (task->use_tag)
 		_starpu_tag_declare(task->tag_id, job);
 
+        _STARPU_LOG_OUT();
 	return job;
 }
 
@@ -101,6 +103,7 @@ void _starpu_wait_job(starpu_job_t j)
 {
 	STARPU_ASSERT(j->task);
 	STARPU_ASSERT(!j->task->detach);
+        _STARPU_LOG_IN();
 
 	PTHREAD_MUTEX_LOCK(&j->sync_mutex);
 
@@ -113,6 +116,7 @@ void _starpu_wait_job(starpu_job_t j)
 		PTHREAD_COND_WAIT(&j->sync_cond, &j->sync_mutex);
 
 	PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
+        _STARPU_LOG_OUT();
 }
 
 void _starpu_handle_job_termination(starpu_job_t j, unsigned job_is_already_locked)
@@ -280,21 +284,29 @@ static unsigned _starpu_not_all_task_deps_are_fulfilled(starpu_job_t j, unsigned
 unsigned _starpu_enforce_deps_and_schedule(starpu_job_t j, unsigned job_is_already_locked)
 {
 	unsigned ret;
+        _STARPU_LOG_IN();
 
 	/* enfore tag dependencies */
-	if (_starpu_not_all_tag_deps_are_fulfilled(j))
+	if (_starpu_not_all_tag_deps_are_fulfilled(j)) {
+                _STARPU_LOG_OUT_TAG("not_all_tag_deps_are_fulfilled");
 		return 0;
+        }
 
 	/* enfore task dependencies */
-	if (_starpu_not_all_task_deps_are_fulfilled(j, job_is_already_locked))
+	if (_starpu_not_all_task_deps_are_fulfilled(j, job_is_already_locked)) {
+                _STARPU_LOG_OUT_TAG("not_all_task_deps_are_fulfilled");
 		return 0;
+        }
 
 	/* enforce data dependencies */
-	if (_starpu_submit_job_enforce_data_deps(j))
+	if (_starpu_submit_job_enforce_data_deps(j)) {
+                _STARPU_LOG_OUT_TAG("enforce_data_deps");
 		return 0;
+        }
 
 	ret = _starpu_push_task(j, job_is_already_locked);
 
+        _STARPU_LOG_OUT();
 	return ret;
 }
 

+ 7 - 2
src/core/sched_policy.c

@@ -203,6 +203,7 @@ void _starpu_deinit_sched_policy(struct starpu_machine_config_s *config)
 int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
 {
 	struct starpu_task *task = j->task;
+        _STARPU_LOG_IN();
 
 	task->status = STARPU_TASK_READY;
 
@@ -212,9 +213,11 @@ int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
 	if (task->cl == NULL)
 	{
 		_starpu_handle_job_termination(j, job_is_already_locked);
+                _STARPU_LOG_OUT_TAG("handle_job_termination");
 		return 0;
 	}
 
+        int ret;
 	if (STARPU_UNLIKELY(task->execute_on_a_specific_worker))
 	{
 		unsigned workerid = task->workerid;
@@ -226,13 +229,15 @@ int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
 			_starpu_prefetch_task_input_on_node(task, memory_node);
 		}
 
-		return _starpu_push_local_task(worker, j);
+		ret = _starpu_push_local_task(worker, j);
 	}
 	else {
 		STARPU_ASSERT(policy.push_task);
 
-		return policy.push_task(task);
+		ret = policy.push_task(task);
 	}
+        _STARPU_LOG_OUT();
+        return ret;
 }
 
 struct starpu_task *_starpu_pop_task(void)

+ 15 - 5
src/core/task.c

@@ -173,6 +173,7 @@ starpu_job_t _starpu_get_job_associated_to_task(struct starpu_task *task)
  * already counted. */
 int _starpu_submit_job(starpu_job_t j, unsigned do_not_increment_nsubmitted)
 {
+        _STARPU_LOG_IN();
 	/* notify bound computation of a new task */
 	_starpu_bound_record(j);
 
@@ -183,7 +184,9 @@ int _starpu_submit_job(starpu_job_t j, unsigned do_not_increment_nsubmitted)
 
 	j->submitted = 1;
 
-	return _starpu_enforce_deps_and_schedule(j, 0);
+	int ret = _starpu_enforce_deps_and_schedule(j, 0);
+        _STARPU_LOG_OUT();
+        return ret;
 }
 
 /* application should submit new tasks to StarPU through this function */
@@ -191,13 +194,16 @@ int starpu_task_submit(struct starpu_task *task)
 {
 	int ret;
 	unsigned is_sync = task->synchronous;
+        _STARPU_LOG_IN();
 
 	if (is_sync)
 	{
 		/* Perhaps it is not possible to submit a synchronous
 		 * (blocking) task */
-		if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
+                if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) {
+                        _STARPU_LOG_OUT_TAG("EDEADLK");
 			return -EDEADLK;
+                }
 
 		task->detach = 0;
 	}
@@ -207,15 +213,18 @@ int starpu_task_submit(struct starpu_task *task)
 	if (task->cl)
 	{
 		uint32_t where = task->cl->where;
-		if (!_starpu_worker_exists(where))
+		if (!_starpu_worker_exists(where)) {
+                        _STARPU_LOG_OUT_TAG("ENODEV");
 			return -ENODEV;
+                }
 
 		/* In case we require that a task should be explicitely
 		 * executed on a specific worker, we make sure that the worker
 		 * is able to execute this task.  */
-		if (task->execute_on_a_specific_worker 
-			&& !_starpu_worker_may_execute_task(task->workerid, where))
+		if (task->execute_on_a_specific_worker && !_starpu_worker_may_execute_task(task->workerid, where)) {
+                        _STARPU_LOG_OUT_TAG("ENODEV");
 			return -ENODEV;
+                }
 
 		_starpu_detect_implicit_data_deps(task);
 	}
@@ -243,6 +252,7 @@ int starpu_task_submit(struct starpu_task *task)
 	if (is_sync)
 		_starpu_wait_job(j);
 
+        _STARPU_LOG_OUT();
 	return ret;
 }
 

+ 7 - 2
src/datawizard/coherency.c

@@ -145,6 +145,7 @@ int _starpu_fetch_data_on_node(starpu_data_handle handle, uint32_t requesting_no
 				void (*callback_func)(void *), void *callback_arg)
 {
 	uint32_t local_node = _starpu_get_local_memory_node();
+        _STARPU_LOG_IN();
 
 	while (_starpu_spin_trylock(&handle->header_lock))
 		_starpu_datawizard_progress(local_node, 1);
@@ -162,6 +163,7 @@ int _starpu_fetch_data_on_node(starpu_data_handle handle, uint32_t requesting_no
 		if (callback_func)
 			callback_func(callback_arg);
 
+                _STARPU_LOG_OUT_TAG("data available");
 		return 0;
 	}
 
@@ -260,7 +262,8 @@ int _starpu_fetch_data_on_node(starpu_data_handle handle, uint32_t requesting_no
 
 			_starpu_spin_unlock(&handle->header_lock);
 
-			return 0;
+                        _STARPU_LOG_OUT_TAG("similar request");
+                        return 0;
 		}
 
 		r->refcnt++;
@@ -281,7 +284,9 @@ int _starpu_fetch_data_on_node(starpu_data_handle handle, uint32_t requesting_no
 		_starpu_spin_unlock(&handle->header_lock);
 	}
 
-	return (is_prefetch?0:_starpu_wait_data_request_completion(r, 1));
+	int ret = is_prefetch?0:_starpu_wait_data_request_completion(r, 1);
+        _STARPU_LOG_OUT();
+        return ret;
 }
 
 static int prefetch_data_on_node(starpu_data_handle handle, starpu_access_mode mode, uint32_t node)

+ 8 - 3
src/datawizard/user_interactions.c

@@ -107,6 +107,7 @@ int starpu_data_acquire_cb(starpu_data_handle handle,
 		starpu_access_mode mode, void (*callback)(void *), void *arg)
 {
 	STARPU_ASSERT(handle);
+        _STARPU_LOG_IN();
 
 	struct user_interaction_wrapper *wrapper = malloc(sizeof(struct user_interaction_wrapper));
 	STARPU_ASSERT(wrapper);
@@ -156,6 +157,7 @@ int starpu_data_acquire_cb(starpu_data_handle handle,
 		starpu_data_acquire_cb_pre_sync_callback(wrapper);
 	}
 
+        _STARPU_LOG_OUT();
 	return 0;
 }
 
@@ -183,10 +185,13 @@ static inline void _starpu_data_acquire_continuation(void *arg)
 int starpu_data_acquire(starpu_data_handle handle, starpu_access_mode mode)
 {
 	STARPU_ASSERT(handle);
+        _STARPU_LOG_IN();
 
 	/* it is forbidden to call this function from a callback or a codelet */
-	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
+	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) {
+                _STARPU_LOG_OUT_TAG("EDEADLK");
 		return -EDEADLK;
+        }
 
 	struct user_interaction_wrapper wrapper =
 	{
@@ -232,8 +237,7 @@ int starpu_data_acquire(starpu_data_handle handle, starpu_access_mode mode)
 	/* we try to get the data, if we do not succeed immediately, we set a
  	* callback function that will be executed automatically when the data is
  	* available again, otherwise we fetch the data directly */
-	if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode,
-			_starpu_data_acquire_continuation, &wrapper))
+	if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode, _starpu_data_acquire_continuation, &wrapper))
 	{
 		/* no one has locked this data yet, so we proceed immediately */
 		int ret = _starpu_fetch_data_on_node(handle, 0, mode, 0, NULL, NULL);
@@ -252,6 +256,7 @@ int starpu_data_acquire(starpu_data_handle handle, starpu_access_mode mode)
 	 * function. */
 	_starpu_add_post_sync_tasks(wrapper.post_sync_task, handle);
 
+        _STARPU_LOG_OUT();
 	return 0;
 }