12 éve · d95e1c8398
--- a/configure.ac
+++ b/configure.ac
@@ -221,6 +221,7 @@ AM_CONDITIONAL([STARPU_LONG_CHECK], [test "x$enable_long_check" = "xyes"])
 
				 AC_CHECK_HEADERS([malloc.h], [AC_DEFINE([STARPU_HAVE_MALLOC_H], [1], [Define to 1 if you have the <malloc.h> header file.])])
			
 
				 
			
 
				 AC_CHECK_HEADERS([valgrind/valgrind.h], [AC_DEFINE([STARPU_HAVE_VALGRIND_H], [1], [Define to 1 if you have the <valgrind/valgrind.h> header file.])])
			
 
				+AC_CHECK_HEADERS([valgrind/helgrind.h], [AC_DEFINE([STARPU_HAVE_HELGRIND_H], [1], [Define to 1 if you have the <valgrind/helgrind.h> header file.])])
			
 
				 
			
 
				 # This defines HAVE_SYNC_VAL_COMPARE_AND_SWAP
			
 
				 STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP
			
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -31,6 +31,34 @@
 
				 #include <msg/msg.h>
			
 
				 #endif
			
 
				 
			
 
				+#ifdef STARPU_HAVE_HELGRIND_H
			
 
				+#include <valgrind/helgrind.h>
			
 
				+#else
			
 
				+#define VALGRIND_HG_MUTEX_LOCK_PRE(mutex, istrylock) ((void)0)
			
 
				+#define VALGRIND_HG_MUTEX_LOCK_POST(mutex) ((void)0)
			
 
				+#define VALGRIND_HG_MUTEX_UNLOCK_PRE(mutex) ((void)0)
			
 
				+#define VALGRIND_HG_MUTEX_UNLOCK_POST(mutex) ((void)0)
			
 
				+#define DO_CREQ_v_WW(_creqF, _ty1F, _arg1F, _ty2F, _arg2F) ((void)0)
			
 
				+#define DO_CREQ_v_W(_creqF, _ty1F, _arg1F) ((void)0)
			
 
				+#define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0)
			
 
				+#define ANNOTATE_HAPPENS_AFTER(obj) ((void)0)
			
 
				+#define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) ((void)0)
			
 
				+#define ANNOTATE_RWLOCK_RELEASED(lock, is_w) ((void)0)
			
 
				+#endif
			
 
				+
			
 
				+#define _STARPU_VALGRIND_HG_SPIN_LOCK_PRE(lock) \
			
 
				+	DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_SPIN_LOCK_PRE, \
			
 
				+			struct _starpu_spinlock *, lock, long, 0)
			
 
				+#define _STARPU_VALGRIND_HG_SPIN_LOCK_POST(lock) \
			
 
				+	DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_SPIN_LOCK_POST, \
			
 
				+			struct _starpu_spinlock *, lock)
			
 
				+#define _STARPU_VALGRIND_HG_SPIN_UNLOCK_PRE(lock) \
			
 
				+	DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_SPIN_INIT_OR_UNLOCK_PRE, \
			
 
				+			struct _starpu_spinlock *, lock)
			
 
				+#define _STARPU_VALGRIND_HG_SPIN_UNLOCK_POST(lock) \
			
 
				+	DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_SPIN_INIT_OR_UNLOCK_POST, \
			
 
				+			struct _starpu_spinlock *, lock)
			
 
				+
			
 
				 #ifdef STARPU_VERBOSE
			
 
				 #  define _STARPU_DEBUG(fmt, args ...) do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, "[starpu][%s] " fmt ,__func__ ,##args); fflush(stderr); }} while(0)
			
 
				 #else
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -1065,6 +1065,10 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 
				 		HASH_FIND_UINT32_T(history, &key, entry);
			
 
				 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
			
 
				 
			
 
				+		/* We do not care about racing access to the mean, we only want a
			
 
				+		 * good-enough estimation, thus simulate taking the rdlock */
			
 
				+		ANNOTATE_RWLOCK_ACQUIRED(&model->model_rwlock, 0);
			
 
				+
			
 
				 		if (entry && entry->history_entry && entry->history_entry->nsample >= _STARPU_CALIBRATION_MINIMUM)
			
 
				 			exp = entry->history_entry->mean;
			
 
				 		else if (!model->benchmarking)
			
@@ -1076,6 +1080,7 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 
				 			_starpu_set_calibrate_flag(1);
			
 
				 			model->benchmarking = 1;
			
 
				 		}
			
 
				+		ANNOTATE_RWLOCK_RELEASED(&model->model_rwlock, 0);
			
 
				 	}
			
 
				 
			
 
				 	return exp;
			
@@ -1098,6 +1103,10 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 
				 	entry = (elt == NULL) ? NULL : elt->history_entry;
			
 
				 	_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
			
 
				 
			
 
				+	/* We do not care about racing access to the mean, we only want a
			
 
				+	 * good-enough estimation, thus simulate taking the rdlock */
			
 
				+	ANNOTATE_RWLOCK_ACQUIRED(&model->model_rwlock, 0);
			
 
				+
			
 
				 	exp = entry?entry->mean:NAN;
			
 
				 
			
 
				 	if (entry && entry->nsample < _STARPU_CALIBRATION_MINIMUM)
			
@@ -1116,6 +1125,8 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 
				 		model->benchmarking = 1;
			
 
				 	}
			
 
				 
			
 
				+	ANNOTATE_RWLOCK_RELEASED(&model->model_rwlock, 0);
			
 
				+
			
 
				 	return exp;
			
 
				 }
			
 
				 
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -895,6 +895,7 @@ unsigned _starpu_machine_is_running(void)
 
				 {
			
 
				 	/* running is just protected by a memory barrier */
			
 
				 	STARPU_RMB();
			
 
				+	ANNOTATE_HAPPENS_AFTER(&config.running);
			
 
				 	return config.running;
			
 
				 }
			
 
				 
			
@@ -925,6 +926,7 @@ static void _starpu_kill_all_workers(struct _starpu_machine_config *pconfig)
 
				 	/* set the flag which will tell workers to stop */
			
 
				 	pconfig->running = 0;
			
 
				 	/* running is just protected by a memory barrier */
			
 
				+	ANNOTATE_HAPPENS_BEFORE(&config.running);
			
 
				 	STARPU_WMB();
			
 
				 	starpu_wake_all_blocked_workers();
			
 
				 }
			
--- a/src/datawizard/data_request.c
+++ b/src/datawizard/data_request.c
@@ -17,6 +17,7 @@
 
				 
			
 
				 #include <starpu.h>
			
 
				 #include <common/config.h>
			
 
				+#include <common/utils.h>
			
 
				 #include <datawizard/datawizard.h>
			
 
				 
			
 
				 /* requests that have not been treated at all */
			
@@ -391,9 +392,18 @@ void _starpu_handle_node_data_requests(unsigned src_node, unsigned may_alloc)
 
				 	struct _starpu_data_request *r;
			
 
				 	struct _starpu_data_request_list *new_data_requests;
			
 
				 
			
 
				-	/* Note: this is not racy: list_empty just reads a pointer */
			
 
				+	/* Note: we here tell valgrind that list_empty (reading a pointer) is
			
 
				+	 * as safe as if we had the lock held */
			
 
				+	VALGRIND_HG_MUTEX_LOCK_PRE(&data_requests_list_mutex[src_node], 0);
			
 
				+	VALGRIND_HG_MUTEX_LOCK_POST(&data_requests_list_mutex[src_node]);
			
 
				 	if (_starpu_data_request_list_empty(data_requests[src_node]))
			
 
				+	{
			
 
				+		VALGRIND_HG_MUTEX_UNLOCK_PRE(&data_requests_list_mutex[src_node]);
			
 
				+		VALGRIND_HG_MUTEX_UNLOCK_POST(&data_requests_list_mutex[src_node]);
			
 
				 		return;
			
 
				+	}
			
 
				+	VALGRIND_HG_MUTEX_UNLOCK_PRE(&data_requests_list_mutex[src_node]);
			
 
				+	VALGRIND_HG_MUTEX_UNLOCK_POST(&data_requests_list_mutex[src_node]);
			
 
				 
			
 
				 	/* take all the entries from the request list */
			
 
				         _STARPU_PTHREAD_MUTEX_LOCK(&data_requests_list_mutex[src_node]);
			
--- a/src/datawizard/interfaces/data_interface.c
+++ b/src/datawizard/interfaces/data_interface.c
@@ -548,8 +548,21 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
				 
			
 
				 	/* Wait for all requests to finish (notably WT requests) */
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&handle->busy_mutex);
			
 
				-	while (handle->busy_count)
			
 
				+	while (1) {
			
 
				+		int busy;
			
 
				+		/* Note: we here tell valgrind that reading busy_count is as
			
 
				+		 * safe is if we had the lock held */
			
 
				+		_STARPU_VALGRIND_HG_SPIN_LOCK_PRE(&handle->header_lock);
			
 
				+		_STARPU_VALGRIND_HG_SPIN_LOCK_POST(&handle->header_lock);
			
 
				+		busy = handle->busy_count;
			
 
				+		_STARPU_VALGRIND_HG_SPIN_UNLOCK_PRE(&handle->header_lock);
			
 
				+		_STARPU_VALGRIND_HG_SPIN_UNLOCK_POST(&handle->header_lock);
			
 
				+		if (!busy)
			
 
				+			break;
			
 
				+		/* This is woken by _starpu_data_check_not_busy, always called
			
 
				+		 * after decrementing busy_count */
			
 
				 		_STARPU_PTHREAD_COND_WAIT(&handle->busy_cond, &handle->busy_mutex);
			
 
				+	}
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex);
			
 
				 
			
 
				 	/* Wait for finished requests to release the handle */
			
--- a/tools/valgrind/starpu.suppr
+++ b/tools/valgrind/starpu.suppr
@@ -1,25 +1,4 @@
 
				 {
			
 
				-   config.running is not racy from starpu_shutdown
			
 
				-   Helgrind:Race
			
 
				-   fun:starpu_shutdown
			
 
				-   ...
			
 
				-}
			
 
				-
			
 
				-{
			
 
				-   config.running is not racy from _starpu_machine_is_running
			
 
				-   Helgrind:Race
			
 
				-   fun:_starpu_machine_is_running
			
 
				-   ...
			
 
				-}
			
 
				-
			
 
				-{
			
 
				-   counterpart of the above
			
 
				-   Helgrind:Race
			
 
				-   fun:starpu_drivers_request_termination
			
 
				-   ...
			
 
				-}
			
 
				-
			
 
				-{
			
 
				    don't care about cache hit stats
			
 
				    Helgrind:Race
			
 
				    fun:_starpu_msi_cache_hit
			
@@ -55,27 +34,6 @@
 
				 }
			
 
				 
			
 
				 {
			
 
				-   We do not care about the race on the entry->mean variable, we only want a good-enough estimation.
			
 
				-   Helgrind:Race
			
 
				-   fun:_starpu_history_based_job_expected_perf
			
 
				-   ...
			
 
				-}
			
 
				-
			
 
				-{
			
 
				-   We do not care about the race on the entry->mean variable, we only want a good-enough estimation.
			
 
				-   Helgrind:Race
			
 
				-   fun:_starpu_non_linear_regression_based_job_expected_perf
			
 
				-   ...
			
 
				-}
			
 
				-
			
 
				-{
			
 
				-   This is the counterpart of the suppressions above
			
 
				-   Helgrind:Race
			
 
				-   fun:_starpu_update_perfmodel_history
			
 
				-   ...
			
 
				-}
			
 
				-
			
 
				-{
			
 
				    We do not care about races on profiling statistics
			
 
				    Helgrind:Race
			
 
				    fun:starpu_profiling_status_get