Forráskód Böngészése

explain helgrind about some races we are fine with

Samuel Thibault 12 éve
szülő
commit
d95e1c8398

+ 1 - 0
configure.ac

@@ -221,6 +221,7 @@ AM_CONDITIONAL([STARPU_LONG_CHECK], [test "x$enable_long_check" = "xyes"])
 AC_CHECK_HEADERS([malloc.h], [AC_DEFINE([STARPU_HAVE_MALLOC_H], [1], [Define to 1 if you have the <malloc.h> header file.])])
 
 AC_CHECK_HEADERS([valgrind/valgrind.h], [AC_DEFINE([STARPU_HAVE_VALGRIND_H], [1], [Define to 1 if you have the <valgrind/valgrind.h> header file.])])
+AC_CHECK_HEADERS([valgrind/helgrind.h], [AC_DEFINE([STARPU_HAVE_HELGRIND_H], [1], [Define to 1 if you have the <valgrind/helgrind.h> header file.])])
 
 # This defines HAVE_SYNC_VAL_COMPARE_AND_SWAP
 STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP

+ 28 - 0
src/common/utils.h

@@ -31,6 +31,34 @@
 #include <msg/msg.h>
 #endif
 
+#ifdef STARPU_HAVE_HELGRIND_H
+#include <valgrind/helgrind.h>
+#else
+#define VALGRIND_HG_MUTEX_LOCK_PRE(mutex, istrylock) ((void)0)
+#define VALGRIND_HG_MUTEX_LOCK_POST(mutex) ((void)0)
+#define VALGRIND_HG_MUTEX_UNLOCK_PRE(mutex) ((void)0)
+#define VALGRIND_HG_MUTEX_UNLOCK_POST(mutex) ((void)0)
+#define DO_CREQ_v_WW(_creqF, _ty1F, _arg1F, _ty2F, _arg2F) ((void)0)
+#define DO_CREQ_v_W(_creqF, _ty1F, _arg1F) ((void)0)
+#define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0)
+#define ANNOTATE_HAPPENS_AFTER(obj) ((void)0)
+#define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) ((void)0)
+#define ANNOTATE_RWLOCK_RELEASED(lock, is_w) ((void)0)
+#endif
+
+#define _STARPU_VALGRIND_HG_SPIN_LOCK_PRE(lock) \
+	DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_SPIN_LOCK_PRE, \
+			struct _starpu_spinlock *, lock, long, 0)
+#define _STARPU_VALGRIND_HG_SPIN_LOCK_POST(lock) \
+	DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_SPIN_LOCK_POST, \
+			struct _starpu_spinlock *, lock)
+#define _STARPU_VALGRIND_HG_SPIN_UNLOCK_PRE(lock) \
+	DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_SPIN_INIT_OR_UNLOCK_PRE, \
+			struct _starpu_spinlock *, lock)
+#define _STARPU_VALGRIND_HG_SPIN_UNLOCK_POST(lock) \
+	DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_SPIN_INIT_OR_UNLOCK_POST, \
+			struct _starpu_spinlock *, lock)
+
 #ifdef STARPU_VERBOSE
 #  define _STARPU_DEBUG(fmt, args ...) do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, "[starpu][%s] " fmt ,__func__ ,##args); fflush(stderr); }} while(0)
 #else

+ 11 - 0
src/core/perfmodel/perfmodel_history.c

@@ -1065,6 +1065,10 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 		HASH_FIND_UINT32_T(history, &key, entry);
 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
 
+		/* We do not care about racing access to the mean, we only want a
+		 * good-enough estimation, thus simulate taking the rdlock */
+		ANNOTATE_RWLOCK_ACQUIRED(&model->model_rwlock, 0);
+
 		if (entry && entry->history_entry && entry->history_entry->nsample >= _STARPU_CALIBRATION_MINIMUM)
 			exp = entry->history_entry->mean;
 		else if (!model->benchmarking)
@@ -1076,6 +1080,7 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 			_starpu_set_calibrate_flag(1);
 			model->benchmarking = 1;
 		}
+		ANNOTATE_RWLOCK_RELEASED(&model->model_rwlock, 0);
 	}
 
 	return exp;
@@ -1098,6 +1103,10 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 	entry = (elt == NULL) ? NULL : elt->history_entry;
 	_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
 
+	/* We do not care about racing access to the mean, we only want a
+	 * good-enough estimation, thus simulate taking the rdlock */
+	ANNOTATE_RWLOCK_ACQUIRED(&model->model_rwlock, 0);
+
 	exp = entry?entry->mean:NAN;
 
 	if (entry && entry->nsample < _STARPU_CALIBRATION_MINIMUM)
@@ -1116,6 +1125,8 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 		model->benchmarking = 1;
 	}
 
+	ANNOTATE_RWLOCK_RELEASED(&model->model_rwlock, 0);
+
 	return exp;
 }
 

+ 2 - 0
src/core/workers.c

@@ -895,6 +895,7 @@ unsigned _starpu_machine_is_running(void)
 {
 	/* running is just protected by a memory barrier */
 	STARPU_RMB();
+	ANNOTATE_HAPPENS_AFTER(&config.running);
 	return config.running;
 }
 
@@ -925,6 +926,7 @@ static void _starpu_kill_all_workers(struct _starpu_machine_config *pconfig)
 	/* set the flag which will tell workers to stop */
 	pconfig->running = 0;
 	/* running is just protected by a memory barrier */
+	ANNOTATE_HAPPENS_BEFORE(&config.running);
 	STARPU_WMB();
 	starpu_wake_all_blocked_workers();
 }

+ 11 - 1
src/datawizard/data_request.c

@@ -17,6 +17,7 @@
 
 #include <starpu.h>
 #include <common/config.h>
+#include <common/utils.h>
 #include <datawizard/datawizard.h>
 
 /* requests that have not been treated at all */
@@ -391,9 +392,18 @@ void _starpu_handle_node_data_requests(unsigned src_node, unsigned may_alloc)
 	struct _starpu_data_request *r;
 	struct _starpu_data_request_list *new_data_requests;
 
-	/* Note: this is not racy: list_empty just reads a pointer */
+	/* Note: we here tell valgrind that list_empty (reading a pointer) is
+	 * as safe as if we had the lock held */
+	VALGRIND_HG_MUTEX_LOCK_PRE(&data_requests_list_mutex[src_node], 0);
+	VALGRIND_HG_MUTEX_LOCK_POST(&data_requests_list_mutex[src_node]);
 	if (_starpu_data_request_list_empty(data_requests[src_node]))
+	{
+		VALGRIND_HG_MUTEX_UNLOCK_PRE(&data_requests_list_mutex[src_node]);
+		VALGRIND_HG_MUTEX_UNLOCK_POST(&data_requests_list_mutex[src_node]);
 		return;
+	}
+	VALGRIND_HG_MUTEX_UNLOCK_PRE(&data_requests_list_mutex[src_node]);
+	VALGRIND_HG_MUTEX_UNLOCK_POST(&data_requests_list_mutex[src_node]);
 
 	/* take all the entries from the request list */
         _STARPU_PTHREAD_MUTEX_LOCK(&data_requests_list_mutex[src_node]);

+ 14 - 1
src/datawizard/interfaces/data_interface.c

@@ -548,8 +548,21 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
 	/* Wait for all requests to finish (notably WT requests) */
 	_STARPU_PTHREAD_MUTEX_LOCK(&handle->busy_mutex);
-	while (handle->busy_count)
+	while (1) {
+		int busy;
+		/* Note: we here tell valgrind that reading busy_count is as
+		 * safe is if we had the lock held */
+		_STARPU_VALGRIND_HG_SPIN_LOCK_PRE(&handle->header_lock);
+		_STARPU_VALGRIND_HG_SPIN_LOCK_POST(&handle->header_lock);
+		busy = handle->busy_count;
+		_STARPU_VALGRIND_HG_SPIN_UNLOCK_PRE(&handle->header_lock);
+		_STARPU_VALGRIND_HG_SPIN_UNLOCK_POST(&handle->header_lock);
+		if (!busy)
+			break;
+		/* This is woken by _starpu_data_check_not_busy, always called
+		 * after decrementing busy_count */
 		_STARPU_PTHREAD_COND_WAIT(&handle->busy_cond, &handle->busy_mutex);
+	}
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex);
 
 	/* Wait for finished requests to release the handle */

+ 0 - 42
tools/valgrind/starpu.suppr

@@ -1,25 +1,4 @@
 {
-   config.running is not racy from starpu_shutdown
-   Helgrind:Race
-   fun:starpu_shutdown
-   ...
-}
-
-{
-   config.running is not racy from _starpu_machine_is_running
-   Helgrind:Race
-   fun:_starpu_machine_is_running
-   ...
-}
-
-{
-   counterpart of the above
-   Helgrind:Race
-   fun:starpu_drivers_request_termination
-   ...
-}
-
-{
    don't care about cache hit stats
    Helgrind:Race
    fun:_starpu_msi_cache_hit
@@ -55,27 +34,6 @@
 }
 
 {
-   We do not care about the race on the entry->mean variable, we only want a good-enough estimation.
-   Helgrind:Race
-   fun:_starpu_history_based_job_expected_perf
-   ...
-}
-
-{
-   We do not care about the race on the entry->mean variable, we only want a good-enough estimation.
-   Helgrind:Race
-   fun:_starpu_non_linear_regression_based_job_expected_perf
-   ...
-}
-
-{
-   This is the counterpart of the suppressions above
-   Helgrind:Race
-   fun:_starpu_update_perfmodel_history
-   ...
-}
-
-{
    We do not care about races on profiling statistics
    Helgrind:Race
    fun:starpu_profiling_status_get