Browse Source

Spinlocks now block after a hundred tries. This avoids typical 10ms pauses
when the application thread tries to submit tasks.

Samuel Thibault 11 years ago
parent
commit
039524c7cf
6 changed files with 118 additions and 9 deletions
  1. 2 0
      ChangeLog
  2. 2 0
      include/starpu_config.h.in
  3. 3 1
      include/starpu_thread.h
  4. 6 0
      include/starpu_util.h
  5. 13 0
      m4/acinclude.m4
  6. 92 8
      src/common/thread.c

+ 2 - 0
ChangeLog

@@ -71,6 +71,8 @@ Small features:
   * Add codelet size, footprint and tag id in the paje trace.
   * Add STARPU_TAG_ONLY, to specify a tag for traces without making StarPU
     manage the tag.
+  * Spinlocks now block after a hundred tries. This avoids typical 10ms pauses
+    when the application thread tries to submit tasks.
 
 Changes:
   * Data interfaces (variable, vector, matrix and block) now define

+ 2 - 0
include/starpu_config.h.in

@@ -55,6 +55,7 @@
 #undef STARPU_HAVE_MALLOC_H
 
 #undef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP
+#undef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP
 #undef STARPU_HAVE_SYNC_FETCH_AND_ADD
 #undef STARPU_HAVE_SYNC_FETCH_AND_OR
 #undef STARPU_HAVE_SYNC_LOCK_TEST_AND_SET
@@ -87,6 +88,7 @@
 #undef STARPU_HAVE_LIBNUMA
 
 #undef STARPU_HAVE_WINDOWS
+#undef STARPU_LINUX_SYS
 #undef STARPU_HAVE_UNSETENV
 
 #ifdef _MSC_VER

+ 3 - 1
include/starpu_thread.h

@@ -236,12 +236,14 @@ int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier);
  * Encapsulation of the pthread_spin_* functions.
  */
 
-#if defined(STARPU_SIMGRID) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK)
+#if defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK)
 
 typedef struct
 {
 #ifdef STARPU_SIMGRID
 	int taken;
+#elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)
+	unsigned taken STARPU_ATTRIBUTE_ALIGNED(16);
 #else /* we only have a trivial implementation yet ! */
 	uint32_t taken STARPU_ATTRIBUTE_ALIGNED(16);
 #endif

+ 6 - 0
include/starpu_util.h

@@ -176,6 +176,12 @@ STARPU_ATOMIC_SOMETHING(or, old | value)
 #define STARPU_BOOL_COMPARE_AND_SWAP(ptr, old, value) (starpu_cmpxchg((ptr), (old), (value)) == (old))
 #endif
 
+#ifdef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP
+#define STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value)  (__sync_val_compare_and_swap ((ptr), (old), (value)))
+#elif defined(STARPU_HAVE_XCHG)
+#define STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value) (starpu_cmpxchg((ptr), (old), (value)))
+#endif
+
 #ifdef STARPU_HAVE_SYNC_LOCK_TEST_AND_SET
 #define STARPU_TEST_AND_SET(ptr, value) (__sync_lock_test_and_set ((ptr), (value)))
 #define STARPU_RELEASE(ptr) (__sync_lock_release ((ptr)))

+ 13 - 0
m4/acinclude.m4

@@ -42,6 +42,19 @@ AC_DEFUN([STARPU_CHECK_SYNC_BOOL_COMPARE_AND_SWAP], [
 	      [Define to 1 if the target supports __sync_bool_compare_and_swap])
   fi])
 
+dnl Check whether the target supports __sync_val_compare_and_swap.
+AC_DEFUN([STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP], [
+  AC_CACHE_CHECK([whether the target supports __sync_val_compare_and_swap],
+		 ac_cv_have_sync_val_compare_and_swap, [
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;],
+			[bar = __sync_val_compare_and_swap(&foo, 0, 1);])],
+			[ac_cv_have_sync_val_compare_and_swap=yes],
+			[ac_cv_have_sync_val_compare_and_swap=no])])
+  if test $ac_cv_have_sync_val_compare_and_swap = yes; then
+    AC_DEFINE(STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP, 1,
+	      [Define to 1 if the target supports __sync_val_compare_and_swap])
+  fi])
+
 dnl Check whether the target supports __sync_fetch_and_add.
 AC_DEFUN([STARPU_CHECK_SYNC_FETCH_AND_ADD], [
   AC_CACHE_CHECK([whether the target supports __sync_fetch_and_add],

+ 92 - 8
src/common/thread.c

@@ -23,6 +23,21 @@
 #include <xbt/synchro_core.h>
 #endif
 
+#if defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)
+#include <linux/futex.h>
+#include <sys/syscall.h>
+
+/* Private futexes are not so old, cope with old kernels.  */
+#ifdef FUTEX_WAIT_PRIVATE
+static int _starpu_futex_wait = FUTEX_WAIT_PRIVATE;
+static int _starpu_futex_wake = FUTEX_WAKE_PRIVATE;
+#else
+static int _starpu_futex_wait = FUTEX_WAIT;
+static int _starpu_futex_wake = FUTEX_WAKE;
+#endif
+
+#endif
+
 #ifdef STARPU_SIMGRID
 
 extern int _starpu_simgrid_thread_start(int argc, char *argv[]);
@@ -490,15 +505,15 @@ int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier)
 }
 #endif /* STARPU_SIMGRID, _MSC_VER, STARPU_HAVE_PTHREAD_BARRIER */
 
-#if defined(STARPU_SIMGRID) || !defined(HAVE_PTHREAD_SPIN_LOCK)
+#if defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(HAVE_PTHREAD_SPIN_LOCK)
 
-int starpu_pthread_spin_init(starpu_pthread_spinlock_t *lock, int pshared)
+int starpu_pthread_spin_init(starpu_pthread_spinlock_t *lock, int pshared STARPU_ATTRIBUTE_UNUSED)
 {
 	lock->taken = 0;
 	return 0;
 }
 
-int starpu_pthread_spin_destroy(starpu_pthread_spinlock_t *lock)
+int starpu_pthread_spin_destroy(starpu_pthread_spinlock_t *lock STARPU_ATTRIBUTE_UNUSED)
 {
 	/* we don't do anything */
 	return 0;
@@ -519,7 +534,53 @@ int starpu_pthread_spin_lock(starpu_pthread_spinlock_t *lock)
 		MSG_process_sleep(0.000001);
 		STARPU_UYIELD();
 	}
-#else
+#elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)
+	if (STARPU_VAL_COMPARE_AND_SWAP(&lock->taken, 0, 1) == 0)
+		/* Got it on first try! */
+		return 0;
+
+	/* Busy, spin a bit.  */
+	unsigned i;
+	for (i = 0; i < 128; i++)
+	{
+		/* Pause a bit before retrying */
+		STARPU_UYIELD();
+		/* And synchronize with other threads */
+		STARPU_SYNCHRONIZE();
+		if (!lock->taken)
+			/* Holder released it, try again */
+			if (STARPU_VAL_COMPARE_AND_SWAP(&lock->taken, 0, 1) == 0)
+				/* Got it! */
+				return 0;
+	}
+
+	/* We have spent enough time with spinning, let's block */
+	while (1)
+	{
+		/* Tell releaser to wake us */
+		unsigned prev = starpu_xchg(&lock->taken, 2);
+		if (prev == 0)
+			/* Ah, it just got released and we actually acquired
+			 * it!
+			 * Note: the sad thing is that we have just written 2,
+			 * so will spuriously try to wake a thread on unlock,
+			 * but we can not avoid it since we do not know whether
+			 * there are other threads sleeping or not.
+			 */
+			return 0;
+
+		/* Now start sleeping (unless it was released in between)
+		 * We are sure to get woken because either
+		 * - some thread has not released the lock yet, and lock->taken
+		 *   is 2, so it will wake us.
+		 * - some other thread started blocking, and will set
+		 *   lock->taken back to 2
+		 */
+		if (syscall(SYS_futex, &lock->taken, _starpu_futex_wait, 2, NULL, NULL, 0))
+			if (errno == ENOSYS)
+				_starpu_futex_wait = FUTEX_WAIT;
+	}
+#else /* !SIMGRID && !LINUX */
 	uint32_t prev;
 	do
 	{
@@ -539,7 +600,11 @@ int starpu_pthread_spin_trylock(starpu_pthread_spinlock_t *lock)
 		return EBUSY;
 	lock->taken = 1;
 	return 0;
-#else
+#elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)
+	unsigned prev;
+	prev = STARPU_VAL_COMPARE_AND_SWAP(&lock->taken, 0, 1);
+	return (prev == 0)?0:EBUSY;
+#else /* !SIMGRID && !LINUX */
 	uint32_t prev;
 	prev = STARPU_TEST_AND_SET(&lock->taken, 1);
 	return (prev == 0)?0:EBUSY;
@@ -550,11 +615,27 @@ int starpu_pthread_spin_unlock(starpu_pthread_spinlock_t *lock)
 {
 #ifdef STARPU_SIMGRID
 	lock->taken = 0;
-	return 0;
-#else
+#elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)
+	STARPU_ASSERT(lock->taken != 0);
+	unsigned next = STARPU_ATOMIC_ADD(&lock->taken, -1);
+	if (next == 0)
+		/* Nobody to wake, we are done */
+		return 0;
+
+	/*
+	 * Somebody to wake. Clear 'taken' and wake him.
+	 * Note that he may not be sleeping yet, but if he is not, we won't
+	 * since the value of 'taken' will have changed.
+	 */
+	lock->taken = 0;
+	STARPU_SYNCHRONIZE();
+	if (syscall(SYS_futex, &lock->taken, _starpu_futex_wake, 1, NULL, NULL, 0))
+		if (errno == ENOSYS)
+			_starpu_futex_wake = FUTEX_WAKE;
+#else /* !SIMGRID && !LINUX */
 	STARPU_RELEASE(&lock->taken);
-	return 0;
 #endif
+	return 0;
 }
 
 #endif /* defined(STARPU_SIMGRID) || !defined(HAVE_PTHREAD_SPIN_LOCK) */
@@ -564,6 +645,9 @@ int _starpu_pthread_spin_checklocked(starpu_pthread_spinlock_t *lock)
 #ifdef STARPU_SIMGRID
 	STARPU_ASSERT(lock->taken);
 	return !lock->taken;
+#elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)
+	STARPU_ASSERT(lock->taken == 1 || lock->taken == 2);
+	return lock->taken == 0;
 #elif defined(HAVE_PTHREAD_SPIN_LOCK)
 	int ret = pthread_spin_trylock((pthread_spinlock_t *)lock);
 	STARPU_ASSERT(ret != 0);