瀏覽代碼

Merge branch 'master' into julia-autotools

Nathalie Furmento 5 年之前
父節點
當前提交
02ea12e013
共有 42 個文件被更改,包括 568 次插入595 次删除
  1. 37 32
      configure.ac
  2. 246 15
      include/starpu_bitmap.h
  3. 3 3
      include/starpu_sched_component.h
  4. 1 1
      mpi/Makefile.am
  5. 8 7
      mpi/src/mpi/starpu_mpi_mpi.c
  6. 4 0
      mpi/src/starpu_mpi_init.c
  7. 8 9
      mpi/src/starpu_mpi_task_insert.c
  8. 10 2
      mpi/tests/Makefile.am
  9. 4 0
      mpi/tests/bench_helper.h
  10. 35 3
      mpi/tests/sendrecv_bench.c
  11. 8 0
      mpi/tests/sendrecv_parallel_tasks_bench.c
  12. 2 2
      tools/replay-mpi/Makefile.am
  13. 0 1
      src/Makefile.am
  14. 0 265
      src/common/bitmap.c
  15. 1 1
      src/common/fxt.h
  16. 1 1
      src/core/topology.c
  17. 0 2
      src/debug/traces/starpu_fxt.c
  18. 1 1
      src/sched_policies/component_best_implementation.c
  19. 6 7
      src/sched_policies/component_composed.c
  20. 6 6
      src/sched_policies/component_eager.c
  21. 4 4
      src/sched_policies/component_eager_calibration.c
  22. 4 4
      src/sched_policies/component_eager_prio.c
  23. 12 13
      src/sched_policies/component_fifo.c
  24. 6 6
      src/sched_policies/component_heteroprio.c
  25. 6 6
      src/sched_policies/component_prio.c
  26. 2 2
      src/sched_policies/component_random.c
  27. 23 26
      src/sched_policies/component_sched.c
  28. 23 28
      src/sched_policies/component_work_stealing.c
  29. 9 9
      src/sched_policies/component_worker.c
  30. 23 39
      src/sched_policies/deque_modeling_policy_data_aware.c
  31. 15 24
      src/sched_policies/eager_central_policy.c
  32. 7 8
      src/sched_policies/eager_central_priority_policy.c
  33. 12 4
      src/sched_policies/fifo_queues.c
  34. 1 0
      src/sched_policies/fifo_queues.h
  35. 15 22
      src/sched_policies/graph_test_policy.c
  36. 6 8
      src/sched_policies/heteroprio.c
  37. 4 4
      src/sched_policies/modular_gemm.c
  38. 11 24
      src/sched_policies/parallel_eager.c
  39. 0 4
      tools/Makefile.am
  40. 1 1
      tools/dev/valgrind/valgrind.sh
  41. 1 1
      tools/starpu_replay.c
  42. 2 0
      tools/starpu_replay_sched.c

+ 37 - 32
configure.ac

@@ -91,11 +91,21 @@ AC_CHECK_PROGS(PROG_DATE,gdate date)
 dnl locate pkg-config
 PKG_PROG_PKG_CONFIG
 
+AC_ARG_ENABLE(simgrid, [AS_HELP_STRING([--enable-simgrid],
+			[Enable simulating execution in simgrid])],
+			enable_simgrid=$enableval, enable_simgrid=no)
+
 if test x$enable_perf_debug = xyes; then
     enable_shared=no
 fi
+
 default_enable_mpi_check=maybe
-default_enable_mpi=maybe
+
+if test x$enable_simgrid = xyes ; then
+	default_enable_mpi=no
+else
+	default_enable_mpi=maybe
+fi
 
 ###############################################################################
 #                                                                             #
@@ -138,9 +148,6 @@ AC_ARG_WITH(simgrid-lib-dir,
 		enable_simgrid=yes
 	], [simgrid_lib_dir=no])
 
-AC_ARG_ENABLE(simgrid, [AS_HELP_STRING([--enable-simgrid],
-			[Enable simulating execution in simgrid])],
-			enable_simgrid=$enableval, enable_simgrid=no)
 if test x$enable_simgrid = xyes ; then
    	if test -n "$SIMGRID_CFLAGS" ; then
 	   	CFLAGS="$SIMGRID_CFLAGS $CFLAGS"
@@ -375,6 +382,30 @@ AC_MSG_CHECKING(whether mpicxx is available)
 AC_MSG_RESULT($mpicxx_path)
 AC_SUBST(MPICXX, $mpicxx_path)
 
+# Check if mpiexec is available
+if test x$enable_simgrid = xyes ; then
+    DEFAULT_MPIEXEC=smpirun
+    AC_ARG_WITH(smpirun, [AS_HELP_STRING([--with-smpirun[=<name of smpirun or path to smpirun>]], [Name or path of the smpirun helper])], [DEFAULT_MPIEXEC=$withval])
+else
+    DEFAULT_MPIEXEC=mpiexec
+    AC_ARG_WITH(mpiexec, [AS_HELP_STRING([--with-mpiexec=<name of mpiexec or path to mpiexec>], [Name or path of mpiexec])], [DEFAULT_MPIEXEC=$withval])
+fi
+
+case $DEFAULT_MPIEXEC in
+    /*) mpiexec_path="$DEFAULT_MPIEXEC" ;;
+    *)  AC_PATH_PROG(mpiexec_path, $DEFAULT_MPIEXEC, [no], [$MPIPATH])
+esac
+AC_MSG_CHECKING(whether mpiexec is available)
+AC_MSG_RESULT($mpiexec_path)
+
+# We test if MPIEXEC exists
+if test ! -x $mpiexec_path; then
+    AC_MSG_RESULT(The mpiexec script '$mpiexec_path' is not valid)
+    default_enable_mpi_check=no
+    mpiexec_path=""
+fi
+AC_SUBST(MPIEXEC,$mpiexec_path)
+
 ###############################################################################
 #                                                                             #
 #                                    MPI                                      #
@@ -507,32 +538,6 @@ if test x$enable_mpi = xno ; then
     running_mpi_check=no
 fi
 
-if test x$enable_mpi = xyes -a x$running_mpi_check = xyes ; then
-    # Check if mpiexec is available
-    if test x$enable_simgrid = xyes ; then
-	DEFAULT_MPIEXEC=smpirun
-        AC_ARG_WITH(smpirun, [AS_HELP_STRING([--with-smpirun[=<name of smpirun or path to smpirun>]], [Name or path of the smpirun helper])], [DEFAULT_MPIEXEC=$withval])
-    else
-	DEFAULT_MPIEXEC=mpiexec
-	AC_ARG_WITH(mpiexec, [AS_HELP_STRING([--with-mpiexec=<name of mpiexec or path to mpiexec>], [Name or path of mpiexec])], [DEFAULT_MPIEXEC=$withval])
-    fi
-
-    case $DEFAULT_MPIEXEC in
-	/*) mpiexec_path="$DEFAULT_MPIEXEC" ;;
-	*)  AC_PATH_PROG(mpiexec_path, $DEFAULT_MPIEXEC, [no], [$MPIPATH])
-    esac
-    AC_MSG_CHECKING(whether mpiexec is available)
-    AC_MSG_RESULT($mpiexec_path)
-
-    # We test if MPIEXEC exists
-    if test ! -x $mpiexec_path; then
-        AC_MSG_RESULT(The mpiexec script '$mpiexec_path' is not valid)
-        running_mpi_check=no
-        mpiexec_path=""
-    fi
-    AC_SUBST(MPIEXEC,$mpiexec_path)
-fi
-
 AM_CONDITIONAL(STARPU_MPI_CHECK, test x$running_mpi_check = xyes)
 AC_MSG_CHECKING(whether MPI tests should be run)
 AC_MSG_RESULT($running_mpi_check)
@@ -555,7 +560,7 @@ fi
 if test x$enable_mpi = xyes ; then
     if test x$enable_simgrid = xyes ; then
         if test x$enable_shared = xyes ; then
-	    AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, if you need the MPI support, theb use --disable-shared to fix this, else disable MPI with --disable-mpi])
+	    AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, if you need the MPI support, then use --disable-shared to fix this, else disable MPI with --disable-mpi])
         else
 	    CFLAGS="$CFLAGS -fPIC"
 	    CXXFLAGS="$CXXFLAGS -fPIC"
@@ -3537,7 +3542,6 @@ AC_OUTPUT([
 	Makefile
 	src/Makefile
 	tools/Makefile
-	tools/replay-mpi/Makefile
 	tools/starpu_env
 	tools/starpu_codelet_profile
 	tools/starpu_codelet_histo_profile
@@ -3588,6 +3592,7 @@ AC_OUTPUT([
 	mpi/src/Makefile
 	mpi/tests/Makefile
 	mpi/examples/Makefile
+	mpi/tools/Makefile
 	sc_hypervisor/Makefile
 	sc_hypervisor/src/Makefile
 	sc_hypervisor/examples/Makefile

+ 246 - 15
include/starpu_bitmap.h

@@ -18,6 +18,12 @@
 #ifndef __STARPU_BITMAP_H__
 #define __STARPU_BITMAP_H__
 
+#include <starpu_util.h>
+#include <starpu_config.h>
+
+#include <string.h>
+#include <stdlib.h>
+
 #ifdef __cplusplus
 extern "C"
 {
@@ -28,43 +34,268 @@ extern "C"
    @brief This is the interface for the bitmap utilities provided by StarPU.
    @{
  */
+#ifndef _STARPU_LONG_BIT
+#define _STARPU_LONG_BIT ((int)(sizeof(unsigned long) * 8))
+#endif
+
+#define _STARPU_BITMAP_SIZE ((STARPU_NMAXWORKERS - 1)/_STARPU_LONG_BIT) + 1
 
 /** create a empty starpu_bitmap */
-struct starpu_bitmap *starpu_bitmap_create(void) STARPU_ATTRIBUTE_MALLOC;
+static inline struct starpu_bitmap *starpu_bitmap_create(void) STARPU_ATTRIBUTE_MALLOC;
+/** zero a starpu_bitmap */
+static inline void starpu_bitmap_init(struct starpu_bitmap *b);
 /** free \p b */
-void starpu_bitmap_destroy(struct starpu_bitmap *b);
+static inline void starpu_bitmap_destroy(struct starpu_bitmap *b);
 
 /** set bit \p e in \p b */
-void starpu_bitmap_set(struct starpu_bitmap *b, int e);
+static inline void starpu_bitmap_set(struct starpu_bitmap *b, int e);
 /** unset bit \p e in \p b */
-void starpu_bitmap_unset(struct starpu_bitmap *b, int e);
+static inline void starpu_bitmap_unset(struct starpu_bitmap *b, int e);
 /** unset all bits in \p b */
-void starpu_bitmap_unset_all(struct starpu_bitmap *b);
+static inline void starpu_bitmap_unset_all(struct starpu_bitmap *b);
 
 /** return true iff bit \p e is set in \p b */
-int starpu_bitmap_get(struct starpu_bitmap *b, int e);
+static inline int starpu_bitmap_get(struct starpu_bitmap *b, int e);
 /** Basically compute \c starpu_bitmap_unset_all(\p a) ; \p a = \p b & \p c; */
-void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c);
+static inline void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c);
 /** Basically compute \p a |= \p b */
-void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b);
+static inline void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b);
 /** return 1 iff \p e is set in \p b1 AND \p e is set in \p b2 */
-int starpu_bitmap_and_get(struct starpu_bitmap *b1, struct starpu_bitmap *b2, int e);
+static inline int starpu_bitmap_and_get(struct starpu_bitmap *b1, struct starpu_bitmap *b2, int e);
 /** return the number of set bits in \p b */
-int starpu_bitmap_cardinal(struct starpu_bitmap *b);
+static inline int starpu_bitmap_cardinal(struct starpu_bitmap *b);
 
 /** return the index of the first set bit of \p b, -1 if none */
-int starpu_bitmap_first(struct starpu_bitmap *b);
+static inline int starpu_bitmap_first(struct starpu_bitmap *b);
 /** return the position of the last set bit of \p b, -1 if none */
-int starpu_bitmap_last(struct starpu_bitmap *b);
+static inline int starpu_bitmap_last(struct starpu_bitmap *b);
 /** return the position of set bit right after \p e in \p b, -1 if none */
-int starpu_bitmap_next(struct starpu_bitmap *b, int e);
+static inline int starpu_bitmap_next(struct starpu_bitmap *b, int e);
 /** todo */
-int starpu_bitmap_has_next(struct starpu_bitmap *b, int e);
+static inline int starpu_bitmap_has_next(struct starpu_bitmap *b, int e);
 
 /** @} */
 
-#ifdef __cplusplus
+struct starpu_bitmap
+{
+	unsigned long bits[_STARPU_BITMAP_SIZE];
+	int cardinal;
+};
+
+#ifdef _STARPU_DEBUG_BITMAP
+static int _starpu_check_bitmap(struct starpu_bitmap *b)
+{
+	int card = b->cardinal;
+	int i = starpu_bitmap_first(b);
+	int j;
+	for(j = 0; j < card; j++)
+	{
+		if(i == -1)
+			return 0;
+		int tmp = starpu_bitmap_next(b,i);
+		if(tmp == i)
+			return 0;
+		i = tmp;
+	}
+	if(i != -1)
+		return 0;
+	return 1;
 }
+#else
+#define _starpu_check_bitmap(b) 1
 #endif
 
+static int _starpu_count_bit_static(unsigned long e)
+{
+#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__) >= 4)
+	return __builtin_popcountl(e);
+#else
+	int c = 0;
+	while(e)
+	{
+		c += e&1;
+		e >>= 1;
+	}
+	return c;
 #endif
+}
+
+static inline struct starpu_bitmap *starpu_bitmap_create()
+{
+	return (struct starpu_bitmap *) calloc(1, sizeof(struct starpu_bitmap));
+}
+
+static inline void starpu_bitmap_init(struct starpu_bitmap *b)
+{
+	memset(b, 0, sizeof(*b));
+}
+
+static inline void starpu_bitmap_destroy(struct starpu_bitmap * b)
+{
+	free(b);
+}
+
+static inline void starpu_bitmap_set(struct starpu_bitmap * b, int e)
+{
+	if(!starpu_bitmap_get(b, e))
+		b->cardinal++;
+	else
+		return;
+	STARPU_ASSERT(e/_STARPU_LONG_BIT < _STARPU_BITMAP_SIZE);
+	b->bits[e/_STARPU_LONG_BIT] |= (1ul << (e%_STARPU_LONG_BIT));
+	STARPU_ASSERT(_starpu_check_bitmap(b));
+}
+static inline void starpu_bitmap_unset(struct starpu_bitmap *b, int e)
+{
+	if(starpu_bitmap_get(b, e))
+		b->cardinal--;
+	else
+		return;
+	STARPU_ASSERT(e/_STARPU_LONG_BIT < _STARPU_BITMAP_SIZE);
+	if(e / _STARPU_LONG_BIT > _STARPU_BITMAP_SIZE)
+		return;
+	b->bits[e/_STARPU_LONG_BIT] &= ~(1ul << (e%_STARPU_LONG_BIT));
+	STARPU_ASSERT(_starpu_check_bitmap(b));
+}
+
+static inline void starpu_bitmap_unset_all(struct starpu_bitmap * b)
+{
+	memset(b->bits, 0, _STARPU_BITMAP_SIZE * sizeof(unsigned long));
+}
+
+static inline void starpu_bitmap_unset_and(struct starpu_bitmap * a, struct starpu_bitmap * b, struct starpu_bitmap * c)
+{
+	a->cardinal = 0;
+	int i;
+	for(i = 0; i < _STARPU_BITMAP_SIZE; i++)
+	{
+		a->bits[i] = b->bits[i] & c->bits[i];
+		a->cardinal += _starpu_count_bit_static(a->bits[i]);
+	}
+}
+
+static inline int starpu_bitmap_get(struct starpu_bitmap * b, int e)
+{
+	STARPU_ASSERT(e / _STARPU_LONG_BIT < _STARPU_BITMAP_SIZE);
+	if(e / _STARPU_LONG_BIT >= _STARPU_BITMAP_SIZE)
+		return 0;
+	return (b->bits[e/_STARPU_LONG_BIT] & (1ul << (e%_STARPU_LONG_BIT))) ?
+		1:
+		0;
+}
+
+static inline void starpu_bitmap_or(struct starpu_bitmap * a, struct starpu_bitmap * b)
+{
+	int i;
+	a->cardinal = 0;
+	for(i = 0; i < _STARPU_BITMAP_SIZE; i++)
+	{
+		a->bits[i] |= b->bits[i];
+		a->cardinal += _starpu_count_bit_static(a->bits[i]);
+	}
+}
+
+
+static inline int starpu_bitmap_and_get(struct starpu_bitmap * b1, struct starpu_bitmap * b2, int e)
+{
+	return starpu_bitmap_get(b1,e) && starpu_bitmap_get(b2,e);
+}
+
+static inline int starpu_bitmap_cardinal(struct starpu_bitmap * b)
+{
+	return b->cardinal;
+}
+
+
+static inline int _starpu_get_first_bit_rank(unsigned long ms)
+{
+	STARPU_ASSERT(ms != 0);
+#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
+	return __builtin_ffsl(ms) - 1;
+#else
+	unsigned long m = 1ul;
+	int i = 0;
+	while(!(m&ms))
+		i++,m<<=1;
+	return i;
+#endif
+}
+
+static inline int _starpu_get_last_bit_rank(unsigned long l)
+{
+	STARPU_ASSERT(l != 0);
+#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
+	return 8*sizeof(l) - __builtin_clzl(l);
+#else
+	int ibit = _STARPU_LONG_BIT - 1;
+	while((!(1ul << ibit)) & l)
+		ibit--;
+	STARPU_ASSERT(ibit >= 0);
+	return ibit;
+#endif
+}
+
+static inline int starpu_bitmap_first(struct starpu_bitmap * b)
+{
+	int i = 0;
+	while(i < _STARPU_BITMAP_SIZE && !b->bits[i])
+		i++;
+	if( i == _STARPU_BITMAP_SIZE)
+		return -1;
+	int nb_long = i;
+	unsigned long ms = b->bits[i];
+
+	return (nb_long * _STARPU_LONG_BIT) + _starpu_get_first_bit_rank(ms);
+}
+
+static inline int starpu_bitmap_has_next(struct starpu_bitmap * b, int e)
+{
+	int nb_long = (e+1) / _STARPU_LONG_BIT;
+	int nb_bit = (e+1) % _STARPU_LONG_BIT;
+	unsigned long mask = (~0ul) << nb_bit;
+	if(b->bits[nb_long] & mask)
+		return 1;
+	for(nb_long++; nb_long < _STARPU_BITMAP_SIZE; nb_long++)
+		if(b->bits[nb_long])
+			return 1;
+	return 0;
+}
+
+static inline int starpu_bitmap_last(struct starpu_bitmap * b)
+{
+	if(b->cardinal == 0)
+		return -1;
+	int ilong;
+	for(ilong = _STARPU_BITMAP_SIZE - 1; ilong >= 0; ilong--)
+	{
+		if(b->bits[ilong])
+			break;
+	}
+	STARPU_ASSERT(ilong >= 0);
+	unsigned long l = b->bits[ilong];
+	return ilong * _STARPU_LONG_BIT + _starpu_get_last_bit_rank(l);
+}
+
+static inline int starpu_bitmap_next(struct starpu_bitmap *b, int e)
+{
+	int nb_long = e / _STARPU_LONG_BIT;
+	int nb_bit = e % _STARPU_LONG_BIT;
+	unsigned long rest = nb_bit == _STARPU_LONG_BIT - 1 ? 0 : (~0ul << (nb_bit + 1)) & b->bits[nb_long];
+	if(nb_bit != (_STARPU_LONG_BIT - 1) && rest)
+	{
+		int i = _starpu_get_first_bit_rank(rest);
+		STARPU_ASSERT(i >= 0 && i < _STARPU_LONG_BIT);
+		return (nb_long * _STARPU_LONG_BIT) + i;
+	}
+
+	for(nb_long++;nb_long < _STARPU_BITMAP_SIZE; nb_long++)
+		if(b->bits[nb_long])
+			return nb_long * _STARPU_LONG_BIT + _starpu_get_first_bit_rank(b->bits[nb_long]);
+	return -1;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __STARPU_BITMAP_H__ */

+ 3 - 3
include/starpu_sched_component.h

@@ -69,14 +69,14 @@ struct starpu_sched_component
 	/** The tree containing the component*/
 	struct starpu_sched_tree *tree;
 	/** set of underlying workers */
-	struct starpu_bitmap *workers;
+	struct starpu_bitmap workers;
 	/**
 	   subset of starpu_sched_component::workers that is currently available in the context
 	   The push method should take this value into account, it is set with:
 	   component->workers UNION tree->workers UNION
 	   component->child[i]->workers_in_ctx iff exist x such as component->children[i]->parents[x] == component
 	*/
-	struct starpu_bitmap *workers_in_ctx;
+	struct starpu_bitmap workers_in_ctx;
 	/** private data */
 	void *data;
 	char *name;
@@ -188,7 +188,7 @@ struct starpu_sched_tree
 	/**
 	   set of workers available in this context, this value is used to mask workers in modules
 	*/
-	struct starpu_bitmap *workers;
+	struct starpu_bitmap workers;
 	/**
 	   context id of the scheduler
 	*/

+ 1 - 1
mpi/Makefile.am

@@ -16,7 +16,7 @@
 
 include $(top_srcdir)/starpu-subdirtests.mk
 
-SUBDIRS=src tests examples
+SUBDIRS=src tests examples tools
 
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = libstarpumpi.pc starpumpi-1.0.pc starpumpi-1.1.pc starpumpi-1.2.pc starpumpi-1.3.pc

+ 8 - 7
mpi/src/mpi/starpu_mpi_mpi.c

@@ -51,7 +51,10 @@ static unsigned nready_process;
 /* Number of send requests to submit to MPI at the same time */
 static unsigned ndetached_send;
 
+#ifdef STARPU_USE_FXT
 static void _starpu_mpi_add_sync_point_in_fxt(void);
+#endif
+
 static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req);
 static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req);
 #ifdef STARPU_MPI_VERBOSE
@@ -1173,6 +1176,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
 	starpu_pthread_setname("MPI");
 
+	_starpu_mpi_env_init();
+
 #ifndef STARPU_SIMGRID
 	if (_starpu_mpi_thread_cpuid < 0)
 	{
@@ -1189,11 +1194,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	if (_starpu_mpi_thread_cpuid >= 0)
 		/* In case MPI changed the binding */
 		starpu_bind_thread_on(_starpu_mpi_thread_cpuid, STARPU_THREAD_ACTIVE, "MPI");
-#endif
-
-	_starpu_mpi_env_init();
-
-#ifdef STARPU_SIMGRID
+#else
 	/* Now that MPI is set up, let the rest of simgrid get initialized */
 	char **argv_cpy;
 	_STARPU_MPI_MALLOC(argv_cpy, *(argc_argv->argc) * sizeof(char*));
@@ -1532,9 +1533,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	return NULL;
 }
 
+#ifdef STARPU_USE_FXT
 static void _starpu_mpi_add_sync_point_in_fxt(void)
 {
-#ifdef STARPU_USE_FXT
 	int rank;
 	int worldsize;
 	int ret;
@@ -1563,8 +1564,8 @@ static void _starpu_mpi_add_sync_point_in_fxt(void)
 	_STARPU_MPI_TRACE_BARRIER(rank, worldsize, random_number);
 
 	_STARPU_MPI_DEBUG(3, "unique key %x\n", random_number);
-#endif
 }
+#endif
 
 int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv)
 {

+ 4 - 0
mpi/src/starpu_mpi_init.c

@@ -228,6 +228,10 @@ int starpu_mpi_shutdown(void)
 	void *value;
 	int rank, world_size;
 
+	/* Make sure we do not have MPI communications pending in the task graph
+	 * before shutting down MPI */
+	starpu_mpi_wait_for_all(MPI_COMM_WORLD);
+
 	/* We need to get the rank before calling MPI_Finalize to pass to _starpu_mpi_comm_amounts_display() */
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size);

+ 8 - 9
mpi/src/starpu_mpi_task_insert.c

@@ -818,8 +818,12 @@ void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle,
 	// need to count how many nodes have the data in redux mode
 	if (me == rank)
 	{
-		int i, j=0;
-		struct starpu_task *taskBs[nb_nodes];
+		int i;
+
+		// taskC depends on all taskBs created
+		struct starpu_task *taskC = starpu_task_create();
+		taskC->cl = &_starpu_mpi_redux_data_readwrite_cl;
+		STARPU_TASK_SET_HANDLE(taskC, data_handle, 0);
 
 		for(i=0 ; i<nb_nodes ; i++)
 		{
@@ -857,8 +861,8 @@ void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle,
 				args->taskB->cl = args->data_handle->redux_cl;
 				args->taskB->sequential_consistency = 0;
 				STARPU_TASK_SET_HANDLE(args->taskB, args->data_handle, 0);
-				taskBs[j] = args->taskB;
-				j++;
+
+				starpu_task_declare_deps_array(taskC, 1, &args->taskB);
 
 				// Submit taskA
 				starpu_task_insert(&_starpu_mpi_redux_data_read_cl,
@@ -868,11 +872,6 @@ void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle,
 			}
 		}
 
-		// Submit taskC which depends on all taskBs created
-		struct starpu_task *taskC = starpu_task_create();
-		taskC->cl = &_starpu_mpi_redux_data_readwrite_cl;
-		STARPU_TASK_SET_HANDLE(taskC, data_handle, 0);
-		starpu_task_declare_deps_array(taskC, j, taskBs);
 		int ret = starpu_task_submit(taskC);
 		STARPU_ASSERT(ret == 0);
 	}

+ 10 - 2
mpi/tests/Makefile.am

@@ -139,8 +139,12 @@ starpu_mpi_TESTS +=				\
 	temporary				\
 	user_defined_datatype			\
 	early_stuff				\
-	sendrecv_bench				\
+	sendrecv_bench
+
+if !STARPU_USE_MPI_MPI
+starpu_mpi_TESTS +=				\
 	sendrecv_parallel_tasks_bench
+endif
 
 if !NO_BLAS_LIB
 starpu_mpi_TESTS +=				\
@@ -235,9 +239,13 @@ noinst_PROGRAMS =				\
 	load_balancer				\
 	driver					\
 	sendrecv_bench				\
-	sendrecv_gemm_bench			\
 	sendrecv_parallel_tasks_bench
 
+if !NO_BLAS_LIB
+noinst_PROGRAMS +=				\
+	sendrecv_gemm_bench
+endif
+
 XFAIL_TESTS=					\
 	policy_register_toomany			\
 	policy_unregister			\

+ 4 - 0
mpi/tests/bench_helper.h

@@ -24,6 +24,10 @@
 	#define MULT_DEFAULT 4
 	#define LOOPS_DEFAULT 100
 	#define NX_MAX (64 * 1024 * 1024) // kB
+#elif !defined(STARPU_LONG_CHECK)
+	#define MULT_DEFAULT 4
+	#define LOOPS_DEFAULT 10000
+	#define NX_MAX (128 * 1024 * 1024) // kB
 #else
 	#define MULT_DEFAULT 2
 	#define LOOPS_DEFAULT 100000

+ 35 - 3
mpi/tests/sendrecv_bench.c

@@ -27,6 +27,30 @@ int main(int argc, char **argv)
 {
 	int ret, rank, worldsize;
 	int mpi_init;
+	int pause_workers = 0;
+
+
+	for (int i = 1; i < argc; i++)
+	{
+		if (strcmp(argv[i], "-p") == 0)
+		{
+			pause_workers = 1;
+			printf("Workers will be paused during benchmark.\n");
+		}
+		else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0)
+		{
+			fprintf(stderr, "Options:\n");
+			fprintf(stderr, "\t-h --help   display this help\n");
+			fprintf(stderr, "\t-p          pause workers during benchmark\n");
+			exit(EXIT_SUCCESS);
+		}
+		else
+		{
+			fprintf(stderr,"Unrecognized option %s\n", argv[i]);
+			exit(EXIT_FAILURE);
+		}
+	}
+
 
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
@@ -46,12 +70,20 @@ int main(int argc, char **argv)
 		return STARPU_TEST_SKIPPED;
 	}
 
-	/* Pause workers for this bench: all workers polling for tasks has a strong impact on performances */
-	starpu_pause();
+
+	if (pause_workers)
+	{
+		/* Pause workers for this bench: all workers polling for tasks has a strong impact on performances */
+		starpu_pause();
+	}
 
 	sendrecv_bench(rank, NULL);
 
-	starpu_resume();
+	if (pause_workers)
+	{
+		starpu_resume();
+	}
+
 	starpu_mpi_shutdown();
 	if (!mpi_init)
 		MPI_Finalize();

+ 8 - 0
mpi/tests/sendrecv_parallel_tasks_bench.c

@@ -13,6 +13,7 @@
  *
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
+
 /*
  * sendrecv benchmark from different tasks, executed simultaneously on serveral
  * workers.
@@ -21,6 +22,13 @@
  * The goal is to measure impact of calls to starpu_mpi_* from different threads.
  *
  * Use STARPU_NCPU to set the number of parallel ping pongs
+ *
+ *
+ * Note: This currently can not work with the MPI backend with more than 1 CPU,
+ * since with big sizes, the MPI_Wait call in the MPI thread may block waiting
+ * for the peer to call MPI_Recv+Wait, and there is no guarantee that the peer
+ * will call MPI_Recv+Wait for the same data since tasks can proceed in any
+ * order.
  */
 
 #include <starpu_mpi.h>

+ 2 - 2
tools/replay-mpi/Makefile.am

@@ -29,14 +29,14 @@ CC=$(CC_OR_MPICC)
 CCLD=$(CC_OR_MPICC)
 
 if STARPU_USE_MPI
-LIBS += ../../mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+LIBS += $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 AM_CPPFLAGS += -I$(top_srcdir)/mpi/include
 endif
 
 bin_PROGRAMS = starpu_replay_mpi
 
 starpu_replay.c starpu_replay_sched.c:
-	$(LN_S) $(abs_srcdir)/../$(notdir $@) $@
+	$(LN_S) $(top_srcdir)/tools/$(notdir $@) $@
 
 starpu_replay_mpi_SOURCES = \
 	starpu_replay.c \

+ 0 - 1
src/Makefile.am

@@ -160,7 +160,6 @@ noinst_HEADERS = 						\
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 		\
 	common/barrier.c					\
 	common/barrier_counter.c				\
-	common/bitmap.c						\
 	common/hash.c 						\
 	common/rwlock.c						\
 	common/starpu_spinlock.c				\

+ 0 - 265
src/common/bitmap.c

@@ -1,265 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2013       Simon Archipoff
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <starpu.h>
-#include <starpu_bitmap.h>
-
-#include <limits.h>
-#include <string.h>
-#include <stdlib.h>
-
-#ifndef LONG_BIT
-#define LONG_BIT (sizeof(unsigned long) * 8)
-#endif
-
-struct starpu_bitmap
-{
-	unsigned long * bits;
-	int size; /* the size of bits array in number of unsigned long */
-	int cardinal;
-};
-
-//#define DEBUG_BITMAP
-
-#ifdef DEBUG_BITMAP
-static int check_bitmap(struct starpu_bitmap *b)
-{
-	int card = b->cardinal;
-	int i = starpu_bitmap_first(b);
-	int j;
-	for(j = 0; j < card; j++)
-	{
-		if(i == -1)
-			return 0;
-		int tmp = starpu_bitmap_next(b,i);
-		if(tmp == i)
-			return 0;
-		i = tmp;
-	}
-	if(i != -1)
-		return 0;
-	return 1;
-}
-#else
-#define check_bitmap(b) 1
-#endif
-
-static int _count_bit(unsigned long e)
-{
-#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__) >= 4)
-	return __builtin_popcountl(e);
-#else
-	int c = 0;
-	while(e)
-	{
-		c += e&1;
-		e >>= 1;
-	}
-	return c;
-#endif
-}
-
-struct starpu_bitmap * starpu_bitmap_create(void)
-{
-	struct starpu_bitmap *b;
-	_STARPU_CALLOC(b, 1, sizeof(*b));
-	return b;
-}
-void starpu_bitmap_destroy(struct starpu_bitmap * b)
-{
-	if(b)
-	{
-		free(b->bits);
-		free(b);
-	}
-}
-
-void starpu_bitmap_set(struct starpu_bitmap * b, int e)
-{
-
-	if(!starpu_bitmap_get(b, e))
-		b->cardinal++;
-	else
-		return;
-	if((e/LONG_BIT) + 1 > b->size)
-	{
-		_STARPU_REALLOC(b->bits, sizeof(unsigned long) * ((e/LONG_BIT) + 1));
-		memset(b->bits + b->size, 0, sizeof(unsigned long) * ((e/LONG_BIT + 1) - b->size));
-		b->size = (e/LONG_BIT) + 1;
-	}
-	b->bits[e/LONG_BIT] |= (1ul << (e%LONG_BIT));
-	STARPU_ASSERT(check_bitmap(b));
-}
-void starpu_bitmap_unset(struct starpu_bitmap *b, int e)
-{
-	if(starpu_bitmap_get(b, e))
-		b->cardinal--;
-	else
-		return;
-	if(e / LONG_BIT > b->size)
-		return;
-	b->bits[e/LONG_BIT] &= ~(1ul << (e%LONG_BIT));
-	STARPU_ASSERT(check_bitmap(b));
-}
-
-void starpu_bitmap_unset_all(struct starpu_bitmap * b)
-{
-	free(b->bits);
-	b->bits = NULL;
-	b->size = 0;
-}
-
-void starpu_bitmap_unset_and(struct starpu_bitmap * a, struct starpu_bitmap * b, struct starpu_bitmap * c)
-{
-	int n = STARPU_MIN(b->size, c->size);
-	_STARPU_REALLOC(a->bits, sizeof(unsigned long) * n);
-	a->size = n;
-	a->cardinal = 0;
-	int i;
-	for(i = 0; i < n; i++)
-	{
-		a->bits[i] = b->bits[i] & c->bits[i];
-		a->cardinal += _count_bit(a->bits[i]);
-	}
-}
-
-int starpu_bitmap_get(struct starpu_bitmap * b, int e)
-{
-	if(e / LONG_BIT >= b->size)
-		return 0;
-	return (b->bits[e/LONG_BIT] & (1ul << (e%LONG_BIT))) ?
-		1:
-		0;
-}
-
-void starpu_bitmap_or(struct starpu_bitmap * a, struct starpu_bitmap * b)
-{
-	if(a->size < b->size)
-	{
-		_STARPU_REALLOC(a->bits, b->size * sizeof(unsigned long));
-		memset(a->bits + a->size, 0, (b->size - a->size) * sizeof(unsigned long));
-		a->size = b->size;
-
-	}
-	int i;
-	for(i = 0; i < b->size; i++)
-	{
-		a->bits[i] |= b->bits[i];
-	}
-	a->cardinal = 0;
-	for(i = 0; i < a->size; i++)
-		a->cardinal += _count_bit(a->bits[i]);
-}
-
-
-int starpu_bitmap_and_get(struct starpu_bitmap * b1, struct starpu_bitmap * b2, int e)
-{
-	return starpu_bitmap_get(b1,e) && starpu_bitmap_get(b2,e);
-}
-
-int starpu_bitmap_cardinal(struct starpu_bitmap * b)
-{
-	return b->cardinal;
-}
-
-
-static inline int get_first_bit_rank(unsigned long ms)
-{
-	STARPU_ASSERT(ms != 0);
-#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
-	return __builtin_ffsl(ms) - 1;
-#else
-	unsigned long m = 1ul;
-	int i = 0;
-	while(!(m&ms))
-		i++,m<<=1;
-	return i;
-#endif
-}
-
-static inline int get_last_bit_rank(unsigned long l)
-{
-	STARPU_ASSERT(l != 0);
-#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
-	return 8*sizeof(l) - __builtin_clzl(l);
-#else
-	int ibit = LONG_BIT - 1;
-	while((!(1ul << ibit)) & l)
-		ibit--;
-	STARPU_ASSERT(ibit >= 0);
-	return ibit;
-#endif
-}
-
-int starpu_bitmap_first(struct starpu_bitmap * b)
-{
-	int i = 0;
-	while(i < b->size && !b->bits[i])
-		i++;
-	if( i == b->size)
-		return -1;
-	int nb_long = i;
-	unsigned long ms = b->bits[i];
-
-	return (nb_long * LONG_BIT) + get_first_bit_rank(ms);
-}
-
-int starpu_bitmap_has_next(struct starpu_bitmap * b, int e)
-{
-	int nb_long = (e+1) / LONG_BIT;
-	int nb_bit = (e+1) % LONG_BIT;
-	unsigned long mask = (~0ul) << nb_bit;
-	if(b->bits[nb_long] & mask)
-		return 1;
-	for(nb_long++; nb_long < b->size; nb_long++)
-		if(b->bits[nb_long])
-			return 1;
-	return 0;
-}
-
-int starpu_bitmap_last(struct starpu_bitmap * b)
-{
-	if(b->cardinal == 0)
-		return -1;
-	int ilong;
-	for(ilong = b->size - 1; ilong >= 0; ilong--)
-	{
-		if(b->bits[ilong])
-			break;
-	}
-	STARPU_ASSERT(ilong >= 0);
-	unsigned long l = b->bits[ilong];
-	return ilong * LONG_BIT + get_last_bit_rank(l);
-}
-
-int starpu_bitmap_next(struct starpu_bitmap *b, int e)
-{
-	int nb_long = e / LONG_BIT;
-	int nb_bit = e % LONG_BIT;
-	unsigned long rest = nb_bit == LONG_BIT - 1 ? 0 : (~0ul << (nb_bit + 1)) & b->bits[nb_long];
-	if(nb_bit != (LONG_BIT - 1) && rest)
-	{
-		int i = get_first_bit_rank(rest);
-		STARPU_ASSERT(i >= 0 && i < LONG_BIT);
-		return (nb_long * LONG_BIT) + i;
-	}
-
-	for(nb_long++;nb_long < b->size; nb_long++)
-		if(b->bits[nb_long])
-			return nb_long * LONG_BIT + get_first_bit_rank(b->bits[nb_long]);
-	return -1;
-}

+ 1 - 1
src/common/fxt.h

@@ -753,7 +753,7 @@ do {									\
 	{								\
 		/* we include the task name */			\
 		_STARPU_FUT_FULL_PROBE5STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid), 1, name); \
-		if (model_name && strcmp(model_name, name))				\
+		if (model_name)					\
 			_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, model_name); \
 	}								\
 	else {                                                          \

+ 1 - 1
src/core/topology.c

@@ -2854,7 +2854,7 @@ static void _starpu_init_workers_binding_and_memory(struct _starpu_machine_confi
 					config->nbindid = STARPU_NMAXWORKERS;
 				else
 					config->nbindid = 2 * old_nbindid;
-				if (bindid > config->nbindid)
+				if (bindid >= config->nbindid)
 				{
 					config->nbindid = bindid+1;
 				}

+ 0 - 2
src/debug/traces/starpu_fxt.c

@@ -199,8 +199,6 @@ static void task_dump(struct task_info *task, struct starpu_fxt_options *options
 	if (task->name)
 	{
 		fprintf(tasks_file, "Name: %s\n", task->name);
-		if (!task->model_name)
-			fprintf(tasks_file, "Model: %s\n", task->name);
 		free(task->name);
 	}
 	if (task->model_name)

+ 1 - 1
src/sched_policies/component_best_implementation.c

@@ -85,7 +85,7 @@ static void select_best_implementation_and_set_preds(unsigned sched_ctx_id, stru
 static int best_implementation_push_task(struct starpu_sched_component * component, struct starpu_task * task)
 {
 	STARPU_ASSERT(component->nchildren == 1);
-	select_best_implementation_and_set_preds(component->tree->sched_ctx_id, component->workers_in_ctx, task);
+	select_best_implementation_and_set_preds(component->tree->sched_ctx_id, &component->workers_in_ctx, task);
 	return starpu_sched_component_push_task(component,component->children[0],task);
 }
 

+ 6 - 7
src/sched_policies/component_composed.c

@@ -168,16 +168,16 @@ static void composed_component_remove_child(struct starpu_sched_component * comp
 static void composed_component_notify_change_workers(struct starpu_sched_component * component)
 {
 	struct composed_component * c = component->data;
-	struct starpu_bitmap * workers = component->workers;
-	struct starpu_bitmap * workers_in_ctx = component->workers_in_ctx;
+	struct starpu_bitmap * workers = &component->workers;
+	struct starpu_bitmap * workers_in_ctx = &component->workers_in_ctx;
 	struct starpu_sched_component * n;
 	for(n = c->top; ;n = n->children[0])
 	{
-		starpu_bitmap_unset_all(n->workers);
-		starpu_bitmap_or(n->workers, workers);
+		starpu_bitmap_unset_all(&n->workers);
+		starpu_bitmap_or(&n->workers, workers);
 
-		starpu_bitmap_unset_all(n->workers_in_ctx);
-		starpu_bitmap_or(n->workers_in_ctx, workers_in_ctx);
+		starpu_bitmap_unset_all(&n->workers_in_ctx);
+		starpu_bitmap_or(&n->workers_in_ctx, workers_in_ctx);
 
 		n->properties = component->properties;
 		if(n == c->bottom)
@@ -195,7 +195,6 @@ void composed_component_deinit_data(struct starpu_sched_component * _component)
 	do
 	{
 		component = next;
-		component->workers = NULL;
 		next = component->children ? component->children[0] : NULL;
 		starpu_sched_component_destroy(component);
 	}

+ 6 - 6
src/sched_policies/component_eager.c

@@ -35,9 +35,9 @@ static int eager_push_task(struct starpu_sched_component * component, struct sta
 	{
 		/* target told us we could push to it, try to */
 		int idworker;
-		for(idworker = starpu_bitmap_first(target->workers);
+		for(idworker = starpu_bitmap_first(&target->workers);
 			idworker != -1;
-			idworker = starpu_bitmap_next(target->workers, idworker))
+			idworker = starpu_bitmap_next(&target->workers, idworker))
 		{
 			int nimpl;
 			for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
@@ -55,9 +55,9 @@ static int eager_push_task(struct starpu_sched_component * component, struct sta
 
 	/* FIXME: should rather just loop over children before looping over its workers */
 	int workerid;
-	for(workerid = starpu_bitmap_first(component->workers_in_ctx);
+	for(workerid = starpu_bitmap_first(&component->workers_in_ctx);
 	    workerid != -1;
-	    workerid = starpu_bitmap_next(component->workers_in_ctx, workerid))
+	    workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid))
 	{
 		int nimpl;
 		for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
@@ -70,9 +70,9 @@ static int eager_push_task(struct starpu_sched_component * component, struct sta
 				for (i = 0; i < component->nchildren; i++)
 				{
 					int idworker;
-					for(idworker = starpu_bitmap_first(component->children[i]->workers);
+					for(idworker = starpu_bitmap_first(&component->children[i]->workers);
 						idworker != -1;
-						idworker = starpu_bitmap_next(component->children[i]->workers, idworker))
+						idworker = starpu_bitmap_next(&component->children[i]->workers, idworker))
 					{
 						if (idworker == workerid)
 						{

+ 4 - 4
src/sched_policies/component_eager_calibration.c

@@ -25,9 +25,9 @@ static int eager_calibration_push_task(struct starpu_sched_component * component
 	starpu_task_bundle_t bundle = task->bundle;
 
 	int workerid;
-	for(workerid = starpu_bitmap_first(component->workers_in_ctx);
+	for(workerid = starpu_bitmap_first(&component->workers_in_ctx);
 	    workerid != -1;
-	    workerid = starpu_bitmap_next(component->workers_in_ctx, workerid))
+	    workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid))
 	{
 		struct starpu_perfmodel_arch* archtype = starpu_worker_get_perf_archtype(workerid, component->tree->sched_ctx_id);
 		int nimpl;
@@ -49,9 +49,9 @@ static int eager_calibration_push_task(struct starpu_sched_component * component
 					for (i = 0; i < component->nchildren; i++)
 					{
 						int idworker;
-						for(idworker = starpu_bitmap_first(component->children[i]->workers);
+						for(idworker = starpu_bitmap_first(&component->children[i]->workers);
 							idworker != -1;
-							idworker = starpu_bitmap_next(component->children[i]->workers, idworker))
+							idworker = starpu_bitmap_next(&component->children[i]->workers, idworker))
 						{
 							if (idworker == workerid)
 							{

+ 4 - 4
src/sched_policies/component_eager_prio.c

@@ -50,9 +50,9 @@ static int eager_prio_progress_one(struct starpu_sched_component *component)
 
 	/* FIXME: should rather just loop over children before looping over its workers */
 	int workerid;
-	for(workerid = starpu_bitmap_first(component->workers_in_ctx);
+	for(workerid = starpu_bitmap_first(&component->workers_in_ctx);
 	    workerid != -1;
-	    workerid = starpu_bitmap_next(component->workers_in_ctx, workerid))
+	    workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid))
 	{
 		int nimpl;
 		for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
@@ -65,9 +65,9 @@ static int eager_prio_progress_one(struct starpu_sched_component *component)
 				for (i = 0; i < component->nchildren; i++)
 				{
 					int idworker;
-					for(idworker = starpu_bitmap_first(component->children[i]->workers);
+					for(idworker = starpu_bitmap_first(&component->children[i]->workers);
 						idworker != -1;
-						idworker = starpu_bitmap_next(component->children[i]->workers, idworker))
+						idworker = starpu_bitmap_next(&component->children[i]->workers, idworker))
 					{
 						if (idworker == workerid)
 						{

+ 12 - 13
src/sched_policies/component_fifo.c

@@ -23,7 +23,7 @@
 
 struct _starpu_fifo_data
 {
-	struct _starpu_fifo_taskq * fifo;
+	struct _starpu_fifo_taskq fifo;
 	starpu_pthread_mutex_t mutex;
 	unsigned ntasks_threshold;
 	double exp_len_threshold;
@@ -35,7 +35,6 @@ static void fifo_component_deinit_data(struct starpu_sched_component * component
 {
 	STARPU_ASSERT(component && component->data);
 	struct _starpu_fifo_data * f = component->data;
-	_starpu_destroy_fifo(f->fifo);
 	STARPU_PTHREAD_MUTEX_DESTROY(&f->mutex);
 	free(f);
 }
@@ -44,22 +43,22 @@ static double fifo_estimated_end(struct starpu_sched_component * component)
 {
 	STARPU_ASSERT(component && component->data);
 	struct _starpu_fifo_data * data = component->data;
-	struct _starpu_fifo_taskq * queue = data->fifo;
+	struct _starpu_fifo_taskq * queue = &data->fifo;
 	return starpu_sched_component_estimated_end_min_add(component, queue->exp_len);
 }
 
 static double fifo_estimated_load(struct starpu_sched_component * component)
 {
 	STARPU_ASSERT(component && component->data);
-	STARPU_ASSERT(starpu_bitmap_cardinal(component->workers_in_ctx) != 0);
+	STARPU_ASSERT(starpu_bitmap_cardinal(&component->workers_in_ctx) != 0);
 	struct _starpu_fifo_data * data = component->data;
-	struct _starpu_fifo_taskq * queue = data->fifo;
+	struct _starpu_fifo_taskq * queue = &data->fifo;
 	starpu_pthread_mutex_t * mutex = &data->mutex;
 	double relative_speedup = 0.0;
 	double load = starpu_sched_component_estimated_load(component);
 	if(STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS(component))
 	{
-		int first_worker = starpu_bitmap_first(component->workers_in_ctx);
+		int first_worker = starpu_bitmap_first(&component->workers_in_ctx);
 		relative_speedup = starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(first_worker, component->tree->sched_ctx_id));
 		STARPU_COMPONENT_MUTEX_LOCK(mutex);
 		load += queue->ntasks / relative_speedup;
@@ -69,11 +68,11 @@ static double fifo_estimated_load(struct starpu_sched_component * component)
 	else
 	{
 		int i;
-		for(i = starpu_bitmap_first(component->workers_in_ctx);
+		for(i = starpu_bitmap_first(&component->workers_in_ctx);
 		    i != -1;
-		    i = starpu_bitmap_next(component->workers_in_ctx, i))
+		    i = starpu_bitmap_next(&component->workers_in_ctx, i))
 			relative_speedup += starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(i, component->tree->sched_ctx_id));
-		relative_speedup /= starpu_bitmap_cardinal(component->workers_in_ctx);
+		relative_speedup /= starpu_bitmap_cardinal(&component->workers_in_ctx);
 		STARPU_ASSERT(!_STARPU_IS_ZERO(relative_speedup));
 		STARPU_COMPONENT_MUTEX_LOCK(mutex);
 		load += queue->ntasks / relative_speedup;
@@ -87,7 +86,7 @@ static int fifo_push_local_task(struct starpu_sched_component * component, struc
 	STARPU_ASSERT(component && component->data && task);
 	STARPU_ASSERT(starpu_sched_component_can_execute_task(component,task));
 	struct _starpu_fifo_data * data = component->data;
-	struct _starpu_fifo_taskq * queue = data->fifo;
+	struct _starpu_fifo_taskq * queue = &data->fifo;
 	starpu_pthread_mutex_t * mutex = &data->mutex;
 	int ret = 0;
 	const double now = starpu_timing_now();
@@ -169,7 +168,7 @@ static struct starpu_task * fifo_pull_task(struct starpu_sched_component * compo
 {
 	STARPU_ASSERT(component && component->data);
 	struct _starpu_fifo_data * data = component->data;
-	struct _starpu_fifo_taskq * queue = data->fifo;
+	struct _starpu_fifo_taskq * queue = &data->fifo;
 	starpu_pthread_mutex_t * mutex = &data->mutex;
 	const double now = starpu_timing_now();
 
@@ -182,7 +181,7 @@ static struct starpu_task * fifo_pull_task(struct starpu_sched_component * compo
 	STARPU_COMPONENT_MUTEX_LOCK(mutex);
 	struct starpu_task * task;
 	if (data->ready && to->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE)
-		task = _starpu_fifo_pop_first_ready_task(queue, starpu_bitmap_first(to->workers_in_ctx), -1);
+		task = _starpu_fifo_pop_first_ready_task(queue, starpu_bitmap_first(&to->workers_in_ctx), -1);
 	else
 		task = _starpu_fifo_pop_task(queue, starpu_worker_get_id_check());
 	if(task && data->exp)
@@ -269,7 +268,7 @@ struct starpu_sched_component * starpu_sched_component_fifo_create(struct starpu
 	struct starpu_sched_component *component = starpu_sched_component_create(tree, "fifo");
 	struct _starpu_fifo_data *data;
 	_STARPU_MALLOC(data, sizeof(*data));
-	data->fifo = _starpu_create_fifo();
+	_starpu_init_fifo(&data->fifo);
 	STARPU_PTHREAD_MUTEX_INIT(&data->mutex,NULL);
 	component->data = data;
 	component->estimated_end = fifo_estimated_end;

+ 6 - 6
src/sched_policies/component_heteroprio.c

@@ -128,9 +128,9 @@ static int heteroprio_progress_accel(struct starpu_sched_component *component, s
 		for (i = 0; i < component->nchildren; i++)
 		{
 			int idworker;
-			for(idworker = starpu_bitmap_first(component->children[i]->workers);
+			for(idworker = starpu_bitmap_first(&component->children[i]->workers);
 				idworker != -1;
-				idworker = starpu_bitmap_next(component->children[i]->workers, idworker))
+				idworker = starpu_bitmap_next(&component->children[i]->workers, idworker))
 			{
 				if (starpu_worker_get_type(idworker) == archtype)
 					break;
@@ -173,9 +173,9 @@ static int heteroprio_progress_accel(struct starpu_sched_component *component, s
 	best_component = component->children[best_icomponent];
 
 	int idworker;
-	for(idworker = starpu_bitmap_first(best_component->workers);
+	for(idworker = starpu_bitmap_first(&best_component->workers);
 		idworker != -1;
-		idworker = starpu_bitmap_next(best_component->workers, idworker))
+		idworker = starpu_bitmap_next(&best_component->workers, idworker))
 	{
 		if (starpu_worker_get_type(idworker) == archtype)
 			break;
@@ -356,9 +356,9 @@ static int heteroprio_push_task(struct starpu_sched_component * component, struc
 
 	/* Compute acceleration between best-performing arch and least-performing arch */
 	int workerid;
-	for(workerid = starpu_bitmap_first(component->workers_in_ctx);
+	for(workerid = starpu_bitmap_first(&component->workers_in_ctx);
 	    workerid != -1;
-	    workerid = starpu_bitmap_next(component->workers_in_ctx, workerid))
+	    workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid))
 	{
 		unsigned impl_mask;
 		if (!starpu_worker_can_execute_task_impl(workerid, task, &impl_mask))

+ 6 - 6
src/sched_policies/component_prio.c

@@ -70,7 +70,7 @@ static double prio_estimated_end(struct starpu_sched_component * component)
 static double prio_estimated_load(struct starpu_sched_component * component)
 {
 	STARPU_ASSERT(component && component->data);
-	STARPU_ASSERT(starpu_bitmap_cardinal(component->workers_in_ctx) != 0);
+	STARPU_ASSERT(starpu_bitmap_cardinal(&component->workers_in_ctx) != 0);
 	struct _starpu_prio_data * data = component->data;
 	struct _starpu_prio_deque * queue = &data->prio;
 	starpu_pthread_mutex_t * mutex = &data->mutex;
@@ -78,7 +78,7 @@ static double prio_estimated_load(struct starpu_sched_component * component)
 	double load = starpu_sched_component_estimated_load(component);
 	if(STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS(component))
 	{
-		int first_worker = starpu_bitmap_first(component->workers_in_ctx);
+		int first_worker = starpu_bitmap_first(&component->workers_in_ctx);
 		relative_speedup = starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(first_worker, component->tree->sched_ctx_id));
 		STARPU_COMPONENT_MUTEX_LOCK(mutex);
 		load += queue->ntasks / relative_speedup;
@@ -88,11 +88,11 @@ static double prio_estimated_load(struct starpu_sched_component * component)
 	else
 	{
 		int i;
-		for(i = starpu_bitmap_first(component->workers_in_ctx);
+		for(i = starpu_bitmap_first(&component->workers_in_ctx);
 		    i != -1;
-		    i = starpu_bitmap_next(component->workers_in_ctx, i))
+		    i = starpu_bitmap_next(&component->workers_in_ctx, i))
 			relative_speedup += starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(i, component->tree->sched_ctx_id));
-		relative_speedup /= starpu_bitmap_cardinal(component->workers_in_ctx);
+		relative_speedup /= starpu_bitmap_cardinal(&component->workers_in_ctx);
 		STARPU_ASSERT(!_STARPU_IS_ZERO(relative_speedup));
 		STARPU_COMPONENT_MUTEX_LOCK(mutex);
 		load += queue->ntasks / relative_speedup;
@@ -204,7 +204,7 @@ static struct starpu_task * prio_pull_task(struct starpu_sched_component * compo
 	STARPU_COMPONENT_MUTEX_LOCK(mutex);
 	struct starpu_task * task;
 	if (data->ready && to->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE)
-		task = _starpu_prio_deque_deque_first_ready_task(queue, starpu_bitmap_first(to->workers_in_ctx));
+		task = _starpu_prio_deque_deque_first_ready_task(queue, starpu_bitmap_first(&to->workers_in_ctx));
 	else
 		task = _starpu_prio_deque_pop_task(queue);
 	if(task && data->exp)

+ 2 - 2
src/sched_policies/component_random.c

@@ -24,9 +24,9 @@ static double compute_relative_speedup(struct starpu_sched_component * component
 {
 	double sum = 0.0;
 	int id;
-	for(id = starpu_bitmap_first(component->workers_in_ctx);
+	for(id = starpu_bitmap_first(&component->workers_in_ctx);
 	    id != -1;
-	    id = starpu_bitmap_next(component->workers_in_ctx, id))
+	    id = starpu_bitmap_next(&component->workers_in_ctx, id))
 	{
 		struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(id, component->tree->sched_ctx_id);
 		sum += starpu_worker_get_relative_speedup(perf_arch);

+ 23 - 26
src/sched_policies/component_sched.c

@@ -45,9 +45,9 @@ int starpu_sched_component_execute_preds(struct starpu_sched_component * compone
 
 
 	int workerid;
-	for(workerid = starpu_bitmap_first(component->workers_in_ctx);
+	for(workerid = starpu_bitmap_first(&component->workers_in_ctx);
 	    workerid != -1;
-	    workerid = starpu_bitmap_next(component->workers_in_ctx, workerid))
+	    workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid))
 	{
 		int nimpl;
 		for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
@@ -100,9 +100,9 @@ int starpu_sched_component_can_execute_task(struct starpu_sched_component * comp
 	unsigned nimpl;
 	int worker;
 	for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
-		for(worker = starpu_bitmap_first(component->workers_in_ctx);
+		for(worker = starpu_bitmap_first(&component->workers_in_ctx);
 		    -1 != worker;
-		    worker = starpu_bitmap_next(component->workers_in_ctx, worker))
+		    worker = starpu_bitmap_next(&component->workers_in_ctx, worker))
 			if (starpu_worker_can_execute_task(worker, task, nimpl)
 			     || starpu_combined_worker_can_execute_task(worker, task, nimpl))
 			    return 1;
@@ -115,21 +115,21 @@ int starpu_sched_component_can_execute_task(struct starpu_sched_component * comp
 double starpu_sched_component_transfer_length(struct starpu_sched_component * component, struct starpu_task * task)
 {
 	STARPU_ASSERT(component && task);
-	int nworkers = starpu_bitmap_cardinal(component->workers_in_ctx);
+	int nworkers = starpu_bitmap_cardinal(&component->workers_in_ctx);
 	double sum = 0.0;
 	int worker;
 	if(STARPU_SCHED_COMPONENT_IS_SINGLE_MEMORY_NODE(component))
 	{
-		unsigned memory_node  = starpu_worker_get_memory_node(starpu_bitmap_first(component->workers_in_ctx));
+		unsigned memory_node  = starpu_worker_get_memory_node(starpu_bitmap_first(&component->workers_in_ctx));
 		if(task->bundle)
 			return starpu_task_bundle_expected_data_transfer_time(task->bundle,memory_node);
 		else
 			return starpu_task_expected_data_transfer_time(memory_node, task);
 	}
 
-	for(worker = starpu_bitmap_first(component->workers_in_ctx);
+	for(worker = starpu_bitmap_first(&component->workers_in_ctx);
 	    worker != -1;
-	    worker = starpu_bitmap_next(component->workers_in_ctx, worker))
+	    worker = starpu_bitmap_next(&component->workers_in_ctx, worker))
 	{
 		unsigned memory_node  = starpu_worker_get_memory_node(worker);
 		if(task->bundle)
@@ -156,7 +156,7 @@ void starpu_sched_component_prefetch_on_node(struct starpu_sched_component * com
 	if (starpu_get_prefetch_flag() && (!task->prefetched)
 		&& (component->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE))
 	{
-		int worker = starpu_bitmap_first(component->workers_in_ctx);
+		int worker = starpu_bitmap_first(&component->workers_in_ctx);
 		unsigned memory_node = starpu_worker_get_memory_node(worker);
 		starpu_prefetch_task_input_on_node(task, memory_node);
 		task->prefetched = 1;
@@ -195,8 +195,6 @@ void starpu_sched_component_destroy(struct starpu_sched_component *component)
 	free(component->children);
 	free(component->parents);
 	free(component->name);
-	starpu_bitmap_destroy(component->workers);
-	starpu_bitmap_destroy(component->workers_in_ctx);
 	free(component);
 }
 
@@ -223,7 +221,7 @@ void set_properties(struct starpu_sched_component * component)
 	STARPU_ASSERT(component);
 	component->properties = 0;
 
-	int worker = starpu_bitmap_first(component->workers_in_ctx);
+	int worker = starpu_bitmap_first(&component->workers_in_ctx);
 	if (worker == -1)
 		return;
 	if (starpu_worker_is_combined_worker(worker))
@@ -237,7 +235,7 @@ void set_properties(struct starpu_sched_component * component)
 	int is_all_same_component = 1;
 	for(;
 	    worker != -1;
-	    worker = starpu_bitmap_next(component->workers_in_ctx, worker))
+	    worker = starpu_bitmap_next(&component->workers_in_ctx, worker))
 	{
 		if(starpu_worker_is_combined_worker(worker))
 			continue;
@@ -262,12 +260,12 @@ void _starpu_sched_component_update_workers(struct starpu_sched_component * comp
 	STARPU_ASSERT(component);
 	if(starpu_sched_component_is_worker(component))
 		return;
-	starpu_bitmap_unset_all(component->workers);
+	starpu_bitmap_unset_all(&component->workers);
 	unsigned i;
 	for(i = 0; i < component->nchildren; i++)
 	{
 		_starpu_sched_component_update_workers(component->children[i]);
-		starpu_bitmap_or(component->workers, component->children[i]->workers);
+		starpu_bitmap_or(&component->workers, &component->children[i]->workers);
 	}
 	component->notify_change_workers(component);
 }
@@ -282,11 +280,11 @@ void _starpu_sched_component_update_workers_in_ctx(struct starpu_sched_component
 	if(starpu_sched_component_is_worker(component))
 		return;
 	struct starpu_bitmap * workers_in_ctx = _starpu_get_worker_mask(sched_ctx_id);
-	starpu_bitmap_unset_and(component->workers_in_ctx,component->workers, workers_in_ctx);
+	starpu_bitmap_unset_and(&component->workers_in_ctx,&component->workers, workers_in_ctx);
 	unsigned i,j;
 	for(i = starpu_worker_get_count(); i < starpu_worker_get_count() + starpu_combined_worker_get_count(); i++)
 	{
-		if (starpu_bitmap_get(component->workers, i))
+		if (starpu_bitmap_get(&component->workers, i))
 		{
 			/* Component has this combined worker, check whether the
 			 * context has all the corresponding workers */
@@ -297,7 +295,7 @@ void _starpu_sched_component_update_workers_in_ctx(struct starpu_sched_component
 				if (!starpu_bitmap_get(workers_in_ctx, combined_workerid[j]))
 					goto nocombined;
 			/* We have all workers, add it */
-			starpu_bitmap_set(component->workers_in_ctx, i);
+			starpu_bitmap_set(&component->workers_in_ctx, i);
 		}
 nocombined:
 		(void)0;
@@ -324,7 +322,7 @@ struct starpu_bitmap * _starpu_get_worker_mask(unsigned sched_ctx_id)
 	STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS);
 	struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id);
 	STARPU_ASSERT(t);
-	return t->workers;
+	return &t->workers;
 }
 
 void starpu_sched_tree_update_workers_in_ctx(struct starpu_sched_tree * t)
@@ -442,7 +440,7 @@ void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsign
 
 	unsigned i;
 	for(i = 0; i < nworkers; i++)
-		starpu_bitmap_set(t->workers, workerids[i]);
+		starpu_bitmap_set(&t->workers, workerids[i]);
 
 	starpu_sched_tree_update_workers_in_ctx(t);
 
@@ -461,7 +459,7 @@ void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, uns
 
 	unsigned i;
 	for(i = 0; i < nworkers; i++)
-		starpu_bitmap_unset(t->workers, workerids[i]);
+		starpu_bitmap_unset(&t->workers, workerids[i]);
 
 	starpu_sched_tree_update_workers_in_ctx(t);
 
@@ -478,7 +476,7 @@ struct starpu_sched_tree * starpu_sched_tree_create(unsigned sched_ctx_id)
 	struct starpu_sched_tree *t;
 	_STARPU_CALLOC(t, 1, sizeof(*t));
 	t->sched_ctx_id = sched_ctx_id;
-	t->workers = starpu_bitmap_create();
+	starpu_bitmap_init(&t->workers);
 	STARPU_PTHREAD_MUTEX_INIT(&t->lock,NULL);
 	trees[sched_ctx_id] = t;
 	return t;
@@ -491,7 +489,6 @@ void starpu_sched_tree_destroy(struct starpu_sched_tree * tree)
 	trees[tree->sched_ctx_id] = NULL;
 	if(tree->root)
 		starpu_sched_component_destroy_rec(tree->root);
-	starpu_bitmap_destroy(tree->workers);
 	STARPU_PTHREAD_MUTEX_DESTROY(&tree->lock);
 	free(tree);
 }
@@ -694,7 +691,7 @@ double starpu_sched_component_estimated_end_min_add(struct starpu_sched_componen
 	{
 		/* We don't know which workers will do this, assume it will be
 		 * evenly distributed to existing work */
-		int card = starpu_bitmap_cardinal(component->workers_in_ctx);
+		int card = starpu_bitmap_cardinal(&component->workers_in_ctx);
 		if (card == 0)
 			/* Oops, no resources to compute our tasks. Let's just hope that
 			 * we will be given one at some point */
@@ -732,8 +729,8 @@ struct starpu_sched_component * starpu_sched_component_create(struct starpu_sche
 	struct starpu_sched_component *component;
 	_STARPU_CALLOC(component, 1, sizeof(*component));
 	component->tree = tree;
-	component->workers = starpu_bitmap_create();
-	component->workers_in_ctx = starpu_bitmap_create();
+	starpu_bitmap_init(&component->workers);
+	starpu_bitmap_init(&component->workers_in_ctx);
 	component->add_child = starpu_sched_component_add_child;
 	component->remove_child = starpu_sched_component_remove_child;
 	component->add_parent = starpu_sched_component_add_parent;

+ 23 - 28
src/sched_policies/component_work_stealing.c

@@ -37,7 +37,7 @@ struct _starpu_component_work_stealing_data
  */
 	unsigned performed_total, last_pop_child, last_push_child;
 
-	struct _starpu_prio_deque ** fifos;
+	struct _starpu_prio_deque * fifos;
 	starpu_pthread_mutex_t ** mutexes;
 	unsigned size;
 };
@@ -59,7 +59,7 @@ static struct starpu_task *  steal_task_round_robin(struct starpu_sched_componen
 	struct starpu_task * task = NULL;
 	while (1)
 	{
-		struct _starpu_prio_deque * fifo = wsd->fifos[i];
+		struct _starpu_prio_deque * fifo = &wsd->fifos[i];
 
 		STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]);
 		task = _starpu_prio_deque_deque_task_for_worker(fifo, workerid, NULL);
@@ -123,7 +123,7 @@ static inline unsigned select_worker(struct starpu_sched_component * component)
 
 static int is_worker_of_component(struct starpu_sched_component * component, int workerid)
 {
-	return starpu_bitmap_get(component->workers, workerid);
+	return starpu_bitmap_get(&component->workers, workerid);
 }
 
 
@@ -141,17 +141,17 @@ static struct starpu_task * pull_task(struct starpu_sched_component * component,
 	struct _starpu_component_work_stealing_data * wsd = component->data;
 	const double now = starpu_timing_now();
 	STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]);
-	struct starpu_task * task = _starpu_prio_deque_pop_task(wsd->fifos[i]);
+	struct starpu_task * task = _starpu_prio_deque_pop_task(&wsd->fifos[i]);
 	if(task)
 	{
 		if(!isnan(task->predicted))
 		{
-			wsd->fifos[i]->exp_len -= task->predicted;
-			wsd->fifos[i]->exp_start = now + task->predicted;
+			wsd->fifos[i].exp_len -= task->predicted;
+			wsd->fifos[i].exp_start = now + task->predicted;
 		}
 	}
 	else
-		wsd->fifos[i]->exp_len = 0.0;
+		wsd->fifos[i].exp_len = 0.0;
 
 	STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]);
 	if(task)
@@ -163,7 +163,7 @@ static struct starpu_task * pull_task(struct starpu_sched_component * component,
 	if(task)
 	{
 		STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]);
-		wsd->fifos[i]->nprocessed++;
+		wsd->fifos[i].nprocessed++;
 		STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]);
 
 		return task;
@@ -196,13 +196,13 @@ double _ws_estimated_end(struct starpu_sched_component * component)
 	for(i = 0; i < component->nchildren; i++)
 	{
 		STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]);
-		sum_len += wsd->fifos[i]->exp_len;
-		wsd->fifos[i]->exp_start = STARPU_MAX(now, wsd->fifos[i]->exp_start);
-		sum_start += wsd->fifos[i]->exp_start;
+		sum_len += wsd->fifos[i].exp_len;
+		wsd->fifos[i].exp_start = STARPU_MAX(now, wsd->fifos[i].exp_start);
+		sum_start += wsd->fifos[i].exp_start;
 		STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]);
 
 	}
-	int nb_workers = starpu_bitmap_cardinal(component->workers_in_ctx);
+	int nb_workers = starpu_bitmap_cardinal(&component->workers_in_ctx);
 
 	return (sum_start + sum_len) / nb_workers;
 }
@@ -216,14 +216,14 @@ double _ws_estimated_load(struct starpu_sched_component * component)
 	for(i = 0; i < component->nchildren; i++)
 	{
 		STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]);
-		ntasks += wsd->fifos[i]->ntasks;
+		ntasks += wsd->fifos[i].ntasks;
 		STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]);
 	}
 	double speedup = 0.0;
 	int workerid;
-	for(workerid = starpu_bitmap_first(component->workers_in_ctx);
+	for(workerid = starpu_bitmap_first(&component->workers_in_ctx);
 	    -1 != workerid;
-	    workerid = starpu_bitmap_next(component->workers_in_ctx, workerid))
+	    workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid))
 	{
 		speedup += starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(workerid, component->tree->sched_ctx_id));
 	}
@@ -243,9 +243,9 @@ static int push_task(struct starpu_sched_component * component, struct starpu_ta
 	while(1)
 	{
 		int workerid;
-		for(workerid = starpu_bitmap_first(component->children[i]->workers_in_ctx);
+		for(workerid = starpu_bitmap_first(&component->children[i]->workers_in_ctx);
 		    -1 != workerid;
-		    workerid = starpu_bitmap_next(component->children[i]->workers_in_ctx, workerid))
+		    workerid = starpu_bitmap_next(&component->children[i]->workers_in_ctx, workerid))
 		{
 			unsigned impl;
 			int can_execute = starpu_worker_can_execute_task_first_impl(workerid, task, &impl);
@@ -265,7 +265,7 @@ static int push_task(struct starpu_sched_component * component, struct starpu_ta
 
 	STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]);
 	starpu_sched_task_break(task);
-	ret = _starpu_prio_deque_push_front_task(wsd->fifos[i], task);
+	ret = _starpu_prio_deque_push_front_task(&wsd->fifos[i], task);
 	STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]);
 
 	wsd->last_push_child = i;
@@ -308,9 +308,9 @@ int starpu_sched_tree_work_stealing_push_task(struct starpu_task *task)
 
 			struct _starpu_component_work_stealing_data * wsd = component->data;
 			STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]);
-			int ret = _starpu_prio_deque_push_front_task(wsd->fifos[i] , task);
+			int ret = _starpu_prio_deque_push_front_task(&wsd->fifos[i] , task);
 			if(ret == 0 && !isnan(task->predicted))
-				wsd->fifos[i]->exp_len += task->predicted;
+				wsd->fifos[i].exp_len += task->predicted;
 			STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]);
 
 			component->can_pull(component);
@@ -334,10 +334,7 @@ void _ws_add_child(struct starpu_sched_component * component, struct starpu_sche
 		wsd->size = component->nchildren;
 	}
 
-	struct _starpu_prio_deque *fifo;
-	_STARPU_MALLOC(fifo, sizeof(*fifo));
-	_starpu_prio_deque_init(fifo);
-	wsd->fifos[component->nchildren - 1] = fifo;
+	_starpu_prio_deque_init(&wsd->fifos[component->nchildren - 1]);
 
 	starpu_pthread_mutex_t *mutex;
 	_STARPU_MALLOC(mutex, sizeof(*mutex));
@@ -359,19 +356,17 @@ void _ws_remove_child(struct starpu_sched_component * component, struct starpu_s
 			break;
 	}
 	STARPU_ASSERT(i_component != component->nchildren);
-	struct _starpu_prio_deque * tmp_fifo = wsd->fifos[i_component];
+	struct _starpu_prio_deque tmp_fifo = wsd->fifos[i_component];
 	wsd->fifos[i_component] = wsd->fifos[component->nchildren - 1];
 
 
 	component->children[i_component] = component->children[component->nchildren - 1];
 	component->nchildren--;
 	struct starpu_task * task;
-	while ((task = _starpu_prio_deque_pop_task(tmp_fifo)))
+	while ((task = _starpu_prio_deque_pop_task(&tmp_fifo)))
 	{
 		starpu_sched_component_push_task(NULL, component, task);
 	}
-	_starpu_prio_deque_destroy(tmp_fifo);
-	free(tmp_fifo);
 }
 
 void _work_stealing_component_deinit_data(struct starpu_sched_component * component)

+ 9 - 9
src/sched_policies/component_worker.c

@@ -408,7 +408,7 @@ static int simple_worker_push_task(struct starpu_sched_component * component, st
 	t->task = task;
 	t->ntasks = 1;
 
-	task->workerid = starpu_bitmap_first(component->workers);
+	task->workerid = starpu_bitmap_first(&component->workers);
 #if 1 /* dead lock problem? */
 	if (starpu_get_prefetch_flag() && !task->prefetched)
 		starpu_prefetch_task_input_for(task, task->workerid);
@@ -522,7 +522,7 @@ static double simple_worker_estimated_load(struct starpu_sched_component * compo
 	int ntasks_in_fifo = l ? l->ntasks : 0;
 	return (double) (nb_task + ntasks_in_fifo)
 		/ starpu_worker_get_relative_speedup(
-				starpu_worker_get_perf_archtype(starpu_bitmap_first(component->workers), component->tree->sched_ctx_id));
+				starpu_worker_get_perf_archtype(starpu_bitmap_first(&component->workers), component->tree->sched_ctx_id));
 }
 
 static void _worker_component_deinit_data(struct starpu_sched_component * component)
@@ -567,8 +567,8 @@ static struct starpu_sched_component * starpu_sched_component_worker_create(stru
 	component->estimated_end = simple_worker_estimated_end;
 	component->estimated_load = simple_worker_estimated_load;
 	component->deinit_data = _worker_component_deinit_data;
-	starpu_bitmap_set(component->workers, workerid);
-	starpu_bitmap_or(component->workers_in_ctx, component->workers);
+	starpu_bitmap_set(&component->workers, workerid);
+	starpu_bitmap_or(&component->workers_in_ctx, &component->workers);
 	_worker_components[tree->sched_ctx_id][workerid] = component;
 
 	/*
@@ -616,7 +616,7 @@ static int combined_worker_push_task(struct starpu_sched_component * component,
 	struct _starpu_worker_component_data * data = component->data;
 	STARPU_ASSERT(data->parallel_worker.worker_size >= 1);
 	struct _starpu_task_grid * task_alias[data->parallel_worker.worker_size];
-	starpu_parallel_task_barrier_init(task, starpu_bitmap_first(component->workers));
+	starpu_parallel_task_barrier_init(task, starpu_bitmap_first(&component->workers));
 	task_alias[0] = _starpu_task_grid_create();
 	task_alias[0]->task = starpu_task_dup(task);
 	task_alias[0]->task->workerid = data->parallel_worker.workerids[0];
@@ -750,8 +750,8 @@ static struct starpu_sched_component  * starpu_sched_component_combined_worker_c
 
 	struct starpu_sched_component *component = starpu_sched_component_parallel_worker_create(tree, combined_worker->worker_size, (unsigned *) combined_worker->combined_workerid);
 
-	starpu_bitmap_set(component->workers, workerid);
-	starpu_bitmap_or(component->workers_in_ctx, component->workers);
+	starpu_bitmap_set(&component->workers, workerid);
+	starpu_bitmap_or(&component->workers_in_ctx, &component->workers);
 
 	_worker_components[tree->sched_ctx_id][workerid] = component;
 
@@ -803,8 +803,8 @@ int starpu_sched_component_worker_get_workerid(struct starpu_sched_component * w
 #ifndef STARPU_NO_ASSERT
 	STARPU_ASSERT(_worker_consistant(worker_component));
 #endif
-	STARPU_ASSERT(1 == starpu_bitmap_cardinal(worker_component->workers));
-	return starpu_bitmap_first(worker_component->workers);
+	STARPU_ASSERT(1 == starpu_bitmap_cardinal(&worker_component->workers));
+	return starpu_bitmap_first(&worker_component->workers);
 }
 
 void starpu_sched_component_worker_pre_exec_hook(struct starpu_task * task, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED)

+ 23 - 39
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -52,7 +52,7 @@ struct _starpu_dmda_data
 	double _gamma;
 	double idle_power;
 
-	struct _starpu_fifo_taskq **queue_array;
+	struct _starpu_fifo_taskq queue_array[STARPU_NMAXWORKERS];
 
 	long int total_task_cnt;
 	long int ready_task_cnt;
@@ -234,7 +234,7 @@ static struct starpu_task *_dmda_pop_task(unsigned sched_ctx_id, int ready)
 	struct starpu_task *task;
 
 	unsigned workerid = starpu_worker_get_id_check();
-	struct _starpu_fifo_taskq *fifo = dt->queue_array[workerid];
+	struct _starpu_fifo_taskq *fifo = &dt->queue_array[workerid];
 
 	/* Take the opportunity to update start time */
 	fifo->exp_start = STARPU_MAX(starpu_timing_now(), fifo->exp_start);
@@ -284,7 +284,7 @@ static struct starpu_task *dmda_pop_every_task(unsigned sched_ctx_id)
 	struct starpu_task *new_list, *task;
 
 	unsigned workerid = starpu_worker_get_id_check();
-	struct _starpu_fifo_taskq *fifo = dt->queue_array[workerid];
+	struct _starpu_fifo_taskq *fifo = &dt->queue_array[workerid];
 
 	/* Take the opportunity to update start time */
 	fifo->exp_start = STARPU_MAX(starpu_timing_now(), fifo->exp_start);
@@ -323,7 +323,7 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 			return 0;
 	}
 
-	struct _starpu_fifo_taskq *fifo = dt->queue_array[best_workerid];
+	struct _starpu_fifo_taskq *fifo = &dt->queue_array[best_workerid];
 
 	double now = starpu_timing_now();
 
@@ -405,13 +405,13 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 	if (prio)
 	{
 		starpu_worker_lock(best_workerid);
-		ret =_starpu_fifo_push_sorted_task(dt->queue_array[best_workerid], task);
+		ret =_starpu_fifo_push_sorted_task(&dt->queue_array[best_workerid], task);
 		if(dt->num_priorities != -1)
 		{
 			int i;
 			int task_prio = _starpu_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx);
 			for(i = 0; i <= task_prio; i++)
-				dt->queue_array[best_workerid]->ntasks_per_priority[i]++;
+				dt->queue_array[best_workerid].ntasks_per_priority[i]++;
 		}
 
 
@@ -424,9 +424,9 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 	else
 	{
 		starpu_worker_lock(best_workerid);
-		starpu_task_list_push_back (&dt->queue_array[best_workerid]->taskq, task);
-		dt->queue_array[best_workerid]->ntasks++;
-		dt->queue_array[best_workerid]->nprocessed++;
+		starpu_task_list_push_back (&dt->queue_array[best_workerid].taskq, task);
+		dt->queue_array[best_workerid].ntasks++;
+		dt->queue_array[best_workerid].nprocessed++;
 #if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID)
 		starpu_wake_worker_locked(best_workerid);
 #endif
@@ -469,7 +469,7 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 		unsigned nimpl;
 		unsigned impl_mask;
 		unsigned worker = workers->get_next(workers, &it);
-		struct _starpu_fifo_taskq *fifo  = dt->queue_array[worker];
+		struct _starpu_fifo_taskq *fifo  = &dt->queue_array[worker];
 		struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(worker, sched_ctx_id);
 
 		/* Sometimes workers didn't take the tasks as early as we expected */
@@ -624,7 +624,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 		unsigned nimpl;
 		unsigned impl_mask;
 		unsigned workerid = workers->get_next(workers, &it);
-		struct _starpu_fifo_taskq *fifo = dt->queue_array[workerid];
+		struct _starpu_fifo_taskq *fifo = &dt->queue_array[workerid];
 		struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(workerid, sched_ctx_id);
 		unsigned memory_node = starpu_worker_get_memory_node(workerid);
 
@@ -965,15 +965,12 @@ static void dmda_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nwo
 		int workerid = workerids[i];
 		/* if the worker has alreadry belonged to this context
 		   the queue and the synchronization variables have been already initialized */
-		q = dt->queue_array[workerid];
-		if(q == NULL)
-		{
-			q = dt->queue_array[workerid] = _starpu_create_fifo();
-			/* These are only stats, they can be read with races */
-			STARPU_HG_DISABLE_CHECKING(q->exp_start);
-			STARPU_HG_DISABLE_CHECKING(q->exp_len);
-			STARPU_HG_DISABLE_CHECKING(q->exp_end);
-		}
+		q = &dt->queue_array[workerid];
+		_starpu_init_fifo(q);
+		/* These are only stats, they can be read with races */
+		STARPU_HG_DISABLE_CHECKING(q->exp_start);
+		STARPU_HG_DISABLE_CHECKING(q->exp_len);
+		STARPU_HG_DISABLE_CHECKING(q->exp_end);
 
 		if(dt->num_priorities != -1)
 		{
@@ -997,16 +994,10 @@ static void dmda_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned
 	for (i = 0; i < nworkers; i++)
 	{
 		int workerid = workerids[i];
-		if(dt->queue_array[workerid] != NULL)
+		if(dt->num_priorities != -1)
 		{
-			if(dt->num_priorities != -1)
-			{
-				free(dt->queue_array[workerid]->exp_len_per_priority);
-				free(dt->queue_array[workerid]->ntasks_per_priority);
-			}
-
-			_starpu_destroy_fifo(dt->queue_array[workerid]);
-			dt->queue_array[workerid] = NULL;
+			free(dt->queue_array[workerid].exp_len_per_priority);
+			free(dt->queue_array[workerid].ntasks_per_priority);
 		}
 	}
 }
@@ -1018,12 +1009,6 @@ static void initialize_dmda_policy(unsigned sched_ctx_id)
 
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)dt);
 
-	_STARPU_MALLOC(dt->queue_array, STARPU_NMAXWORKERS*sizeof(struct _starpu_fifo_taskq*));
-
-	int i;
-	for(i = 0; i < STARPU_NMAXWORKERS; i++)
-		dt->queue_array[i] = NULL;
-
 	dt->alpha = starpu_get_env_float_default("STARPU_SCHED_ALPHA", _STARPU_SCHED_ALPHA_DEFAULT);
 	dt->beta = starpu_get_env_float_default("STARPU_SCHED_BETA", _STARPU_SCHED_BETA_DEFAULT);
 	dt->_gamma = starpu_get_env_float_default("STARPU_SCHED_GAMMA", _STARPU_SCHED_GAMMA_DEFAULT);
@@ -1069,7 +1054,6 @@ static void deinitialize_dmda_policy(unsigned sched_ctx_id)
 	}
 #endif
 
-	free(dt->queue_array);
 	free(dt);
 }
 
@@ -1080,7 +1064,7 @@ static void dmda_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id)
 {
 	unsigned workerid = starpu_worker_get_id_check();
 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	struct _starpu_fifo_taskq *fifo = dt->queue_array[workerid];
+	struct _starpu_fifo_taskq *fifo = &dt->queue_array[workerid];
 	const double now = starpu_timing_now();
 
 	/* Once the task is executing, we can update the predicted amount
@@ -1099,7 +1083,7 @@ static void dmda_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id)
 static void dmda_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id)
 {
 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	struct _starpu_fifo_taskq *fifo = dt->queue_array[workerid];
+	struct _starpu_fifo_taskq *fifo = &dt->queue_array[workerid];
 
 	/* Compute the expected penality */
 	double predicted = starpu_task_worker_expected_length(task, perf_workerid, STARPU_NMAX_SCHED_CTXS,
@@ -1174,7 +1158,7 @@ static void dmda_post_exec_hook(struct starpu_task * task, unsigned sched_ctx_id
 {
 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 	unsigned workerid = starpu_worker_get_id_check();
-	struct _starpu_fifo_taskq *fifo = dt->queue_array[workerid];
+	struct _starpu_fifo_taskq *fifo = &dt->queue_array[workerid];
 	starpu_worker_lock_self();
 	_starpu_fifo_task_finished(fifo, task, dt->num_priorities);
 	starpu_worker_unlock_self();

+ 15 - 24
src/sched_policies/eager_central_policy.c

@@ -29,9 +29,9 @@
 
 struct _starpu_eager_center_policy_data
 {
-	struct _starpu_fifo_taskq *fifo;
+	struct _starpu_fifo_taskq fifo;
 	starpu_pthread_mutex_t policy_mutex;
-	struct starpu_bitmap *waiters;
+	struct starpu_bitmap waiters;
 };
 
 static void initialize_eager_center_policy(unsigned sched_ctx_id)
@@ -40,13 +40,8 @@ static void initialize_eager_center_policy(unsigned sched_ctx_id)
 	_STARPU_MALLOC(data, sizeof(struct _starpu_eager_center_policy_data));
 
 	/* there is only a single queue in that trivial design */
-	data->fifo =  _starpu_create_fifo();
-	data->waiters = starpu_bitmap_create();
-
-	 /* Tell helgrind that it's fine to check for empty fifo in
-	  * pop_task_eager_policy without actual mutex (it's just an integer)
-	  */
-	STARPU_HG_DISABLE_CHECKING(data->fifo->ntasks);
+	_starpu_init_fifo(&data->fifo);
+	starpu_bitmap_init(&data->waiters);
 
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data);
 	STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL);
@@ -55,14 +50,10 @@ static void initialize_eager_center_policy(unsigned sched_ctx_id)
 static void deinitialize_eager_center_policy(unsigned sched_ctx_id)
 {
 	struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	struct _starpu_fifo_taskq *fifo = data->fifo;
+	struct _starpu_fifo_taskq *fifo = &data->fifo;
 
 	STARPU_ASSERT(starpu_task_list_empty(&fifo->taskq));
 
-	/* deallocate the job queue */
-	_starpu_destroy_fifo(fifo);
-	starpu_bitmap_destroy(data->waiters);
-
 	STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex);
 	free(data);
 }
@@ -75,9 +66,9 @@ static int push_task_eager_policy(struct starpu_task *task)
 	starpu_worker_relax_on();
 	STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	starpu_worker_relax_off();
-	starpu_task_list_push_back(&data->fifo->taskq,task);
-	data->fifo->ntasks++;
-	data->fifo->nprocessed++;
+	starpu_task_list_push_back(&data->fifo.taskq,task);
+	data->fifo.ntasks++;
+	data->fifo.nprocessed++;
 
 	if (_starpu_get_nsched_ctxs() > 1)
 	{
@@ -105,7 +96,7 @@ static int push_task_eager_policy(struct starpu_task *task)
 		unsigned worker = workers->get_next(workers, &it);
 
 #ifdef STARPU_NON_BLOCKING_DRIVERS
-		if (!starpu_bitmap_get(data->waiters, worker))
+		if (!starpu_bitmap_get(&data->waiters, worker))
 			/* This worker is not waiting for a task */
 			continue;
 #endif
@@ -114,7 +105,7 @@ static int push_task_eager_policy(struct starpu_task *task)
 		{
 			/* It can execute this one, tell him! */
 #ifdef STARPU_NON_BLOCKING_DRIVERS
-			starpu_bitmap_unset(data->waiters, worker);
+			starpu_bitmap_unset(&data->waiters, worker);
 			/* We really woke at least somebody, no need to wake somebody else */
 			break;
 #else
@@ -146,7 +137,7 @@ static struct starpu_task *pop_every_task_eager_policy(unsigned sched_ctx_id)
 	struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 	unsigned workerid = starpu_worker_get_id_check();
 	STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
-	struct starpu_task* task = _starpu_fifo_pop_every_task(data->fifo, workerid);
+	struct starpu_task* task = _starpu_fifo_pop_every_task(&data->fifo, workerid);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 
 	starpu_sched_ctx_list_task_counters_reset_all(task, sched_ctx_id);
@@ -163,13 +154,13 @@ static struct starpu_task *pop_task_eager_policy(unsigned sched_ctx_id)
 	/* Here helgrind would shout that this is unprotected, this is just an
 	 * integer access, and we hold the sched mutex, so we can not miss any
 	 * wake up. */
-	if (!STARPU_RUNNING_ON_VALGRIND && _starpu_fifo_empty(data->fifo))
+	if (!STARPU_RUNNING_ON_VALGRIND && _starpu_fifo_empty(&data->fifo))
 	{
 		return NULL;
 	}
 
 #ifdef STARPU_NON_BLOCKING_DRIVERS
-	if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(data->waiters, workerid))
+	if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(&data->waiters, workerid))
 		/* Nobody woke us, avoid bothering the mutex */
 	{
 		return NULL;
@@ -180,10 +171,10 @@ static struct starpu_task *pop_task_eager_policy(unsigned sched_ctx_id)
 	STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	starpu_worker_relax_off();
 
-	chosen_task = _starpu_fifo_pop_task(data->fifo, workerid);
+	chosen_task = _starpu_fifo_pop_task(&data->fifo, workerid);
 	if (!chosen_task)
 		/* Tell pushers that we are waiting for tasks for us */
-		starpu_bitmap_set(data->waiters, workerid);
+		starpu_bitmap_set(&data->waiters, workerid);
 
 	STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 	if(chosen_task &&_starpu_get_nsched_ctxs() > 1)

+ 7 - 8
src/sched_policies/eager_central_priority_policy.c

@@ -35,7 +35,7 @@ struct _starpu_eager_central_prio_data
 {
 	struct _starpu_prio_deque taskq;
 	starpu_pthread_mutex_t policy_mutex;
-	struct starpu_bitmap *waiters;
+	struct starpu_bitmap waiters;
 };
 
 /*
@@ -49,7 +49,7 @@ static void initialize_eager_center_priority_policy(unsigned sched_ctx_id)
 
 	/* only a single queue (even though there are several internaly) */
 	_starpu_prio_deque_init(&data->taskq);
-	data->waiters = starpu_bitmap_create();
+	starpu_bitmap_init(&data->waiters);
 
 	/* Tell helgrind that it's fine to check for empty fifo in
 	 * _starpu_priority_pop_task without actual mutex (it's just an
@@ -72,7 +72,6 @@ static void deinitialize_eager_center_priority_policy(unsigned sched_ctx_id)
 
 	/* deallocate the job queue */
 	_starpu_prio_deque_destroy(&data->taskq);
-	starpu_bitmap_destroy(data->waiters);
 
 	STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex);
 	free(data);
@@ -115,7 +114,7 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 		unsigned worker = workers->get_next(workers, &it);
 
 #ifdef STARPU_NON_BLOCKING_DRIVERS
-		if (!starpu_bitmap_get(data->waiters, worker))
+		if (!starpu_bitmap_get(&data->waiters, worker))
 			/* This worker is not waiting for a task */
 			continue;
 #endif
@@ -124,7 +123,7 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 		{
 			/* It can execute this one, tell him! */
 #ifdef STARPU_NON_BLOCKING_DRIVERS
-			starpu_bitmap_unset(data->waiters, worker);
+			starpu_bitmap_unset(&data->waiters, worker);
 			/* We really woke at least somebody, no need to wake somebody else */
 			break;
 #else
@@ -170,7 +169,7 @@ static struct starpu_task *_starpu_priority_pop_task(unsigned sched_ctx_id)
 	}
 
 #ifdef STARPU_NON_BLOCKING_DRIVERS
-	if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(data->waiters, workerid))
+	if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(&data->waiters, workerid))
 		/* Nobody woke us, avoid bothering the mutex */
 	{
 		return NULL;
@@ -197,7 +196,7 @@ static struct starpu_task *_starpu_priority_pop_task(unsigned sched_ctx_id)
 			if(worker != workerid)
 			{
 #ifdef STARPU_NON_BLOCKING_DRIVERS
-				starpu_bitmap_unset(data->waiters, worker);
+				starpu_bitmap_unset(&data->waiters, worker);
 #else
 				starpu_wake_worker_relax_light(worker);
 #endif
@@ -208,7 +207,7 @@ static struct starpu_task *_starpu_priority_pop_task(unsigned sched_ctx_id)
 
 	if (!chosen_task)
 		/* Tell pushers that we are waiting for tasks for us */
-		starpu_bitmap_set(data->waiters, workerid);
+		starpu_bitmap_set(&data->waiters, workerid);
 
 	STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 	if(chosen_task &&_starpu_get_nsched_ctxs() > 1)

+ 12 - 4
src/sched_policies/fifo_queues.c

@@ -44,14 +44,14 @@ static int is_sorted_task_list(struct starpu_task * task)
 }
 */
 
-struct _starpu_fifo_taskq *_starpu_create_fifo(void)
+void _starpu_init_fifo(struct _starpu_fifo_taskq *fifo)
 {
-	struct _starpu_fifo_taskq *fifo;
-	_STARPU_MALLOC(fifo, sizeof(struct _starpu_fifo_taskq));
-
 	/* note that not all mechanisms (eg. the semaphore) have to be used */
 	starpu_task_list_init(&fifo->taskq);
 	fifo->ntasks = 0;
+	/* Tell helgrind that it's fine to check for empty fifo in
+	 * pop_task_graph_test_policy without actual mutex (it's just an integer)
+	 */
 	STARPU_HG_DISABLE_CHECKING(fifo->ntasks);
 	fifo->nprocessed = 0;
 
@@ -60,6 +60,14 @@ struct _starpu_fifo_taskq *_starpu_create_fifo(void)
 	fifo->exp_end = fifo->exp_start;
 	fifo->exp_len_per_priority = NULL;
 	fifo->pipeline_len = 0.0;
+}
+
+struct _starpu_fifo_taskq *_starpu_create_fifo(void)
+{
+	struct _starpu_fifo_taskq *fifo;
+	_STARPU_MALLOC(fifo, sizeof(struct _starpu_fifo_taskq));
+
+	_starpu_init_fifo(fifo);
 
 	return fifo;
 }

+ 1 - 0
src/sched_policies/fifo_queues.h

@@ -50,6 +50,7 @@ struct _starpu_fifo_taskq
 };
 
 struct _starpu_fifo_taskq*_starpu_create_fifo(void) STARPU_ATTRIBUTE_MALLOC;
+void _starpu_init_fifo(struct _starpu_fifo_taskq *fifo);
 void _starpu_destroy_fifo(struct _starpu_fifo_taskq *fifo);
 
 int _starpu_fifo_empty(struct _starpu_fifo_taskq *fifo);

+ 15 - 22
src/sched_policies/graph_test_policy.c

@@ -36,11 +36,11 @@
 
 struct _starpu_graph_test_policy_data
 {
-	struct _starpu_fifo_taskq *fifo;	/* Bag of tasks which are ready before do_schedule is called */
+	struct _starpu_fifo_taskq fifo;	/* Bag of tasks which are ready before do_schedule is called */
 	struct _starpu_prio_deque prio_cpu;
 	struct _starpu_prio_deque prio_gpu;
 	starpu_pthread_mutex_t policy_mutex;
-	struct starpu_bitmap *waiters;
+	struct starpu_bitmap waiters;
 	unsigned computed;
 	unsigned descendants;			/* Whether we use descendants, or depths, for priorities */
 };
@@ -51,20 +51,15 @@ static void initialize_graph_test_policy(unsigned sched_ctx_id)
 	_STARPU_MALLOC(data, sizeof(struct _starpu_graph_test_policy_data));
 
 	/* there is only a single queue in that trivial design */
-	data->fifo =  _starpu_create_fifo();
+	_starpu_init_fifo(&data->fifo);
 	 _starpu_prio_deque_init(&data->prio_cpu);
 	 _starpu_prio_deque_init(&data->prio_gpu);
-	data->waiters = starpu_bitmap_create();
+	starpu_bitmap_init(&data->waiters);
 	data->computed = 0;
 	data->descendants = starpu_get_env_number_default("STARPU_SCHED_GRAPH_TEST_DESCENDANTS", 0);
 
 	_starpu_graph_record = 1;
 
-	 /* Tell helgrind that it's fine to check for empty fifo in
-	  * pop_task_graph_test_policy without actual mutex (it's just an integer)
-	  */
-	STARPU_HG_DISABLE_CHECKING(data->fifo->ntasks);
-
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data);
 	STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL);
 }
@@ -72,15 +67,13 @@ static void initialize_graph_test_policy(unsigned sched_ctx_id)
 static void deinitialize_graph_test_policy(unsigned sched_ctx_id)
 {
 	struct _starpu_graph_test_policy_data *data = (struct _starpu_graph_test_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	struct _starpu_fifo_taskq *fifo = data->fifo;
+	struct _starpu_fifo_taskq *fifo = &data->fifo;
 
 	STARPU_ASSERT(starpu_task_list_empty(&fifo->taskq));
 
 	/* deallocate the job queue */
-	_starpu_destroy_fifo(fifo);
 	 _starpu_prio_deque_destroy(&data->prio_cpu);
 	 _starpu_prio_deque_destroy(&data->prio_gpu);
-	starpu_bitmap_destroy(data->waiters);
 
 	_starpu_graph_record = 0;
 	STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex);
@@ -194,9 +187,9 @@ static void do_schedule_graph_test_policy(unsigned sched_ctx_id)
 	}
 
 	/* Now that we have priorities, move tasks from bag to priority queue */
-	while(!_starpu_fifo_empty(data->fifo))
+	while(!_starpu_fifo_empty(&data->fifo))
 	{
-		struct starpu_task *task = _starpu_fifo_pop_task(data->fifo, -1);
+		struct starpu_task *task = _starpu_fifo_pop_task(&data->fifo, -1);
 		struct _starpu_prio_deque *prio = select_prio(sched_ctx_id, data, task);
 		_starpu_prio_deque_push_back_task(prio, task);
 	}
@@ -210,7 +203,7 @@ static void do_schedule_graph_test_policy(unsigned sched_ctx_id)
 	{
 		/* Tell each worker is shouldn't sleep any more */
 		unsigned worker = workers->get_next(workers, &it);
-		starpu_bitmap_unset(data->waiters, worker);
+		starpu_bitmap_unset(&data->waiters, worker);
 	}
 #endif
 	STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
@@ -237,9 +230,9 @@ static int push_task_graph_test_policy(struct starpu_task *task)
 	if (!data->computed)
 	{
 		/* Priorities are not computed, leave the task in the bag for now */
-		starpu_task_list_push_back(&data->fifo->taskq,task);
-		data->fifo->ntasks++;
-		data->fifo->nprocessed++;
+		starpu_task_list_push_back(&data->fifo.taskq,task);
+		data->fifo.ntasks++;
+		data->fifo.nprocessed++;
 		starpu_push_task_end(task);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 		return 0;
@@ -266,7 +259,7 @@ static int push_task_graph_test_policy(struct starpu_task *task)
 		unsigned worker = workers->get_next(workers, &it);
 
 #ifdef STARPU_NON_BLOCKING_DRIVERS
-		if (!starpu_bitmap_get(data->waiters, worker))
+		if (!starpu_bitmap_get(&data->waiters, worker))
 			/* This worker is not waiting for a task */
 			continue;
 #endif
@@ -281,7 +274,7 @@ static int push_task_graph_test_policy(struct starpu_task *task)
 		{
 			/* It can execute this one, tell him! */
 #ifdef STARPU_NON_BLOCKING_DRIVERS
-			starpu_bitmap_unset(data->waiters, worker);
+			starpu_bitmap_unset(&data->waiters, worker);
 			/* We really woke at least somebody, no need to wake somebody else */
 			break;
 #else
@@ -333,7 +326,7 @@ static struct starpu_task *pop_task_graph_test_policy(unsigned sched_ctx_id)
 	if (!STARPU_RUNNING_ON_VALGRIND && !data->computed)
 		/* Not computed yet */
 		return NULL;
-	if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(data->waiters, workerid))
+	if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(&data->waiters, workerid))
 		/* Nobody woke us, avoid bothering the mutex */
 		return NULL;
 #endif
@@ -350,7 +343,7 @@ static struct starpu_task *pop_task_graph_test_policy(unsigned sched_ctx_id)
 	chosen_task = _starpu_prio_deque_pop_task_for_worker(prio, workerid, NULL);
 	if (!chosen_task)
 		/* Tell pushers that we are waiting for tasks for us */
-		starpu_bitmap_set(data->waiters, workerid);
+		starpu_bitmap_set(&data->waiters, workerid);
 
 	STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 

+ 6 - 8
src/sched_policies/heteroprio.c

@@ -88,7 +88,7 @@ struct _heteroprio_worker_wrapper
 struct _starpu_heteroprio_data
 {
 	starpu_pthread_mutex_t policy_mutex;
-	struct starpu_bitmap *waiters;
+	struct starpu_bitmap waiters;
 	/* The bucket to store the tasks */
 	struct _heteroprio_bucket buckets[STARPU_HETEROPRIO_MAX_PRIO];
 	/* The number of buckets for each arch */
@@ -216,7 +216,7 @@ static void initialize_heteroprio_policy(unsigned sched_ctx_id)
 	_STARPU_MALLOC(hp, sizeof(struct _starpu_heteroprio_data));
 	memset(hp, 0, sizeof(*hp));
 
-	hp->waiters = starpu_bitmap_create();
+	starpu_bitmap_init(&hp->waiters);
 
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)hp);
 
@@ -295,8 +295,6 @@ static void deinitialize_heteroprio_policy(unsigned sched_ctx_id)
 		_heteroprio_bucket_release(&hp->buckets[idx_prio]);
 	}
 
-	starpu_bitmap_destroy(hp->waiters);
-
 	STARPU_PTHREAD_MUTEX_DESTROY(&hp->policy_mutex);
 	free(hp);
 }
@@ -404,7 +402,7 @@ static int push_task_heteroprio_policy(struct starpu_task *task)
 		unsigned worker = workers->get_next(workers, &it);
 
 #ifdef STARPU_NON_BLOCKING_DRIVERS
-		if (!starpu_bitmap_get(hp->waiters, worker))
+		if (!starpu_bitmap_get(&hp->waiters, worker))
 			/* This worker is not waiting for a task */
 			continue;
 #endif
@@ -413,7 +411,7 @@ static int push_task_heteroprio_policy(struct starpu_task *task)
 		{
 			/* It can execute this one, tell him! */
 #ifdef STARPU_NON_BLOCKING_DRIVERS
-			starpu_bitmap_unset(hp->waiters, worker);
+			starpu_bitmap_unset(&hp->waiters, worker);
 			/* We really woke at least somebody, no need to wake somebody else */
 			break;
 #else
@@ -455,7 +453,7 @@ static struct starpu_task *pop_task_heteroprio_policy(unsigned sched_ctx_id)
 		return NULL;
 	}
 
-	if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(hp->waiters, workerid))
+	if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(&hp->waiters, workerid))
 	{
 		/* Nobody woke us, avoid bothering the mutex */
 		return NULL;
@@ -602,7 +600,7 @@ done:		;
 	if (!task)
 	{
 		/* Tell pushers that we are waiting for tasks_queue for us */
-		starpu_bitmap_set(hp->waiters, workerid);
+		starpu_bitmap_set(&hp->waiters, workerid);
 	}
 	STARPU_PTHREAD_MUTEX_UNLOCK(&hp->policy_mutex);
 

+ 4 - 4
src/sched_policies/modular_gemm.c

@@ -119,9 +119,9 @@ static int gemm_push_task(struct starpu_sched_component * component, struct star
 
 	int workerid;
 	/* It's not a GEMM, or no GPU wanted to take it, find somebody else */
-	for(workerid = starpu_bitmap_first(component->workers_in_ctx);
+	for(workerid = starpu_bitmap_first(&component->workers_in_ctx);
 	    workerid != -1;
-	    workerid = starpu_bitmap_next(component->workers_in_ctx, workerid))
+	    workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid))
 	{
 		int nimpl;
 		for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
@@ -133,9 +133,9 @@ static int gemm_push_task(struct starpu_sched_component * component, struct star
 				{
 					struct starpu_sched_component *child = component->children[i];
 					int idworker;
-					for(idworker = starpu_bitmap_first(component->children[i]->workers);
+					for(idworker = starpu_bitmap_first(&component->children[i]->workers);
 						idworker != -1;
-						idworker = starpu_bitmap_next(component->children[i]->workers, idworker))
+						idworker = starpu_bitmap_next(&component->children[i]->workers, idworker))
 					{
 						if (idworker == workerid)
 						{

+ 11 - 24
src/sched_policies/parallel_eager.c

@@ -35,8 +35,8 @@ struct _starpu_peager_common_data *_peager_common_data = NULL;
 struct _starpu_peager_data
 {
 	starpu_pthread_mutex_t policy_mutex;
-	struct _starpu_fifo_taskq *fifo;
-	struct _starpu_fifo_taskq *local_fifo[STARPU_NMAXWORKERS];
+	struct _starpu_fifo_taskq fifo;
+	struct _starpu_fifo_taskq local_fifo[STARPU_NMAXWORKERS];
 };
 
 static void initialize_peager_common(void)
@@ -136,22 +136,12 @@ static void peager_add_workers(unsigned sched_ctx_id, int *workerids, unsigned n
 		/* slaves pick up tasks from their local queue, their master
 		 * will put tasks directly in that local list when a parallel
 		 * tasks comes. */
-		data->local_fifo[workerid] = _starpu_create_fifo();
+		_starpu_init_fifo(&data->local_fifo[workerid]);
 	}
 }
 
-static void peager_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
+static void peager_remove_workers(unsigned sched_ctx_id, int *workerids STARPU_ATTRIBUTE_UNUSED, unsigned nworkers STARPU_ATTRIBUTE_UNUSED)
 {
-	struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	unsigned i;
-	for(i = 0; i < nworkers; i++)
-        {
-		int workerid = workerids[i];
-		if(!starpu_worker_is_combined_worker(workerid))
-		{
-			_starpu_destroy_fifo(data->local_fifo[workerid]);
-		}
-	}
 	if (sched_ctx_id == 0)
 	{
 		deinitialize_peager_common();
@@ -166,7 +156,7 @@ static void initialize_peager_policy(unsigned sched_ctx_id)
 	_STARPU_DISP("Warning: the peager scheduler is mostly a proof of concept and not really very optimized\n");
 
 	/* masters pick tasks from that queue */
-	data->fifo = _starpu_create_fifo();
+	_starpu_init_fifo(&data->fifo);
 
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data);
         STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL);
@@ -177,9 +167,6 @@ static void deinitialize_peager_policy(unsigned sched_ctx_id)
 	/* TODO check that there is no task left in the queue */
 	struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 
-	/* deallocate the job queue */
-	_starpu_destroy_fifo(data->fifo);
-
         STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex);
 
 	free(data);
@@ -193,7 +180,7 @@ static int push_task_peager_policy(struct starpu_task *task)
 	struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 
 	STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
-	ret_val = _starpu_fifo_push_task(data->fifo, task);
+	ret_val = _starpu_fifo_push_task(&data->fifo, task);
 #ifndef STARPU_NON_BLOCKING_DRIVERS
 	int is_parallel_task = task->cl && task->cl->max_parallelism > 1;
 #endif
@@ -249,7 +236,7 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 		starpu_worker_relax_on();
 		STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 		starpu_worker_relax_off();
-		task = _starpu_fifo_pop_task(data->fifo, workerid);
+		task = _starpu_fifo_pop_task(&data->fifo, workerid);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 
 		return task;
@@ -261,11 +248,11 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 	STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	starpu_worker_relax_off();
 	/* check if a slave task is available in the local queue */
-	task = _starpu_fifo_pop_task(data->local_fifo[workerid], workerid);
+	task = _starpu_fifo_pop_task(&data->local_fifo[workerid], workerid);
 	if (!task)
 	{
 		/* no slave task, try to pop a task as master */
-		task = _starpu_fifo_pop_task(data->fifo, workerid);
+		task = _starpu_fifo_pop_task(&data->fifo, workerid);
 		if (task)
 		{
 			_STARPU_DEBUG("poping master task %p\n", task);
@@ -277,7 +264,7 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 		{
 			/* task is potentially parallel, leave it for a combined worker master */
 			_STARPU_DEBUG("pushing back master task %p\n", task);
-			_starpu_fifo_push_back_task(data->fifo, task);
+			_starpu_fifo_push_back_task(&data->fifo, task);
 			task = NULL;
 		}
 #endif
@@ -339,7 +326,7 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 		int local_worker = combined_workerid[i];
 		alias->destroy = 1;
 		_STARPU_TRACE_JOB_PUSH(alias, alias->priority > 0);
-		_starpu_fifo_push_task(data->local_fifo[local_worker], alias);
+		_starpu_fifo_push_task(&data->local_fifo[local_worker], alias);
 	}
 
 	/* The master also manipulated an alias */

+ 0 - 4
tools/Makefile.am

@@ -399,10 +399,6 @@ starpu_replay_SOURCES = \
 	starpu_replay.c \
 	starpu_replay_sched.c
 
-if STARPU_USE_MPI
-SUBDIRS += replay-mpi
-endif
-
 endif
 
 starpu_perfmodel_plot_CPPFLAGS = $(AM_CFLAGS) $(AM_CPPFLAGS) $(FXT_CFLAGS)

+ 1 - 1
tools/dev/valgrind/valgrind.sh

@@ -21,7 +21,7 @@ CLIMIT=$(ulimit -c)
 if [ "$CLIMIT" = unlimited ]
 then
 	# valgrind cores are often *huge*, 100MB will already be quite big...
-	ulimit -c 100000000
+	ulimit -c 100000
 fi
 
 if test "$EXEC" == "valgrind"

+ 1 - 1
tools/starpu_replay.c

@@ -431,7 +431,7 @@ static void arrays_managing(int mode)
 	{
 		_STARPU_MALLOC(handles_ptr, sizeof(*handles_ptr) * nb_parameters);
 		_STARPU_MALLOC(modes_ptr, sizeof(*modes_ptr) * nb_parameters);
-		_STARPU_CALLOC(reg_signal, nb_parameters, sizeof(char *));
+		_STARPU_CALLOC(reg_signal, nb_parameters, sizeof(char));
 
 	}
 }

+ 2 - 0
tools/starpu_replay_sched.c

@@ -344,6 +344,8 @@ void schedRecInit(const char * filename)
 	}
 
 	fclose(f);
+
+	free(s);
 }
 
 static void do_prefetch(void *arg)