Browse Source

merge from trunk

Corentin Salingue 8 years ago
parent
commit
f45e0a0e7c
55 changed files with 524 additions and 251 deletions
  1. 27 2
      ChangeLog
  2. 2 1
      configure.ac
  3. 10 0
      doc/doxygen/chapters/501_environment_variables.doxy
  4. 1 5
      examples/Makefile.am
  5. 2 2
      examples/basic_examples/mult.c
  6. 5 0
      include/starpu.h
  7. 4 1
      include/starpu_cusparse.h
  8. 2 0
      include/starpu_data.h
  9. 2 1
      include/starpu_perfmodel.h
  10. 1 0
      include/starpu_thread_util.h
  11. 1 6
      include/starpu_util.h
  12. 2 1
      include/starpu_worker.h
  13. 1 2
      mpi/src/starpu_mpi.c
  14. 37 56
      mpi/src/starpu_mpi_collective.c
  15. 1 1
      mpi/src/starpu_mpi_init.c
  16. 1 1
      mpi/src/starpu_mpi_task_insert.c
  17. 1 0
      src/common/barrier.c
  18. 1 1
      src/common/barrier.h
  19. 4 0
      src/common/thread.c
  20. 0 1
      src/common/thread.h
  21. 8 9
      src/core/combined_workers.c
  22. 1 1
      src/core/perfmodel/perfmodel_history.c
  23. 3 3
      src/core/sched_ctx.c
  24. 1 2
      src/core/simgrid.c
  25. 3 1
      src/core/simgrid.h
  26. 62 24
      src/core/topology.c
  27. 9 7
      src/core/workers.c
  28. 2 1
      src/core/workers.h
  29. 1 1
      src/datawizard/coherency.c
  30. 8 0
      src/datawizard/data_request.c
  31. 1 0
      src/datawizard/datawizard.c
  32. 3 0
      src/debug/traces/starpu_paje.c
  33. 1 2
      src/drivers/cuda/driver_cuda.c
  34. 1 2
      src/drivers/mic/driver_mic_common.c
  35. 1 2
      src/drivers/mic/driver_mic_sink.c
  36. 2 4
      src/drivers/mic/driver_mic_source.c
  37. 1 1
      src/drivers/mp_common/sink_common.c
  38. 1 2
      src/drivers/mp_common/source_common.c
  39. 18 19
      src/drivers/mpi/driver_mpi_common.c
  40. 2 4
      src/drivers/opencl/driver_opencl_utils.c
  41. 12 12
      src/top/starpu_top.c
  42. 10 15
      src/top/starpu_top_connection.c
  43. 5 5
      src/top/starpu_top_message_queue.c
  44. 12 13
      src/util/fstarpu.c
  45. 5 9
      src/util/starpu_task_insert_utils.c
  46. 7 1
      tests/Makefile.am
  47. 21 19
      tests/datawizard/reclaim.c
  48. 12 5
      tests/main/insert_task_where.c
  49. 10 4
      tests/model-checking/Makefile
  50. 19 0
      tests/model-checking/barrier.sh
  51. 5 0
      tests/model-checking/common/config.h
  52. 2 0
      tests/model-checking/prio_list.c
  53. 128 0
      tests/model-checking/starpu_barrier.c
  54. 40 0
      tests/model-checking/starpu_config.h
  55. 2 2
      tools/starpu_machine_display.c

+ 27 - 2
ChangeLog

@@ -61,14 +61,39 @@ StarPU 1.2.2 (svn revision xxx)
 New features:
   * Add starpu_data_acquire_try and starpu_data_acquire_on_node_try.
   * Add NVCC_CC environment variable.
-  * Add -no-foo options to starpu_fxt_tool to make traces lighter
+  * Add -no-flops and -no-events options to starpu_fxt_tool to make
+    traces lighter
   * Add starpu_cusparse_init/shutdown/get_local_handle for proper CUDA
     overlapping with cusparse.
+  * Allow precise debugging by setting STARPU_TASK_BREAK_ON_PUSH,
+    STARPU_TASK_BREAK_ON_SCHED, STARPU_TASK_BREAK_ON_POP, and
+    STARPU_TASK_BREAK_ON_EXEC environment variables, with the job_id
+    of a task. StarPU will raise SIGTRAP when the task is being
+    scheduled, pushed, or popped by the scheduler.
 
+Small features:
+  * New function starpu_worker_get_job_id(struct starpu_task *task)
+    which returns the job identifier for a given task
+  * Show package/numa topology in starpu_machine_display
+  * MPI: Add mpi communications in dag.dot
+  * Add STARPU_PERF_MODEL_HOMOGENEOUS_CPU environment variable to
+    allow having one perfmodel per CPU core
 
 Small changes:
   * Output generated through STARPU_MPI_COMM has been modified to
     allow easier automated checking
+  * MPI: Fix reactivity of the beginning of the application, when a
+    lot of ready requests have to be processed at the same time, we
+    want to poll the pending requests from time to time.
+  * MPI: Fix gantt chart for starpu_mpi_irecv: it should use the
+    termination time of the request, not the submission time.
+  * MPI: Modify output generated through STARPU_MPI_COMM to allow
+    easier automated checking
+  * MPI: enable more tests in simgrid mode
+  * Use assumed-size instead of assumed-shape arrays for native
+    fortran API, for better backward compatibility.
+  * Fix odd ordering of CPU workers on CPUs due to GPUs stealing some
+    cores
 
 StarPU 1.2.1 (svn revision 20299)
 ==============================================
@@ -494,7 +519,7 @@ New features:
   * Add the Hypervisor to manage the Scheduling Contexts automatically
     	- The Contexts can be registered to the Hypervisor
 	- Only the registered contexts are managed by the Hypervisor
-	- The Hypervisor can detect the initial distribution of resources of 
+	- The Hypervisor can detect the initial distribution of resources of
 	a context and constructs it consequently (the cost of execution is required)
     	- Several policies can adapt dynamically the distribution of resources
 	in contexts if the initial one was not appropriate

+ 2 - 1
configure.ac

@@ -2968,7 +2968,7 @@ AS_IF([test "$use_hwloc" = "yes" -a "$have_valid_hwloc" = "no"],
      )
 # in case hwloc is not available but was not explicitely disabled, this is an error
 AS_IF([test "$have_valid_hwloc" = "no" -a "$use_hwloc" != "no"],
-      [AC_MSG_ERROR([libhwloc was not found on your system. If the target machine is hyperthreaded the performance may be impacted a lot.  It is strongly recommended to install libhwloc. However, if you really want to use StarPU without enabling libhwloc, please restart configure by specifying the option '--without-hwloc'.])]
+      [AC_MSG_ERROR([libhwloc or pkg-config was not found on your system. If the target machine is hyperthreaded the performance may be impacted a lot.  It is strongly recommended to install libhwloc and pkg-config. However, if you really want to use StarPU without enabling libhwloc, please restart configure by specifying the option '--without-hwloc'.])]
      )
 
 LDFLAGS="${HWLOC_LIBS} ${SAVED_LDFLAGS}"
@@ -3150,6 +3150,7 @@ AC_CONFIG_COMMANDS([executable-scripts], [
   test -e tests/datawizard/locality.sh || ln -sf $ac_abs_top_srcdir/tests/datawizard/locality.sh tests/datawizard/
   mkdir -p tests/model-checking
   test -e tests/model-checking/prio_list.sh || ln -sf $ac_abs_top_srcdir/tests/model-checking/prio_list.sh tests/model-checking/
+  test -e tests/model-checking/barrier.sh || ln -sf $ac_abs_top_srcdir/tests/model-checking/barrier.sh tests/model-checking/
   mkdir -p examples/heat
   test -e examples/heat/heat.sh || ln -sf $ac_abs_top_srcdir/examples/heat/heat.sh examples/heat/
   mkdir -p examples/lu

+ 10 - 0
doc/doxygen/chapters/501_environment_variables.doxy

@@ -680,6 +680,16 @@ This specifies the main directory in which StarPU stores its
 performance model files. The default is <c>$STARPU_HOME/.starpu/sampling</c>.
 </dd>
 
+<dt>STARPU_PERF_MODEL_HOMOGENEOUS_CPU</dt>
+<dd>
+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_CPU
+\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_CPU
+When this is set to 0, StarPU will assume that CPU devices do not have the same
+performance, and thus use different performance models for them, thus making
+kernel calibration much longer, since measurements have to be made for each CPU
+core.
+</dd>
+
 <dt>STARPU_PERF_MODEL_HOMOGENEOUS_CUDA</dt>
 <dd>
 \anchor STARPU_PERF_MODEL_HOMOGENEOUS_CUDA

+ 1 - 5
examples/Makefile.am

@@ -344,12 +344,8 @@ STARPU_EXAMPLES +=				\
 	sched_ctx/sched_ctx_without_sched_policy\
 	sched_ctx/nested_sched_ctxs		\
 	sched_ctx/sched_ctx_without_sched_policy_awake\
-	sched_ctx/parallel_tasks_reuse_handle
-
-if STARPU_LONG_CHECK
-STARPU_EXAMPLES +=				\
+	sched_ctx/parallel_tasks_reuse_handle	\
 	sched_ctx/parallel_code
-endif
 
 if STARPU_HAVE_HWLOC
 if STARPU_HWLOC_HAVE_TOPOLOGY_DUP

+ 2 - 2
examples/basic_examples/mult.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2010-2011, 2013, 2015  Université de Bordeaux
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
- * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2013, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -136,7 +136,7 @@ static void init_problem_data(void)
 	C = (float *) malloc(xdim*ydim*sizeof(float));
 
 	/* fill the A and B matrices */
-	srand(2009);
+	starpu_srand48(2009);
 	for (j=0; j < ydim; j++)
 	{
 		for (i=0; i < zdim; i++)

+ 5 - 0
include/starpu.h

@@ -30,6 +30,11 @@ typedef unsigned short uint16_t;
 typedef unsigned int uint32_t;
 typedef unsigned long long uint64_t;
 typedef UINT_PTR uintptr_t;
+typedef char int8_t;
+typedef short int16_t;
+typedef int int32_t;
+typedef long long int64_t;
+typedef INT_PTR intptr_t;
 #endif
 
 #include <starpu_config.h>

+ 4 - 1
include/starpu_cusparse.h

@@ -18,6 +18,10 @@
 #ifndef __STARPU_CUSPARSE_H__
 #define __STARPU_CUSPARSE_H__
 
+#if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
+#include <cusparse.h>
+#endif
+
 #ifdef __cplusplus
 extern "C"
 {
@@ -27,7 +31,6 @@ void starpu_cusparse_init(void);
 void starpu_cusparse_shutdown(void);
 
 #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
-#include <cusparse.h>
 cusparseHandle_t starpu_cusparse_get_local_handle(void);
 #endif
 

+ 2 - 0
include/starpu_data.h

@@ -160,6 +160,8 @@ void starpu_memchunk_tidy(unsigned memory_node);
 void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data);
 void *starpu_data_get_user_data(starpu_data_handle_t handle);
 
+int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
+
 #ifdef __cplusplus
 }
 #endif

+ 2 - 1
include/starpu_perfmodel.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2014, 2016  Université de Bordeaux
+ * Copyright (C) 2010-2014, 2016-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2016  Inria
@@ -25,6 +25,7 @@
 
 #include <starpu_util.h>
 #include <starpu_worker.h>
+#include <starpu_task.h>
 
 #ifdef __cplusplus
 extern "C"

+ 1 - 0
include/starpu_thread_util.h

@@ -19,6 +19,7 @@
 #define __STARPU_THREAD_UTIL_H__
 
 #include <starpu_util.h>
+#include <starpu_thread.h>
 #include <errno.h>
 
 #if !(defined(_MSC_VER) && !defined(BUILDING_STARPU))

+ 1 - 6
include/starpu_util.h

@@ -20,6 +20,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdint.h>
 #include <string.h>
 #include <assert.h>
 
@@ -331,10 +332,6 @@ STARPU_ATOMIC_SOMETHINGL(or, old | value)
 }
 #endif
 
-/* Include this only here so that <starpu_data_interfaces.h> can use the
- * macros above.  */
-#include <starpu_task.h>
-
 #ifdef __cplusplus
 extern "C"
 {
@@ -416,8 +413,6 @@ void starpu_execute_on_each_worker_ex(void (*func)(void *), void *arg, uint32_t
 
 void starpu_execute_on_specific_workers(void (*func)(void*), void *arg, unsigned num_workers, unsigned *workers, const char *name);
 
-int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
-
 double starpu_timing_now(void);
 
 #ifdef _WIN32

+ 2 - 1
include/starpu_worker.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2013, 2016  Université de Bordeaux
+ * Copyright (C) 2009-2013, 2016-2017  Université de Bordeaux
  * Copyright (C) 2010-2014, 2017  CNRS
  * Copyright (C) 2016, 2017  INRIA
  * Copyright (C) 2016  Uppsala University
@@ -23,6 +23,7 @@
 #include <stdlib.h>
 #include <starpu_config.h>
 #include <starpu_thread.h>
+#include <starpu_task.h>
 
 #ifdef __cplusplus
 extern "C"

+ 1 - 2
mpi/src/starpu_mpi.c

@@ -1337,8 +1337,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	_STARPU_CALLOC(tsd, MAX_TSD + 1, sizeof(void*));
 	if (!smpi_process_set_user_data)
 	{
-		fprintf(stderr,"Your version of simgrid does not provide smpi_process_set_user_data, we can not continue without it\n");
-		exit(1);
+		_STARPU_ERROR("Your version of simgrid does not provide smpi_process_set_user_data, we can not continue without it\n");
 	}
 	smpi_process_set_user_data(tsd);
 #endif

+ 37 - 56
mpi/src/starpu_mpi_collective.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -39,25 +39,23 @@ void _callback_collective(void *arg)
 	}
 }
 
-int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
+static
+int _callback_set(int rank, starpu_data_handle_t *data_handles, int count, int root, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg, void (**callback_func)(void *), struct _callback_arg **callback_arg)
 {
-	int rank;
-	int x;
-	struct _callback_arg *callback_arg = NULL;
-	void (*callback_func)(void *) = NULL;
 	void (*callback)(void *);
 
-	starpu_mpi_comm_rank(comm, &rank);
-
 	callback = (rank == root) ? scallback : rcallback;
-	if (callback)
+	if (*callback)
 	{
-		callback_func = _callback_collective;
-		_STARPU_MPI_MALLOC(callback_arg, sizeof(struct _callback_arg));
-		callback_arg->count = 0;
-		callback_arg->nb = 0;
-		callback_arg->callback = (rank == root) ? scallback : rcallback;
-		callback_arg->arg = (rank == root) ? sarg : rarg;
+		int x;
+
+		*callback_func = _callback_collective;
+
+		_STARPU_MPI_MALLOC(*callback_arg, sizeof(struct _callback_arg));
+		(*callback_arg)->count = 0;
+		(*callback_arg)->nb = 0;
+		(*callback_arg)->callback = (rank == root) ? scallback : rcallback;
+		(*callback_arg)->arg = (rank == root) ? sarg : rarg;
 
 		for(x = 0; x < count ; x++)
 		{
@@ -68,22 +66,38 @@ int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, i
 				STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle");
 				if ((rank == root) && (owner != root))
 				{
-					callback_arg->count ++;
+					(*callback_arg)->count ++;
 				}
 				if ((rank != root) && (owner == rank))
 				{
-					callback_arg->count ++;
+					(*callback_arg)->count ++;
 				}
 			}
 		}
 
-		if (!callback_arg->count)
+		if (!(*callback_arg)->count)
 		{
-			free(callback_arg);
-			return 0;
+			free(*callback_arg);
+			return 1;
 		}
 	}
 
+	return 0;
+}
+
+int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
+{
+	int rank;
+	int x;
+	struct _callback_arg *callback_arg = NULL;
+	void (*callback_func)(void *) = NULL;
+
+	starpu_mpi_comm_rank(comm, &rank);
+
+	x = _callback_set(rank, data_handles, count, root, scallback, sarg, rcallback, rarg, &callback_func, &callback_arg);
+	if (x == 1)
+		return 0;
+
 	for(x = 0; x < count ; x++)
 	{
 		if (data_handles[x])
@@ -112,45 +126,12 @@ int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, in
 	int x;
 	struct _callback_arg *callback_arg = NULL;
 	void (*callback_func)(void *) = NULL;
-	void (*callback)(void *);
 
 	starpu_mpi_comm_rank(comm, &rank);
 
-	callback = (rank == root) ? scallback : rcallback;
-	if (callback)
-	{
-		callback_func = _callback_collective;
-
-		_STARPU_MPI_MALLOC(callback_arg, sizeof(struct _callback_arg));
-		callback_arg->count = 0;
-		callback_arg->nb = 0;
-		callback_arg->callback = callback;
-		callback_arg->arg = (rank == root) ? sarg : rarg;
-
-		for(x = 0; x < count ; x++)
-		{
-			if (data_handles[x])
-			{
-				int owner = starpu_mpi_data_get_rank(data_handles[x]);
-				int data_tag = starpu_mpi_data_get_tag(data_handles[x]);
-				STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle");
-				if ((rank == root) && (owner != root))
-				{
-					callback_arg->count ++;
-				}
-				if ((rank != root) && (owner == rank))
-				{
-					callback_arg->count ++;
-				}
-			}
-		}
-
-		if (!callback_arg->count)
-		{
-			free(callback_arg);
-			return 0;
-		}
-	}
+	x = _callback_set(rank, data_handles, count, root, scallback, sarg, rcallback, rarg, &callback_func, &callback_arg);
+	if (x == 1)
+		return 0;
 
 	for(x = 0; x < count ; x++)
 	{

+ 1 - 1
mpi/src/starpu_mpi_init.c

@@ -48,7 +48,7 @@ static void _starpu_mpi_print_thread_level_support(int thread_level, char *msg)
 	{
 		case MPI_THREAD_SERIALIZED:
 		{
-			_STARPU_DISP("MPI%s MPI_THREAD_SERIALIZED; Multiple threads may make MPI calls, but only one at a time.\n", msg);
+			_STARPU_DEBUG("MPI%s MPI_THREAD_SERIALIZED; Multiple threads may make MPI calls, but only one at a time.\n", msg);
 			break;
 		}
 		case MPI_THREAD_FUNNELED:

+ 1 - 1
mpi/src/starpu_mpi_task_insert.c

@@ -390,7 +390,7 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 		{
 			// the flag is decoded and set later when
 			// calling function _starpu_task_insert_create()
-			(void)va_arg(varg_list_copy, uint32_t);
+			(void)va_arg(varg_list_copy, unsigned long long);
 		}
 		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
 		{

+ 1 - 0
src/common/barrier.c

@@ -16,6 +16,7 @@
 
 #include <common/barrier.h>
 #include <common/utils.h>
+#include <starpu_thread_util.h>
 
 int _starpu_barrier_init(struct _starpu_barrier *barrier, int count)
 {

+ 1 - 1
src/common/barrier.h

@@ -17,7 +17,7 @@
 #ifndef __COMMON_BARRIER_H__
 #define __COMMON_BARRIER_H__
 
-#include <starpu.h>
+#include <starpu_thread.h>
 
 struct _starpu_barrier
 {

+ 4 - 0
src/common/thread.c

@@ -18,7 +18,11 @@
 
 #include <starpu.h>
 #include <core/simgrid.h>
+#ifdef STARPU_DEBUG
 #include <core/workers.h>
+#endif
+#include <common/thread.h>
+#include <common/fxt.h>
 
 #include <errno.h>
 #include <limits.h>

+ 0 - 1
src/common/thread.h

@@ -18,7 +18,6 @@
 #ifndef __COMMON_THREAD_H__
 #define __COMMON_THREAD_H__
 
-#include <starpu.h>
 #include <common/utils.h>
 
 #if defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)

+ 8 - 9
src/core/combined_workers.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2015  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2014, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2014, 2016, 2017  CNRS
  * Copyright (C) 2017  Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -88,14 +88,13 @@ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
 	new_workerid = basic_worker_count + combined_worker_id;
 	config->topology.ncombinedworkers++;
 
-#if 0
-	fprintf(stderr, "COMBINED WORKERS ");
-	for (i = 0; i < nworkers; i++)
-	{
-		fprintf(stderr, "%d ", workerid_array[i]);
-	}
-	fprintf(stderr, "into worker %d\n", new_workerid);
-#endif
+//	fprintf(stderr, "COMBINED WORKERS ");
+//	for (i = 0; i < nworkers; i++)
+//	{
+//		fprintf(stderr, "%d ", workerid_array[i]);
+//	}
+//	fprintf(stderr, "into worker %d\n", new_workerid);
+
 	for(i = 0; i < nworkers; i++)
 		_starpu_get_worker_struct(workerid_array[i])->combined_workerid = new_workerid;
 

+ 1 - 1
src/core/perfmodel/perfmodel_history.c

@@ -1076,7 +1076,7 @@ void _starpu_initialize_registered_performance_models(void)
 	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
 	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
 	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
-	/* ignore_devid[STARPU_CPU_WORKER]; */ /* Always true for now */
+	ignore_devid[STARPU_CPU_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
 	ignore_devid[STARPU_CUDA_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CUDA", 0);
 	ignore_devid[STARPU_OPENCL_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL", 0);
 	ignore_devid[STARPU_MIC_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_MIC", 0);

+ 3 - 3
src/core/sched_ctx.c

@@ -1097,13 +1097,13 @@ unsigned _starpu_can_push_task(struct _starpu_sched_ctx *sched_ctx, struct starp
 			expected_len = expected_end - hyp_actual_start_sample[sched_ctx->id] ;
 		else
 		{
-			printf("%d: sc start is 0.0\n", sched_ctx->id);
+			_STARPU_MSG("%d: sc start is 0.0\n", sched_ctx->id);
 			expected_len = expected_end - starpu_timing_now();
 		}
 		if(expected_len < 0.0)
-			printf("exp len negative %lf \n", expected_len);
+			_STARPU_MSG("exp len negative %lf \n", expected_len);
 		expected_len /= 1000000.0;
-//		printf("exp_end %lf start %lf expected_len %lf \n", expected_end, hyp_actual_start_sample[sched_ctx->id], expected_len);
+		//		_STARPU_MSG("exp_end %lf start %lf expected_len %lf \n", expected_end, hyp_actual_start_sample[sched_ctx->id], expected_len);
 		if(expected_len > (window_size + 0.2*window_size))
 			return 0;
 	}

+ 1 - 2
src/core/simgrid.c

@@ -270,8 +270,7 @@ int main(int argc, char **argv)
 	{
 		if (!smpi_process_get_user_data)
 		{
-			fprintf(stderr,"Your version of simgrid does not provide smpi_process_get_user_data, we can not continue without it\n");
-			exit(1);
+			_STARPU_ERROR("Your version of simgrid does not provide smpi_process_get_user_data, we can not continue without it\n");
 		}
 
 #if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR >= 16)

+ 3 - 1
src/core/simgrid.h

@@ -25,7 +25,6 @@
 #include <msg/msg.h>
 #endif
 
-#include <datawizard/data_request.h>
 #include <xbt/xbt_os_time.h>
 
 struct _starpu_pthread_args
@@ -43,8 +42,11 @@ void _starpu_simgrid_init_early(int *argc, char ***argv);
 void _starpu_simgrid_init(void);
 void _starpu_simgrid_deinit(void);
 void _starpu_simgrid_wait_tasks(int workerid);
+struct _starpu_job;
 void _starpu_simgrid_submit_job(int workerid, struct _starpu_job *job, struct starpu_perfmodel_arch* perf_arch, double length, unsigned *finished);
+struct _starpu_data_request;
 int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node, struct _starpu_data_request *req);
+union _starpu_async_channel_event;
 int _starpu_simgrid_wait_transfer_event(union _starpu_async_channel_event *event);
 int _starpu_simgrid_test_transfer_event(union _starpu_async_channel_event *event);
 void _starpu_simgrid_sync_gpus(void);

+ 62 - 24
src/core/topology.c

@@ -880,6 +880,9 @@ _starpu_initialize_workers_bindid (struct _starpu_machine_config *config)
 
 	for (i = 0; i < STARPU_MAXCPUS;i++)
 		cpu_worker[i] = STARPU_NOWORKERID;
+
+	/* no binding yet */
+	memset(&config->currently_bound, 0, sizeof(config->currently_bound));
 }
 
 /* This function gets the identifier of the next core on which to bind a
@@ -895,44 +898,51 @@ _starpu_get_next_bindid (struct _starpu_machine_config *config,
 	unsigned found = 0;
 	int current_preferred;
 	int nhyperthreads = topology->nhwpus / topology->nhwcpus;
+	unsigned i;
 
 	/* loop over the preference list */
 	for (current_preferred = 0;
 	     current_preferred < npreferred;
 	     current_preferred++)
 	{
-		if (found)
-			break;
-
 		/* Try to get this core */
 		unsigned requested_core = preferred_binding[current_preferred];
+		unsigned requested_bindid = requested_core * nhyperthreads;
 
 		/* can we bind the worker on the preferred core ? */
 		unsigned ind;
 		/* Look at the remaining cores to be bound to */
-		for (ind = config->current_bindid;
+		for (ind = 0;
 		     ind < topology->nhwpus / nhyperthreads;
 		     ind++)
 		{
-			if (topology->workers_bindid[ind] == requested_core * nhyperthreads)
+			if (topology->workers_bindid[ind] == requested_bindid && !config->currently_bound[ind])
 			{
-				/* the cpu is available, we use it ! In order
-				 * to make sure that it will not be used again
-				 * later on, we exchange it with the next bindid we were supposed to use */
-				topology->workers_bindid[ind] =
-					topology->workers_bindid[config->current_bindid];
-				topology->workers_bindid[config->current_bindid] = requested_core * nhyperthreads;
-
-				found = 1;
-
-				break;
+				/* the cpu is available, we use it ! */
+				config->currently_bound[ind] = 1;
+				return requested_bindid;
 			}
 		}
 	}
 
-	unsigned i = ((config->current_bindid++) % STARPU_NMAXWORKERS);
+	for (i = config->current_bindid; i < topology->nhwpus / nhyperthreads; i++)
+		if (!config->currently_bound[i])
+			/* Found a cpu ready for use, use it! */
+			break;
 
-	return (int)topology->workers_bindid[i];
+	STARPU_ASSERT(i < topology->nhwpus / nhyperthreads);
+	int bindid = topology->workers_bindid[i];
+	config->currently_bound[i] = 1;
+	i++;
+	if (i == topology->nhwpus / nhyperthreads)
+	{
+		/* Finished binding on all cpus, restart from start in
+		 * case the user really wants overloading */
+		memset(&config->currently_bound, 0, sizeof(config->currently_bound));
+		i = 0;
+	}
+	config->current_bindid = i;
+	return bindid;
 }
 
 unsigned
@@ -1127,9 +1137,8 @@ _starpu_init_mpi_config (struct _starpu_machine_config *config,
                 if ((unsigned) nmpicores > topology->nhwmpicores[mpi_idx])
                 {
                         /* The user requires more MPI cores than there is available */
-                        fprintf(stderr,
-                                        "# Warning: %d MPI cores requested. Only %d available.\n",
-                                        nmpicores, topology->nhwmpicores[mpi_idx]);
+                        _STARPU_MSG("# Warning: %d MPI cores requested. Only %d available.\n",
+				    nmpicores, topology->nhwmpicores[mpi_idx]);
                         nmpicores = topology->nhwmpicores[mpi_idx];
                 }
         }
@@ -1714,6 +1723,7 @@ _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_con
 	STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
 
 	unsigned cpu;
+	unsigned homogeneous = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
 	for (cpu = 0; cpu < topology->ncpus; cpu++)
 	{
 		int worker_idx = topology->nworkers + cpu;
@@ -1721,7 +1731,7 @@ _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_con
 		_STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices,  sizeof(struct starpu_perfmodel_device));
 		config->workers[worker_idx].perf_arch.ndevices = 1;
 		config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CPU_WORKER;
-		config->workers[worker_idx].perf_arch.devices[0].devid = 0;
+		config->workers[worker_idx].perf_arch.devices[0].devid = homogeneous ? 0 : cpu;
 		config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
 		config->workers[worker_idx].subworkerid = 0;
 		config->workers[worker_idx].devid = cpu;
@@ -2703,10 +2713,38 @@ starpu_topology_print (FILE *output)
 	unsigned nthreads_per_core = topology->nhwpus / topology->nhwcpus;
 	unsigned numa;
 
-	for (numa = 0; numa < nb_numa_nodes; numa++)
+#ifdef STARPU_HAVE_HWLOC
+	hwloc_topology_t topo = topology->hwtopology;
+	hwloc_obj_t pu_obj;
+	hwloc_obj_t last_numa_obj = NULL, numa_obj;
+	hwloc_obj_t last_package_obj = NULL, package_obj;
+#endif
+
+	for (pu = 0; pu < topology->nhwpus; pu++)
 	{
-		fprintf(output, "------\tNUMA %u\t------\n", numa);
-		for (pu = 0; pu < topology->nhwpus; pu++)
+#ifdef STARPU_HAVE_HWLOC
+		pu_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, pu);
+		numa_obj = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NODE, pu_obj);
+		if (numa_obj != last_numa_obj)
+		{
+			fprintf(output, "numa %u", numa_obj->logical_index);
+			last_numa_obj = numa_obj;
+		}
+		fprintf(output, "\t");
+		package_obj = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_SOCKET, pu_obj);
+		if (package_obj != last_package_obj)
+		{
+			fprintf(output, "pack %u", package_obj->logical_index);
+			last_package_obj = package_obj;
+		}
+		fprintf(output, "\t");
+#endif
+		if ((pu % nthreads_per_core) == 0)
+			fprintf(output, "core %u", pu / nthreads_per_core);
+		fprintf(output, "\tPU %u\t", pu);
+		for (worker = 0;
+		     worker < nworkers + ncombinedworkers;
+		     worker++)
 		{
 			if (starpu_memory_nodes_numa_id_to_hwloclogid(numa) == _starpu_numa_get_logical_id_from_pu(pu))
 			{

+ 9 - 7
src/core/workers.c

@@ -1171,13 +1171,13 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 #ifdef STARPU_USE_MP
 	_starpu_set_argc_argv(argc, argv);
 
-#	ifdef STARPU_USE_SCC
+#ifdef STARPU_USE_SCC
 	/* In SCC case we look at the rank to know if we are a sink */
 	if (_starpu_scc_common_mp_init() && !_starpu_scc_common_is_src_node())
 		setenv("STARPU_SINK", "STARPU_SCC", 1);
-#	endif
+#endif
 
-#       ifdef STARPU_USE_MPI_MASTER_SLAVE
+#ifdef STARPU_USE_MPI_MASTER_SLAVE
         if (_starpu_mpi_common_mp_init() == -ENODEV)
         {
                 initialized = UNINITIALIZED;
@@ -1187,7 +1187,7 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
         /* In MPI case we look at the rank to know if we are a sink */
         if (!_starpu_mpi_common_is_src_node())
                 setenv("STARPU_SINK", "STARPU_MPI_MS", 1);
-#       endif
+# endif
 
 	/* If StarPU was configured to use MP sinks, we have to control the
 	 * kind on node we are running on : host or sink ? */
@@ -1257,8 +1257,6 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 	WSAStartup(MAKEWORD(1,0), &wsadata);
 #endif
 
-	srand(2008);
-
 	STARPU_AYU_PREINIT();
 	/* store the pointer to the user explicit configuration during the
 	 * initialization */
@@ -1323,7 +1321,7 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 	/* Depending on whether we are a MP sink or not, we must build the
 	 * topology with MP nodes or not. */
 	ret = _starpu_build_topology(&_starpu_config, is_a_sink);
-    /* sink doesn't exit even if no worker discorvered */
+	/* sink doesn't exit even if no worker discorvered */
 	if (ret && !is_a_sink)
 	{
 		starpu_perfmodel_free_sampling_directories();
@@ -1345,6 +1343,10 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 		/* Let somebody else try to do it */
 		STARPU_PTHREAD_COND_SIGNAL(&init_cond);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
+
+#ifdef STARPU_USE_FXT
+		_starpu_stop_fxt_profiling();
+#endif
 		return ret;
 	}
 

+ 2 - 1
src/core/workers.h

@@ -358,8 +358,9 @@ struct _starpu_machine_config
 	int pu_depth;
 #endif
 
-	/* Where to bind workers ? */
+	/* Where to bind next worker ? */
 	int current_bindid;
+	char currently_bound[STARPU_NMAXWORKERS];
 
 	/* Which GPU(s) do we use for CUDA ? */
 	int current_cuda_gpuid;

+ 1 - 1
src/datawizard/coherency.c

@@ -376,7 +376,7 @@ static int determine_request_path(starpu_data_handle_t handle,
 
 	if (!link_is_valid)
 	{
-		int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node) = handle->ops->copy_methods->can_copy;
+		int (*can_copy)(void *, unsigned, void *, unsigned, unsigned) = handle->ops->copy_methods->can_copy;
 		void *src_interface = handle->per_node[src_node].data_interface;
 		void *dst_interface = handle->per_node[dst_node].data_interface;
 

+ 8 - 0
src/datawizard/data_request.c

@@ -31,6 +31,7 @@
 #define MAX_PENDING_REQUESTS_PER_NODE 20
 #define MAX_PENDING_PREFETCH_REQUESTS_PER_NODE 10
 #define MAX_PENDING_IDLE_REQUESTS_PER_NODE 1
+#define MAX_PUSH_TIME 1000 /* Maximum time in us that we can afford pushing requests before going back to the driver loop, e.g. for checking GPU task termination */
 
 /* requests that have not been treated at all */
 static struct _starpu_data_request_prio_list data_requests[STARPU_MAXNODES];
@@ -584,6 +585,7 @@ static int __starpu_handle_node_data_requests(struct _starpu_data_request_prio_l
 	for (i = 0; i <= prefetch; i++)
 		_starpu_data_request_prio_list_init(&new_data_requests[i]);
 
+	double start = starpu_timing_now();
 	/* for all entries of the list */
 	while (!_starpu_data_request_prio_list_empty(&local_list))
 	{
@@ -612,6 +614,12 @@ static int __starpu_handle_node_data_requests(struct _starpu_data_request_prio_l
 		}
 
 		(*pushed)++;
+		if (starpu_timing_now() - start >= MAX_PUSH_TIME)
+		{
+			/* We have spent a lot of time doing requests, skip pushing more for now */
+			ret = -EBUSY;
+			break;
+		}
 	}
 
 	/* Push back requests we didn't handle on the proper list */

+ 1 - 0
src/datawizard/datawizard.c

@@ -45,6 +45,7 @@ int ___starpu_datawizard_progress(unsigned memory_node, unsigned may_alloc, unsi
 
 	if (ret || push_requests)
 	{
+		/* Some transfers have finished, or the driver requests to really push more */
 		unsigned pushed;
 		if (_starpu_handle_node_data_requests(memory_node, may_alloc, &pushed) == 0)
 		{

+ 3 - 0
src/debug/traces/starpu_paje.c

@@ -135,6 +135,8 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED)
 	fprintf(file, "%%	Container	string\n");
 	fprintf(file, "%%	Type	string\n");
 	fprintf(file, "%%	Value	string\n");
+#if 0
+	/* TODO: implement in worker_set_detailed_state() and handle_codelet_details() */
 	fprintf(file, "%%	Size	string\n");
 	fprintf(file, "%%	Params	string\n");
 	fprintf(file, "%%	Footprint	string\n");
@@ -146,6 +148,7 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED)
 	fprintf(file, "%%	Z	string\n");
 	fprintf(file, "%%	Iteration	string\n");
 	fprintf(file, "%%	Subiteration	string\n");
+#endif
 	fprintf(file, "%%EndEventDef\n");
 #endif
 

+ 1 - 2
src/drivers/cuda/driver_cuda.c

@@ -1060,8 +1060,7 @@ void starpu_cublas_report_error(const char *func, const char *file, int line, in
 void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status)
 {
 	const char *errormsg = cudaGetErrorString(status);
-	printf("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg);
-	STARPU_ABORT();
+	_STARPU_ERROR("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg);
 }
 #endif /* STARPU_USE_CUDA */
 

+ 1 - 2
src/drivers/mic/driver_mic_common.c

@@ -23,8 +23,7 @@
 void _starpu_mic_common_report_scif_error(const char *func, const char *file, const int line, const int status)
 {
 	const char *errormsg = strerror(status);
-	printf("Common: oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg);
-	STARPU_ASSERT(0);
+	_STARPU_ERROR("Common: oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg);
 }
 
 /* Handles the error so the caller (which must be generic) doesn't have to

+ 1 - 2
src/drivers/mic/driver_mic_sink.c

@@ -154,8 +154,7 @@ void _starpu_mic_sink_deinit(struct _starpu_mp_node *node)
 void _starpu_mic_sink_report_error(const char *func, const char *file, const int line, const int status)
 {
 	const char *errormsg = strerror(status);
-	printf("SINK: oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg);
-	STARPU_ASSERT(0);
+	_STARPU_ERROR("SINK: oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg);
 }
 
 /* Allocate memory on the MIC.

+ 2 - 4
src/drivers/mic/driver_mic_source.c

@@ -193,8 +193,7 @@ void _starpu_mic_src_report_coi_error(const char *func, const char *file,
 				      const int line, const COIRESULT status)
 {
 	const char *errormsg = COIResultGetName(status);
-	printf("SRC: oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg);
-	STARPU_ASSERT(0);
+	_STARPU_ERROR("SRC: oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg);
 }
 
 /* Report an error which occured when using a MIC device
@@ -205,8 +204,7 @@ void _starpu_mic_src_report_coi_error(const char *func, const char *file,
 void _starpu_mic_src_report_scif_error(const char *func, const char *file, const int line, const int status)
 {
 	const char *errormsg = strerror(status);
-	printf("SRC: oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg);
-	STARPU_ASSERT(0);
+	_STARPU_ERROR("SRC: oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg);
 }
 
 /* Return the number of MIC devices in the system.

+ 1 - 1
src/drivers/mp_common/sink_common.c

@@ -425,7 +425,7 @@ void _starpu_sink_common_worker(void)
 					_starpu_sink_common_recv_workers(node, arg, arg_size);
 					break;
 				default:
-					printf("Oops, command %x unrecognized\n", command);
+					_STARPU_MSG("Oops, command %x unrecognized\n", command);
 			}
 		}
 

+ 1 - 2
src/drivers/mp_common/source_common.c

@@ -273,8 +273,7 @@ static void _starpu_src_common_recv_async(struct _starpu_mp_node * node)
 	answer = _starpu_mp_common_recv_command(node, &arg, &arg_size);
 	if(!_starpu_src_common_handle_async(node,arg,arg_size,answer, 0))
 	{
-		printf("incorrect commande: unknown command or sync command");
-		STARPU_ASSERT(0);
+		_STARPU_ERROR("incorrect command: unknown command or sync command");
 	}
 }
 

+ 18 - 19
src/drivers/mpi/driver_mpi_common.c

@@ -162,7 +162,7 @@ void _starpu_mpi_common_send(const struct _starpu_mp_node *node, void *msg, int
         int id_proc;
         MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
 
-        //printf("envoi %d B to %d\n", len, node->mp_connection.mpi_remote_nodeid);
+        //_STARPU_MSG("envoi %d B to %d\n", len, node->mp_connection.mpi_remote_nodeid);
 
         if (event)
         {
@@ -177,7 +177,7 @@ void _starpu_mpi_common_send(const struct _starpu_mp_node *node, void *msg, int
                 /* Initialize the list */
                 if (channel->event.mpi_ms_event.requests == NULL)
                 {
-                        channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();            
+                        channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();
                         _starpu_mpi_ms_event_request_list_init(channel->event.mpi_ms_event.requests);
                 }
 
@@ -189,7 +189,7 @@ void _starpu_mpi_common_send(const struct _starpu_mp_node *node, void *msg, int
                 channel->starpu_mp_common_finished_sender++;
 
                 _starpu_mpi_ms_event_request_list_push_back(channel->event.mpi_ms_event.requests, req);
-        } 
+        }
         else
         {
                 /* Synchronous send */
@@ -212,7 +212,7 @@ void _starpu_mpi_common_recv(const struct _starpu_mp_node *node, void *msg, int
         MPI_Status s;
         MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
 
-        //printf("recv %d B from %d in %p\n", len, node->mp_connection.mpi_remote_nodeid, msg);
+        //_STARPU_MSG("recv %d B from %d in %p\n", len, node->mp_connection.mpi_remote_nodeid, msg);
 
         if (event)
         {
@@ -227,7 +227,7 @@ void _starpu_mpi_common_recv(const struct _starpu_mp_node *node, void *msg, int
                 /* Initialize the list */
                 if (channel->event.mpi_ms_event.requests == NULL)
                 {
-                        channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();            
+                        channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();
                         _starpu_mpi_ms_event_request_list_init(channel->event.mpi_ms_event.requests);
                 }
 
@@ -239,7 +239,7 @@ void _starpu_mpi_common_recv(const struct _starpu_mp_node *node, void *msg, int
                 channel->starpu_mp_common_finished_sender++;
 
                 _starpu_mpi_ms_event_request_list_push_back(channel->event.mpi_ms_event.requests, req);
-        } 
+        }
         else
         {
                 /* Synchronous recv */
@@ -259,12 +259,12 @@ void _starpu_mpi_common_mp_recv(const struct _starpu_mp_node *node, void *msg, i
 
 /* SEND to any node */
 void _starpu_mpi_common_send_to_device(const struct _starpu_mp_node *node, int dst_devid, void *msg, int len, void * event)
-{   
+{
         int res;
         int id_proc;
         MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
 
-        //printf("S_to_D send %d bytes from %d from %p\n", len, dst_devid, msg);
+        //_STARPU_MSG("S_to_D send %d bytes from %d from %p\n", len, dst_devid, msg);
 
         if (event)
         {
@@ -279,7 +279,7 @@ void _starpu_mpi_common_send_to_device(const struct _starpu_mp_node *node, int d
                 /* Initialize the list */
                 if (channel->event.mpi_ms_event.requests == NULL)
                 {
-                        channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();            
+                        channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();
                         _starpu_mpi_ms_event_request_list_init(channel->event.mpi_ms_event.requests);
                 }
 
@@ -291,12 +291,12 @@ void _starpu_mpi_common_send_to_device(const struct _starpu_mp_node *node, int d
                 channel->starpu_mp_common_finished_sender++;
 
                 _starpu_mpi_ms_event_request_list_push_back(channel->event.mpi_ms_event.requests, req);
-        } 
+        }
         else
         {
                 /* Synchronous send */
                 res = MPI_Send(msg, len, MPI_BYTE, dst_devid, SYNC_TAG, MPI_COMM_WORLD);
-        }    
+        }
 
         STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot receive a msg with a size of %d Bytes !", len);
 }
@@ -308,7 +308,7 @@ void _starpu_mpi_common_recv_from_device(const struct _starpu_mp_node *node, int
         int id_proc;
         MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
 
-        //printf("R_to_D nop recv %d bytes from %d\n", len, src_devid);
+        //_STARPU_MSG("R_to_D nop recv %d bytes from %d\n", len, src_devid);
 
         if (event)
         {
@@ -323,7 +323,7 @@ void _starpu_mpi_common_recv_from_device(const struct _starpu_mp_node *node, int
                 /* Initialize the list */
                 if (channel->event.mpi_ms_event.requests == NULL)
                 {
-                        channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();            
+                        channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();
                         _starpu_mpi_ms_event_request_list_init(channel->event.mpi_ms_event.requests);
                 }
 
@@ -335,7 +335,7 @@ void _starpu_mpi_common_recv_from_device(const struct _starpu_mp_node *node, int
                 channel->starpu_mp_common_finished_sender++;
 
                 _starpu_mpi_ms_event_request_list_push_back(channel->event.mpi_ms_event.requests, req);
-        } 
+        }
         else
         {
                 /* Synchronous recv */
@@ -363,8 +363,7 @@ static void _starpu_mpi_common_polling_node(struct _starpu_mp_node * node)
                         answer = _starpu_mp_common_recv_command(node, &arg, &arg_size);
                         if(!_starpu_src_common_store_message(node,arg,arg_size,answer))
                         {
-                                printf("incorrect commande: unknown command or sync command");
-                                STARPU_ASSERT(0);
+                                _STARPU_ERROR("incorrect command: unknown command or sync command");
                         }
                 }
                 STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex);
@@ -485,7 +484,7 @@ void _starpu_mpi_common_measure_bandwidth_latency(double timing_dtod[STARPU_MAXM
         unsigned sender, receiver;
         for(sender = 0; sender < nb_proc; sender++)
         {
-                for(receiver = 0; receiver < nb_proc; receiver++) 
+                for(receiver = 0; receiver < nb_proc; receiver++)
                 {
                         //Node can't be a sender and a receiver
                         if(sender == receiver)
@@ -501,7 +500,7 @@ void _starpu_mpi_common_measure_bandwidth_latency(double timing_dtod[STARPU_MAXM
                                 start = starpu_timing_now();
                                 for (iter = 0; iter < NITER; iter++)
                                 {
-                                        ret = MPI_Send(buf, SIZE_BANDWIDTH, MPI_BYTE, receiver, 42, MPI_COMM_WORLD); 
+                                        ret = MPI_Send(buf, SIZE_BANDWIDTH, MPI_BYTE, receiver, 42, MPI_COMM_WORLD);
                                         STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
                                 }
                                 end = starpu_timing_now();
@@ -511,7 +510,7 @@ void _starpu_mpi_common_measure_bandwidth_latency(double timing_dtod[STARPU_MAXM
                                 start = starpu_timing_now();
                                 for (iter = 0; iter < NITER; iter++)
                                 {
-                                        ret = MPI_Send(buf, 1, MPI_BYTE, receiver, 42, MPI_COMM_WORLD); 
+                                        ret = MPI_Send(buf, 1, MPI_BYTE, receiver, 42, MPI_COMM_WORLD);
                                         STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Latency of MPI Master/Slave cannot be measured !");
                                 }
                                 end = starpu_timing_now();

+ 2 - 4
src/drivers/opencl/driver_opencl_utils.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011, 2012, 2014, 2015, 2016  CNRS
+ * Copyright (C) 2010, 2011, 2012, 2014, 2015, 2016, 2017  CNRS
  * Copyright (C) 2010-2016  Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -776,9 +776,7 @@ const char *starpu_opencl_error_string(cl_int status)
 
 void starpu_opencl_display_error(const char *func, const char *file, int line, const char* msg, cl_int status)
 {
-	printf("oops in %s (%s:%d) (%s) ... <%s> (%d) \n", func, file, line, msg,
-	       starpu_opencl_error_string (status), status);
-
+	_STARPU_MSG("oops in %s (%s:%d) (%s) ... <%s> (%d) \n", func, file, line, msg, starpu_opencl_error_string (status), status);
 }
 
 int starpu_opencl_set_kernel_args(cl_int *error, cl_kernel *kernel, ...)

+ 12 - 12
src/top/starpu_top.c

@@ -69,7 +69,7 @@ char *message_for_topparam_init(struct starpu_top_param* param);
 static
 void copy_data_and_param(void)
 {
-	printf("%s:%d trace\n", __FILE__, __LINE__);
+	_STARPU_MSG("trace\n");
 	//copying datas
 	_STARPU_MALLOC(starpu_top_datas, starpu_top_data_cpt*sizeof(struct starpu_top_data*));
 	struct starpu_top_data* cur = starpu_top_first_data;
@@ -159,7 +159,7 @@ void starpu_top_init_and_wait(const char* server_name)
 	STARPU_ASSERT(_starpu_top_mt);
 
 	//waiting for UI to connect
-	printf("%s:%d launching network threads\n", __FILE__, __LINE__);
+	_STARPU_MSG("launching network threads\n");
 	_starpu_top_communications_threads_launcher();
 
 	//sending server information (report to protocol)
@@ -197,22 +197,22 @@ void starpu_top_init_and_wait(const char* server_name)
 	sprintf(message, "%s", "PARAMS\n");
 	_starpu_top_message_add(_starpu_top_mt,message);
 	struct starpu_top_param * cur_param = starpu_top_first_param;
-	printf("%s:%d sending parameters\n", __FILE__, __LINE__);
+	_STARPU_MSG("sending parameters\n");
 	while(cur_param != NULL)
 	{
 	  _starpu_top_message_add(_starpu_top_mt,message_for_topparam_init(cur_param));
 	  cur_param = cur_param->next;
 	}
-	printf("%s:%d parameters sended\n", __FILE__, __LINE__);
+	_STARPU_MSG("parameters sended\n");
 	_STARPU_MALLOC(message, strlen("/PARAMS\n")+1);
 	sprintf(message, "%s", "/PARAMS\n");
 	_starpu_top_message_add(_starpu_top_mt,message);
 
 
 	//sending DEVICE list
-	printf("%s:%d sending devices info\n", __FILE__, __LINE__);
+	_STARPU_MSG("sending devices info\n");
 	starpu_top_send_devices_info();
-	printf("%s:%d devices_info sended\n", __FILE__, __LINE__);
+	_STARPU_MSG("devices_info sended\n");
 	//copying data and params
 	copy_data_and_param();
 
@@ -222,7 +222,7 @@ void starpu_top_init_and_wait(const char* server_name)
 	_starpu_top_message_add(_starpu_top_mt,message);
 
 	//This threads keeps locked while we don't receive an GO message from UI
-	printf("%s:%d waiting for GO message\n", __FILE__, __LINE__);
+	_STARPU_MSG("waiting for GO message\n");
 	sem_wait(&starpu_top_wait_for_go);
 }
 
@@ -721,7 +721,7 @@ static
 void starpu_top_unlock_starpu(void)
 {
 	sem_post(&starpu_top_wait_for_go);
-	printf("%s:%d starpu started\n", __FILE__, __LINE__);
+	_STARPU_MSG("starpu started\n");
 }
 
 static
@@ -731,7 +731,7 @@ void starpu_top_change_data_active(char* message, int active)
 	char* fin = strstr(debut+1, "\n");
 	*fin = '\0';
 	int data_id = atoi(debut);
-	printf("%s:%d data %d %s\n", __FILE__, __LINE__, data_id, active ? "ENABLED" : "DISABLE");
+	_STARPU_MSG("data %d %s\n", data_id, active ? "ENABLED" : "DISABLE");
 	starpu_top_datas[data_id]->active = active;
 }
 
@@ -775,12 +775,12 @@ void starpu_top_change_debug_mode(const char*message)
 	if(!strncmp("ON",debut, 2))
 	{
 		starpu_top_debug_on = 1;
-		printf("%s:%d debug is now ON\n", __FILE__, __LINE__);
+		_STARPU_MSG("debug is now ON\n");
 	}
 	else
 	{
 		starpu_top_debug_on = 0;
-		printf("%s:%d debug is now OFF\n", __FILE__, __LINE__);
+		_STARPU_MSG("debug is now OFF\n");
 	}
 
 	char *m;
@@ -823,7 +823,7 @@ void _starpu_top_process_input_message(char *buffer)
 			starpu_top_debug_next_step();
 		break;
 		default:
-			printf("%s:%d unknow message : '%s'\n", __FILE__, __LINE__, buffer);
+			_STARPU_MSG("unknown message : '%s'\n", buffer);
 	}
 }
 

+ 10 - 15
src/top/starpu_top_connection.c

@@ -65,14 +65,14 @@ void * message_from_ui(void * p)
 	{
 		char * check=fgets (str, STARPU_TOP_BUFFER_SIZE, starpu_top_socket_fd_read);
 
-		printf("Message from UI : %s",str);
+		_STARPU_MSG("Message from UI : %s",str);
 		if (check)
 		{
 			_starpu_top_process_input_message(str);
 		}
 		else
 		{
-			fprintf(stderr,"Connection dropped\n");
+			_STARPU_MSG("Connection dropped\n");
 			//unlocking StarPU.
 			_starpu_top_process_input_message("GO\n");
 			_starpu_top_process_input_message("DEBUG;OFF\n");
@@ -96,7 +96,7 @@ void * message_to_ui(void * p)
 		free(message);
 		if (check!=len || check2==EOF )
 		{
-			fprintf(stderr,"Connection dropped : message no longer send\n");
+			_STARPU_MSG("Connection dropped : message no longer send\n");
 			while(1)
 			{
 				message=_starpu_top_message_remove(_starpu_top_mt);
@@ -115,7 +115,7 @@ void _starpu_top_communications_threads_launcher(void)
 
 
 	//Connection to UI & Socket Initilization
-	printf("%s:%d Connection to UI initilization\n",__FILE__, __LINE__);
+	_STARPU_MSG("Connection to UI initilization\n");
 	struct sockaddr_storage from;
 	struct addrinfo req, *ans;
 	int code;
@@ -126,7 +126,7 @@ void _starpu_top_communications_threads_launcher(void)
 
 	if ((code = getaddrinfo(NULL, STARPU_TOP_PORT, &req, &ans)) != 0)
 	{
-		fprintf(stderr, " getaddrinfo failed %d\n", code);
+		_STARPU_MSG(" getaddrinfo failed %d\n", code);
 		exit(EXIT_FAILURE);
    	}
   	int sock=socket(ans->ai_family, ans->ai_socktype, ans->ai_protocol);
@@ -156,34 +156,29 @@ void _starpu_top_communications_threads_launcher(void)
 
    	if ((starpu_top_socket_fd=accept(sock, (struct sockaddr *) &from, &len)) ==-1)
 	{
-		fprintf(stderr, "accept error\n");
-		perror("accept");
-		exit(EXIT_FAILURE);
+		_STARPU_ERROR("accept error %s\n", strerror(errno));
 	}
 
 	if ( (starpu_top_socket_fd_read=fdopen(starpu_top_socket_fd, "r")) == NULL)
 	{
-		perror("fdopen");
-		exit(EXIT_FAILURE);
+		_STARPU_ERROR("fdopen error %s\n", strerror(errno));
 	}
 
 	starpu_top_socket_fd=dup(starpu_top_socket_fd);
 	if (starpu_top_socket_fd == -1)
 	{
-		perror("dup");
-		exit(EXIT_FAILURE);
+		_STARPU_ERROR("dup error %s\n", strerror(errno));
 	}
 
 	if ((starpu_top_socket_fd_write=fdopen(starpu_top_socket_fd, "w")) == NULL)
 	{
-		perror("fdopen");
-		exit(EXIT_FAILURE);
+		_STARPU_ERROR("fdopen error %s\n", strerror(errno));
 	}
 
 	close(sock);
 
 	//Threads creation
-	fprintf(stderr,"Threads Creation\n");
+	_STARPU_MSG("Threads Creation\n");
 	starpu_pthread_attr_init(&threads_attr);
 	starpu_pthread_attr_setdetachstate(&threads_attr, PTHREAD_CREATE_DETACHED);
 

+ 5 - 5
src/top/starpu_top_message_queue.c

@@ -31,7 +31,7 @@ struct _starpu_top_message_queue* _starpu_top_message_add(struct _starpu_top_mes
 {
 	if( NULL == s )
 	{
-		printf("Queue not initialized\n");
+		_STARPU_MSG("Queue not initialized\n");
 		free(msg);
 		return s;
 	}
@@ -40,7 +40,7 @@ struct _starpu_top_message_queue* _starpu_top_message_add(struct _starpu_top_mes
 	STARPU_PTHREAD_MUTEX_LOCK(&(s->mutex));
 	if( NULL == p )
 	{
-		fprintf(stderr, "IN %s, %s: malloc() failed\n", __FILE__, "list_add");
+		_STARPU_MSG("IN %s, %s: malloc() failed\n", __FILE__, "list_add");
 		free(msg);
 		STARPU_PTHREAD_MUTEX_UNLOCK(&(s->mutex));
 		return s;
@@ -51,7 +51,7 @@ struct _starpu_top_message_queue* _starpu_top_message_add(struct _starpu_top_mes
 
 	if( NULL == s->head && NULL == s->tail )
 	{
-		/* printf("Empty list, adding p->num: %d\n\n", p->num);  */
+		/* _STARPU_MSG("Empty list, adding p->num: %d\n\n", p->num);  */
 		sem_post(&(s->semaphore));
 		s->head = s->tail = p;
 		STARPU_PTHREAD_MUTEX_UNLOCK(&(s->mutex));
@@ -59,7 +59,7 @@ struct _starpu_top_message_queue* _starpu_top_message_add(struct _starpu_top_mes
 	}
 	else
 	{
-		/* printf("List not empty, adding element to tail\n"); */
+		/* _STARPU_MSG("List not empty, adding element to tail\n"); */
 		sem_post(&(s->semaphore));
 		s->tail->next = p;
 		s->tail = p;
@@ -73,7 +73,7 @@ char* _starpu_top_message_remove(struct _starpu_top_message_queue* s)
 {
 	if( NULL == s )
 	{
-		printf("List is null\n");
+		_STARPU_MSG("List is null\n");
 		return NULL;
 	}
 

+ 12 - 13
src/util/fstarpu.c

@@ -18,8 +18,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <starpu.h>
-
-#define _FSTARPU_ERROR(msg) do {fprintf(stderr, "fstarpu error: %s\n", (msg));abort();} while(0)
+#include <common/utils.h>
 
 typedef void (*_starpu_callback_func_t)(void *);
 
@@ -160,7 +159,7 @@ intptr_t fstarpu_get_constant(char *s)
 	else if (!strcmp(s, "FSTARPU_CUDA_ASYNC"))	{ return fstarpu_starpu_cuda_async; }
 	else if (!strcmp(s, "FSTARPU_OPENCL_ASYNC"))	{ return fstarpu_starpu_opencl_async; }
 
-	else { _FSTARPU_ERROR("unknown constant"); }
+	else { _STARPU_ERROR("unknown constant"); }
 }
 
 struct starpu_conf *fstarpu_conf_allocate(void)
@@ -270,7 +269,7 @@ void fstarpu_codelet_add_cpu_func(struct starpu_codelet *cl, void *f_ptr)
 			return;
 		}
 	}
-	_FSTARPU_ERROR("fstarpu: too many cpu functions in Fortran codelet");
+	_STARPU_ERROR("fstarpu: too many cpu functions in Fortran codelet");
 }
 
 void fstarpu_codelet_add_cuda_func(struct starpu_codelet *cl, void *f_ptr)
@@ -285,7 +284,7 @@ void fstarpu_codelet_add_cuda_func(struct starpu_codelet *cl, void *f_ptr)
 			return;
 		}
 	}
-	_FSTARPU_ERROR("fstarpu: too many cuda functions in Fortran codelet");
+	_STARPU_ERROR("fstarpu: too many cuda functions in Fortran codelet");
 }
 
 void fstarpu_codelet_add_cuda_flags(struct starpu_codelet *cl, intptr_t flags)
@@ -300,7 +299,7 @@ void fstarpu_codelet_add_cuda_flags(struct starpu_codelet *cl, intptr_t flags)
 			return;
 		}
 	}
-	_FSTARPU_ERROR("fstarpu: too many cuda flags in Fortran codelet");
+	_STARPU_ERROR("fstarpu: too many cuda flags in Fortran codelet");
 }
 
 void fstarpu_codelet_add_opencl_func(struct starpu_codelet *cl, void *f_ptr)
@@ -315,7 +314,7 @@ void fstarpu_codelet_add_opencl_func(struct starpu_codelet *cl, void *f_ptr)
 			return;
 		}
 	}
-	_FSTARPU_ERROR("fstarpu: too many opencl functions in Fortran codelet");
+	_STARPU_ERROR("fstarpu: too many opencl functions in Fortran codelet");
 }
 
 void fstarpu_codelet_add_opencl_flags(struct starpu_codelet *cl, intptr_t flags)
@@ -330,7 +329,7 @@ void fstarpu_codelet_add_opencl_flags(struct starpu_codelet *cl, intptr_t flags)
 			return;
 		}
 	}
-	_FSTARPU_ERROR("fstarpu: too many opencl flags in Fortran codelet");
+	_STARPU_ERROR("fstarpu: too many opencl flags in Fortran codelet");
 }
 
 void fstarpu_codelet_add_mic_func(struct starpu_codelet *cl, void *f_ptr)
@@ -345,7 +344,7 @@ void fstarpu_codelet_add_mic_func(struct starpu_codelet *cl, void *f_ptr)
 			return;
 		}
 	}
-	_FSTARPU_ERROR("fstarpu: too many mic functions in Fortran codelet");
+	_STARPU_ERROR("fstarpu: too many mic functions in Fortran codelet");
 }
 
 void fstarpu_codelet_add_scc_func(struct starpu_codelet *cl, void *f_ptr)
@@ -360,7 +359,7 @@ void fstarpu_codelet_add_scc_func(struct starpu_codelet *cl, void *f_ptr)
 			return;
 		}
 	}
-	_FSTARPU_ERROR("fstarpu: too many scc functions in Fortran codelet");
+	_STARPU_ERROR("fstarpu: too many scc functions in Fortran codelet");
 }
 
 void fstarpu_codelet_add_buffer(struct starpu_codelet *cl, intptr_t _mode)
@@ -370,7 +369,7 @@ void fstarpu_codelet_add_buffer(struct starpu_codelet *cl, intptr_t _mode)
 	const size_t max_modes = sizeof(cl->modes)/sizeof(cl->modes[0])-1;
 	if ((mode & (STARPU_ACCESS_MODE_MAX-1)) != mode)
 	{
-		_FSTARPU_ERROR("fstarpu: invalid data mode");
+		_STARPU_ERROR("fstarpu: invalid data mode");
 	}
 	if  (cl->nbuffers < (int) max_modes)
 	{
@@ -379,7 +378,7 @@ void fstarpu_codelet_add_buffer(struct starpu_codelet *cl, intptr_t _mode)
 	}
 	else
 	{
-		_FSTARPU_ERROR("fstarpu: too many buffers in Fortran codelet");
+		_STARPU_ERROR("fstarpu: too many buffers in Fortran codelet");
 	}
 }
 
@@ -396,7 +395,7 @@ void fstarpu_codelet_set_nbuffers(struct starpu_codelet *cl, int nbuffers)
 	}
 	else
 	{
-		_FSTARPU_ERROR("fstarpu: invalid nbuffers parameter");
+		_STARPU_ERROR("fstarpu: invalid nbuffers parameter");
 	}
 }
 

+ 5 - 9
src/util/starpu_task_insert_utils.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011, 2013-2016   Université Bordeaux
+ * Copyright (C) 2011, 2013-2017   Université Bordeaux
  * Copyright (C) 2011-2017         CNRS
  * Copyright (C) 2011, 2014        INRIA
  * Copyright (C) 2016-2017 Inria
@@ -121,7 +121,7 @@ int _starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, va_lis
 		}
 		else if (arg_type==STARPU_EXECUTE_WHERE)
 		{
-			(void)va_arg(varg_list, uint32_t);
+			(void)va_arg(varg_list, unsigned long long);
 		}
 		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
 		{
@@ -388,7 +388,7 @@ int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **t
 		}
 		else if (arg_type==STARPU_EXECUTE_WHERE)
 		{
-			(*task)->where = va_arg(varg_list, uint32_t);
+			(*task)->where = va_arg(varg_list, unsigned long long);
 		}
 		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
 		{
@@ -605,12 +605,8 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **
 		{
 			assert(0);
 			arg_i++;
-			int worker = *(int *)arglist[arg_i];
-			if (worker != -1)
-			{
-				(*task)->workerid = worker;
-				(*task)->execute_on_a_specific_worker = 1;
-			}
+			unsigned long long where = *(unsigned long long *)arglist[arg_i];
+			(*task)->where = where;
 		}
 		else if (arg_type == STARPU_EXECUTE_ON_WORKER)
 		{

+ 7 - 1
tests/Makefile.am

@@ -396,11 +396,17 @@ TESTS += datawizard/locality.sh
 if STARPU_SIMGRID_MC
 model_checking_prio_list_LDADD = 
 model_checking_prio_list_LDFLAGS = 
-model_checking_prio_list_SOURCES = model-checking/prio_list.c ../src/common/rbtree.c
 noinst_PROGRAMS += model-checking/prio_list
 if !STARPU_QUICK_CHECK
 TESTS += model-checking/prio_list.sh
 endif
+
+model_checking_starpu_barrier_LDADD = 
+model_checking_starpu_barrier_LDFLAGS = 
+noinst_PROGRAMS += model-checking/starpu_barrier
+if !STARPU_QUICK_CHECK
+#TESTS += model-checking/barrier.sh
+endif
 endif
 
 #######################

+ 21 - 19
tests/datawizard/reclaim.c

@@ -80,16 +80,12 @@ static struct starpu_codelet dummy_cl =
 };
 
 /* Number of chunks */
-static int mb = 16;
+static unsigned mb = 16;
 
 int main(int argc, char **argv)
 {
-	int i, ret;
-	int taskid;
-
-        ret = starpu_initialize(NULL, &argc, &argv);
-	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	unsigned j, taskid;
+	int ret;
 
 #ifdef STARPU_HAVE_HWLOC
 	/* We allocate 50% of the memory */
@@ -103,6 +99,10 @@ int main(int argc, char **argv)
 
 	setenv("STARPU_LIMIT_OPENCL_MEM", "1000", 1);
 
+        ret = starpu_initialize(NULL, &argc, &argv);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
 	/* An optional argument indicates the number of MB to allocate */
 	if (argc > 1)
 		mb = atoi(argv[1]);
@@ -116,26 +116,28 @@ int main(int argc, char **argv)
 		mb = 1;
 #endif
 
-	FPRINTF(stderr, "Allocate %d buffers and create %u tasks\n", mb, ntasks);
+	FPRINTF(stderr, "Allocate %d buffers of size %d and create %u tasks\n", mb, BLOCK_SIZE, ntasks);
 
 	float **host_ptr_array;
 	starpu_data_handle_t *handle_array;
 
-	host_ptr_array = (float **) calloc(mb, sizeof(float *));
-	handle_array = (starpu_data_handle_t *) calloc(mb, sizeof(starpu_data_handle_t));
+	host_ptr_array = calloc(mb, sizeof(float *));
+	STARPU_ASSERT(host_ptr_array);
+	handle_array = calloc(mb, sizeof(starpu_data_handle_t));
+	STARPU_ASSERT(handle_array);
 
 	/* Register mb buffers of 1MB */
-	for (i = 0; i < mb; i++)
+	for (j = 0; j < mb; j++)
 	{
-		host_ptr_array[i] = (float *) malloc(BLOCK_SIZE);
-		if (host_ptr_array[i] == NULL)
+		host_ptr_array[j] = calloc(BLOCK_SIZE, 1);
+		if (host_ptr_array[j] == NULL)
 		{
-			mb = i;
+			mb = j;
 			FPRINTF(stderr, "Cannot allocate more than %d buffers\n", mb);
 			break;
 		}
-		starpu_variable_data_register(&handle_array[i], STARPU_MAIN_RAM, (uintptr_t)host_ptr_array[i], BLOCK_SIZE);
-		STARPU_ASSERT(handle_array[i]);
+		starpu_variable_data_register(&handle_array[j], STARPU_MAIN_RAM, (uintptr_t)host_ptr_array[j], BLOCK_SIZE);
+		STARPU_ASSERT(handle_array[j]);
 	}
 
 	for (taskid = 0; taskid < ntasks; taskid++)
@@ -156,10 +158,10 @@ int main(int argc, char **argv)
 	ret = starpu_task_wait_for_all();
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
 
-	for (i = 0; i < mb; i++)
+	for (j = 0; j < mb; j++)
 	{
-		starpu_data_unregister(handle_array[i]);
-		free(host_ptr_array[i]);
+		starpu_data_unregister(handle_array[j]);
+		free(host_ptr_array[j]);
 	}
 
 	free(host_ptr_array);

+ 12 - 5
tests/main/insert_task_where.c

@@ -69,11 +69,18 @@ int main(int argc, char **argv)
 
 	starpu_shutdown();
 
-	if (ret1 != -ENODEV && x != 14) ret = 1;
-	if (ret2 != -ENODEV && y != 13) ret = 1;
-
-	FPRINTF(stderr, "Value x = %d (expected 14)\n", x);
-	FPRINTF(stderr, "Value y = %d (expected 13)\n", y);
+	if (ret1 != -ENODEV)
+	{
+		if (x != 14)
+			ret = 1;
+		FPRINTF(stderr, "Value x = %d (expected 14)\n", x);
+	}
+	if (ret2 != -ENODEV)
+	{
+		if (y != 13)
+			ret = 1;
+		FPRINTF(stderr, "Value y = %d (expected 13)\n", y);
+	}
 
 	STARPU_RETURN(ret);
 }

+ 10 - 4
tests/model-checking/Makefile

@@ -14,8 +14,8 @@
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
 STARPU=../../
-CPPFLAGS=-I$(STARPU)/src
-CFLAGS=-Wall -Wextra -g $(STARPU)/src/common/rbtree.c -DNOCONFIG
+CPPFLAGS=-I$(STARPU)/src -I$(STARPU)/include -I.
+CFLAGS=-Wall -Wextra -g -DNOCONFIG
 LDFLAGS=-lsimgrid
 
 MC_FLAGS=--cfg=model-check/reduction:none
@@ -42,7 +42,13 @@ test: prio_list
 debug: prio_list
 	simgrid-mc ./prio_list platform.xml MAIN --log=mc_safety.thres:debug $(MC_FLAGS)
 
-all: prio_list prio_list2
+test-barrier: starpu_barrier
+	simgrid-mc ./starpu_barrier platform.xml MAIN $(MC_FLAGS)
+
+debug-barrier: starpu_barrier
+	simgrid-mc ./starpu_barrier platform.xml MAIN --log=mc_safety.thres:debug $(MC_FLAGS)
+
+all: prio_list prio_list2 starpu_barrier
 
 clean:
-	rm -f prio_list
+	rm -f prio_list prio_list2 starpu_barrier

+ 19 - 0
tests/model-checking/barrier.sh

@@ -0,0 +1,19 @@
+#!/bin/bash -x
+#
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2017  Université de Bordeaux
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
+source $(dirname $0)/starpu-mc.sh
+test starpu_barrier

+ 5 - 0
tests/model-checking/common/config.h

@@ -0,0 +1,5 @@
+#define STARPU_SIMGRID
+#define STARPU_MAXIMPLEMENTATIONS 4
+#define STARPU_NMAXBUFS 8
+#define STARPU_MAXNODES 2
+#define STARPU_NMAXWORKERS 16

+ 2 - 0
tests/model-checking/prio_list.c

@@ -43,6 +43,8 @@
 #include <xbt/synchro_core.h>
 #endif
 
+#include <common/rbtree.c>
+
 #ifndef NLISTS
 #define NLISTS 1
 #endif

+ 128 - 0
tests/model-checking/starpu_barrier.c

@@ -0,0 +1,128 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2017  Université de Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#define __COMMON_UTILS_H__
+#define _STARPU_MALLOC(p, s) do {p = malloc(s);} while (0)
+#define _STARPU_CALLOC(p, n, s) do {p = calloc(n, s);} while (0)
+#define _STARPU_REALLOC(p, s) do {p = realloc(p, s);} while (0)
+//#define STARPU_ATTRIBUTE_UNUSED __attribute((__unused__))
+
+#define STARPU_DEBUG_PREFIX "[starpu]"
+#ifdef STARPU_VERBOSE
+#  define _STARPU_DEBUG(fmt, ...) do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,## __VA_ARGS__); fflush(stderr); }} while(0)
+#else
+#  define _STARPU_DEBUG(fmt, ...) do { } while (0)
+#endif
+
+#define STARPU_UYIELD() ((void)0)
+
+#ifndef NOCONFIG
+#include <config.h>
+#else
+#define _GNU_SOURCE
+// Assuming recent simgrid
+#define STARPU_HAVE_SIMGRID_MSG_H
+#define STARPU_HAVE_XBT_SYNCHRO_H
+#endif
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <limits.h>
+#include <common/barrier.h>
+#ifdef STARPU_HAVE_SIMGRID_MSG_H
+#include <simgrid/msg.h>
+#else
+#include <msg/msg.h>
+#endif
+#include <simgrid/modelchecker.h>
+#ifdef STARPU_HAVE_XBT_SYNCHRO_H
+#include <xbt/synchro.h>
+#else
+#include <xbt/synchro_core.h>
+#endif
+
+int
+_starpu_simgrid_thread_start(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0;
+}
+
+#include <common/barrier.c>
+#include <common/thread.c>
+
+#ifndef NTHREADS
+#define NTHREADS 2
+#endif
+
+#ifndef NITERS
+#define NITERS 1
+#endif
+
+struct _starpu_barrier barrier;
+
+int worker(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
+{
+	unsigned iter;
+
+	for (iter = 0; iter < NITERS; iter++)
+	{
+		MC_assert(barrier.count <= NTHREADS);
+		_starpu_barrier_wait(&barrier);
+	}
+
+	return 0;
+}
+
+int master(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
+{
+	unsigned i;
+
+	_starpu_barrier_init(&barrier, NTHREADS);
+
+	for (i = 0; i < NTHREADS; i++)
+	{
+		char *s;
+		asprintf(&s, "%d\n", i);
+		char **args = malloc(sizeof(char*)*2);
+		args[0] = s;
+		args[1] = NULL;
+		MSG_process_create_with_arguments("test", worker, NULL, MSG_host_self(), 1, args);
+	}
+
+	return 0;
+}
+
+#undef main
+int main(int argc, char *argv[])
+{
+	if (argc < 3)
+	{
+		fprintf(stderr,"usage: %s platform.xml host\n", argv[0]);
+		exit(EXIT_FAILURE);
+	}
+	srand48(0);
+	MSG_init(&argc, argv);
+#if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 13)
+	extern xbt_cfg_t _sg_cfg_set;
+	xbt_cfg_set_int(_sg_cfg_set, "contexts/stack-size", 128);
+#else
+	xbt_cfg_set_int("contexts/stack-size", 128);
+#endif
+	MSG_create_environment(argv[1]);
+	MSG_process_create("master", master, NULL, MSG_get_host_by_name(argv[2]));
+	MSG_main();
+	return 0;
+}

+ 40 - 0
tests/model-checking/starpu_config.h

@@ -0,0 +1,40 @@
+#define STARPU_SIMGRID
+#define STARPU_MAXIMPLEMENTATIONS 4
+#define STARPU_NMAXBUFS 8
+#define STARPU_MAXNODES 2
+#define STARPU_NMAXWORKERS 16
+
+#ifndef _MSC_VER
+#include <stdint.h>
+#else
+#include <windows.h>
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+typedef UINT_PTR uintptr_t;
+typedef char int8_t;
+typedef short int16_t;
+typedef int int32_t;
+typedef long long int64_t;
+typedef INT_PTR intptr_t;
+#endif
+
+#ifdef _MSC_VER
+typedef long starpu_ssize_t;
+#define __starpu_func__ __FUNCTION__
+#else
+#  include <sys/types.h>
+typedef ssize_t starpu_ssize_t;
+#define __starpu_func__ __func__
+#endif
+
+#if defined(c_plusplus) || defined(__cplusplus)
+/* inline is part of C++ */
+#  define __starpu_inline inline
+#elif defined(_MSC_VER) || defined(__HP_cc)
+#  define __starpu_inline __inline
+#else
+#  define __starpu_inline __inline__
+#endif
+

+ 2 - 2
tools/starpu_machine_display.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011-2012, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2011-2012, 2014-2015, 2017  Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -157,7 +157,7 @@ int main(int argc, char **argv)
 
 	if (ret != -ENODEV)
 	{
-		fprintf(stdout, "\ntopology ...\n");
+		fprintf(stdout, "\ntopology ... (hwloc logical indexes)\n");
 		starpu_topology_print(stdout);
 
 		fprintf(stdout, "\nbandwidth and latency ...\n");