Browse Source

nmad/ : tests from starpu-mpi trunk added.
Added user definied datatype.

Guillaume Beauchamp 8 years ago
parent
commit
defced0015
59 changed files with 3774 additions and 616 deletions
  1. 10 10
      nmad/src/starpu_mpi.c
  2. 99 3
      nmad/src/starpu_mpi_datatype.c
  3. 2 0
      nmad/src/starpu_mpi_datatype.h
  4. 100 18
      nmad/tests/Makefile.am
  5. 14 12
      nmad/tests/block_interface.c
  6. 14 12
      nmad/tests/block_interface_pinned.c
  7. 27 13
      nmad/tests/cache.c
  8. 31 13
      nmad/tests/cache_disable.c
  9. 128 0
      nmad/tests/callback.c
  10. 155 120
      nmad/tests/datatypes.c
  11. 252 0
      nmad/tests/early_request.c
  12. 75 0
      nmad/tests/gather.c
  13. 97 0
      nmad/tests/gather2.c
  14. 18 6
      nmad/tests/helper.h
  15. 26 14
      nmad/tests/insert_task.c
  16. 23 7
      nmad/tests/insert_task_block.c
  17. 128 23
      nmad/tests/insert_task_compute.c
  18. 38 16
      nmad/tests/insert_task_count.c
  19. 178 0
      nmad/tests/insert_task_dyn_handles.c
  20. 122 0
      nmad/tests/insert_task_node_choice.c
  21. 55 27
      nmad/tests/insert_task_owner.c
  22. 49 25
      nmad/tests/insert_task_owner2.c
  23. 30 8
      nmad/tests/insert_task_owner_data.c
  24. 40 15
      nmad/tests/insert_task_recv_cache.c
  25. 41 16
      nmad/tests/insert_task_sent_cache.c
  26. 33 10
      nmad/tests/matrix.c
  27. 37 18
      nmad/tests/matrix2.c
  28. 16 12
      nmad/tests/mpi_detached_tag.c
  29. 129 0
      nmad/tests/mpi_earlyrecv.c
  30. 253 0
      nmad/tests/mpi_earlyrecv2.c
  31. 237 0
      nmad/tests/mpi_earlyrecv2_sync.c
  32. 14 12
      nmad/tests/mpi_irecv.c
  33. 16 12
      nmad/tests/mpi_irecv_detached.c
  34. 14 12
      nmad/tests/mpi_isend.c
  35. 16 12
      nmad/tests/mpi_isend_detached.c
  36. 61 16
      nmad/tests/mpi_reduction.c
  37. 8 11
      nmad/tests/mpi_reduction_kernels.c
  38. 25 12
      nmad/tests/mpi_redux.c
  39. 29 6
      nmad/tests/mpi_scatter_gather.c
  40. 16 15
      nmad/tests/mpi_test.c
  41. 7 7
      nmad/tests/multiple_send.c
  42. 14 12
      nmad/tests/pingpong.c
  43. 146 0
      nmad/tests/policy_register.c
  44. 54 0
      nmad/tests/policy_register_many.c
  45. 52 0
      nmad/tests/policy_register_toomany.c
  46. 187 0
      nmad/tests/policy_selection.c
  47. 131 0
      nmad/tests/policy_selection2.c
  48. 37 0
      nmad/tests/policy_unregister.c
  49. 39 17
      nmad/tests/ring.c
  50. 41 18
      nmad/tests/ring_async.c
  51. 34 14
      nmad/tests/ring_async_implicit.c
  52. 1 1
      nmad/tests/ring_kernel.cu
  53. 38 16
      nmad/tests/ring_sync.c
  54. 36 14
      nmad/tests/ring_sync_detached.c
  55. 43 0
      nmad/tests/starpu_redefine.c
  56. 97 0
      nmad/tests/sync.c
  57. 148 0
      nmad/tests/tags_checking.c
  58. 7 8
      nmad/tests/user_defined_datatype.c
  59. 6 3
      nmad/tests/user_defined_datatype_value.h

+ 10 - 10
nmad/src/starpu_mpi.c

@@ -1030,7 +1030,7 @@ int _starpu_mpi_initialize(int *argc, char ***argv, int initialize_mpi)
 	_starpu_mpi_comm_amounts_init(MPI_COMM_WORLD);
 	_starpu_mpi_cache_init(MPI_COMM_WORLD);	
 	_starpu_mpi_select_node_init();
-
+	_starpu_mpi_datatype_init();
 	return 0;
 }
 
@@ -1088,7 +1088,7 @@ int starpu_mpi_shutdown(void)
 	_starpu_mpi_comm_amounts_display(rank);
 	_starpu_mpi_comm_amounts_free();
 	_starpu_mpi_cache_free(world_size);
-
+	_starpu_mpi_datatype_shutdown();
 	return 0;
 }
 
@@ -1191,8 +1191,8 @@ void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t da
 	if (me == node)
 	{
 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
-		int already_received = _starpu_mpi_cache_received_data_set(data_handle);
-		if (already_received == 0)
+		void* already_received = _starpu_mpi_cache_received_data_set(data_handle);
+		if (already_received == NULL)
 		{
 			_STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank);
 			starpu_mpi_irecv_detached(data_handle, rank, tag, comm, callback, arg);
@@ -1201,8 +1201,8 @@ void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t da
 	else if (me == rank)
 	{
 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
-		int already_sent = _starpu_mpi_cache_sent_data_set(data_handle, node);
-		if (already_sent == 0)
+		void* already_sent = _starpu_mpi_cache_sent_data_set(data_handle, node);
+		if (already_sent == NULL)
 		{
 			_STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node);
 			starpu_mpi_isend_detached(data_handle, node, tag, comm, NULL, NULL);
@@ -1233,8 +1233,8 @@ void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle
 	{
 		MPI_Status status;
 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
-		int already_received = _starpu_mpi_cache_received_data_set(data_handle);
-		if (already_received == 0)
+		void* already_received = _starpu_mpi_cache_received_data_set(data_handle);
+		if (already_received == NULL)
 		{
 			_STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank);
 			starpu_mpi_recv(data_handle, rank, tag, comm, &status);
@@ -1243,8 +1243,8 @@ void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle
 	else if (me == rank)
 	{
 		_STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node);
-		int already_sent = _starpu_mpi_cache_sent_data_set(data_handle, node);
-		if (already_sent == 0)
+		void* already_sent = _starpu_mpi_cache_sent_data_set(data_handle, node);
+		if (already_sent == NULL)
 		{
 			_STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node);
 			starpu_mpi_send(data_handle, node, tag, comm);

+ 99 - 3
nmad/src/starpu_mpi_datatype.c

@@ -16,10 +16,35 @@
  */
 
 #include <starpu_mpi_datatype.h>
+#include <common/uthash.h>
+#include <datawizard/coherency.h>
+#include <starpu_mpi_private.h>
 
 typedef void (*handle_to_datatype_func)(starpu_data_handle_t, MPI_Datatype *);
 typedef void (*handle_free_datatype_func)(MPI_Datatype *);
 
+struct _starpu_mpi_datatype_funcs
+{
+	enum starpu_data_interface_id id;
+	starpu_mpi_datatype_allocate_func_t allocate_datatype_func;
+	starpu_mpi_datatype_free_func_t free_datatype_func;
+	UT_hash_handle hh;
+};
+
+static starpu_pthread_mutex_t _starpu_mpi_datatype_funcs_table_mutex;
+static struct _starpu_mpi_datatype_funcs *_starpu_mpi_datatype_funcs_table = NULL;
+
+void _starpu_mpi_datatype_init(void)
+{
+	STARPU_PTHREAD_MUTEX_INIT(&_starpu_mpi_datatype_funcs_table_mutex, NULL);
+}
+
+void _starpu_mpi_datatype_shutdown(void)
+{
+	STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_mpi_datatype_funcs_table_mutex);
+}
+
+
 /*
  * 	Matrix
  */
@@ -148,9 +173,22 @@ void _starpu_mpi_handle_allocate_datatype(starpu_data_handle_t data_handle, MPI_
 	}
 	else
 	{
-		/* The datatype is not predefined by StarPU */
-		*datatype = MPI_BYTE;
-		*user_datatype = 1;
+		struct _starpu_mpi_datatype_funcs *table;
+		STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex);
+		HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table);
+		STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex);
+		if (table)
+		{
+			STARPU_ASSERT_MSG(table->allocate_datatype_func, "Handle To Datatype Function not defined for StarPU data interface %d", id);
+			table->allocate_datatype_func(data_handle, datatype);
+			*user_datatype = 0;
+		}
+		else
+		{
+			/* The datatype is not predefined by StarPU */
+			*datatype = MPI_BYTE;
+			*user_datatype = 1;
+		}
 	}
 }
 
@@ -206,6 +244,19 @@ void _starpu_mpi_handle_free_datatype(starpu_data_handle_t data_handle, MPI_Data
 		STARPU_ASSERT_MSG(func, "Handle free datatype function not defined for StarPU data interface %d", id);
 		func(datatype);
 	}
+	else
+	{
+		struct _starpu_mpi_datatype_funcs *table;
+		STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex);
+		HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table);
+		STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex);
+		if (table)
+		{
+			STARPU_ASSERT_MSG(table->free_datatype_func, "Free Datatype Function not defined for StarPU data interface %d", id);
+			table->free_datatype_func(datatype);
+		}
+
+	}
 	/* else the datatype is not predefined by StarPU */
 }
 
@@ -243,3 +294,48 @@ char *_starpu_mpi_datatype(MPI_Datatype datatype)
      if (datatype == MPI_PACKED) return "MPI_PACKED";
      return "User defined MPI Datatype";
 }
+
+int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func)
+{
+	enum starpu_data_interface_id id = starpu_data_get_interface_id(handle);
+	struct _starpu_mpi_datatype_funcs *table;
+
+	STARPU_ASSERT_MSG(id >= STARPU_MAX_INTERFACE_ID, "Cannot redefine the MPI datatype for a predefined StarPU datatype");
+
+	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex);
+	HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table);
+	if (table)
+	{
+		table->allocate_datatype_func = allocate_datatype_func;
+		table->free_datatype_func = free_datatype_func;
+	}
+	else
+	{
+		_STARPU_MPI_MALLOC(table, sizeof(struct _starpu_mpi_datatype_funcs));
+		table->id = id;
+		table->allocate_datatype_func = allocate_datatype_func;
+		table->free_datatype_func = free_datatype_func;
+		HASH_ADD_INT(_starpu_mpi_datatype_funcs_table, id, table);
+	}
+	STARPU_ASSERT_MSG(handle->ops->handle_to_pointer, "The data interface must define the operation 'handle_to_pointer'\n");
+	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex);
+	return 0;
+}
+
+int starpu_mpi_datatype_unregister(starpu_data_handle_t handle)
+{
+	enum starpu_data_interface_id id = starpu_data_get_interface_id(handle);
+	struct _starpu_mpi_datatype_funcs *table;
+
+	STARPU_ASSERT_MSG(id >= STARPU_MAX_INTERFACE_ID, "Cannot redefine the MPI datatype for a predefined StarPU datatype");
+
+	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex);
+	HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table);
+	if (table)
+	{
+		HASH_DEL(_starpu_mpi_datatype_funcs_table, table);
+		free(table);
+	}
+	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex);
+	return 0;
+}

+ 2 - 0
nmad/src/starpu_mpi_datatype.h

@@ -24,6 +24,8 @@
 extern "C" {
 #endif
 
+void _starpu_mpi_datatype_init(void);
+void _starpu_mpi_datatype_shutdown(void);
 void _starpu_mpi_handle_allocate_datatype(starpu_data_handle_t data_handle, MPI_Datatype *datatype, int *user_datatype);
 void _starpu_mpi_handle_free_datatype(starpu_data_handle_t data_handle, MPI_Datatype *datatype);
 char *_starpu_mpi_datatype(MPI_Datatype datatype);

+ 100 - 18
nmad/tests/Makefile.am

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009-2012, 2016  Université de Bordeaux
-# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  Centre National de la Recherche Scientifique
+# Copyright (C) 2009-2012, 2015-2016  Université de Bordeaux
+# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -28,17 +28,18 @@ LOADER_BIN		=	$(abs_top_builddir)/nmad/tests/$(LOADER)
 loader_SOURCES		=	../../tests/loader.c
 endif
 
+# we always test on 4 processes, the execution time is not that bigger
 if STARPU_QUICK_CHECK
-MPI			=	$(MPIEXEC) -np 2
+MPI			=	$(MPIEXEC) $(MPIEXEC_ARGS) -np 4
 else
-MPI			=	$(MPIEXEC) -np 4
+MPI			=	$(MPIEXEC) $(MPIEXEC_ARGS) -np 4
 endif
 
 if STARPU_HAVE_AM111
-TESTS_ENVIRONMENT	=	top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)"
+TESTS_ENVIRONMENT	=	STARPU_WORKERS_NOBIND=1 STARPU_NCPU=4 top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)"
 LOG_COMPILER	 	=	$(MPI) $(LOADER_BIN)
 else
-TESTS_ENVIRONMENT 	=	top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(MPI) $(LOADER_BIN)
+TESTS_ENVIRONMENT 	=	STARPU_WORKERS_NOBIND=1 STARPU_NCPU=4 top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(MPI) $(LOADER_BIN)
 endif
 
 if !STARPU_SIMGRID
@@ -51,16 +52,20 @@ check_PROGRAMS = $(LOADER) $(starpu_mpi_TESTS)
 
 BUILT_SOURCES =
 
-CLEANFILES = *.gcno *.gcda *.linkinfo
+CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log
 
 EXTRA_DIST = 					\
-	user_defined_datatype_value.h
+	user_defined_datatype_value.h		\
+	helper.h
 
-examplebindir = $(libdir)/starpu/examples/mpi
+examplebindir = $(libdir)/starpu/examples/nmad
 
 examplebin_PROGRAMS =
 
 if STARPU_USE_CUDA
+if STARPU_COVERITY
+include $(top_srcdir)/starpu-mynvcc.mk
+else
 NVCCFLAGS += --compiler-options -fno-strict-aliasing  -I$(top_srcdir)/include/ -I$(top_builddir)/include/ $(HWLOC_CFLAGS)
 
 .cu.cubin:
@@ -70,21 +75,26 @@ NVCCFLAGS += --compiler-options -fno-strict-aliasing  -I$(top_srcdir)/include/ -
 .cu.o:
 	$(NVCC) $< -c -o $@ $(NVCCFLAGS)
 endif
+endif
 
 AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
 LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ @LIBS@ $(FXT_LIBS) $(MAGMA_LIBS)
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/nmad/include -I$(top_srcdir)/nmad/src -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_srcdir)/examples/
-AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS)
+AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS)
 
 ########################
 # Unit testcases       #
 ########################
 
+if BUILD_TESTS
 starpu_mpi_TESTS =				\
 	datatypes				\
 	pingpong				\
 	mpi_test				\
 	mpi_isend				\
+	mpi_earlyrecv				\
+	mpi_earlyrecv2				\
+	mpi_earlyrecv2_sync			\
 	mpi_irecv				\
 	mpi_isend_detached			\
 	mpi_irecv_detached			\
@@ -99,10 +109,10 @@ starpu_mpi_TESTS =				\
 	block_interface_pinned			\
 	cache					\
 	cache_disable				\
+	callback				\
 	matrix					\
 	matrix2					\
 	insert_task				\
-	insert_task_cache			\
 	insert_task_compute			\
 	insert_task_sent_cache			\
 	insert_task_recv_cache			\
@@ -110,18 +120,34 @@ starpu_mpi_TESTS =				\
 	insert_task_owner			\
 	insert_task_owner2			\
 	insert_task_owner_data			\
+	insert_task_node_choice			\
 	insert_task_count			\
+	insert_task_dyn_handles			\
 	multiple_send				\
 	mpi_scatter_gather			\
 	mpi_reduction				\
 	user_defined_datatype			\
-	comm
+	tags_checking				\
+	sync					\
+	gather					\
+	gather2					\
+	policy_register				\
+	policy_register_many			\
+	policy_register_toomany			\
+	policy_unregister			\
+	policy_selection			\
+	policy_selection2			\
+	early_request				\
+	starpu_redefine
 
 noinst_PROGRAMS =				\
 	datatypes				\
 	pingpong				\
 	mpi_test				\
 	mpi_isend				\
+	mpi_earlyrecv				\
+	mpi_earlyrecv2				\
+	mpi_earlyrecv2_sync			\
 	mpi_irecv				\
 	mpi_isend_detached			\
 	mpi_irecv_detached			\
@@ -136,10 +162,10 @@ noinst_PROGRAMS =				\
 	block_interface_pinned			\
 	cache					\
 	cache_disable				\
+	callback				\
 	matrix					\
 	matrix2					\
 	insert_task				\
-	insert_task_cache			\
 	insert_task_compute			\
 	insert_task_sent_cache			\
 	insert_task_recv_cache			\
@@ -147,15 +173,40 @@ noinst_PROGRAMS =				\
 	insert_task_owner			\
 	insert_task_owner2			\
 	insert_task_owner_data			\
+	insert_task_node_choice			\
 	insert_task_count			\
+	insert_task_dyn_handles			\
 	multiple_send				\
 	mpi_scatter_gather			\
 	mpi_reduction				\
 	user_defined_datatype			\
-	comm
+	tags_checking				\
+	sync					\
+	gather					\
+	gather2					\
+	policy_register				\
+	policy_register_many			\
+	policy_register_toomany			\
+	policy_unregister			\
+	policy_selection			\
+	policy_selection2			\
+	early_request				\
+	starpu_redefine
+
+
+XFAIL_TESTS=					\
+	policy_register_toomany			\
+	policy_unregister			\
+	starpu_redefine
 
 mpi_isend_LDADD =					\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+mpi_earlyrecv_LDADD =					\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+mpi_earlyrecv2_LDADD =					\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+mpi_earlyrecv2_sync_LDADD =					\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 mpi_irecv_LDADD =					\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 mpi_isend_detached_LDADD =			\
@@ -190,14 +241,14 @@ cache_LDADD =					\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 cache_disable_LDADD =					\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+callback_LDADD =				\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 matrix_LDADD =					\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 matrix2_LDADD =					\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 insert_task_LDADD =				\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-insert_task_cache_LDADD =				\
-	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 insert_task_compute_LDADD =				\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 insert_task_sent_cache_LDADD =				\
@@ -212,8 +263,12 @@ insert_task_owner2_LDADD =			\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 insert_task_owner_data_LDADD =			\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+insert_task_node_choice_LDADD =			\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 insert_task_count_LDADD =				\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+insert_task_dyn_handles_LDADD =				\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 multiple_send_LDADD =				\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 mpi_scatter_gather_LDADD =			\
@@ -222,7 +277,29 @@ mpi_reduction_LDADD =			\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 user_defined_datatype_LDADD =			\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-comm_LDADD =			\
+tags_checking_LDADD =			\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+sync_LDADD =			\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+gather_LDADD =			\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+gather2_LDADD =			\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+policy_register_LDADD =			\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+policy_register_many_LDADD =			\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+policy_register_toomany_LDADD =			\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+policy_unregister_LDADD =			\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+policy_selection_LDADD =			\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+policy_selection2_LDADD =			\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+early_request_LDADD =					\
+	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+starpu_redefine_LDADD =					\
 	../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 
 ring_SOURCES = ring.c
@@ -242,5 +319,10 @@ endif
 mpi_reduction_SOURCES = mpi_reduction.c
 mpi_reduction_SOURCES += mpi_reduction_kernels.c
 user_defined_datatype_SOURCES = user_defined_datatype.c
-user_defined_datatype_SOURCES += $(top_srcdir)/examples/interface/complex_interface.c
+user_defined_datatype_SOURCES += ../../examples/interface/complex_interface.c
 
+mpi_earlyrecv2_SOURCES = mpi_earlyrecv2.c
+mpi_earlyrecv2_SOURCES += ../../examples/interface/complex_interface.c
+mpi_earlyrecv2_sync_SOURCES = mpi_earlyrecv2_sync.c
+mpi_earlyrecv2_sync_SOURCES += ../../examples/interface/complex_interface.c
+endif

+ 14 - 12
nmad/tests/block_interface.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2014  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2014, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,24 +28,26 @@ int main(int argc, char **argv)
 {
 	int ret, rank, size;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	if (size < 2)
 	{
 		if (rank == 0)
 			FPRINTF(stderr, "We need at least 2 processes.\n");
 
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
 	/* Node 0 will allocate a big block and only register an inner part of
 	 * it as the block data, Node 1 will allocate a block of small size and
 	 * register it directly. Node 0 and 1 will then exchange the content of
@@ -68,7 +70,7 @@ int main(int argc, char **argv)
 			block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f;
 		}
 
-		starpu_block_data_register(&block_handle, 0,
+		starpu_block_data_register(&block_handle, STARPU_MAIN_RAM,
 			(uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE,
 			SIZE, SIZE, SIZE, sizeof(float));
 	}
@@ -77,7 +79,7 @@ int main(int argc, char **argv)
 		block = calloc(SIZE*SIZE*SIZE, sizeof(float));
 		assert(block);
 
-		starpu_block_data_register(&block_handle, 0,
+		starpu_block_data_register(&block_handle, STARPU_MAIN_RAM,
 			(uintptr_t)block, SIZE, SIZE*SIZE,
 			SIZE, SIZE, SIZE, sizeof(float));
 	}

+ 14 - 12
nmad/tests/block_interface_pinned.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,24 +28,26 @@ int main(int argc, char **argv)
 {
 	int ret, rank, size;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	if (size < 2)
 	{
 		if (rank == 0)
 			FPRINTF(stderr, "We need at least 2 processes.\n");
 
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
 	/* Node 0 will allocate a big block and only register an inner part of
 	 * it as the block data, Node 1 will allocate a block of small size and
 	 * register it directly. Node 0 and 1 will then exchange the content of
@@ -69,7 +71,7 @@ int main(int argc, char **argv)
 			block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f;
 		}
 
-		starpu_block_data_register(&block_handle, 0,
+		starpu_block_data_register(&block_handle, STARPU_MAIN_RAM,
 			(uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE,
 			SIZE, SIZE, SIZE, sizeof(float));
 	}
@@ -79,7 +81,7 @@ int main(int argc, char **argv)
 			SIZE*SIZE*SIZE*sizeof(float));
 		memset(block, 0, SIZE*SIZE*SIZE*sizeof(float));
 
-		starpu_block_data_register(&block_handle, 0,
+		starpu_block_data_register(&block_handle, STARPU_MAIN_RAM,
 			(uintptr_t)block, SIZE, SIZE*SIZE,
 			SIZE, SIZE, SIZE, sizeof(float));
 	}

+ 27 - 13
nmad/tests/cache.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -23,46 +23,60 @@ void func_cpu(STARPU_ATTRIBUTE_UNUSED void *descr[], STARPU_ATTRIBUTE_UNUSED voi
 {
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type          = STARPU_COMMON,
+	.cost_function = cost_function
+};
+
 struct starpu_codelet mycodelet_r =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 1,
-	.modes = {STARPU_R}
+	.modes = {STARPU_R},
+	.model = &dumb_model
 };
 
 struct starpu_codelet mycodelet_w =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 1,
-	.modes = {STARPU_W}
+	.modes = {STARPU_W},
+	.model = &dumb_model
 };
 
 struct starpu_codelet mycodelet_rw =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 1,
-	.modes = {STARPU_RW}
+	.modes = {STARPU_RW},
+	.model = &dumb_model
 };
 
 void test(struct starpu_codelet *codelet, enum starpu_data_access_mode mode, starpu_data_handle_t data, int rank, int in_cache)
 {
-	void *ptr;
+	void* cache;
 	int ret;
 
-	ret = starpu_mpi_insert_task(MPI_COMM_WORLD, codelet, mode, data, STARPU_EXECUTE_ON_NODE, 1, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
+	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, codelet, mode, data, STARPU_EXECUTE_ON_NODE, 1, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 
-	ptr = _starpu_mpi_cache_received_data_get(data);
+	cache = _starpu_mpi_cache_received_data_get(data);
 
 	if (rank == 1)
 	{
 	     if (in_cache)
 	     {
-		     STARPU_ASSERT_MSG(ptr != NULL, "Data should be in cache\n");
+		     STARPU_ASSERT_MSG(cache != NULL, "Data should be in cache\n");
 	     }
 	     else
 	     {
-		     STARPU_ASSERT_MSG(ptr == NULL, "Data should NOT be in cache\n");
+		     STARPU_ASSERT_MSG(cache == NULL, "Data should NOT be in cache\n");
 	     }
 	}
 }
@@ -71,19 +85,19 @@ int main(int argc, char **argv)
 {
 	int rank, n;
 	int ret;
-	unsigned val;
+	unsigned val = 42;
 	starpu_data_handle_t data;
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(&argc, &argv, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 
 	if (starpu_mpi_cache_is_enabled() == 0) goto skip;
 
 	if (rank == 0)
-		starpu_variable_data_register(&data, 0, (uintptr_t)&val, sizeof(unsigned));
+		starpu_variable_data_register(&data, STARPU_MAIN_RAM, (uintptr_t)&val, sizeof(unsigned));
 	else
 		starpu_variable_data_register(&data, -1, (uintptr_t)NULL, sizeof(unsigned));
 	starpu_mpi_data_register(data, 42, 0);

+ 31 - 13
nmad/tests/cache_disable.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -23,43 +23,60 @@ void func_cpu(STARPU_ATTRIBUTE_UNUSED void *descr[], STARPU_ATTRIBUTE_UNUSED voi
 {
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type          = STARPU_COMMON,
+	.cost_function = cost_function
+};
+
 struct starpu_codelet mycodelet_r =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 1,
-	.modes = {STARPU_R}
+	.modes = {STARPU_R},
+	.model = &dumb_model
 };
 
 int main(int argc, char **argv)
 {
 	int rank, n;
 	int ret;
-	unsigned val;
+	unsigned *val;
 	starpu_data_handle_t data;
-	void *ptr;
+	void* ptr;
+	int cache;
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(&argc, &argv, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+
+	cache = starpu_mpi_cache_is_enabled();
+	if (cache == 0) goto skip;
 
-	if (starpu_mpi_cache_is_enabled() == 0) goto skip;
+	val = malloc(sizeof(*val));
+	*val = 12;
 
 	if (rank == 0)
-		starpu_variable_data_register(&data, 0, (uintptr_t)&val, sizeof(unsigned));
+		starpu_variable_data_register(&data, STARPU_MAIN_RAM, (uintptr_t)val, sizeof(unsigned));
 	else
 		starpu_variable_data_register(&data, -1, (uintptr_t)NULL, sizeof(unsigned));
 	starpu_mpi_data_register(data, 42, 0);
 	FPRINTF_MPI(stderr, "Registering data %p with tag %d and node %d\n", data, 42, 0);
 
-	ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0);
+	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 
 	ptr = _starpu_mpi_cache_received_data_get(data);
 	if (rank == 1)
 	{
-	     STARPU_ASSERT_MSG(ptr != NULL, "Data should be in cache\n");
+		STARPU_ASSERT_MSG(ptr != NULL, "Data should be in cache\n");
 	}
 
 	// We clean the cache
@@ -69,25 +86,26 @@ int main(int argc, char **argv)
 	ptr = _starpu_mpi_cache_received_data_get(data);
 	if (rank == 1)
 	{
-	     STARPU_ASSERT_MSG(ptr == NULL, "Data should NOT be in cache\n");
+		STARPU_ASSERT_MSG(ptr == NULL, "Data should NOT be in cache\n");
 	}
 
-	ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0);
+	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 	ptr = _starpu_mpi_cache_received_data_get(data);
 	if (rank == 1)
 	{
-	     STARPU_ASSERT_MSG(ptr == NULL, "Data should NOT be in cache\n");
+		STARPU_ASSERT_MSG(ptr == NULL, "Data should NOT be in cache\n");
 	}
 
 	FPRINTF(stderr, "Waiting ...\n");
 	starpu_task_wait_for_all();
 
 	starpu_data_unregister(data);
+	free(val);
 
 skip:
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 
-	return starpu_mpi_cache_is_enabled() == 0 ? STARPU_TEST_SKIPPED : 0;
+	return cache == 0 ? STARPU_TEST_SKIPPED : 0;
 }

+ 128 - 0
nmad/tests/callback.c

@@ -0,0 +1,128 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2013, 2014, 2015  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+static
+int expected_x=40;
+static
+int expected_y=12;
+
+void my_func(STARPU_ATTRIBUTE_UNUSED void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
+{
+	FPRINTF_MPI(stderr, "i am here\n");
+}
+
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
+struct starpu_codelet my_codelet =
+{
+	.cpu_funcs = {my_func},
+	.cuda_funcs = {my_func},
+	.opencl_funcs = {my_func},
+	.model = &dumb_model
+};
+
+static
+void callback(void *ptr)
+{
+	int *x = (int *)ptr;
+	FPRINTF_MPI(stderr, "x=%d\n", *x);
+	STARPU_ASSERT_MSG(*x == expected_x, "%d != %d\n", *x, expected_x);
+	(*x)++;
+}
+
+static
+void prologue_callback(void *ptr)
+{
+	int *y = (int *)ptr;
+	FPRINTF_MPI(stderr, "y=%d\n", *y);
+	STARPU_ASSERT_MSG(*y == expected_y, "%d != %d\n", *y, expected_y);
+	(*y)++;
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+	int x=40;
+	int y=12;
+	int rank, size;
+
+	ret = starpu_initialize(NULL, &argc, &argv);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	ret = starpu_mpi_init(&argc, &argv, 1);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_mpi_task_insert(MPI_COMM_WORLD,
+				     NULL,
+				     STARPU_EXECUTE_ON_NODE, 0,
+				     STARPU_CALLBACK_WITH_ARG, callback, &x,
+				     0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
+
+	if (rank == 0) expected_x ++;
+	ret = starpu_mpi_task_insert(MPI_COMM_WORLD,
+				     NULL,
+				     STARPU_EXECUTE_ON_NODE, 0,
+				     STARPU_CALLBACK, callback,
+				     STARPU_CALLBACK_ARG, &x,
+				     0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
+	if (rank == 0) expected_x ++;
+	STARPU_ASSERT_MSG(x == expected_x, "x should be equal to %d and not %d\n", expected_x, x);
+
+	ret = starpu_mpi_task_insert(MPI_COMM_WORLD,
+				     NULL,
+				     STARPU_EXECUTE_ON_NODE, 0,
+				     STARPU_PROLOGUE_CALLBACK, prologue_callback,
+				     STARPU_PROLOGUE_CALLBACK_ARG, &y,
+				     0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
+	if (rank == 0) expected_y ++;
+	ret = starpu_mpi_task_insert(MPI_COMM_WORLD,
+				     &my_codelet,
+				     STARPU_EXECUTE_ON_NODE, 0,
+				     STARPU_PROLOGUE_CALLBACK_POP, prologue_callback,
+				     STARPU_PROLOGUE_CALLBACK_POP_ARG, &y,
+				     0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
+	starpu_task_wait_for_all();
+	if (rank == 0) expected_y ++;
+	STARPU_ASSERT_MSG(y == expected_y, "y should be equal to %d and not %d\n", expected_y, y);
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	return EXIT_SUCCESS;
+}
+

+ 155 - 120
nmad/tests/datatypes.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2013, 2014, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2013, 2014, 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,7 +27,6 @@ void check_void(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, in
 
 void check_variable(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error)
 {
-	int ret;
 	float *v_s, *v_r;
 
 	STARPU_ASSERT(starpu_variable_get_elemsize(handle_s) == starpu_variable_get_elemsize(handle_r));
@@ -48,7 +47,7 @@ void check_variable(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r
 
 void check_vector(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error)
 {
-	int ret, i;
+	int i;
 	int nx;
 	int *v_r, *v_s;
 
@@ -90,6 +89,7 @@ void check_matrix(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r,
 	int x, y;
 
 	for(y=0 ; y<ny ; y++)
+	{
 		for(x=0 ; x<nx ; x++)
 		{
 			int index=(y*ldy)+x;
@@ -103,6 +103,7 @@ void check_matrix(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r,
 				FPRINTF_MPI(stderr, "Error with matrix[%d,%d --> %d] value: %c != %c\n", x, y, index, matrix_s[index], matrix_r[index]);
 			}
 		}
+	}
 }
 
 void check_block(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error)
@@ -114,6 +115,9 @@ void check_block(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, i
 	STARPU_ASSERT(starpu_block_get_local_ldy(handle_s) == starpu_block_get_local_ldy(handle_r));
 	STARPU_ASSERT(starpu_block_get_local_ldz(handle_s) == starpu_block_get_local_ldz(handle_r));
 
+	starpu_data_acquire(handle_s, STARPU_R);
+	starpu_data_acquire(handle_r, STARPU_R);
+
 	float *block_s = (float *)starpu_block_get_local_ptr(handle_s);
 	float *block_r = (float *)starpu_block_get_local_ptr(handle_r);
 
@@ -127,6 +131,7 @@ void check_block(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, i
 	int x, y, z;
 
 	for(z=0 ; z<nz ; z++)
+	{
 		for(y=0 ; y<ny ; y++)
 			for(x=0 ; x<nx ; x++)
 			{
@@ -141,6 +146,10 @@ void check_block(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, i
 					FPRINTF_MPI(stderr, "Error with block[%d,%d,%d --> %d] value: %f != %f\n", x, y, z, index, block_s[index], block_r[index]);
 				}
 			}
+	}
+
+	starpu_data_release(handle_s);
+	starpu_data_release(handle_r);
 }
 
 void send_recv_and_check(int rank, int node, starpu_data_handle_t handle_s, int tag_s, starpu_data_handle_t handle_r, int tag_r, int *error, check_func func)
@@ -157,7 +166,7 @@ void send_recv_and_check(int rank, int node, starpu_data_handle_t handle_s, int
 
 		func(handle_s, handle_r, error);
 	}
-	else
+	else if (rank == 1)
 	{
 		ret = starpu_mpi_recv(handle_s, node, tag_s, MPI_COMM_WORLD, &status);
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv");
@@ -166,164 +175,190 @@ void send_recv_and_check(int rank, int node, starpu_data_handle_t handle_s, int
 	}
 }
 
-int main(int argc, char **argv)
+void exchange_void(int rank, int *error)
 {
-	int ret, rank, size;
-	int error=0;
+	STARPU_SKIP_IF_VALGRIND;
 
-	int nx=3;
-	int ny=2;
-	int nz=4;
+	if (rank == 0)
+	{
+		starpu_data_handle_t void_handle[2];
+		starpu_void_data_register(&void_handle[0]);
+		starpu_void_data_register(&void_handle[1]);
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+		send_recv_and_check(rank, 1, void_handle[0], 0x42, void_handle[1], 0x1337, error, check_void);
 
-	if (size < 2)
+		starpu_data_unregister(void_handle[0]);
+		starpu_data_unregister(void_handle[1]);
+	}
+	else if (rank == 1)
 	{
-		if (rank == 0)
-			FPRINTF(stderr, "We need at least 2 processes.\n");
-
-		MPI_Finalize();
-		return STARPU_TEST_SKIPPED;
+		starpu_data_handle_t void_handle;
+		starpu_void_data_register(&void_handle);
+		send_recv_and_check(rank, 0, void_handle, 0x42, NULL, 0x1337, NULL, NULL);
+		starpu_data_unregister(void_handle);
 	}
+}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
+void exchange_variable(int rank, int *error)
+{
 	if (rank == 0)
 	{
-		MPI_Status status;
+		float v = 42.12;
+		starpu_data_handle_t variable_handle[2];
+		starpu_variable_data_register(&variable_handle[0], STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(v));
+		starpu_variable_data_register(&variable_handle[1], -1, (uintptr_t)NULL, sizeof(v));
 
-		{
-			starpu_data_handle_t void_handle[2];
-			starpu_void_data_register(&void_handle[0]);
-			starpu_void_data_register(&void_handle[1]);
-
-			send_recv_and_check(rank, 1, void_handle[0], 0x42, void_handle[1], 0x1337, &error, check_void);
+		send_recv_and_check(rank, 1, variable_handle[0], 0x42, variable_handle[1], 0x1337, error, check_variable);
 
-			starpu_data_unregister(void_handle[0]);
-			starpu_data_unregister(void_handle[1]);
-		}
-		{
-			float v = 42.12;
-			starpu_data_handle_t variable_handle[2];
-			starpu_variable_data_register(&variable_handle[0], 0, (uintptr_t)&v, sizeof(v));
-			starpu_variable_data_register(&variable_handle[1], -1, (uintptr_t)NULL, sizeof(v));
+		starpu_data_unregister(variable_handle[0]);
+		starpu_data_unregister(variable_handle[1]);
+	}
+	else if (rank == 1)
+	{
+		starpu_data_handle_t variable_handle;
+		starpu_variable_data_register(&variable_handle, -1, (uintptr_t)NULL, sizeof(float));
+		send_recv_and_check(rank, 0, variable_handle, 0x42, NULL, 0x1337, NULL, NULL);
+		starpu_data_unregister(variable_handle);
+	}
+}
 
-			send_recv_and_check(rank, 1, variable_handle[0], 0x42, variable_handle[1], 0x1337, &error, check_variable);
+void exchange_vector(int rank, int *error)
+{
+	if (rank == 0)
+	{
+		int vector[4] = {1, 2, 3, 4};
+		starpu_data_handle_t vector_handle[2];
 
-			starpu_data_unregister(variable_handle[0]);
-			starpu_data_unregister(variable_handle[1]);
-		}
+		starpu_vector_data_register(&vector_handle[0], STARPU_MAIN_RAM, (uintptr_t)vector, 4, sizeof(vector[0]));
+		starpu_vector_data_register(&vector_handle[1], -1, (uintptr_t)NULL, 4, sizeof(vector[0]));
 
-		{
-			int vector[4] = {1, 2, 3, 4};
-			starpu_data_handle_t vector_handle[2];
+		send_recv_and_check(rank, 1, vector_handle[0], 0x43, vector_handle[1], 0x2337, error, check_vector);
 
-			starpu_vector_data_register(&vector_handle[0], 0, (uintptr_t)vector, 4, sizeof(vector[0]));
-			starpu_vector_data_register(&vector_handle[1], -1, (uintptr_t)NULL, 4, sizeof(vector[0]));
+		starpu_data_unregister(vector_handle[0]);
+		starpu_data_unregister(vector_handle[1]);
+	}
+	else if (rank == 1)
+	{
+		starpu_data_handle_t vector_handle;
+		starpu_vector_data_register(&vector_handle, -1, (uintptr_t)NULL, 4, sizeof(int));
+		send_recv_and_check(rank, 0, vector_handle, 0x43, NULL, 0x2337, NULL, NULL);
+		starpu_data_unregister(vector_handle);
+	}
+}
 
-			send_recv_and_check(rank, 1, vector_handle[0], 0x43, vector_handle[1], 0x2337, &error, check_vector);
+void exchange_matrix(int rank, int *error)
+{
+	int nx=3;
+	int ny=2;
 
-			starpu_data_unregister(vector_handle[0]);
-			starpu_data_unregister(vector_handle[1]);
-		}
+	if (rank == 0)
+	{
+		char *matrix, n='a';
+		int x, y;
+		starpu_data_handle_t matrix_handle[2];
 
+		matrix = (char*)malloc(nx*ny*sizeof(char));
+		assert(matrix);
+		for(y=0 ; y<ny ; y++)
 		{
-			char *matrix, n='a';
-			int x, y;
-			starpu_data_handle_t matrix_handle[2];
-
-			matrix = (char*)malloc(nx*ny*nz*sizeof(char));
-			assert(matrix);
-			for(y=0 ; y<ny ; y++)
+			for(x=0 ; x<nx ; x++)
 			{
-				for(x=0 ; x<nx ; x++)
-				{
-					matrix[(y*nx)+x] = n++;
-				}
+				matrix[(y*nx)+x] = n++;
 			}
+		}
 
-			starpu_matrix_data_register(&matrix_handle[0], 0, (uintptr_t)matrix, nx, nx, ny, sizeof(char));
-			starpu_matrix_data_register(&matrix_handle[1], -1, (uintptr_t)NULL, nx, nx, ny, sizeof(char));
+		starpu_matrix_data_register(&matrix_handle[0], STARPU_MAIN_RAM, (uintptr_t)matrix, nx, nx, ny, sizeof(char));
+		starpu_matrix_data_register(&matrix_handle[1], -1, (uintptr_t)NULL, nx, nx, ny, sizeof(char));
 
-			send_recv_and_check(rank, 1, matrix_handle[0], 0x75, matrix_handle[1], 0x8555, &error, check_matrix);
+		send_recv_and_check(rank, 1, matrix_handle[0], 0x75, matrix_handle[1], 0x8555, error, check_matrix);
 
-			starpu_data_unregister(matrix_handle[0]);
-			starpu_data_unregister(matrix_handle[1]);
-			free(matrix);
-		}
+		starpu_data_unregister(matrix_handle[0]);
+		starpu_data_unregister(matrix_handle[1]);
+		free(matrix);
+	}
+	else if (rank == 1)
+	{
+		starpu_data_handle_t matrix_handle;
+		starpu_matrix_data_register(&matrix_handle, -1, (uintptr_t)NULL, nx, nx, ny, sizeof(char));
+		send_recv_and_check(rank, 0, matrix_handle, 0x75, NULL, 0x8555, NULL, NULL);
+		starpu_data_unregister(matrix_handle);
+	}
+}
 
-		{
-			float *block, n=1.0;
-			int x, y, z;
-			starpu_data_handle_t block_handle[2];
+void exchange_block(int rank, int *error)
+{
+	int nx=3;
+	int ny=2;
+	int nz=4;
+
+	if (rank == 0)
+	{
+		float *block, n=1.0;
+		int x, y, z;
+		starpu_data_handle_t block_handle[2];
 
-			block = (float*)malloc(nx*ny*nz*sizeof(float));
-			assert(block);
-			for(z=0 ; z<nz ; z++)
+		block = (float*)malloc(nx*ny*nz*sizeof(float));
+		assert(block);
+		for(z=0 ; z<nz ; z++)
+		{
+			for(y=0 ; y<ny ; y++)
 			{
-				for(y=0 ; y<ny ; y++)
+				for(x=0 ; x<nx ; x++)
 				{
-					for(x=0 ; x<nx ; x++)
-					{
-						block[(z*nx*ny)+(y*nx)+x] = n++;
-					}
+					block[(z*nx*ny)+(y*nx)+x] = n++;
 				}
 			}
+		}
 
-			starpu_block_data_register(&block_handle[0], 0, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
-			starpu_block_data_register(&block_handle[1], -1, (uintptr_t)NULL, nx, nx*ny, nx, ny, nz, sizeof(float));
+		starpu_block_data_register(&block_handle[0], STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
+		starpu_block_data_register(&block_handle[1], -1, (uintptr_t)NULL, nx, nx*ny, nx, ny, nz, sizeof(float));
 
-			send_recv_and_check(rank, 1, block_handle[0], 0x73, block_handle[1], 0x8337, &error, check_block);
+		send_recv_and_check(rank, 1, block_handle[0], 0x73, block_handle[1], 0x8337, error, check_block);
 
-			starpu_data_unregister(block_handle[0]);
-			starpu_data_unregister(block_handle[1]);
-			free(block);
-		}
+		starpu_data_unregister(block_handle[0]);
+		starpu_data_unregister(block_handle[1]);
+		free(block);
 	}
 	else if (rank == 1)
 	{
-		MPI_Status status;
+		starpu_data_handle_t block_handle;
+		starpu_block_data_register(&block_handle, -1, (uintptr_t)NULL, nx, nx*ny, nx, ny, nz, sizeof(float));
+		send_recv_and_check(rank, 0, block_handle, 0x73, NULL, 0x8337, NULL, NULL);
+		starpu_data_unregister(block_handle);
+	}
+}
 
-		{
-			starpu_data_handle_t void_handle;
-			starpu_void_data_register(&void_handle);
-			send_recv_and_check(rank, 0, void_handle, 0x42, NULL, 0x1337, NULL, NULL);
-			starpu_data_unregister(void_handle);
-		}
-		{
-			starpu_data_handle_t variable_handle;
-			starpu_variable_data_register(&variable_handle, -1, (uintptr_t)NULL, sizeof(float));
-			send_recv_and_check(rank, 0, variable_handle, 0x42, NULL, 0x1337, NULL, NULL);
-			starpu_data_unregister(variable_handle);
-		}
+int main(int argc, char **argv)
+{
+	int ret, rank, size;
+	int error=0;
 
-		{
-			starpu_data_handle_t vector_handle;
-			starpu_vector_data_register(&vector_handle, -1, (uintptr_t)NULL, 4, sizeof(int));
-			send_recv_and_check(rank, 0, vector_handle, 0x43, NULL, 0x2337, NULL, NULL);
-			starpu_data_unregister(vector_handle);
-		}
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
-		{
-			starpu_data_handle_t matrix_handle;
-			starpu_matrix_data_register(&matrix_handle, -1, (uintptr_t)NULL, nx, nx, ny, sizeof(char));
-			send_recv_and_check(rank, 0, matrix_handle, 0x75, NULL, 0x8555, NULL, NULL);
-			starpu_data_unregister(matrix_handle);
-		}
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
-		{
-			starpu_data_handle_t block_handle;
-			starpu_block_data_register(&block_handle, -1, (uintptr_t)NULL, nx, nx*ny, nx, ny, nz, sizeof(float));
-			send_recv_and_check(rank, 0, block_handle, 0x73, NULL, 0x8337, NULL, NULL);
-			starpu_data_unregister(block_handle);
-		}
+	if (size < 2)
+	{
+		if (rank == 0)
+			FPRINTF(stderr, "We need at least 2 processes.\n");
+
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		MPI_Finalize();
+		return STARPU_TEST_SKIPPED;
 	}
 
+	exchange_void(rank, &error);
+	exchange_variable(rank, &error);
+	exchange_vector(rank, &error);
+	exchange_matrix(rank, &error);
+	exchange_block(rank, &error);
+
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 

+ 252 - 0
nmad/tests/early_request.c

@@ -0,0 +1,252 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2015, 2016, 2017  CNRS
+ * Copyright (C) 2015  INRIA
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_mpi.h>
+#include "helper.h"
+
+#define NUM_EL 5
+#define NUM_LOOPS 10
+
+/*
+ * This testcase written by J-M Couteyen allows to test that several
+ * early requests for a given source and tag can be posted to StarPU
+ * by the application before data arrive.
+ *
+ * In this test case, multiples processes (called "domains") exchanges
+ * informations between multiple "elements" multiple times, with
+ * different sizes (in order to catch error more easily).
+ * The communications are independent between the elements (each one
+ * as its proper tag), but must occur in the submitted order for an
+ * element taken independtly.
+*/
+
+struct element
+{
+	int tag;
+	int foreign_domain;
+
+	int array_send[100];
+	int array_recv[100];
+
+	starpu_data_handle_t ensure_submitted_order_send;
+	starpu_data_handle_t ensure_submitted_order_recv;
+	starpu_data_handle_t send;
+	starpu_data_handle_t recv;
+};
+
+/* functions/codelet to fill the bufferss*/
+void fill_tmp_buffer(void *buffers[], void *cl_arg)
+{
+	int *tmp = (int *) STARPU_VECTOR_GET_PTR(buffers[0]);
+	int nx = STARPU_VECTOR_GET_NX(buffers[0]);
+	int i;
+
+	for (i=0; i<nx; i++)
+		tmp[i]=nx+i;
+}
+
+#ifdef STARPU_SIMGRID
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+#endif
+
+static struct starpu_codelet fill_tmp_buffer_cl =
+{
+	.where = STARPU_CPU,
+	.cpu_funcs = {fill_tmp_buffer, NULL},
+	.nbuffers = 1,
+	.modes = {STARPU_W},
+#ifdef STARPU_SIMGRID
+	.model = &dumb_model,
+#endif
+	.name = "fill_tmp_buffer"
+};
+
+void read_ghost(void *buffers[], void *cl_arg)
+{
+	int *tmp = (int *) STARPU_VECTOR_GET_PTR(buffers[0]);
+	int nx=STARPU_VECTOR_GET_NX(buffers[0]);
+	int i;
+	for(i=0; i<nx;i++)
+	{
+		assert(tmp[i]==nx+i);
+	}
+}
+
+static struct starpu_codelet read_ghost_value_cl =
+{
+	.where = STARPU_CPU,
+	.cpu_funcs = {read_ghost, NULL},
+	.nbuffers = 1,
+	.modes = {STARPU_R},
+#ifdef STARPU_SIMGRID
+	.model = &dumb_model,
+#endif
+	.name = "read_ghost_value"
+};
+
+/*codelet to ensure submitted order for a given element*/
+void noop(void *buffers[], void *cl_arg)
+{
+}
+
+void submitted_order_fun(void *buffers[], void *cl_arg)
+{
+}
+
+static struct starpu_codelet submitted_order =
+{
+	.where = STARPU_CPU,
+	.cpu_funcs = {submitted_order_fun, NULL},
+	.nbuffers = 2,
+	.modes = {STARPU_RW, STARPU_W},
+#ifdef STARPU_SIMGRID
+	.model = &dumb_model,
+#endif
+	.name = "submitted_order_enforcer"
+};
+
+void init_element(struct element *el, int size, int foreign_domain)
+{
+	el->tag=size;
+	el->foreign_domain=foreign_domain;
+
+	int mpi_rank;
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank);
+
+	starpu_vector_data_register(&el->recv, 0, (uintptr_t)el->array_recv, size, sizeof(int));
+	starpu_vector_data_register(&el->send, 0, (uintptr_t)el->array_send, size, sizeof(int));
+	starpu_void_data_register(&el->ensure_submitted_order_send);
+	starpu_void_data_register(&el->ensure_submitted_order_recv);
+}
+
+void free_element(struct element *el)
+{
+	starpu_data_unregister(el->recv);
+	starpu_data_unregister(el->send);
+	starpu_data_unregister(el->ensure_submitted_order_send);
+	starpu_data_unregister(el->ensure_submitted_order_recv);
+}
+
+void insert_work_for_one_element(struct element *el)
+{
+	starpu_data_handle_t tmp_recv;
+	starpu_data_handle_t tmp_send;
+
+	starpu_vector_data_register(&tmp_recv, -1, 0, el->tag, sizeof(int));
+	starpu_vector_data_register(&tmp_send, -1, 0, el->tag, sizeof(int));
+
+	//Emulate the work to fill the send buffer
+	starpu_insert_task(&fill_tmp_buffer_cl,
+			   STARPU_W,tmp_send,
+			   0);
+	//Send operation
+	starpu_insert_task(&submitted_order,
+			   STARPU_RW,el->ensure_submitted_order_send,
+			   STARPU_W,tmp_send,
+			   0);
+	starpu_mpi_isend_detached(tmp_send,el->foreign_domain,el->tag, MPI_COMM_WORLD, NULL, NULL);
+
+	//Recv operation for current element
+	starpu_insert_task(&submitted_order,
+			   STARPU_RW,el->ensure_submitted_order_recv,
+			   STARPU_W,tmp_recv,
+			   0);
+	starpu_mpi_irecv_detached(tmp_recv,el->foreign_domain,el->tag, MPI_COMM_WORLD, NULL, NULL);
+	//Emulate the "reading" of the recv value.
+	starpu_insert_task(&read_ghost_value_cl,
+			   STARPU_R,tmp_recv,
+			   0);
+
+	starpu_data_unregister_submit(tmp_send);
+	starpu_data_unregister_submit(tmp_recv);
+}
+
+/*main program*/
+int main(int argc, char * argv[])
+{
+	/* Init */
+	int ret;
+	int mpi_rank, mpi_size;
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &mpi_size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if (starpu_cpu_worker_get_count() == 0)
+	{
+		if (mpi_rank == 0)
+			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		MPI_Finalize();
+		return STARPU_TEST_SKIPPED;
+	}
+
+	/*element initialization : domains are connected as a ring for this test*/
+	int num_elements=NUM_EL;
+	struct element * el_left=malloc(num_elements*sizeof(el_left[0]));
+	struct element * el_right=malloc(num_elements*sizeof(el_right[0]));
+	int i;
+	for(i=0;i<num_elements;i++)
+	{
+		init_element(el_left+i,i+1,((mpi_rank-1)+mpi_size)%mpi_size);
+		init_element(el_right+i,i+1,(mpi_rank+1)%mpi_size);
+	}
+
+	/* Communication loop */
+	for (i=0; i<NUM_LOOPS; i++) //number of "computations loops"
+	{
+		int e;
+		for (e=0;e<num_elements;e++) //Do something for each elements
+		{
+			insert_work_for_one_element(el_right+e);
+			insert_work_for_one_element(el_left+e);
+		}
+	}
+	/* End */
+	starpu_task_wait_for_all();
+
+	for(i=0;i<num_elements;i++)
+	{
+		free_element(el_left+i);
+		free_element(el_right+i);
+	}
+	free(el_left);
+	free(el_right);
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	MPI_Finalize();
+	FPRINTF(stderr, "No assert until end\n");
+	return 0;
+}

+ 75 - 0
nmad/tests/gather.c

@@ -0,0 +1,75 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2013, 2015, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+int main(int argc, char **argv)
+{
+	int ret, rank, size;
+	starpu_data_handle_t handle;
+	int var;
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if (size<3)
+	{
+		FPRINTF(stderr, "We need more than 2 processes.\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		MPI_Finalize();
+		return STARPU_TEST_SKIPPED;
+	}
+
+	if (rank == 0)
+	{
+		int n;
+		for(n=1 ; n<size ; n++)
+		{
+			MPI_Status status;
+
+			FPRINTF_MPI(stderr, "receiving from node %d\n", n);
+			starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));
+			starpu_mpi_recv(handle, n, 42, MPI_COMM_WORLD, &status);
+			starpu_data_acquire(handle, STARPU_R);
+			STARPU_ASSERT_MSG(var == n, "Received incorrect value <%d> from node <%d>\n", var, n);
+			FPRINTF_MPI(stderr, "received <%d> from node %d\n", var, n);
+			starpu_data_release(handle);
+			starpu_data_unregister(handle);
+		}
+	}
+	else
+	{
+		FPRINTF_MPI(stderr, "sending to node %d\n", 0);
+		var = rank;
+		starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));
+		starpu_mpi_send(handle, 0, 42, MPI_COMM_WORLD);
+		starpu_data_unregister(handle);
+	}
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+	MPI_Finalize();
+
+	return 0;
+}

+ 97 - 0
nmad/tests/gather2.c

@@ -0,0 +1,97 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2013, 2015, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+int main(int argc, char **argv)
+{
+	int ret, rank, size;
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if (size<3)
+	{
+		FPRINTF(stderr, "We need more than 2 processes.\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		MPI_Finalize();
+		return STARPU_TEST_SKIPPED;
+	}
+
+	if (rank == 0)
+	{
+		int n;
+		for(n=1 ; n<size ; n++)
+		{
+			int i, var[2];
+			MPI_Status status[3];
+			starpu_data_handle_t handle[2];
+
+			FPRINTF_MPI(stderr, "receiving from node %d\n", n);
+			for(i=0 ; i<2 ; i++)
+				starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i]));
+
+			starpu_mpi_recv(handle[0], n, 42, MPI_COMM_WORLD, &status[0]);
+			starpu_data_acquire(handle[0], STARPU_R);
+			STARPU_ASSERT_MSG(var[0] == n, "Received incorrect value <%d> from node <%d>\n", var[0], n);
+			FPRINTF_MPI(stderr, "received <%d> from node %d\n", var[0], n);
+			starpu_data_release(handle[0]);
+
+			starpu_mpi_recv(handle[0], n, 42, MPI_COMM_WORLD, &status[1]);
+			starpu_mpi_recv(handle[1], n, 44, MPI_COMM_WORLD, &status[2]);
+			for(i=0 ; i<2 ; i++)
+				starpu_data_acquire(handle[i], STARPU_R);
+			STARPU_ASSERT_MSG(var[0] == n*2, "Received incorrect value <%d> from node <%d>\n", var[0], n);
+			STARPU_ASSERT_MSG(var[1] == n*4, "Received incorrect value <%d> from node <%d>\n", var[0], n);
+			FPRINTF_MPI(stderr, "received <%d> and <%d> from node %d\n", var[0], var[1], n);
+			for(i=0 ; i<2 ; i++)
+				starpu_data_release(handle[i]);
+			for(i=0 ; i<2 ; i++)
+				starpu_data_unregister(handle[i]);
+		}
+	}
+	else
+	{
+		int i, var[3];
+		starpu_data_handle_t handle[3];
+
+		FPRINTF_MPI(stderr, "sending to node %d\n", 0);
+		var[0] = rank;
+		var[1] = var[0] * 2;
+		var[2] = var[0] * 4;
+		for(i=0 ; i<3 ; i++)
+			starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i]));
+		starpu_mpi_send(handle[0], 0, 42, MPI_COMM_WORLD);
+		starpu_mpi_send(handle[1], 0, 42, MPI_COMM_WORLD);
+		starpu_mpi_send(handle[2], 0, 44, MPI_COMM_WORLD);
+		for(i=0 ; i<3 ; i++)
+			starpu_data_unregister(handle[i]);
+	}
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+	MPI_Finalize();
+
+	return 0;
+}

+ 18 - 6
nmad/tests/helper.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011, 2012, 2013, 2014, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013, 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -15,12 +15,24 @@
  */
 
 #include <errno.h>
-
-#define STARPU_TEST_SKIPPED 77
+#include <starpu_mpi.h>
+#include "../../tests/helper.h"
 
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 #define FPRINTF_MPI(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \
-    						int _disp_rank; MPI_Comm_rank(MPI_COMM_WORLD, &_disp_rank);       \
-                                                fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \
-                                                fflush(ofile); }} while(0);
+			int _disp_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &_disp_rank); \
+			fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \
+			fflush(ofile); }} while(0);
+
+#define MPI_INIT_THREAD(argc, argv, required) do {	    \
+		int thread_support;					\
+		if (MPI_Init_thread(argc, argv, required, &thread_support) != MPI_SUCCESS) \
+		{						\
+			fprintf(stderr,"MPI_Init_thread failed\n");	\
+			exit(1);					\
+		}							\
+		if (thread_support == MPI_THREAD_FUNNELED)		\
+			fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); \
+		if (thread_support < MPI_THREAD_FUNNELED)		\
+			fprintf(stderr,"Warning: MPI does not have thread support!\n"); } while(0);
 

+ 26 - 14
nmad/tests/insert_task.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011, 2012, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013, 2014  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,11 +27,23 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	*x = (*x + *y) / 2;
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
-	.modes = {STARPU_RW, STARPU_R}
+	.modes = {STARPU_RW, STARPU_R},
+	.model = &dumb_model
 };
 
 #define X     4
@@ -40,7 +52,7 @@ struct starpu_codelet mycodelet =
 /* Returns the MPI node number where data indexes index is */
 int my_distrib(int x, int y, int nb_nodes)
 {
-	return x % nb_nodes;
+	return (x + y) % nb_nodes;
 }
 
 
@@ -55,8 +67,8 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(&argc, &argv, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
 	for(x = 0; x < X; x++)
 	{
@@ -86,7 +98,7 @@ int main(int argc, char **argv)
 			if (mpi_rank == rank)
 			{
 				//FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y);
-				starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
+				starpu_variable_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)&(matrix[x][y]), sizeof(unsigned));
 			}
 			else
 			{
@@ -101,14 +113,14 @@ int main(int argc, char **argv)
 		}
 	}
 
-	ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[1][1], STARPU_R, data_handles[0][1], 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
-	ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[3][1], STARPU_R, data_handles[0][1], 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
-	ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0][1], STARPU_R, data_handles[0][0], 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
-	ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[3][1], STARPU_R, data_handles[0][1], 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
+	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[1][1], STARPU_R, data_handles[0][1], 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
+	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[3][1], STARPU_R, data_handles[0][1], 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
+	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0][1], STARPU_R, data_handles[0][0], 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
+	ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[3][1], STARPU_R, data_handles[0][1], 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 
 	FPRINTF(stderr, "Waiting ...\n");
 	starpu_task_wait_for_all();

+ 23 - 7
nmad/tests/insert_task_block.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011, 2012, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013, 2014  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -44,10 +44,26 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	}
 }
 
+#ifdef STARPU_SIMGRID
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+#endif
+
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 1,
+#ifdef STARPU_SIMGRID
+	.model = &dumb_model,
+#endif
 	.modes = {STARPU_RW}
 };
 
@@ -57,7 +73,7 @@ struct starpu_codelet mycodelet =
 /* Returns the MPI node number where data indexes index is */
 int my_distrib(int x, int y, int nb_nodes)
 {
-	return x % nb_nodes;
+	return (x + y) % nb_nodes;
 }
 
 
@@ -72,8 +88,8 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(&argc, &argv, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_initialize_extended");
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
 	for(x = 0; x < SIZE; x++)
 	{
@@ -103,7 +119,7 @@ int main(int argc, char **argv)
 			if (mpi_rank == rank)
 			{
 				//FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y);
-				starpu_matrix_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[((SIZE/BLOCKS)*x) + ((SIZE/BLOCKS)*y) * SIZE]),
+				starpu_matrix_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)&(matrix[((SIZE/BLOCKS)*x) + ((SIZE/BLOCKS)*y) * SIZE]),
 							    SIZE, SIZE/BLOCKS, SIZE/BLOCKS, sizeof(unsigned));
 			}
 			else
@@ -124,10 +140,10 @@ int main(int argc, char **argv)
 	{
 		for (y = 0; y < BLOCKS; y++)
 		{
-			ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet,
+			ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
 						     STARPU_RW, data_handles[x][y],
 						     0);
-			STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
+			STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 		}
 	}
 

+ 128 - 23
nmad/tests/insert_task_compute.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2013, 2014, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2013, 2014, 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,21 +25,34 @@ void func_cpu(void *descr[], void *_args)
 
 	starpu_codelet_unpack_args(_args, &rank);
 
-	FPRINTF(stdout, "[%d] VALUES: %u %u\n", rank, *x, *y);
+	FPRINTF(stdout, "[%d] VALUES: %d %d\n", rank, *x, *y);
 	*x = *x * *y;
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
-	.modes = {STARPU_RW, STARPU_R}
+	.modes = {STARPU_RW, STARPU_R},
+	.model = &dumb_model
 };
 
-int test(int rank, int node, int *before, int *after, int data_array)
+int test(int rank, int node, int *before, int *after, int task_insert, int data_array)
 {
 	int ok, ret, i, x[2];
 	starpu_data_handle_t data_handles[2];
+	struct starpu_data_descr descrs[2];
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
@@ -50,10 +63,11 @@ int test(int rank, int node, int *before, int *after, int data_array)
 	{
 		// If there is no cpu to execute the codelet, mpi will block trying to do the post-execution communication
 		ret = -ENODEV;
+		FPRINTF_MPI(stderr, "No CPU is available\n");
 		goto nodata;
 	}
 
-	FPRINTF_MPI(stderr, "Testing with data_array=%d and node=%d\n", data_array, node);
+	FPRINTF_MPI(stderr, "Testing with node=%d - task_insert=%d - data_array=%d - \n", node, task_insert, data_array);
 
 	for(i=0 ; i<2 ; i++)
 	{
@@ -69,36 +83,122 @@ int test(int rank, int node, int *before, int *after, int data_array)
 		else
 			starpu_variable_data_register(&data_handles[i], -1, (uintptr_t)NULL, sizeof(int));
 		starpu_mpi_data_register(data_handles[i], i, i);
+		descrs[i].handle = data_handles[i];
 	}
+	descrs[0].mode = STARPU_RW;
+	descrs[1].mode = STARPU_R;
 
-	switch(data_array)
+	switch(task_insert)
 	{
 		case 0:
 		{
-			ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet,
-						     STARPU_RW, data_handles[0], STARPU_R, data_handles[1],
-						     STARPU_VALUE, &rank, sizeof(rank),
-						     STARPU_EXECUTE_ON_NODE, node, 0);
+			struct starpu_task *task = NULL;
+			switch(data_array)
+			{
+				case 0:
+				{
+					task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet,
+								     STARPU_RW, data_handles[0], STARPU_R, data_handles[1],
+								     STARPU_VALUE, &rank, sizeof(rank),
+								     STARPU_EXECUTE_ON_NODE, node, 0);
+					break;
+				}
+				case 1:
+				{
+					task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet,
+								     STARPU_DATA_ARRAY, data_handles, 2,
+								     STARPU_VALUE, &rank, sizeof(rank),
+								     STARPU_EXECUTE_ON_NODE, node, 0);
+					break;
+				}
+				case 2:
+				{
+					task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet,
+								     STARPU_DATA_MODE_ARRAY, descrs, 2,
+								     STARPU_VALUE, &rank, sizeof(rank),
+								     STARPU_EXECUTE_ON_NODE, node, 0);
+					break;
+				}
+			}
+
+			if (task)
+			{
+				ret = starpu_task_submit(task);
+				if (ret == -ENODEV) goto enodev;
+				STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+			}
+
+			switch(data_array)
+			{
+				case 0:
+				{
+					starpu_mpi_task_post_build(MPI_COMM_WORLD, &mycodelet,
+								   STARPU_RW, data_handles[0], STARPU_R, data_handles[1],
+								   STARPU_EXECUTE_ON_NODE, node, 0);
+					break;
+				}
+				case 1:
+				{
+					starpu_mpi_task_post_build(MPI_COMM_WORLD, &mycodelet,
+								   STARPU_DATA_ARRAY, data_handles, 2,
+								   STARPU_EXECUTE_ON_NODE, node, 0);
+					break;
+				}
+				case 2:
+				{
+					starpu_mpi_task_post_build(MPI_COMM_WORLD, &mycodelet,
+								   STARPU_DATA_MODE_ARRAY, descrs, 2,
+								   STARPU_EXECUTE_ON_NODE, node, 0);
+					break;
+				}
+			}
+
 			break;
 		}
 		case 1:
 		{
-			ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet,
-						     STARPU_DATA_ARRAY, data_handles, 2,
-						     STARPU_VALUE, &rank, sizeof(rank),
-						     STARPU_EXECUTE_ON_NODE, node, 0);
+			switch(data_array)
+			{
+				case 0:
+				{
+					ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
+								     STARPU_RW, data_handles[0], STARPU_R, data_handles[1],
+								     STARPU_VALUE, &rank, sizeof(rank),
+								     STARPU_EXECUTE_ON_NODE, node, 0);
+					break;
+				}
+				case 1:
+				{
+					ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
+								     STARPU_DATA_ARRAY, data_handles, 2,
+								     STARPU_VALUE, &rank, sizeof(rank),
+								     STARPU_EXECUTE_ON_NODE, node, 0);
+					break;
+				}
+				case 2:
+				{
+					ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
+								     STARPU_DATA_MODE_ARRAY, descrs, 2,
+								     STARPU_VALUE, &rank, sizeof(rank),
+								     STARPU_EXECUTE_ON_NODE, node, 0);
+					break;
+				}
+			}
+			STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 			break;
 		}
 	}
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
+
 	starpu_task_wait_for_all();
 
+enodev:
 	for(i=0; i<2; i++)
 	{
 		starpu_data_unregister(data_handles[i]);
 	}
 
 	ok = 1;
+#ifndef STARPU_SIMGRID
 	if (rank <= 1)
 	{
 		for(i=0; i<2; i++)
@@ -108,8 +208,10 @@ int test(int rank, int node, int *before, int *after, int data_array)
 		}
 		FPRINTF_MPI(stderr, "result is %s\n", ok?"CORRECT":"NOT CORRECT");
 	}
+#endif
 
 nodata:
+	MPI_Barrier(MPI_COMM_WORLD);
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 
@@ -119,24 +221,27 @@ nodata:
 int main(int argc, char **argv)
 {
 	int rank;
-	int ret;
+	int global_ret, ret;
 	int before[4] = {10, 20, 11, 22};
 	int after_node[2][4] = {{220, 20, 11, 22}, {220, 20, 11, 22}};
 	int node, insert_task, data_array;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 
+	global_ret = 0;
 	for(node=0 ; node<=1 ; node++)
 	{
-		for(data_array=0 ; data_array<=1 ; data_array++)
+		for(insert_task=0 ; insert_task<=1 ; insert_task++)
 		{
-			ret = test(rank, node, before, after_node[node], data_array);
-			if (ret == -ENODEV || ret) goto end;
+			for(data_array=0 ; data_array<=2 ; data_array++)
+			{
+				ret = test(rank, node, before, after_node[node], insert_task, data_array);
+				if (ret == -ENODEV || ret) global_ret = ret;
+			}
 		}
 	}
 
-end:
 	MPI_Finalize();
-	return ret==-ENODEV?STARPU_TEST_SKIPPED:ret;
+	return global_ret==-ENODEV?STARPU_TEST_SKIPPED:global_ret;
 }

+ 38 - 16
nmad/tests/insert_task_count.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009, 2010, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,6 +20,8 @@
 
 #ifdef STARPU_QUICK_CHECK
 #  define NITER	32
+#elif !defined(STARPU_LONG_CHECK)
+#  define NITER	256
 #else
 #  define NITER	2048
 #endif
@@ -34,6 +36,17 @@ void increment_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	(*tokenptr)++;
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 static struct starpu_codelet increment_cl =
 {
 #ifdef STARPU_USE_CUDA
@@ -41,7 +54,8 @@ static struct starpu_codelet increment_cl =
 #endif
 	.cpu_funcs = {increment_cpu},
 	.nbuffers = 1,
-	.modes = {STARPU_RW}
+	.modes = {STARPU_RW},
+	.model = &dumb_model
 };
 
 int main(int argc, char **argv)
@@ -50,24 +64,30 @@ int main(int argc, char **argv)
 	int token = 0;
 	starpu_data_handle_t token_handle;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
-	if (size < 2)
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	{
 		if (rank == 0)
-			FPRINTF(stderr, "We need at least 2 processes.\n");
-
+		{
+			if (size < 2)
+				FPRINTF(stderr, "We need at least 2 processes.\n");
+			else
+				FPRINTF(stderr, "We need at least 1 CPU or CUDA worker.\n");
+		}
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
 	if (rank == 1)
 		starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(token));
 	else
@@ -82,12 +102,12 @@ int main(int argc, char **argv)
 	for (loop = 0; loop < nloops; loop++)
 	{
 		if (loop % 2)
-			starpu_mpi_insert_task(MPI_COMM_WORLD, &increment_cl,
+			starpu_mpi_task_insert(MPI_COMM_WORLD, &increment_cl,
 					       STARPU_RW|STARPU_SSEND, token_handle,
 					       STARPU_EXECUTE_ON_NODE, 0,
 					       0);
 		else
-			starpu_mpi_insert_task(MPI_COMM_WORLD, &increment_cl,
+			starpu_mpi_task_insert(MPI_COMM_WORLD, &increment_cl,
 					       STARPU_RW, token_handle,
 					       STARPU_EXECUTE_ON_NODE, 0,
 					       0);
@@ -102,6 +122,7 @@ int main(int argc, char **argv)
 
 	MPI_Finalize();
 
+#ifndef STARPU_SIMGRID
 	if (rank == 1)
 	{
 		STARPU_ASSERT_MSG(token == nloops, "token==%d != expected_value==%d\n", token, nloops);
@@ -111,6 +132,7 @@ int main(int argc, char **argv)
 		STARPU_ASSERT_MSG(token == 0, "token==%d != expected_value==0\n", token);
 
 	}
+#endif
 
 	return 0;
 }

+ 178 - 0
nmad/tests/insert_task_dyn_handles.c

@@ -0,0 +1,178 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <config.h>
+#include <starpu_mpi.h>
+#include <starpu_config.h>
+#include "helper.h"
+
+#define FFACTOR 42
+
+void func_cpu(void *descr[], void *_args)
+{
+	int num = starpu_task_get_current()->nbuffers;
+	int *factor = (int *)STARPU_VARIABLE_GET_PTR(descr[num-1]);
+	int i;
+
+	for (i = 0; i < num-1; i++)
+	{
+		int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[i]);
+
+		*x = *x + 1**factor;
+	}
+}
+
+#ifdef STARPU_SIMGRID
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+#endif
+
+struct starpu_codelet codelet =
+{
+	.cpu_funcs = {func_cpu},
+	.cpu_funcs_name = {"func_cpu"},
+	.nbuffers = STARPU_VARIABLE_NBUFFERS,
+#ifdef STARPU_SIMGRID
+	.model = &dumb_model,
+#endif
+};
+
+int main(int argc, char **argv)
+{
+        int *x;
+        int i, ret, loop;
+	int rank;
+	int factor=0;
+
+#ifdef STARPU_QUICK_CHECK
+	int nloops = 4;
+#else
+	int nloops = 16;
+#endif
+        starpu_data_handle_t *data_handles;
+        starpu_data_handle_t factor_handle;
+	struct starpu_data_descr *descrs;
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if (starpu_cpu_worker_get_count() == 0)
+	{
+		if (rank == 0)
+			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		return STARPU_TEST_SKIPPED;
+	}
+
+	x = calloc(1, (STARPU_NMAXBUFS+15) * sizeof(int));
+	data_handles = malloc((STARPU_NMAXBUFS+15) * sizeof(starpu_data_handle_t));
+	descrs = malloc((STARPU_NMAXBUFS+15) * sizeof(struct starpu_data_descr));
+	for(i=0 ; i<STARPU_NMAXBUFS+15 ; i++)
+	{
+		starpu_variable_data_register(&data_handles[i], STARPU_MAIN_RAM, (uintptr_t)&x[i], sizeof(x[i]));
+		starpu_mpi_data_register(data_handles[i], i, 0);
+		descrs[i].handle = data_handles[i];
+		descrs[i].mode = STARPU_RW;
+	}
+	if (rank == 1) factor=FFACTOR;
+	starpu_variable_data_register(&factor_handle, STARPU_MAIN_RAM, (uintptr_t)&factor, sizeof(factor));
+	starpu_mpi_data_register(factor_handle, FFACTOR, 1);
+
+	for (loop = 0; loop < nloops; loop++)
+	{
+		ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet,
+					     STARPU_DATA_MODE_ARRAY, descrs, STARPU_NMAXBUFS-1,
+					     STARPU_R, factor_handle,
+					     0);
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
+
+		ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet,
+					     STARPU_DATA_MODE_ARRAY, descrs, STARPU_NMAXBUFS+15,
+					     STARPU_R, factor_handle,
+					     0);
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
+	}
+
+enodev:
+        for(i=0 ; i<STARPU_NMAXBUFS+15 ; i++)
+	{
+                starpu_data_unregister(data_handles[i]);
+        }
+	starpu_data_unregister(factor_handle);
+
+	free(data_handles);
+	free(descrs);
+
+	if (ret == -ENODEV)
+	{
+		fprintf(stderr, "WARNING: No one can execute this task\n");
+		/* yes, we do not perform the computation but we did detect that no one
+		 * could perform the kernel, so this is not an error from StarPU */
+		free(x);
+		ret = STARPU_TEST_SKIPPED;
+	}
+	else if (rank == 0)
+	{
+		for(i=0 ; i<STARPU_NMAXBUFS-1 ; i++)
+		{
+			if (x[i] != nloops * FFACTOR * 2)
+			{
+				FPRINTF_MPI(stderr, "[end loop] value[%d] = %d != Expected value %d\n", i, x[i], nloops*2);
+				ret = 1;
+			}
+		}
+		for(i=STARPU_NMAXBUFS-1 ; i<STARPU_NMAXBUFS+15 ; i++)
+		{
+			if (x[i] != nloops * FFACTOR)
+			{
+				FPRINTF_MPI(stderr, "[end loop] value[%d] = %d != Expected value %d\n", i, x[i], nloops);
+				ret = 1;
+			}
+		}
+		if (ret == 0)
+		{
+			FPRINTF_MPI(stderr, "[end of loop] all values are correct\n");
+		}
+		free(x);
+	}
+	else
+	{
+		FPRINTF_MPI(stderr, "[end of loop] no computation on this node\n");
+		ret = 0;
+		free(x);
+	}
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+	MPI_Finalize();
+	return ret;
+}

+ 122 - 0
nmad/tests/insert_task_node_choice.c

@@ -0,0 +1,122 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include <math.h>
+#include "helper.h"
+
+void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
+{
+	int node;
+	int rank;
+
+	starpu_codelet_unpack_args(_args, &node);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	FPRINTF_MPI(stderr, "Expected node: %d - Actual node: %d\n", node, rank);
+
+	assert(node == rank);
+}
+
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
+struct starpu_codelet mycodelet =
+{
+	.cpu_funcs = {func_cpu},
+	.nbuffers = 2,
+	.modes = {STARPU_RW, STARPU_RW},
+	.model = &dumb_model,
+	.name = "insert_task_node_choice"
+};
+
+int main(int argc, char **argv)
+{
+	int ret, rank, size, err, node;
+	int x0=32;
+	long long x1=23;
+	starpu_data_handle_t data_handlesx0;
+	starpu_data_handle_t data_handlesx1;
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(&argc, &argv, 1);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	if (rank != 0 && rank != 1) goto end;
+
+	if (rank == 0)
+	{
+		starpu_variable_data_register(&data_handlesx0, STARPU_MAIN_RAM, (uintptr_t)&x0, sizeof(x0));
+		starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(x1));
+	}
+	else
+	{
+		starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(x0));
+		starpu_variable_data_register(&data_handlesx1, STARPU_MAIN_RAM, (uintptr_t)&x1, sizeof(x1));
+	}
+	starpu_mpi_data_register(data_handlesx0, 100, 0);
+	starpu_mpi_data_register(data_handlesx1, 200, 1);
+
+	node = 0;
+	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
+				     STARPU_VALUE, &node, sizeof(node),
+				     STARPU_EXECUTE_ON_NODE, 0,
+				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1,
+				     0);
+	assert(err == 0);
+
+	node = starpu_data_get_rank(data_handlesx1);
+	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
+				     STARPU_VALUE, &node, sizeof(node),
+				     STARPU_EXECUTE_ON_DATA, data_handlesx1,
+				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1,
+				     0);
+	assert(err == 0);
+
+	// Node 1 has a long long data which has a bigger size than a
+	// int, so it is going to be selected by the node selection
+	// policy to execute the codelet
+	err = starpu_mpi_node_selection_set_current_policy(STARPU_MPI_NODE_SELECTION_MOST_R_DATA);
+	assert(err == 0);
+	node = 1;
+	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
+				     STARPU_VALUE, &node, sizeof(node),
+				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1,
+				     0);
+	assert(err == 0);
+
+	FPRINTF_MPI(stderr, "Waiting ...\n");
+	starpu_task_wait_for_all();
+	starpu_data_unregister(data_handlesx0);
+	starpu_data_unregister(data_handlesx1);
+
+end:
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	return 0;
+}
+

+ 55 - 27
nmad/tests/insert_task_owner.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011, 2012, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -24,51 +24,68 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	int rank;
 
 	starpu_codelet_unpack_args(_args, &node);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	FPRINTF(stderr, "Expected node: %d - Actual node: %d\n", node, rank);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	FPRINTF_MPI(stderr, "Expected node: %d - Actual node: %d\n", node, rank);
 
 	assert(node == rank);
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 struct starpu_codelet mycodelet_r_w =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_W}
+	.modes = {STARPU_R, STARPU_W},
+	.model = &dumb_model
 };
 
 struct starpu_codelet mycodelet_rw_r =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
-	.modes = {STARPU_RW, STARPU_R}
+	.modes = {STARPU_RW, STARPU_R},
+	.model = &dumb_model
 };
 
 struct starpu_codelet mycodelet_rw_rw =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
-	.modes = {STARPU_RW, STARPU_RW}
+	.modes = {STARPU_RW, STARPU_RW},
+	.model = &dumb_model
 };
 
 struct starpu_codelet mycodelet_w_r =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
-	.modes = {STARPU_W, STARPU_R}
+	.modes = {STARPU_W, STARPU_R},
+	.model = &dumb_model
 };
 
 struct starpu_codelet mycodelet_r_r =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_R}
+	.modes = {STARPU_R, STARPU_R},
+	.model = &dumb_model
 };
 
 int main(int argc, char **argv)
 {
 	int ret, rank, size, err, node;
-	int x0=32, x1=23;
+	long x0=32;
+	int x1=23;
 	starpu_data_handle_t data_handlesx0;
 	starpu_data_handle_t data_handlesx1;
 
@@ -76,60 +93,71 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(&argc, &argv, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	if (starpu_cpu_worker_get_count() == 0)
+	{
+		if (rank == 0)
+			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		return STARPU_TEST_SKIPPED;
+	}
 
 	if (rank != 0 && rank != 1) goto end;
 
 	if (rank == 0)
 	{
-		starpu_variable_data_register(&data_handlesx0, 0, (uintptr_t)&x0, sizeof(x0));
-		starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(int));
+		starpu_variable_data_register(&data_handlesx0, STARPU_MAIN_RAM, (uintptr_t)&x0, sizeof(x0));
+		starpu_mpi_data_register(data_handlesx0, 0, rank);
+		starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(x1));
+		starpu_mpi_data_register(data_handlesx1, 1, 1);
 	}
 	else if (rank == 1)
 	{
-		starpu_variable_data_register(&data_handlesx1, 0, (uintptr_t)&x1, sizeof(x1));
-		starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(int));
+		starpu_variable_data_register(&data_handlesx1, STARPU_MAIN_RAM, (uintptr_t)&x1, sizeof(x1));
+		starpu_mpi_data_register(data_handlesx1, 1, rank);
+		starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(x0));
+		starpu_mpi_data_register(data_handlesx0, 0, 0);
 	}
-	starpu_mpi_data_register(data_handlesx0, 0, 0);
-	starpu_mpi_data_register(data_handlesx1, 1, 1);
 
 	node = starpu_mpi_data_get_rank(data_handlesx1);
-	err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_r_w,
+	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w,
 				     STARPU_VALUE, &node, sizeof(node),
 				     STARPU_R, data_handlesx0, STARPU_W, data_handlesx1,
 				     0);
 	assert(err == 0);
 
 	node = starpu_mpi_data_get_rank(data_handlesx0);
-	err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_rw_r,
+	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_r,
 				     STARPU_VALUE, &node, sizeof(node),
 				     STARPU_RW, data_handlesx0, STARPU_R, data_handlesx1,
 				     0);
 	assert(err == 0);
 
-	err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_rw_rw,
+	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw,
 				     STARPU_VALUE, &node, sizeof(node),
 				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1,
 				     0);
-	assert(err == -EINVAL);
+	assert(err == 0);
 
 	node = 1;
-	err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_rw_rw,
+	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw,
 				     STARPU_VALUE, &node, sizeof(node),
 				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
 				     0);
 	assert(err == 0);
 
 	node = 0;
-	err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_rw_rw,
+	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw,
 				     STARPU_VALUE, &node, sizeof(node),
 				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
 				     0);
 	assert(err == 0);
 
 	node = 0;
-	err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_r_r,
+	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_r,
 				     STARPU_VALUE, &node, sizeof(node),
 				     STARPU_R, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
 				     0);
@@ -139,7 +167,7 @@ int main(int argc, char **argv)
 	   going to overwrite the node even though the data model clearly specifies
 	   which node is going to execute the codelet */
 	node = 0;
-	err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_r_w,
+	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w,
 				     STARPU_VALUE, &node, sizeof(node),
 				     STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
 				     0);
@@ -149,13 +177,13 @@ int main(int argc, char **argv)
 	   going to overwrite the node even though the data model clearly specifies
 	   which node is going to execute the codelet */
 	node = 0;
-	err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet_w_r,
+	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_w_r,
 				     STARPU_VALUE, &node, sizeof(node),
 				     STARPU_W, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
 				     0);
 	assert(err == 0);
 
-	fprintf(stderr, "Waiting ...\n");
+	FPRINTF_MPI(stderr, "Waiting ...\n");
 	starpu_task_wait_for_all();
 	starpu_data_unregister(data_handlesx0);
 	starpu_data_unregister(data_handlesx1);

+ 49 - 25
nmad/tests/insert_task_owner2.c

@@ -1,6 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011, 2012, 2013, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2012, 2015                    Université Bordeaux
+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,50 +26,68 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	int *x2 = (int *)STARPU_VARIABLE_GET_PTR(descr[2]);
 	int *y = (int *)STARPU_VARIABLE_GET_PTR(descr[3]);
 
-	//FPRINTF(stderr, "-------> CODELET VALUES: %d %d %d %d\n", *x0, *x1, *x2, *y);
-	//*x2 = 45;
-	//*y = 144;
-
-	FPRINTF(stderr, "-------> CODELET VALUES: %d %d (x2) %d\n", *x0, *x1, *y);
+	FPRINTF(stderr, "-------> CODELET VALUES: %d %d nan %d\n", *x0, *x1, *y);
+	*x2 = *y;
 	*y = (*x0 + *x1) * 100;
 	*x1 = 12;
-	*x2 = 24;
-	*x0 = 36;
 	FPRINTF(stderr, "-------> CODELET VALUES: %d %d %d %d\n", *x0, *x1, *x2, *y);
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 4,
-	.modes = {STARPU_R, STARPU_RW, STARPU_W, STARPU_W}
+	.modes = {STARPU_R, STARPU_RW, STARPU_W, STARPU_W},
+	.model = &dumb_model
 };
 
 int main(int argc, char **argv)
 {
 	int rank, size, err;
 	int x[3], y=0;
-	int i, ret;
+	int oldx[3];
+	int i, ret=0;
 	starpu_data_handle_t data_handles[4];
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(&argc, &argv, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	if (starpu_cpu_worker_get_count() == 0)
+	{
+		if (rank == 0)
+			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		return STARPU_TEST_SKIPPED;
+	}
 
 	if (rank == 0)
 	{
 		for(i=0 ; i<3 ; i++)
 		{
 			x[i] = 10*(i+1);
-			starpu_variable_data_register(&data_handles[i], 0, (uintptr_t)&x[i], sizeof(x[i]));
+			oldx[i] = 10*(i+1);
+			starpu_variable_data_register(&data_handles[i], STARPU_MAIN_RAM, (uintptr_t)&x[i], sizeof(x[i]));
 		}
 		y = -1;
 		starpu_variable_data_register(&data_handles[3], -1, (uintptr_t)NULL, sizeof(int));
 	}
-	else if (rank == 1)
+	else
 	{
 		for(i=0 ; i<3 ; i++)
 		{
@@ -76,27 +95,22 @@ int main(int argc, char **argv)
 			starpu_variable_data_register(&data_handles[i], -1, (uintptr_t)NULL, sizeof(int));
 		}
 		y=200;
-		starpu_variable_data_register(&data_handles[3], 0, (uintptr_t)&y, sizeof(int));
+		starpu_variable_data_register(&data_handles[3], STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(int));
 	}
-	else
-	{
-		for(i=0 ; i<4 ; i++)
-			starpu_variable_data_register(&data_handles[i], -1, (uintptr_t)NULL, sizeof(int));
-	}
-	FPRINTF(stderr, "[%d][init] VALUES: %d %d %d %d\n", rank, x[0], x[1], x[2], y);
-
 	for(i=0 ; i<3 ; i++)
 	{
 		starpu_mpi_data_register(data_handles[i], i, 0);
 	}
 	starpu_mpi_data_register(data_handles[3], 3, 1);
 
-	err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet,
+	FPRINTF(stderr, "[%d][init] VALUES: %d %d %d %d\n", rank, x[0], x[1], x[2], y);
+
+	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
 				     STARPU_R, data_handles[0], STARPU_RW, data_handles[1],
 				     STARPU_W, data_handles[2],
 				     STARPU_W, data_handles[3],
 				     STARPU_EXECUTE_ON_NODE, 1, 0);
-	STARPU_CHECK_RETURN_VALUE(err, "starpu_mpi_insert_task");
+	STARPU_CHECK_RETURN_VALUE(err, "starpu_mpi_task_insert");
 	starpu_task_wait_for_all();
 
 	int *values = malloc(4 * sizeof(int));
@@ -114,6 +128,16 @@ int main(int argc, char **argv)
 	if (rank == 0)
 	{
 		FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d %d %d\n", rank, values[0], values[1], values[2], values[3]);
+		if (values[0] != oldx[0] || values[1] != 12 || values[2] != 200 || values[3] != ((oldx[0] + oldx[1]) * 100))
+		{
+			FPRINTF(stderr, "[%d][error] values[0] %d != x[0] %d && values[1] %d != 12 && values[2] %d != 200 && values[3] %d != ((x[0] %d + x[1] %d) * 100)\n",
+				rank, values[0], oldx[0], values[1], values[2], values[3], oldx[0], oldx[1]);
+			ret = 1;
+		}
+		else
+		{
+			FPRINTF(stderr, "[%d] correct computation\n", rank);
+		}
 	}
         FPRINTF(stderr, "[%d][end] VALUES: %d %d %d %d\n", rank, x[0], x[1], x[2], y);
 
@@ -121,6 +145,6 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 
-	return 0;
+	return (rank == 0) ? ret : 0;
 }
 

+ 30 - 8
nmad/tests/insert_task_owner_data.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011, 2012, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,11 +27,23 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	*x1 *= *x1;
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
-	.modes = {STARPU_RW, STARPU_RW}
+	.modes = {STARPU_RW, STARPU_RW},
+	.model = &dumb_model
 };
 
 int main(int argc, char **argv)
@@ -46,20 +58,29 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(&argc, &argv, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	if (starpu_cpu_worker_get_count() == 0)
+	{
+		if (rank == 0)
+			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		return STARPU_TEST_SKIPPED;
+	}
 
 	if (rank == 0)
 	{
 		x[0] = 11;
-		starpu_variable_data_register(&data_handles[0], 0, (uintptr_t)&x[0], sizeof(x[0]));
+		starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&x[0], sizeof(x[0]));
 		starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1]));
 	}
 	else if (rank == 1)
 	{
 		x[1] = 12;
 		starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0]));
-		starpu_variable_data_register(&data_handles[1], 0, (uintptr_t)&x[1], sizeof(x[1]));
+		starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&x[1], sizeof(x[1]));
 	}
 	else
 	{
@@ -70,7 +91,7 @@ int main(int argc, char **argv)
 	starpu_mpi_data_register(data_handles[0], 0, 0);
 	starpu_mpi_data_register(data_handles[1], 1, 1);
 
-	err = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet,
+	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
 				     STARPU_RW, data_handles[0], STARPU_RW, data_handles[1],
 				     STARPU_EXECUTE_ON_DATA, data_handles[1],
 				     0);
@@ -84,7 +105,8 @@ int main(int argc, char **argv)
 		{
 			starpu_data_acquire(data_handles[i], STARPU_R);
 			values[i] = *((int *)starpu_data_get_local_ptr(data_handles[i]));
-			starpu_data_release(data_handles[i]);		}
+			starpu_data_release(data_handles[i]);
+		}
 	}
 	ret = 0;
 	if (rank == 0)

+ 40 - 15
nmad/tests/insert_task_recv_cache.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011, 2012, 2013, 2014, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -32,11 +32,23 @@ void func_cpu(STARPU_ATTRIBUTE_UNUSED void *descr[], STARPU_ATTRIBUTE_UNUSED voi
 {
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
-	.modes = {STARPU_RW, STARPU_R}
+	.modes = {STARPU_RW, STARPU_R},
+	.model = &dumb_model
 };
 
 #define N     1000
@@ -47,11 +59,11 @@ int my_distrib(int x)
 	return x;
 }
 
-void test_cache(int rank, int size, char *enabled, size_t *comm_amount)
+void test_cache(int rank, char *enabled, size_t *comm_amount)
 {
 	int i;
 	int ret;
-	unsigned v[2][N];
+	unsigned *v[2];
 	starpu_data_handle_t data_handles[2];
 
 	FPRINTF_MPI(stderr, "Testing with STARPU_MPI_CACHE=%s\n", enabled);
@@ -64,10 +76,20 @@ void test_cache(int rank, int size, char *enabled, size_t *comm_amount)
 
 	for(i = 0; i < 2; i++)
 	{
+		int j;
+		v[i] = calloc(N, sizeof(unsigned));
+		for(j=0 ; j<N ; j++)
+		{
+			v[i][j] = 12;
+		}
+	}
+
+	for(i = 0; i < 2; i++)
+	{
 		int mpi_rank = my_distrib(i);
 		if (mpi_rank == rank)
 		{
-			starpu_vector_data_register(&data_handles[i], 0, (uintptr_t)&(v[i]), N, sizeof(unsigned));
+			starpu_vector_data_register(&data_handles[i], STARPU_MAIN_RAM, (uintptr_t)v[i], N, sizeof(unsigned));
 		}
 		else
 		{
@@ -77,12 +99,12 @@ void test_cache(int rank, int size, char *enabled, size_t *comm_amount)
 		starpu_mpi_data_register(data_handles[i], i, mpi_rank);
 	}
 
-	// We call starpu_mpi_insert_task twice, when the cache is enabled, the 1st time puts the
+	// We call starpu_mpi_task_insert twice, when the cache is enabled, the 1st time puts the
 	// data in the cache, the 2nd time allows to check the data is not sent again
 	for(i = 0; i < 2; i++)
 	{
-		ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0], STARPU_R, data_handles[1], 0);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
+		ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0], STARPU_R, data_handles[1], 0);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 	}
 
 	// Flush the cache for data_handles[1] which has been sent from node1 to node0
@@ -91,8 +113,8 @@ void test_cache(int rank, int size, char *enabled, size_t *comm_amount)
 	// Check again
 	for(i = 0; i < 2; i++)
 	{
-		ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0], STARPU_R, data_handles[1], 0);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
+		ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0], STARPU_R, data_handles[1], 0);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 	}
 
 	starpu_task_wait_for_all();
@@ -100,6 +122,7 @@ void test_cache(int rank, int size, char *enabled, size_t *comm_amount)
 	for(i = 0; i < 2; i++)
 	{
 		starpu_data_unregister(data_handles[i]);
+		free(v[i]);
 	}
 
 	starpu_mpi_comm_amounts_retrieve(comm_amount);
@@ -114,9 +137,9 @@ int main(int argc, char **argv)
 	size_t *comm_amount_with_cache;
 	size_t *comm_amount_without_cache;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
 	setenv("STARPU_COMM_STATS", "1", 1);
 	setenv("STARPU_MPI_CACHE_STATS", "1", 1);
@@ -124,8 +147,8 @@ int main(int argc, char **argv)
 	comm_amount_with_cache = malloc(size * sizeof(size_t));
 	comm_amount_without_cache = malloc(size * sizeof(size_t));
 
-	test_cache(rank, size, "0", comm_amount_with_cache);
-	test_cache(rank, size, "1", comm_amount_without_cache);
+	test_cache(rank, "0", comm_amount_with_cache);
+	test_cache(rank, "1", comm_amount_without_cache);
 
 	if (rank == 1)
 	{
@@ -133,7 +156,9 @@ int main(int argc, char **argv)
 		FPRINTF_MPI(stderr, "Communication cache mechanism is %sworking (with cache: %ld) (without cache: %ld)\n", result?"":"NOT ", comm_amount_with_cache[0], comm_amount_without_cache[0]);
 	}
 	else
+	{
 		result = 1;
+	}
 
 	free(comm_amount_without_cache);
 	free(comm_amount_with_cache);

+ 41 - 16
nmad/tests/insert_task_sent_cache.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011, 2012, 2013, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -32,11 +32,23 @@ void func_cpu(STARPU_ATTRIBUTE_UNUSED void *descr[], STARPU_ATTRIBUTE_UNUSED voi
 {
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 2,
-	.modes = {STARPU_RW, STARPU_R}
+	.modes = {STARPU_RW, STARPU_R},
+	.model = &dumb_model
 };
 
 #define N     1000
@@ -47,11 +59,11 @@ int my_distrib(int x)
 	return x;
 }
 
-void test_cache(int rank, int size, char *enabled, size_t *comm_amount)
+void test_cache(int rank, char *enabled, size_t *comm_amount)
 {
 	int i;
 	int ret;
-	unsigned v[2][N];
+	unsigned *v[2];
 	starpu_data_handle_t data_handles[2];
 
 	setenv("STARPU_MPI_CACHE", enabled, 1);
@@ -63,11 +75,21 @@ void test_cache(int rank, int size, char *enabled, size_t *comm_amount)
 
 	for(i = 0; i < 2; i++)
 	{
+		int j;
+		v[i] = malloc(N * sizeof(unsigned));
+		for(j=0 ; j<N ; j++)
+		{
+			v[i][j] = 12;
+		}
+	}
+
+	for(i = 0; i < 2; i++)
+	{
 		int mpi_rank = my_distrib(i);
 		if (mpi_rank == rank)
 		{
 			//FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y);
-			starpu_vector_data_register(&data_handles[i], 0, (uintptr_t)&(v[i]), N, sizeof(unsigned));
+			starpu_vector_data_register(&data_handles[i], STARPU_MAIN_RAM, (uintptr_t)v[i], N, sizeof(unsigned));
 		}
 		else
 		{
@@ -80,14 +102,14 @@ void test_cache(int rank, int size, char *enabled, size_t *comm_amount)
 
 	for(i = 0; i < 5; i++)
 	{
-		ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0], STARPU_R, data_handles[1], 0);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
+		ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0], STARPU_R, data_handles[1], 0);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 	}
 
 	for(i = 0; i < 5; i++)
 	{
-		ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[1], STARPU_R, data_handles[0], 0);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
+		ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[1], STARPU_R, data_handles[0], 0);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 	}
 
 	for(i = 0; i < 5; i++)
@@ -97,8 +119,8 @@ void test_cache(int rank, int size, char *enabled, size_t *comm_amount)
 
 	for(i = 0; i < 5; i++)
 	{
-		ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[1], STARPU_R, data_handles[0], 0);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
+		ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[1], STARPU_R, data_handles[0], 0);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 	}
 
 	starpu_task_wait_for_all();
@@ -106,6 +128,7 @@ void test_cache(int rank, int size, char *enabled, size_t *comm_amount)
 	for(i = 0; i < 2; i++)
 	{
 		starpu_data_unregister(data_handles[i]);
+		free(v[i]);
 	}
 
 	starpu_mpi_comm_amounts_retrieve(comm_amount);
@@ -120,17 +143,17 @@ int main(int argc, char **argv)
 	size_t *comm_amount_with_cache;
 	size_t *comm_amount_without_cache;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
 	setenv("STARPU_COMM_STATS", "1", 1);
 
 	comm_amount_with_cache = malloc(size * sizeof(size_t));
 	comm_amount_without_cache = malloc(size * sizeof(size_t));
 
-	test_cache(rank, size, "0", comm_amount_with_cache);
-	test_cache(rank, size, "1", comm_amount_without_cache);
+	test_cache(rank, "0", comm_amount_with_cache);
+	test_cache(rank, "1", comm_amount_without_cache);
 
 	if (rank == 0 || rank == 1)
 	{
@@ -139,7 +162,9 @@ int main(int argc, char **argv)
 		FPRINTF_MPI(stderr, "Communication cache mechanism is %sworking\n", result?"":"NOT ");
 	}
 	else
+	{
 		result = 1;
+	}
 
 	free(comm_amount_without_cache);
 	free(comm_amount_with_cache);

+ 33 - 10
nmad/tests/matrix.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2015, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,11 +28,23 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	*Y = *Y + *A * *X;
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 3,
-	.modes = {STARPU_R, STARPU_R, STARPU_RW}
+	.modes = {STARPU_R, STARPU_R, STARPU_RW},
+	.model = &dumb_model
 };
 
 #define N 4
@@ -52,7 +64,16 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(&argc, &argv, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+
+	if (starpu_cpu_worker_get_count() == 0)
+	{
+		if (rank == 0)
+			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		return STARPU_TEST_SKIPPED;
+	}
 
 	for(n = 0; n < N; n++)
 	{
@@ -77,29 +98,29 @@ int main(int argc, char **argv)
 	for(n = 0; n < N; n++)
 	{
 		if (rank == n%2)
-			starpu_variable_data_register(&data_A[n], 0, (uintptr_t)&A[n], sizeof(unsigned));
+			starpu_variable_data_register(&data_A[n], STARPU_MAIN_RAM, (uintptr_t)&A[n], sizeof(unsigned));
 		else
 			starpu_variable_data_register(&data_A[n], -1, (uintptr_t)NULL, sizeof(unsigned));
-		starpu_mpi_data_register_comm(data_A[n], n+100, n%2, MPI_COMM_WORLD);
+		starpu_mpi_data_register(data_A[n], n+100, n%2);
 		FPRINTF_MPI(stderr, "Registering A[%d] to %p with tag %d and node %d\n", n, data_A[n], n+100, n%2);
 
 		if (rank == n%2)
-			starpu_variable_data_register(&data_X[n], 0, (uintptr_t)&X[n], sizeof(unsigned));
+			starpu_variable_data_register(&data_X[n], STARPU_MAIN_RAM, (uintptr_t)&X[n], sizeof(unsigned));
 		else
 			starpu_variable_data_register(&data_X[n], -1, (uintptr_t)NULL, sizeof(unsigned));
-		starpu_mpi_data_register_comm(data_X[n], n+200, n%2, MPI_COMM_WORLD);
+		starpu_mpi_data_register(data_X[n], n+200, n%2);
 		FPRINTF_MPI(stderr, "Registering X[%d] to %p with tag %d and node %d\n", n, data_X[n], n+200, n%2);
 	}
 	if (rank == 0)
-		starpu_variable_data_register(&data_Y, 0, (uintptr_t)&Y, sizeof(unsigned));
+		starpu_variable_data_register(&data_Y, STARPU_MAIN_RAM, (uintptr_t)&Y, sizeof(unsigned));
 	else
 		starpu_variable_data_register(&data_Y, -1, (uintptr_t)NULL, sizeof(unsigned));
-	starpu_mpi_data_register_comm(data_Y, 10, 0, MPI_COMM_WORLD);
+	starpu_mpi_data_register(data_Y, 10, 0);
 	FPRINTF_MPI(stderr, "Registering Y to %p with tag %d and node %d\n", data_Y, 10, 0);
 
 	for(n = 0; n < N; n++)
 	{
-		ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet,
+		ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
 					     STARPU_R, data_A[n],
 					     STARPU_R, data_X[n],
 					     STARPU_RW, data_Y,
@@ -123,10 +144,12 @@ int main(int argc, char **argv)
 
 	FPRINTF(stdout, "[%d] Y=%u\n", rank, Y);
 
+#ifndef STARPU_SIMGRID
 	if (rank == 0)
 	{
 		STARPU_ASSERT_MSG(Y==300, "Error when calculating Y=%u\n", Y);
 	}
+#endif
 
 	return 0;
 }

+ 37 - 18
nmad/tests/matrix2.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,11 +28,23 @@ void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	*Y = *Y + *A * *X;
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 struct starpu_codelet mycodelet =
 {
 	.cpu_funcs = {func_cpu},
 	.nbuffers = 3,
-	.modes = {STARPU_R, STARPU_R, STARPU_RW}
+	.modes = {STARPU_R, STARPU_R, STARPU_RW},
+	.model = &dumb_model
 };
 
 #define N 4
@@ -47,24 +59,30 @@ int main(int argc, char **argv)
 	starpu_data_handle_t data_A[N];
 	starpu_data_handle_t data_X[N];
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
-	if (size < 3)
+	if ((size < 3) || (starpu_cpu_worker_get_count() == 0))
 	{
 		if (rank == 0)
-			FPRINTF(stderr, "We need at least 3 processes.\n");
-
+		{
+			if (size < 3)
+				FPRINTF(stderr, "We need at least 3 processes.\n");
+			else
+				FPRINTF(stderr, "We need at least 1 CPU worker.\n");
+		}
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
 	for(n = 0; n < N; n++)
 	{
 		A[n] = (n+1)*10;
@@ -87,7 +105,7 @@ int main(int argc, char **argv)
 	for(n = 0; n < N; n++)
 	{
 		if (rank == n%2)
-			starpu_variable_data_register(&data_A[n], 0, (uintptr_t)&A[n], sizeof(unsigned));
+			starpu_variable_data_register(&data_A[n], STARPU_MAIN_RAM, (uintptr_t)&A[n], sizeof(unsigned));
 		else
 			starpu_variable_data_register(&data_A[n], -1, (uintptr_t)NULL, sizeof(unsigned));
 		starpu_mpi_data_register(data_A[n], n+100, n%2);
@@ -97,7 +115,7 @@ int main(int argc, char **argv)
 	for(n = 0; n < N; n++)
 	{
 		if (rank == 2)
-			starpu_variable_data_register(&data_X[n], 0, (uintptr_t)&X[n], sizeof(unsigned));
+			starpu_variable_data_register(&data_X[n], STARPU_MAIN_RAM, (uintptr_t)&X[n], sizeof(unsigned));
 		else
 			starpu_variable_data_register(&data_X[n], -1, (uintptr_t)NULL, sizeof(unsigned));
 		starpu_mpi_data_register(data_X[n], n+200, 2);
@@ -106,14 +124,13 @@ int main(int argc, char **argv)
 
 	for(n = 0; n < N-1; n++)
 	{
-	     fprintf(stderr, "loop %d\n", n);
-		ret = starpu_mpi_insert_task(MPI_COMM_WORLD, &mycodelet,
+		ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
 					     STARPU_R, data_A[n],
 					     STARPU_R, data_X[n],
 					     STARPU_RW, data_X[N-1],
 					     STARPU_EXECUTE_ON_DATA, data_A[n],
 					     0);
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_insert_task");
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert");
 	}
 
 	FPRINTF(stderr, "Waiting ...\n");
@@ -130,10 +147,12 @@ int main(int argc, char **argv)
 
 	FPRINTF(stdout, "[%d] X[%d]=%u\n", rank, N-1, X[N-1]);
 
+#ifndef STARPU_SIMGRID
 	if (rank == 2)
 	{
 		STARPU_ASSERT_MSG(X[N-1]==144, "Error when calculating X[N-1]=%u\n", X[N-1]);
 	}
+#endif
 
 	MPI_Finalize();
 	return 0;

+ 16 - 12
nmad/tests/mpi_detached_tag.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,6 +20,8 @@
 
 #ifdef STARPU_QUICK_CHECK
 #  define NITER	16
+#elif !defined(STARPU_LONG_CHECK)
+#  define NITER	256
 #else
 #  define NITER	2048
 #endif
@@ -32,27 +34,29 @@ int main(int argc, char **argv)
 {
 	int ret, rank, size;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	if (size%2 != 0)
 	{
 		if (rank == 0)
 			FPRINTF(stderr, "We need a even number of processes.\n");
 
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	tab = malloc(SIZE*sizeof(float));
+	tab = calloc(SIZE, sizeof(float));
 
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
+	starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float));
 
 	int nloops = NITER;
 	int loop;

+ 129 - 0
nmad/tests/mpi_earlyrecv.c

@@ -0,0 +1,129 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+#include <unistd.h>
+
+int main(int argc, char **argv)
+{
+	int ret, rank, size, i;
+	starpu_data_handle_t tab_handle[4];
+	int values[4];
+	starpu_mpi_req request[2] = {NULL, NULL};
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if (size%2 != 0)
+	{
+		FPRINTF_MPI(stderr, "We need a even number of processes.\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		MPI_Finalize();
+		return STARPU_TEST_SKIPPED;
+	}
+
+	for(i=0 ; i<4 ; i++)
+	{
+		if (i<3 || rank%2)
+		{
+			// all data are registered on all nodes, but the 4th data which is not registered on the receiving node
+			values[i] = (rank+1) * (i+1);
+			starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&values[i], sizeof(values[i]));
+			starpu_mpi_data_register(tab_handle[i], i, rank);
+		}
+	}
+
+	int other_rank = rank%2 == 0 ? rank+1 : rank-1;
+
+	FPRINTF_MPI(stderr, "rank %d exchanging with rank %d\n", rank, other_rank);
+
+	if (rank%2)
+	{
+		FPRINTF_MPI(stderr, "Sending values %d and %d to node %d\n", values[0], values[3], other_rank);
+		// this data will be received as an early registered data
+		starpu_mpi_isend(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD);
+		// this data will be received as an early UNregistered data
+		starpu_mpi_isend(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD);
+
+		starpu_mpi_send(tab_handle[1], other_rank, 1, MPI_COMM_WORLD);
+		starpu_mpi_recv(tab_handle[2], other_rank, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+	}
+	else
+	{
+		starpu_mpi_recv(tab_handle[1], other_rank, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+		starpu_mpi_send(tab_handle[2], other_rank, 2, MPI_COMM_WORLD);
+
+		// we register the data
+		starpu_variable_data_register(&tab_handle[3], -1, (uintptr_t)NULL, sizeof(int));
+		starpu_mpi_data_register(tab_handle[3], 3, rank);
+		starpu_mpi_irecv(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD);
+		starpu_mpi_irecv(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD);
+	}
+
+	int finished=0;
+	while (!finished)
+	{
+		for(i=0 ; i<2 ; i++)
+		{
+			if (request[i])
+			{
+				int flag;
+				MPI_Status status;
+				starpu_mpi_test(&request[i], &flag, &status);
+				if (flag)
+					FPRINTF_MPI(stderr, "request[%d] = %d %p\n", i, flag, request[i]);
+			}
+		}
+		finished = request[0] == NULL && request[1] == NULL;
+	}
+
+	if (rank%2 == 0)
+	{
+		void *ptr0;
+		void *ptr3;
+
+		starpu_data_acquire(tab_handle[0], STARPU_RW);
+		ptr0 = starpu_data_get_local_ptr(tab_handle[0]);
+		starpu_data_release(tab_handle[0]);
+
+		starpu_data_acquire(tab_handle[3], STARPU_RW);
+		ptr3 = starpu_data_get_local_ptr(tab_handle[3]);
+		starpu_data_release(tab_handle[3]);
+
+		ret = (*((int *)ptr0) == (other_rank+1)*1) && (*((int *)ptr3) == (other_rank+1)*4);
+		ret = !ret;
+		FPRINTF_MPI(stderr, "[%s] Received values %d and %d from node %d\n", ret?"FAILURE":"SUCCESS", *((int *)ptr0), *((int *)ptr3), other_rank);
+	}
+
+	for(i=0 ; i<4 ; i++)
+		starpu_data_unregister(tab_handle[i]);
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	MPI_Finalize();
+
+	return ret;
+}

+ 253 - 0
nmad/tests/mpi_earlyrecv2.c

@@ -0,0 +1,253 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+#include <unistd.h>
+#include <interface/complex_interface.h>
+
+#define NB 10
+
+static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
+static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER;
+
+void callback(void *arg)
+{
+	unsigned *received = arg;
+
+	STARPU_PTHREAD_MUTEX_LOCK(&mutex);
+	*received = *received + 1;
+	FPRINTF_MPI(stderr, "Requests %u received\n", *received);
+	STARPU_PTHREAD_COND_SIGNAL(&cond);
+	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
+}
+
+typedef void (*check_func)(starpu_data_handle_t handle, int i, int rank, int *error);
+
+int exchange(int rank, starpu_data_handle_t *handles, check_func func, int detached)
+{
+	int other_rank = rank%2 == 0 ? rank+1 : rank-1;
+	int i;
+
+	if (rank%2)
+	{
+		starpu_mpi_send(handles[0], other_rank, 0, MPI_COMM_WORLD);
+		starpu_mpi_send(handles[NB-1], other_rank, NB-1, MPI_COMM_WORLD);
+		for(i=1 ; i<NB-1 ; i++)
+		{
+			starpu_mpi_send(handles[i], other_rank, i, MPI_COMM_WORLD);
+		}
+		return 0;
+	}
+	else
+	{
+		int ret=0;
+		starpu_mpi_req req[NB];
+		int received = 0;
+
+		if (detached)
+		{
+			starpu_mpi_irecv_detached(handles[0], other_rank, 0, MPI_COMM_WORLD, callback, &received);
+		}
+		else
+		{
+			memset(req, 0, NB*sizeof(starpu_mpi_req));
+			starpu_mpi_irecv(handles[0], &req[0], other_rank, 0, MPI_COMM_WORLD);
+			STARPU_ASSERT(req[0] != NULL);
+		}
+
+		// We sleep to make sure that the data for the tag 9 will be received before the recv is posted
+		usleep(2000000);
+		for(i=1 ; i<NB ; i++)
+		{
+			if (detached)
+			{
+				starpu_mpi_irecv_detached(handles[i], other_rank, i, MPI_COMM_WORLD, callback, &received);
+			}
+			else
+			{
+				starpu_mpi_irecv(handles[i], &req[i], other_rank, i, MPI_COMM_WORLD);
+				STARPU_ASSERT(req[i] != NULL);
+			}
+		}
+
+		if (detached)
+		{
+			STARPU_PTHREAD_MUTEX_LOCK(&mutex);
+			while (received != NB)
+			{
+			     FPRINTF_MPI(stderr, "Received %d messages\n", received);
+			     STARPU_PTHREAD_COND_WAIT(&cond, &mutex);
+			}
+			STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
+		}
+		else
+		{
+			for(i=0 ; i<NB ; i++)
+			{
+			     starpu_mpi_wait(&req[i], MPI_STATUS_IGNORE);
+			     func(handles[i], i, rank, &ret);
+			}
+		}
+		return ret;
+	}
+}
+
+void check_variable(starpu_data_handle_t handle, int i, int rank, int *error)
+{
+	int other_rank = rank%2 == 0 ? rank+1 : rank-1;
+
+	int *rvalue = (int *)starpu_data_get_local_ptr(handle);
+	if (*rvalue != i*other_rank)
+	{
+		FPRINTF_MPI(stderr, "Incorrect received value: %d != %d\n", *rvalue, i*other_rank);
+		*error = 1;
+	}
+}
+
+int exchange_variable(int rank, int detached)
+{
+	int ret, i;
+	starpu_data_handle_t tab_handle[NB];
+	int value[NB];
+
+	FPRINTF_MPI(stderr, "Exchanging variable data with detached=%d\n", detached);
+
+	for(i=0 ; i<NB ; i++)
+	{
+		value[i]=i*rank;
+		starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&value[i], sizeof(int));
+		starpu_mpi_data_register(tab_handle[i], i, rank);
+	}
+	ret = exchange(rank, tab_handle, check_variable, detached);
+	for(i=0 ; i<NB ; i++)
+		starpu_data_unregister(tab_handle[i]);
+
+	return ret;
+}
+
+void check_void(starpu_data_handle_t handle, int i, int rank, int *error)
+{
+}
+
+int exchange_void(int rank, int detached)
+{
+	int ret, i;
+	starpu_data_handle_t tab_handle[NB];
+
+	// This test is not run with valgrind as valgrind falsely detects error when exchanging NULL pointers
+	STARPU_SKIP_IF_VALGRIND_RETURN_ZERO;
+
+	FPRINTF_MPI(stderr, "Exchanging void data with detached=%d\n", detached);
+
+	for(i=0 ; i<NB ; i++)
+	{
+		starpu_void_data_register(&tab_handle[i]);
+		starpu_mpi_data_register(tab_handle[i], i, rank);
+	}
+	ret = exchange(rank, tab_handle, check_void, detached);
+	for(i=0 ; i<NB ; i++)
+		starpu_data_unregister(tab_handle[i]);
+
+	return ret;
+}
+
+void check_complex(starpu_data_handle_t handle, int i, int rank, int *error)
+{
+	double *real = starpu_complex_get_real(handle);
+	double *imaginary = starpu_complex_get_imaginary(handle);
+
+	int other_rank = rank%2 == 0 ? rank+1 : rank-1;
+
+	if ((*real != ((i*other_rank)+12)) || (*imaginary != ((i*other_rank)+45)))
+	{
+		FPRINTF_MPI(stderr, "Incorrect received value: %f != %d || %f != %d\n", *real, ((i*other_rank)+12), *imaginary, ((i*other_rank)+45));
+		*error = 1;
+	}
+}
+
+int exchange_complex(int rank, int detached)
+{
+	int ret, i;
+	starpu_data_handle_t handle[NB];
+	double real[NB];
+	double imaginary[NB];
+
+	FPRINTF_MPI(stderr, "Exchanging complex data with detached=%d\n", detached);
+
+	for(i=0 ; i<NB ; i++)
+	{
+		real[i] = (i*rank)+12;
+		imaginary[i] = (i*rank)+45;
+		starpu_complex_data_register(&handle[i], STARPU_MAIN_RAM, &real[i], &imaginary[i], 1);
+		starpu_mpi_data_register(handle[i], i, rank);
+	}
+	ret = exchange(rank, handle, check_complex, detached);
+	for(i=0 ; i<NB ; i++)
+		starpu_data_unregister(handle[i]);
+
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int ret=0, global_ret=0;
+	int rank, size;
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if (size%2 != 0)
+	{
+		FPRINTF(stderr, "We need a even number of processes.\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		MPI_Finalize();
+		return STARPU_TEST_SKIPPED;
+	}
+
+	ret = exchange_variable(rank, 0);
+	if (ret != 0) global_ret = ret;
+
+	ret = exchange_variable(rank, 1);
+	if (ret != 0) global_ret = ret;
+
+	ret = exchange_void(rank, 0);
+	if (ret != 0) global_ret = ret;
+
+	ret = exchange_void(rank, 1);
+	if (ret != 0) global_ret = ret;
+
+	ret = exchange_complex(rank, 0);
+	if (ret != 0) global_ret = ret;
+
+	ret = exchange_complex(rank, 1);
+	if (ret != 0) global_ret = ret;
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	MPI_Finalize();
+
+	return global_ret;
+}

+ 237 - 0
nmad/tests/mpi_earlyrecv2_sync.c

@@ -0,0 +1,237 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+#include <unistd.h>
+#include <interface/complex_interface.h>
+
+#define NB 6
+
+typedef void (*check_func)(starpu_data_handle_t handle, int i, int rank, int *error);
+
+int exchange(int rank, starpu_data_handle_t *handles, check_func func)
+{
+	int other_rank = rank%2 == 0 ? rank+1 : rank-1;
+	int i;
+	int ret=0;
+	starpu_mpi_req req[NB];
+
+	memset(req, 0, NB*sizeof(starpu_mpi_req));
+
+	if (rank%2)
+	{
+		starpu_mpi_issend(handles[0], &req[0], other_rank, 0, MPI_COMM_WORLD);
+		starpu_mpi_isend(handles[NB-1], &req[NB-1], other_rank, NB-1, MPI_COMM_WORLD);
+		starpu_mpi_issend(handles[NB-2], &req[NB-2], other_rank, NB-2, MPI_COMM_WORLD);
+
+		for(i=1 ; i<NB-2 ; i++)
+		{
+			if (i%2)
+			{
+				FPRINTF_MPI(stderr, "iSsending value %d\n", i);
+				starpu_mpi_issend(handles[i], &req[i], other_rank, i, MPI_COMM_WORLD);
+			}
+			else
+			{
+				FPRINTF_MPI(stderr, "isending value %d\n", i);
+				starpu_mpi_isend(handles[i], &req[i], other_rank, i, MPI_COMM_WORLD);
+			}
+		}
+		for(i=0 ; i<NB ; i++)
+		{
+			starpu_mpi_wait(&req[i], MPI_STATUS_IGNORE);
+		}
+	}
+	else
+	{
+		starpu_mpi_irecv(handles[0], &req[0], other_rank, 0, MPI_COMM_WORLD);
+		STARPU_ASSERT(req[0] != NULL);
+		starpu_mpi_irecv(handles[1], &req[1], other_rank, 1, MPI_COMM_WORLD);
+		STARPU_ASSERT(req[1] != NULL);
+
+		// We sleep to make sure that the data for the tag 8 and the tag 9 will be received before the recv are posted
+		usleep(2000000);
+		for(i=2 ; i<NB ; i++)
+		{
+			starpu_mpi_irecv(handles[i], &req[i], other_rank, i, MPI_COMM_WORLD);
+			STARPU_ASSERT(req[i] != NULL);
+		}
+
+		for(i=0 ; i<NB ; i++)
+		{
+			starpu_mpi_wait(&req[i], MPI_STATUS_IGNORE);
+			if (func)
+				func(handles[i], i, rank, &ret);
+		}
+	}
+	return ret;
+}
+
+void check_variable(starpu_data_handle_t handle, int i, int rank, int *error)
+{
+	int other_rank = rank%2 == 0 ? rank+1 : rank-1;
+
+	int *rvalue = (int *)starpu_data_get_local_ptr(handle);
+	if (*rvalue != i*other_rank)
+	{
+		FPRINTF_MPI(stderr, "Incorrect received value: %d != %d\n", *rvalue, i*other_rank);
+		*error = 1;
+	}
+	else
+	{
+		FPRINTF_MPI(stderr, "Correct received value: %d == %d\n", *rvalue, i*other_rank);
+	}
+}
+
+int exchange_variable(int rank)
+{
+	int ret, i;
+	starpu_data_handle_t tab_handle[NB];
+	int value[NB];
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	FPRINTF_MPI(stderr, "Exchanging variable data\n");
+
+	for(i=0 ; i<NB ; i++)
+	{
+		value[i]=i*rank;
+		starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&value[i], sizeof(int));
+		starpu_mpi_data_register(tab_handle[i], i, rank);
+	}
+	ret = exchange(rank, tab_handle, check_variable);
+	for(i=0 ; i<NB ; i++)
+		starpu_data_unregister(tab_handle[i]);
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	return ret;
+}
+
+int exchange_void(int rank)
+{
+	int ret, i;
+	starpu_data_handle_t tab_handle[NB];
+
+	// This test is not run with valgrind as valgrind falsely detects error when exchanging NULL pointers
+	STARPU_SKIP_IF_VALGRIND_RETURN_ZERO;
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	FPRINTF_MPI(stderr, "Exchanging void data\n");
+
+	for(i=0 ; i<NB ; i++)
+	{
+		starpu_void_data_register(&tab_handle[i]);
+		starpu_mpi_data_register(tab_handle[i], i, rank);
+	}
+	ret = exchange(rank, tab_handle, NULL);
+	for(i=0 ; i<NB ; i++)
+		starpu_data_unregister(tab_handle[i]);
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	return ret;
+}
+
+void check_complex(starpu_data_handle_t handle, int i, int rank, int *error)
+{
+	double *real = starpu_complex_get_real(handle);
+	double *imaginary = starpu_complex_get_imaginary(handle);
+
+	int other_rank = rank%2 == 0 ? rank+1 : rank-1;
+
+	if ((*real != ((i*other_rank)+12)) || (*imaginary != ((i*other_rank)+45)))
+	{
+		FPRINTF_MPI(stderr, "Incorrect received value: %f != %d || %f != %d\n", *real, ((i*other_rank)+12), *imaginary, ((i*other_rank)+45));
+		*error = 1;
+	}
+	else
+	{
+		FPRINTF_MPI(stderr, "Correct received value: %f == %d || %f == %d\n", *real, ((i*other_rank)+12), *imaginary, ((i*other_rank)+45));
+	}
+}
+
+int exchange_complex(int rank)
+{
+	int ret, i;
+	starpu_data_handle_t handle[NB];
+	double real[NB];
+	double imaginary[NB];
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	FPRINTF_MPI(stderr, "Exchanging complex data\n");
+
+	for(i=0 ; i<NB ; i++)
+	{
+		real[i] = (i*rank)+12;
+		imaginary[i] = (i*rank)+45;
+		starpu_complex_data_register(&handle[i], STARPU_MAIN_RAM, &real[i], &imaginary[i], 1);
+		starpu_mpi_data_register(handle[i], i, rank);
+	}
+	ret = exchange(rank, handle, check_complex);
+	for(i=0 ; i<NB ; i++)
+		starpu_data_unregister(handle[i]);
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int ret=0, global_ret=0;
+	int rank, size;
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	if (size%2 != 0)
+	{
+		FPRINTF(stderr, "We need a even number of processes.\n");
+		MPI_Finalize();
+		return STARPU_TEST_SKIPPED;
+	}
+
+	ret = exchange_variable(rank);
+	if (ret != 0) global_ret = ret;
+
+	ret = exchange_void(rank);
+	if (ret != 0) global_ret = ret;
+
+	ret = exchange_complex(rank);
+	if (ret != 0) global_ret = ret;
+
+	MPI_Finalize();
+
+	return global_ret;
+}

+ 14 - 12
nmad/tests/mpi_irecv.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -32,27 +32,29 @@ int main(int argc, char **argv)
 {
 	int ret, rank, size;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	if (size%2 != 0)
 	{
 		if (rank == 0)
 			FPRINTF(stderr, "We need a even number of processes.\n");
 
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	tab = malloc(SIZE*sizeof(float));
+	tab = calloc(SIZE, sizeof(float));
 
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
+	starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float));
 
 	int nloops = NITER;
 	int loop;

+ 16 - 12
nmad/tests/mpi_irecv_detached.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2012, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -21,6 +21,8 @@
 
 #ifdef STARPU_QUICK_CHECK
 #  define NITER	16
+#elif !defined(STARPU_LONG_CHECK)
+#  define NITER	256
 #else
 #  define NITER	2048
 #endif
@@ -47,27 +49,29 @@ int main(int argc, char **argv)
 {
 	int ret, rank, size;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	if (size%2 != 0)
 	{
 		if (rank == 0)
 			FPRINTF(stderr, "We need a even number of processes.\n");
 
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	tab = malloc(SIZE*sizeof(float));
+	tab = calloc(SIZE, sizeof(float));
 
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
+	starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float));
 
 	int nloops = NITER;
 	int loop;

+ 14 - 12
nmad/tests/mpi_isend.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -32,27 +32,29 @@ int main(int argc, char **argv)
 {
 	int ret, rank, size;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	if (size%2 != 0)
 	{
 		if (rank == 0)
 			FPRINTF(stderr, "We need a even number of processes.\n");
 
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	tab = malloc(SIZE*sizeof(float));
+	tab = calloc(SIZE, sizeof(float));
 
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
+	starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float));
 
 	int nloops = NITER;
 	int loop;

+ 16 - 12
nmad/tests/mpi_isend_detached.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2012, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -22,6 +22,8 @@
 
 #ifdef STARPU_QUICK_CHECK
 #  define NITER	16
+#elif !defined(STARPU_LONG_CHECK)
+#  define NITER	256
 #else
 #  define NITER	2048
 #endif
@@ -46,27 +48,29 @@ int main(int argc, char **argv)
 	float *tab;
 	starpu_data_handle_t tab_handle;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	if (size%2 != 0)
 	{
 		if (rank == 0)
 			FPRINTF(stderr, "We need a even number of processes.\n");
 
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	tab = malloc(SIZE*sizeof(float));
+	tab = calloc(SIZE, sizeof(float));
 
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
+	starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float));
 
 	int nloops = NITER;
 	int loop;

+ 61 - 16
nmad/tests/mpi_reduction.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2013  Université de Bordeaux
- * Copyright (C) 2012, 2013, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2013, 2015  Université de Bordeaux
+ * Copyright (C) 2012, 2013, 2014, 2015, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,17 +17,34 @@
 
 #include <starpu_mpi.h>
 #include <math.h>
+#include "helper.h"
 
 extern void init_cpu_func(void *descr[], void *cl_arg);
 extern void redux_cpu_func(void *descr[], void *cl_arg);
 extern void dot_cpu_func(void *descr[], void *cl_arg);
 extern void display_cpu_func(void *descr[], void *cl_arg);
 
+#ifdef STARPU_SIMGRID
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+#endif
+
 static struct starpu_codelet init_codelet =
 {
 	.cpu_funcs = {init_cpu_func},
 	.nbuffers = 1,
 	.modes = {STARPU_W},
+#ifdef STARPU_SIMGRID
+	.model = &dumb_model,
+#endif
 	.name = "init_codelet"
 };
 
@@ -36,6 +53,9 @@ static struct starpu_codelet redux_codelet =
 	.cpu_funcs = {redux_cpu_func},
 	.modes = {STARPU_RW, STARPU_R},
 	.nbuffers = 2,
+#ifdef STARPU_SIMGRID
+	.model = &dumb_model,
+#endif
 	.name = "redux_codelet"
 };
 
@@ -44,6 +64,9 @@ static struct starpu_codelet dot_codelet =
 	.cpu_funcs = {dot_cpu_func},
 	.nbuffers = 2,
 	.modes = {STARPU_R, STARPU_REDUX},
+#ifdef STARPU_SIMGRID
+	.model = &dumb_model,
+#endif
 	.name = "dot_codelet"
 };
 
@@ -52,6 +75,9 @@ static struct starpu_codelet display_codelet =
 	.cpu_funcs = {display_cpu_func},
 	.nbuffers = 1,
 	.modes = {STARPU_R},
+#ifdef STARPU_SIMGRID
+	.model = &dumb_model,
+#endif
 	.name = "display_codelet"
 };
 
@@ -71,12 +97,25 @@ int main(int argc, char **argv)
 
 	int nb_elements, step, loops;
 
+	/* Not supported yet */
+	if (starpu_get_env_number_default("STARPU_GLOBAL_ARBITER", 0) > 0)
+		return STARPU_TEST_SKIPPED;
+
 	int ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(&argc, &argv, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	if (starpu_cpu_worker_get_count() == 0)
+	{
+		if (my_rank == 0)
+			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		return STARPU_TEST_SKIPPED;
+	}
 
 	nb_elements = size*8000;
 	step = 4;
@@ -100,7 +139,7 @@ int main(int argc, char **argv)
 		sum = (nb_elements * (nb_elements + 1)) / 2;
 		sum *= loops;
 		sum += dot;
-		starpu_variable_data_register(&dot_handle, 0, (uintptr_t)&dot, sizeof(dot));
+		starpu_variable_data_register(&dot_handle, STARPU_MAIN_RAM, (uintptr_t)&dot, sizeof(dot));
 	}
 	else
 	{
@@ -111,11 +150,12 @@ int main(int argc, char **argv)
 	handles = (starpu_data_handle_t *) malloc(nb_elements*sizeof(handles[0]));
 	for(x = 0; x < nb_elements; x+=step)
 	{
+		handles[x] = NULL;
 		int mpi_rank = my_distrib(x/step, size);
 		if (mpi_rank == my_rank)
 		{
 			/* Owning data */
-			starpu_vector_data_register(&handles[x], 0, (uintptr_t)&(vector[x]), step, sizeof(vector[0]));
+			starpu_vector_data_register(&handles[x], STARPU_MAIN_RAM, (uintptr_t)&(vector[x]), step, sizeof(vector[0]));
 		}
 		else
 		{
@@ -134,27 +174,24 @@ int main(int argc, char **argv)
 	{
 		for (x = 0; x < nb_elements; x+=step)
 		{
-			starpu_mpi_insert_task(MPI_COMM_WORLD,
+			starpu_mpi_task_insert(MPI_COMM_WORLD,
 					       &dot_codelet,
 					       STARPU_R, handles[x],
 					       STARPU_REDUX, dot_handle,
 					       0);
 		}
 		starpu_mpi_redux_data(MPI_COMM_WORLD, dot_handle);
-		starpu_mpi_insert_task(MPI_COMM_WORLD, &display_codelet, STARPU_R, dot_handle, 0);
+		starpu_mpi_task_insert(MPI_COMM_WORLD, &display_codelet, STARPU_R, dot_handle, 0);
 	}
 
-	fprintf(stderr, "Waiting ...\n");
+	FPRINTF_MPI(stderr, "Waiting ...\n");
 	starpu_task_wait_for_all();
 
 	for(x = 0; x < nb_elements; x+=step)
 	{
 		if (handles[x]) starpu_data_unregister(handles[x]);
 	}
-	if (dot_handle)
-	{
-		starpu_data_unregister(dot_handle);
-	}
+	starpu_data_unregister(dot_handle);
 	free(vector);
 	free(handles);
 
@@ -163,10 +200,18 @@ int main(int argc, char **argv)
 
 	if (my_rank == 0)
 	{
-		fprintf(stderr, "[%d] sum=%ld\n", my_rank, sum);
-		fprintf(stderr, "[%d] dot=%ld\n", my_rank, dot);
-		fprintf(stderr, "%s when computing reduction\n", (sum == dot) ? "Success" : "Error");
+		FPRINTF(stderr, "[%d] sum=%ld\n", my_rank, sum);
+	}
+
+#ifndef STARPU_SIMGRID
+	if (my_rank == 0)
+	{
+		FPRINTF(stderr, "[%d] dot=%ld\n", my_rank, dot);
+		FPRINTF(stderr, "%s when computing reduction\n", (sum == dot) ? "Success" : "Error");
+		if (sum != dot)
+			return 1;
 	}
+#endif
 
 	return 0;
 }

+ 8 - 11
nmad/tests/mpi_reduction_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2012, 2013, 2015  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,10 +17,7 @@
 #include <starpu.h>
 #include <mpi.h>
 
-#define _DISPLAY(fmt, ...) do { \
-		int _display_rank; MPI_Comm_rank(MPI_COMM_WORLD, &_display_rank);	\
-		fprintf(stderr, "[%d][%s] " fmt , _display_rank, __starpu_func__ ,## __VA_ARGS__); 	\
-		fflush(stderr); } while(0)
+#include "helper.h"
 
 /*
  *	Codelet to create a neutral element
@@ -29,7 +26,7 @@ void init_cpu_func(void *descr[], void *cl_arg)
 {
 	long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]);
 	*dot = 0;
-	_DISPLAY("Init dot\n");
+	FPRINTF_MPI(stderr, "Init dot\n");
 }
 
 /*
@@ -41,7 +38,7 @@ void redux_cpu_func(void *descr[], void *cl_arg)
 	long int *dotb = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]);
 
 	*dota = *dota + *dotb;
-	_DISPLAY("Calling redux %ld=%ld+%ld\n", *dota, *dota-*dotb, *dotb);
+	FPRINTF_MPI(stderr, "Calling redux %ld=%ld+%ld\n", *dota, *dota-*dotb, *dotb);
 }
 
 /*
@@ -54,14 +51,14 @@ void dot_cpu_func(void *descr[], void *cl_arg)
 
 	long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]);
 
-//	_DISPLAY("Before dot=%ld (adding %d elements...)\n", *dot, n);
+	//FPRINTF_MPI(stderr, "Before dot=%ld (adding %d elements...)\n", *dot, n);
 	unsigned i;
 	for (i = 0; i < n; i++)
 	{
-//		_DISPLAY("Adding %ld\n", local_x[i]);
+		//FPRINTF_MPI(stderr, "Adding %ld\n", local_x[i]);
 		*dot += local_x[i];
 	}
-//	_DISPLAY("After dot=%ld\n", *dot);
+	//FPRINTF_MPI(stderr, "After dot=%ld\n", *dot);
 }
 
 /*
@@ -71,6 +68,6 @@ void display_cpu_func(void *descr[], void *cl_arg)
 {
 	long int *local_x = (long int *)STARPU_VECTOR_GET_PTR(descr[0]);
 
-	_DISPLAY("Local=%ld\n", *local_x);
+	FPRINTF_MPI(stderr, "Local=%ld\n", *local_x);
 }
 

+ 25 - 12
nmad/tests/mpi_redux.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2013, 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -26,20 +26,22 @@ void callback(void *arg)
 
 	STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 	*received = *received + 1;
-	fprintf(stderr, "received = %d\n", *received);
+	FPRINTF_MPI(stderr, "received = %u\n", *received);
 	STARPU_PTHREAD_COND_SIGNAL(&cond);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 }
 
 int main(int argc, char **argv)
 {
-	int ret, rank, size;
+	int ret, rank, size, sum;
 	int value=0;
 	starpu_data_handle_t *handles;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	sum = ((size-1) * (size) / 2);
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
@@ -48,7 +50,7 @@ int main(int argc, char **argv)
 
 	if (rank == 0)
 	{
-		int src, sum;
+		int src;
 		int received = 1;
 
 		handles = malloc(size * sizeof(starpu_data_handle_t));
@@ -56,7 +58,7 @@ int main(int argc, char **argv)
 		for(src=1 ; src<size ; src++)
 		{
 			starpu_variable_data_register(&handles[src], -1, (uintptr_t)NULL, sizeof(int));
-			starpu_mpi_irecv_detached(handles[src], src, 12, MPI_COMM_WORLD, callback, &received);
+			starpu_mpi_irecv_detached(handles[src], src, 12+src, MPI_COMM_WORLD, callback, &received);
 		}
 
 		STARPU_PTHREAD_MUTEX_LOCK(&mutex);
@@ -70,16 +72,25 @@ int main(int argc, char **argv)
 			value += *((int *)ptr);
 			starpu_data_unregister(handles[src]);
 		}
-		sum = ((size-1) * (size) / 2);
-		STARPU_ASSERT_MSG(sum == value, "Sum of first %d integers is %d, not %d\n", size-1, sum, value);
+
+		for(src=1 ; src<size ; src++)
+		{
+			starpu_variable_data_register(&handles[src], STARPU_MAIN_RAM, (uintptr_t)&sum, sizeof(int));
+			starpu_mpi_send(handles[src], src, 12+src, MPI_COMM_WORLD);
+			starpu_data_unregister(handles[src]);
+		}
 	}
 	else
 	{
 		value = rank;
 		handles = malloc(sizeof(starpu_data_handle_t));
-		starpu_variable_data_register(&handles[0], 0, (uintptr_t)&value, sizeof(int));
-		starpu_mpi_send(handles[0], 0, 12, MPI_COMM_WORLD);
+		starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(int));
+		starpu_mpi_send(handles[0], 0, 12+rank, MPI_COMM_WORLD);
 		starpu_data_unregister_submit(handles[0]);
+
+		starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(int));
+		starpu_mpi_recv(handles[0], 0, 12+rank, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+		starpu_data_unregister(handles[0]);
 	}
 
 	starpu_task_wait_for_all();
@@ -90,5 +101,7 @@ int main(int argc, char **argv)
 
 	MPI_Finalize();
 
+	STARPU_ASSERT_MSG(sum == value, "Sum of first %d integers is %d, not %d\n", size-1, sum, value);
+
 	return 0;
 }

+ 29 - 6
nmad/tests/mpi_scatter_gather.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011, 2012, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -38,11 +38,27 @@ void cpu_codelet(void *descr[], void *_args)
 	}
 }
 
+#ifdef STARPU_SIMGRID
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+#endif
+
 static struct starpu_codelet cl =
 {
 	.cpu_funcs = {cpu_codelet},
 	.nbuffers = 1,
 	.modes = {STARPU_RW},
+#ifdef STARPU_SIMGRID
+	.model = &dumb_model,
+#endif
 };
 
 void scallback(void *arg STARPU_ATTRIBUTE_UNUSED)
@@ -68,17 +84,24 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(&argc, &argv, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &nodes);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes);
+
+	if (starpu_cpu_worker_get_count() == 0)
+	{
+		if (rank == 0)
+			FPRINTF(stderr, "We need at least 1 CPU worker.\n");
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		return STARPU_TEST_SKIPPED;
+	}
 
 	if (rank == 0)
 	{
 		/* Allocate the vector */
 		vector = malloc(size * sizeof(int));
 		for(x=0 ; x<size ; x++)
-		{
 			vector[x] = x+10;
-		}
 
 		// Print vector
 		FPRINTF_MPI(stderr, " Input vector: ");
@@ -134,7 +157,7 @@ int main(int argc, char **argv)
 			if (owner == rank)
 			{
 				FPRINTF_MPI(stderr,"Computing on data[%d]\n", x);
-				starpu_insert_task(&cl,
+				starpu_task_insert(&cl,
 						   STARPU_VALUE, &rank, sizeof(rank),
 						   STARPU_RW, data_handles[x],
 						   0);

+ 16 - 15
nmad/tests/mpi_test.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -26,34 +26,35 @@
 
 #define SIZE	16
 
-float *tab;
-starpu_data_handle_t tab_handle;
-
 int main(int argc, char **argv)
 {
 	int ret, rank, size;
+	float *tab;
+	starpu_data_handle_t tab_handle;
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	if (size%2 != 0)
 	{
 		if (rank == 0)
 			FPRINTF(stderr, "We need a even number of processes.\n");
 
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	tab = malloc(SIZE*sizeof(float));
+	tab = calloc(SIZE, sizeof(float));
 
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
+	starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float));
 
 	int nloops = NITER;
 	int loop;

+ 7 - 7
nmad/tests/multiple_send.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -30,8 +30,8 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(&argc, &argv, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
 	if (size < 2)
 	{
@@ -43,10 +43,10 @@ int main(int argc, char **argv)
 		return STARPU_TEST_SKIPPED;
 	}
 
-	starpu_variable_data_register(&send_handle[0], 0, (uintptr_t)&send[0], sizeof(unsigned));
-	starpu_variable_data_register(&send_handle[1], 0, (uintptr_t)&send[1], sizeof(unsigned));
-	starpu_variable_data_register(&recv_handle[0], 0, (uintptr_t)&recv[0], sizeof(unsigned));
-	starpu_variable_data_register(&recv_handle[1], 0, (uintptr_t)&recv[1], sizeof(unsigned));
+	starpu_variable_data_register(&send_handle[0], STARPU_MAIN_RAM, (uintptr_t)&send[0], sizeof(unsigned));
+	starpu_variable_data_register(&send_handle[1], STARPU_MAIN_RAM, (uintptr_t)&send[1], sizeof(unsigned));
+	starpu_variable_data_register(&recv_handle[0], STARPU_MAIN_RAM, (uintptr_t)&recv[0], sizeof(unsigned));
+	starpu_variable_data_register(&recv_handle[1], STARPU_MAIN_RAM, (uintptr_t)&recv[1], sizeof(unsigned));
 
 	if (rank == 0)
 	{

+ 14 - 12
nmad/tests/pingpong.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -33,27 +33,29 @@ int main(int argc, char **argv)
 {
 	int ret, rank, size;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	if (size%2 != 0)
 	{
 		if (rank == 0)
 			FPRINTF(stderr, "We need a even number of processes.\n");
 
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	tab = malloc(SIZE*sizeof(float));
+	tab = calloc(SIZE, sizeof(float));
 
-	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));
+	starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float));
 
 	int nloops = NITER;
 	int loop;

+ 146 - 0
nmad/tests/policy_register.c

@@ -0,0 +1,146 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2015, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+void func_cpu(void *descr[], void *_args)
+{
+	(void)descr;
+	(void)_args;
+}
+
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
+struct starpu_codelet mycodelet =
+{
+	.cpu_funcs = {func_cpu},
+	.nbuffers = 2,
+	.modes = {STARPU_W, STARPU_W},
+	.model = &dumb_model
+};
+
+int starpu_mpi_select_node_my_policy_0(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data)
+{
+	(void) me;
+	(void) nb_nodes;
+	(void) nb_data;
+
+	starpu_data_handle_t data = descr[0].handle;
+	return starpu_data_get_rank(data);
+}
+
+int starpu_mpi_select_node_my_policy_1(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data)
+{
+	(void) me;
+	(void) nb_nodes;
+	(void) nb_data;
+
+	starpu_data_handle_t data = descr[1].handle;
+	return starpu_data_get_rank(data);
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+	int rank, size;
+	int policy;
+	struct starpu_task *task;
+	starpu_data_handle_t handles[2];
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if (size < 2)
+	{
+		if (rank == 0)
+			FPRINTF(stderr, "We need at least 2 processes.\n");
+
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		MPI_Finalize();
+		return STARPU_TEST_SKIPPED;
+	}
+
+	if (rank == 0)
+		starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int));
+	else
+		starpu_variable_data_register(&handles[0], -1, (uintptr_t)NULL, sizeof(int));
+	starpu_mpi_data_register(handles[0], 10, 0);
+	if (rank == 1)
+		starpu_variable_data_register(&handles[1], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int));
+	else
+		starpu_variable_data_register(&handles[1], -1, (uintptr_t)NULL, sizeof(int));
+	starpu_mpi_data_register(handles[1], 20, 1);
+
+	policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy_1);
+	starpu_mpi_node_selection_set_current_policy(policy);
+
+	task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet,
+				     STARPU_W, handles[0], STARPU_W, handles[1],
+				     0);
+	FPRINTF_MPI(stderr, "Task %p\n", task);
+	if (rank == 1)
+	{
+		STARPU_ASSERT_MSG(task, "Task should be executed by rank 1\n");
+		task->destroy = 0;
+		starpu_task_destroy(task);
+	}
+	else
+	{
+		STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 1\n");
+	}
+
+	policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy_0);
+	task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet,
+				     STARPU_W, handles[0], STARPU_W, handles[1],
+				     STARPU_NODE_SELECTION_POLICY, policy,
+				     0);
+	FPRINTF_MPI(stderr, "Task %p\n", task);
+	if (rank == 0)
+	{
+		STARPU_ASSERT_MSG(task, "Task should be executed by rank 0\n");
+		task->destroy = 0;
+		starpu_task_destroy(task);
+	}
+	else
+	{
+		STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 0\n");
+	}
+
+	starpu_data_unregister(handles[0]);
+	starpu_data_unregister(handles[1]);
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+	MPI_Finalize();
+
+	return 0;
+}

+ 54 - 0
nmad/tests/policy_register_many.c

@@ -0,0 +1,54 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2015  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include <starpu_mpi_select_node.h>
+#include "helper.h"
+
+int starpu_mpi_select_node_my_policy(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data)
+{
+	(void) me;
+	(void) nb_nodes;
+	(void) descr;
+	(void) nb_data;
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+	int i, policy;
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(&argc, &argv, 1);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	for(i=0 ; i<_STARPU_MPI_NODE_SELECTION_MAX_POLICY-1 ; i++)
+	{
+		policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy);
+		FPRINTF_MPI(stderr, "New policy %d\n", policy);
+	}
+	starpu_mpi_node_selection_unregister_policy(_STARPU_MPI_NODE_SELECTION_MAX_POLICY-2);
+	policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy);
+	FPRINTF_MPI(stderr, "New policy %d\n", policy);
+	STARPU_ASSERT(policy==_STARPU_MPI_NODE_SELECTION_MAX_POLICY-2);
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	return 0;
+}

+ 52 - 0
nmad/tests/policy_register_toomany.c

@@ -0,0 +1,52 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2015, 2016  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include <starpu_mpi_select_node.h>
+#include "helper.h"
+
+int starpu_mpi_select_node_my_policy(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data)
+{
+	(void) me;
+	(void) nb_nodes;
+	(void) descr;
+	(void) nb_data;
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+	int i;
+
+	disable_coredump();
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(&argc, &argv, 1);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	for(i=0 ; i<_STARPU_MPI_NODE_SELECTION_MAX_POLICY+1 ; i++)
+	{
+		int policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy);
+		FPRINTF_MPI(stderr, "New policy %d\n", policy);
+	}
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	return 0;
+}

+ 187 - 0
nmad/tests/policy_selection.c

@@ -0,0 +1,187 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2015, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+void func_cpu(void *descr[], void *_args)
+{
+	(void)descr;
+	(void)_args;
+}
+
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
+struct starpu_codelet mycodelet_2 =
+{
+	.cpu_funcs = {func_cpu},
+	.nbuffers = 2,
+	.modes = {STARPU_W, STARPU_W},
+	.model = &dumb_model
+};
+struct starpu_codelet mycodelet_3 =
+{
+	.cpu_funcs = {func_cpu},
+	.nbuffers = 3,
+	.modes = {STARPU_R, STARPU_W, STARPU_W},
+	.model = &dumb_model
+};
+
+int main(int argc, char **argv)
+{
+	int ret;
+	int rank, size;
+	int policy = 12;
+	struct starpu_task *task;
+	starpu_data_handle_t handles[3];
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if (size < 3)
+	{
+		if (rank == 0)
+			FPRINTF(stderr, "We need at least 3 processes.\n");
+
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		MPI_Finalize();
+		return STARPU_TEST_SKIPPED;
+	}
+
+	if (rank == 0)
+	{
+		starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int));
+	}
+	else
+	{
+		starpu_variable_data_register(&handles[0], -1, (uintptr_t)NULL, sizeof(int));
+	}
+	starpu_mpi_data_register(handles[0], 10, 0);
+
+	if (rank == 1)
+	{
+		starpu_variable_data_register(&handles[1], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int));
+	}
+	else
+	{
+		starpu_variable_data_register(&handles[1], -1, (uintptr_t)NULL, sizeof(int));
+	}
+	starpu_mpi_data_register(handles[1], 20, 1);
+
+	if (rank == 2)
+	{
+	     starpu_variable_data_register(&handles[2], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int));
+	}
+	else
+	{
+	     starpu_variable_data_register(&handles[2], -1, (uintptr_t)NULL, sizeof(int));
+	}
+	starpu_mpi_data_register(handles[2], 30, 2);
+
+	// Force the execution on node 1
+	task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet_3,
+				     STARPU_R, handles[2],
+				     STARPU_W, handles[0], STARPU_W, handles[1],
+				     STARPU_EXECUTE_ON_NODE, 1,
+				     0);
+	FPRINTF_MPI(stderr, "Task %p\n", task);
+	if (rank == 1)
+	{
+		STARPU_ASSERT_MSG(task, "Task should be executed by rank 1\n");
+		task->destroy = 0;
+		starpu_task_destroy(task);
+	}
+	else
+	{
+		STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 1\n");
+	}
+
+	// Force the execution on node 1
+	task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet_2,
+				     STARPU_W, handles[0], STARPU_W, handles[1],
+				     STARPU_EXECUTE_ON_NODE, 1,
+				     0);
+	FPRINTF_MPI(stderr, "Task %p\n", task);
+	if (rank == 1)
+	{
+		STARPU_ASSERT_MSG(task, "Task should be executed by rank 1\n");
+		task->destroy = 0;
+		starpu_task_destroy(task);
+	}
+	else
+	{
+		STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 1\n");
+	}
+
+	// Let StarPU choose the node
+	task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet_3,
+				     STARPU_R, handles[2],
+				     STARPU_W, handles[0], STARPU_W, handles[1],
+				     0);
+	FPRINTF_MPI(stderr, "Task %p\n", task);
+	if (rank == 2)
+	{
+		STARPU_ASSERT_MSG(task, "Task should be executed by rank 2\n");
+		task->destroy = 0;
+		starpu_task_destroy(task);
+	}
+	else
+	{
+		STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 2\n");
+	}
+
+	// Let StarPU choose the node
+	task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet_2,
+				     STARPU_W, handles[0], STARPU_W, handles[1],
+				     0);
+	FPRINTF_MPI(stderr, "Task %p\n", task);
+	if (rank == 0)
+	{
+		STARPU_ASSERT_MSG(task, "Task should be executed by rank 0\n");
+		task->destroy = 0;
+		starpu_task_destroy(task);
+	}
+	else
+	{
+		STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 0\n");
+	}
+
+	starpu_data_unregister(handles[0]);
+	starpu_data_unregister(handles[1]);
+	starpu_data_unregister(handles[2]);
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+	MPI_Finalize();
+
+	return 0;
+}

+ 131 - 0
nmad/tests/policy_selection2.c

@@ -0,0 +1,131 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2015, 2016, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+void func_cpu(void *descr[], void *_args)
+{
+	(void)_args;
+
+	int *data0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
+	int *data1 = (int *)STARPU_VARIABLE_GET_PTR(descr[1]);
+	int *data2 = (int *)STARPU_VARIABLE_GET_PTR(descr[2]);
+	*data1 += *data0;
+	*data2 += *data0;
+}
+
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
+struct starpu_codelet mycodelet =
+{
+	.cpu_funcs = {func_cpu},
+	.nbuffers = 3,
+	.modes = {STARPU_R, STARPU_W, STARPU_W},
+	.model = &dumb_model
+};
+
+int main(int argc, char **argv)
+{
+	int ret;
+	int i;
+	int rank, size;
+	int data[3];
+	starpu_data_handle_t handles[3];
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if ((size < 3) || (starpu_cpu_worker_get_count() == 0))
+	{
+		if (rank == 0)
+		{
+			if (size < 3)
+				FPRINTF(stderr, "We need at least 3 processes.\n");
+			else
+				FPRINTF(stderr, "We need at least 1 CPU worker.\n");
+		}
+		starpu_mpi_shutdown();
+		starpu_shutdown();
+		MPI_Finalize();
+		return STARPU_TEST_SKIPPED;
+	}
+
+	data[0] = 12;
+	starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)&data[0], sizeof(int));
+	starpu_mpi_data_register(handles[0], 10, 0);
+
+	data[1] = 12;
+	starpu_variable_data_register(&handles[1], STARPU_MAIN_RAM, (uintptr_t)&data[1], sizeof(int));
+	starpu_mpi_data_register(handles[1], 20, 1);
+
+	data[2] = 12;
+	starpu_variable_data_register(&handles[2], STARPU_MAIN_RAM, (uintptr_t)&data[2], sizeof(int));
+	starpu_mpi_data_register(handles[2], 30, 2);
+
+	starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
+			       STARPU_R, handles[2], STARPU_W, handles[0], STARPU_W, handles[1],
+			       0);
+	for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_R);
+	FPRINTF_MPI(stderr, "data[%d,%d,%d] = %d,%d,%d\n", 0, 1, 2, data[0], data[1], data[2]);
+	for(i=0 ; i<2 ; i++) starpu_data_release(handles[i]);
+	if (rank == 2)
+	{
+		STARPU_ASSERT_MSG(data[0] == 2*data[2] && data[1] == 2*data[2], "Computation incorrect. data[%d] (%d) != 2*data[%d] (%d) && data[%d] (%d) != 2*data[%d] (%d)\n",
+				  0, data[0], 2, data[2], 1, data[1], 2, data[2]);
+	}
+
+	for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_W);
+	for(i=0 ; i<2 ; i++) data[i] = 12;
+	for(i=0 ; i<2 ; i++) starpu_data_release(handles[i]);
+
+	// Let StarPU choose the node
+	starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
+			       STARPU_R, handles[2], STARPU_W, handles[0], STARPU_W, handles[1],
+			       STARPU_EXECUTE_ON_NODE, 1,
+			       0);
+	for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_R);
+	FPRINTF_MPI(stderr, "data[%d,%d,%d] = %d,%d,%d\n", 0, 1, 2, data[0], data[1], data[2]);
+	for(i=0 ; i<2 ; i++) starpu_data_release(handles[i]);
+	if (rank == 1)
+	{
+		STARPU_ASSERT_MSG(data[0] == 2*data[2] && data[1] == 2*data[2], "Computation incorrect. data[%d] (%d) != 2*data[%d] (%d) && data[%d] (%d) != 2*data[%d] (%d)\n",
+				  0, data[0], 2, data[2], 1, data[1], 2, data[2]);
+	}
+
+	for(i=0 ; i<3 ; i++) starpu_data_unregister(handles[i]);
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+	MPI_Finalize();
+
+	return 0;
+}

+ 37 - 0
nmad/tests/policy_unregister.c

@@ -0,0 +1,37 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2015  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+	disable_coredump();
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(&argc, &argv, 1);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	starpu_mpi_node_selection_unregister_policy(STARPU_MPI_NODE_SELECTION_MOST_R_DATA);
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	return 0;
+}

+ 39 - 17
nmad/tests/ring.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009, 2010, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,6 +20,8 @@
 
 #ifdef STARPU_QUICK_CHECK
 #  define NITER	32
+#elif !defined(STARPU_LONG_CHECK)
+#  define NITER	256
 #else
 #  define NITER	2048
 #endif
@@ -37,6 +39,17 @@ void increment_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	(*tokenptr)++;
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 static struct starpu_codelet increment_cl =
 {
 #ifdef STARPU_USE_CUDA
@@ -44,7 +57,8 @@ static struct starpu_codelet increment_cl =
 #endif
 	.cpu_funcs = {increment_cpu},
 	.nbuffers = 1,
-	.modes = {STARPU_RW}
+	.modes = {STARPU_RW},
+	.model = &dumb_model
 };
 
 void increment_token(void)
@@ -63,25 +77,31 @@ int main(int argc, char **argv)
 {
 	int ret, rank, size;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
-	if (size < 2)
+	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	{
 		if (rank == 0)
-			FPRINTF(stderr, "We need at least 2 processes.\n");
-
+		{
+			if (size < 2)
+				FPRINTF(stderr, "We need at least 2 processes.\n");
+			else
+				FPRINTF(stderr, "We need at least 1 CPU or CUDA worker.\n");
+		}
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(token));
+	starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token));
 
 	int nloops = NITER;
 	int loop;
@@ -96,7 +116,7 @@ int main(int argc, char **argv)
 		if (loop == 0 && rank == 0)
 		{
 			token = 0;
-			FPRINTF(stdout, "Start with token value %u\n", token);
+			FPRINTF(stdout, "Start with token value %d\n", token);
 		}
 		else
 		{
@@ -109,7 +129,7 @@ int main(int argc, char **argv)
 		if (loop == last_loop && rank == last_rank)
 		{
 			starpu_data_acquire(token_handle, STARPU_R);
-			FPRINTF(stdout, "Finished : token value %u\n", token);
+			FPRINTF(stdout, "Finished : token value %d\n", token);
 			starpu_data_release(token_handle);
 		}
 		else
@@ -124,10 +144,12 @@ int main(int argc, char **argv)
 
 	MPI_Finalize();
 
+#ifndef STARPU_SIMGRID
 	if (rank == last_rank)
 	{
 		STARPU_ASSERT(token == nloops*size);
 	}
+#endif
 
 	return 0;
 }

+ 41 - 18
nmad/tests/ring_async.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009, 2010, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,6 +20,8 @@
 
 #ifdef STARPU_QUICK_CHECK
 #  define NITER	32
+#elif !defined(STARPU_LONG_CHECK)
+#  define NITER	256
 #else
 #  define NITER	2048
 #endif
@@ -37,6 +39,17 @@ void increment_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	(*tokenptr)++;
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 static struct starpu_codelet increment_cl =
 {
 #ifdef STARPU_USE_CUDA
@@ -44,7 +57,8 @@ static struct starpu_codelet increment_cl =
 #endif
 	.cpu_funcs = {increment_cpu},
 	.nbuffers = 1,
-	.modes = {STARPU_RW}
+	.modes = {STARPU_RW},
+	.model = &dumb_model
 };
 
 void increment_token(void)
@@ -63,25 +77,31 @@ int main(int argc, char **argv)
 {
 	int ret, rank, size;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
-	if (size < 2)
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	{
 		if (rank == 0)
-			FPRINTF(stderr, "We need at least 2 processes.\n");
-
+		{
+			if (size < 2)
+				FPRINTF(stderr, "We need at least 2 processes.\n");
+			else
+				FPRINTF(stderr, "We need at least 1 CPU or CUDA worker.\n");
+		}
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
-	starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(token));
+	starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token));
 
 	int nloops = NITER;
 	int loop;
@@ -96,7 +116,7 @@ int main(int argc, char **argv)
 		if (loop == 0 && rank == 0)
 		{
 			token = 0;
-			FPRINTF(stdout, "Start with token value %u\n", token);
+			FPRINTF(stdout, "Start with token value %d\n", token);
 		}
 		else
 		{
@@ -111,10 +131,11 @@ int main(int argc, char **argv)
 		if (loop == last_loop && rank == last_rank)
 		{
 			starpu_data_acquire(token_handle, STARPU_R);
-			FPRINTF(stdout, "Finished : token value %u\n", token);
+			FPRINTF(stdout, "Finished : token value %d\n", token);
 			starpu_data_release(token_handle);
 		}
-		else {
+		else
+		{
 			starpu_mpi_req req;
 			MPI_Status status;
 			starpu_mpi_isend(token_handle, &req, (rank+1)%size, tag+1, MPI_COMM_WORLD);
@@ -128,10 +149,12 @@ int main(int argc, char **argv)
 
 	MPI_Finalize();
 
+#ifndef STARPU_SIMGRID
 	if (rank == last_rank)
 	{
 		STARPU_ASSERT(token == nloops*size);
 	}
+#endif
 
 	return 0;
 }

+ 34 - 14
nmad/tests/ring_async_implicit.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2015-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,6 +20,8 @@
 
 #ifdef STARPU_QUICK_CHECK
 #  define NITER	32
+#elif !defined(STARPU_LONG_CHECK)
+#  define NITER	256
 #else
 #  define NITER	2048
 #endif
@@ -37,6 +39,17 @@ void increment_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	(*tokenptr)++;
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 static struct starpu_codelet increment_cl =
 {
 #ifdef STARPU_USE_CUDA
@@ -44,7 +57,8 @@ static struct starpu_codelet increment_cl =
 #endif
 	.cpu_funcs = {increment_cpu},
 	.nbuffers = 1,
-	.modes = {STARPU_RW}
+	.modes = {STARPU_RW},
+	.model = &dumb_model
 };
 
 void increment_token(void)
@@ -66,20 +80,24 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(NULL, NULL, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
 
-	if (size < 2)
+	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	{
 		if (rank == 0)
-			FPRINTF(stderr, "We need at least 2 processes.\n");
-
-		MPI_Finalize();
+		{
+			if (size < 2)
+				FPRINTF(stderr, "We need at least 2 processes.\n");
+			else
+				FPRINTF(stderr, "We need at least 1 CPU or CUDA worker.\n");
+		}
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		return STARPU_TEST_SKIPPED;
 	}
 
-
-	starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(token));
+	starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token));
 
 	int nloops = NITER;
 	int loop;
@@ -94,7 +112,7 @@ int main(int argc, char **argv)
 		if (loop == 0 && rank == 0)
 		{
 			token = 0;
-			FPRINTF(stdout, "Start with token value %u\n", token);
+			FPRINTF(stdout, "Start with token value %d\n", token);
 		}
 		else
 		{
@@ -106,7 +124,7 @@ int main(int argc, char **argv)
 		if (loop == last_loop && rank == last_rank)
 		{
 			starpu_data_acquire(token_handle, STARPU_R);
-			FPRINTF(stdout, "Finished : token value %u\n", token);
+			FPRINTF(stdout, "Finished : token value %d\n", token);
 			starpu_data_release(token_handle);
 		}
 		else
@@ -121,11 +139,13 @@ int main(int argc, char **argv)
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 
+#ifndef STARPU_SIMGRID
 	if (rank == last_rank)
 	{
-		FPRINTF(stderr, "[%d] token = %u == %u * %d ?\n", rank, token, nloops, size);
+		FPRINTF(stderr, "[%d] token = %d == %d * %d ?\n", rank, token, nloops, size);
 		STARPU_ASSERT(token == nloops*size);
 	}
+#endif
 
 	return 0;
 }

+ 1 - 1
nmad/tests/ring_kernel.cu

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux
- * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 38 - 16
nmad/tests/ring_sync.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009, 2010, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,6 +20,8 @@
 
 #ifdef STARPU_QUICK_CHECK
 #  define NITER	32
+#elif !defined(STARPU_LONG_CHECK)
+#  define NITER	256
 #else
 #  define NITER	2048
 #endif
@@ -37,6 +39,17 @@ void increment_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	(*tokenptr)++;
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 static struct starpu_codelet increment_cl =
 {
 #ifdef STARPU_USE_CUDA
@@ -44,7 +57,8 @@ static struct starpu_codelet increment_cl =
 #endif
 	.cpu_funcs = {increment_cpu},
 	.nbuffers = 1,
-	.modes = {STARPU_RW}
+	.modes = {STARPU_RW},
+	.model = &dumb_model
 };
 
 void increment_token(void)
@@ -63,24 +77,30 @@ int main(int argc, char **argv)
 {
 	int ret, rank, size;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
-	if (size < 2)
+	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	{
 		if (rank == 0)
-			FPRINTF(stderr, "We need at least 2 processes.\n");
-
+		{
+			if (size < 2)
+				FPRINTF(stderr, "We need at least 2 processes.\n");
+			else
+				FPRINTF(stderr, "We need at least 1 CPU or CUDA worker.\n");
+		}
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
 	starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(token));
 
 	int nloops = NITER;
@@ -96,7 +116,7 @@ int main(int argc, char **argv)
 		if (loop == 0 && rank == 0)
 		{
 			token = 0;
-			FPRINTF(stdout, "Start with token value %u\n", token);
+			FPRINTF(stdout, "Start with token value %d\n", token);
 		}
 		else
 		{
@@ -109,7 +129,7 @@ int main(int argc, char **argv)
 		if (loop == last_loop && rank == last_rank)
 		{
 			starpu_data_acquire(token_handle, STARPU_R);
-			FPRINTF(stdout, "Finished : token value %u\n", token);
+			FPRINTF(stdout, "Finished : token value %d\n", token);
 			starpu_data_release(token_handle);
 		}
 		else
@@ -127,10 +147,12 @@ int main(int argc, char **argv)
 
 	MPI_Finalize();
 
+#ifndef STARPU_SIMGRID
 	if (rank == last_rank)
 	{
 		STARPU_ASSERT(token == nloops*size);
 	}
+#endif
 
 	return 0;
 }

+ 36 - 14
nmad/tests/ring_sync_detached.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2014  Université de Bordeaux
- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009, 2010, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,6 +20,8 @@
 
 #ifdef STARPU_QUICK_CHECK
 #  define NITER	32
+#elif !defined(STARPU_LONG_CHECK)
+#  define NITER	256
 #else
 #  define NITER	2048
 #endif
@@ -34,6 +36,17 @@ void increment_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	(*tokenptr)++;
 }
 
+/* Dummy cost function for simgrid */
+static double cost_function(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED)
+{
+	return 0.000001;
+}
+static struct starpu_perfmodel dumb_model =
+{
+	.type		= STARPU_COMMON,
+	.cost_function	= cost_function
+};
+
 static struct starpu_codelet increment_cl =
 {
 #ifdef STARPU_USE_CUDA
@@ -41,7 +54,8 @@ static struct starpu_codelet increment_cl =
 #endif
 	.cpu_funcs = {increment_cpu},
 	.nbuffers = 1,
-	.modes = {STARPU_RW}
+	.modes = {STARPU_RW},
+	.model = &dumb_model
 };
 
 void increment_token(starpu_data_handle_t handle)
@@ -75,24 +89,30 @@ int main(int argc, char **argv)
 	int token = 42;
 	starpu_data_handle_t token_handle;
 
-	MPI_Init(&argc, &argv);
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
-	if (size < 2)
+	if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0))
 	{
 		if (rank == 0)
-			FPRINTF(stderr, "We need at least 2 processes.\n");
-
+		{
+			if (size < 2)
+				FPRINTF(stderr, "We need at least 2 processes.\n");
+			else
+				FPRINTF(stderr, "We need at least 1 CPU or CUDA worker.\n");
+		}
+		starpu_mpi_shutdown();
+		starpu_shutdown();
 		MPI_Finalize();
 		return STARPU_TEST_SKIPPED;
 	}
 
-	ret = starpu_init(NULL);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-
 	starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(token));
 
 	int nloops = NITER;
@@ -143,10 +163,12 @@ int main(int argc, char **argv)
 	FPRINTF_MPI(stderr, "Final value for token %d\n", token);
 	MPI_Finalize();
 
+#ifndef STARPU_SIMGRID
 	if (rank == last_rank)
 	{
 		STARPU_ASSERT(token == nloops*size);
 	}
+#endif
 
 
 	return 0;

+ 43 - 0
nmad/tests/starpu_redefine.c

@@ -0,0 +1,43 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2015, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+int main(int argc, char **argv)
+{
+	int ret;
+	starpu_data_handle_t handle;
+
+	disable_coredump();
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	ret = starpu_mpi_init(NULL, NULL, 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&ret, 1, sizeof(int));
+	starpu_mpi_datatype_register(handle, NULL, NULL);
+	starpu_data_unregister(handle);
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+	MPI_Finalize();
+
+	return 0;
+}

+ 97 - 0
nmad/tests/sync.c

@@ -0,0 +1,97 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2015, 2016, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+int main(int argc, char **argv)
+{
+	int size, x=789;
+	int rank, other_rank;
+	int ret;
+	starpu_data_handle_t data[2];
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+        starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+        starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+        if (size % 2)
+        {
+		FPRINTF(stderr, "We need a even number of processes.\n");
+                MPI_Finalize();
+                return STARPU_TEST_SKIPPED;
+        }
+
+	other_rank = rank%2 == 0 ? rank+1 : rank-1;
+	FPRINTF_MPI(stderr, "rank %d exchanging with rank %d\n", rank, other_rank);
+
+	if (rank % 2)
+	{
+		MPI_Send(&rank, 1, MPI_INT, other_rank, 10, MPI_COMM_WORLD);
+		FPRINTF(stderr, "[%d] sending %d\n", rank, rank);
+	}
+	else
+	{
+		MPI_Recv(&x, 1, MPI_INT, other_rank, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+		FPRINTF(stderr, "[%d] received %d\n", rank, x);
+	}
+
+        ret = starpu_init(NULL);
+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+        ret = starpu_mpi_init(NULL, NULL, 0);
+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if (rank % 2)
+	{
+		starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(unsigned));
+		starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(unsigned));
+		starpu_mpi_data_register(data[1], 22, 0);
+	}
+	else
+		starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(unsigned));
+	starpu_mpi_data_register(data[0], 12, 0);
+
+	if (rank % 2)
+	{
+		starpu_mpi_req req;
+		starpu_mpi_issend(data[1], &req, other_rank, 22, MPI_COMM_WORLD);
+		starpu_mpi_send(data[0], other_rank, 12, MPI_COMM_WORLD);
+		starpu_mpi_wait(&req, MPI_STATUS_IGNORE);
+	}
+	else
+	{
+		int *xx;
+
+		starpu_mpi_recv(data[0], other_rank, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+		xx = (int *)starpu_variable_get_local_ptr(data[0]);
+		FPRINTF_MPI(stderr, "received %d\n", *xx);
+		STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x);
+
+		starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(unsigned));
+		starpu_mpi_data_register(data[1], 22, 0);
+		starpu_mpi_recv(data[0],  other_rank, 22, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+		xx = (int *)starpu_variable_get_local_ptr(data[0]);
+		STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x);
+	}
+
+	starpu_data_unregister(data[0]);
+	starpu_data_unregister(data[1]);
+
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+        MPI_Finalize();
+	return 0;
+}

+ 148 - 0
nmad/tests/tags_checking.c

@@ -0,0 +1,148 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2015, 2016, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+#define VAL0 12
+#define VAL1 24
+
+static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
+static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER;
+
+void callback(void *arg)
+{
+	unsigned *received = arg;
+
+	STARPU_PTHREAD_MUTEX_LOCK(&mutex);
+	*received = *received + 1;
+	FPRINTF_MPI(stderr, "Request %u received\n", *received);
+	STARPU_PTHREAD_COND_SIGNAL(&cond);
+	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
+}
+
+int do_test(int rank, int sdetached, int rdetached)
+{
+	int ret, i;
+	int val[2];
+	starpu_data_handle_t data[2];
+
+	ret = starpu_init(NULL);
+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+        ret = starpu_mpi_init(NULL, NULL, 0);
+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
+
+	if (rank == 1)
+	{
+		val[0] = VAL0;
+		val[1] = VAL1;
+	}
+	else
+	{
+		val[0] = -1;
+		val[1] = -1;
+	}
+	starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&val[0], sizeof(val[0]));
+	starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&val[1], sizeof(val[1]));
+	starpu_mpi_data_register(data[0], 77, 1);
+	starpu_mpi_data_register(data[1], 88, 1);
+
+	if (rank == 1)
+	{
+		for(i=1 ; i>=0 ; i--)
+		{
+			if (sdetached)
+				starpu_mpi_isend_detached(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, NULL, NULL);
+			else
+				starpu_mpi_send(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD);
+		}
+	}
+	else if (rank == 0)
+	{
+		int received = 0;
+
+		for(i=0 ; i<2 ; i++)
+			FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]);
+		for(i=0 ; i<2 ; i++)
+		{
+			if (rdetached)
+				starpu_mpi_irecv_detached(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, callback, &received);
+			else
+				starpu_mpi_recv(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+		}
+
+		if (rdetached)
+		{
+			STARPU_PTHREAD_MUTEX_LOCK(&mutex);
+			while (received != 2)
+			{
+				FPRINTF_MPI(stderr, "Received %d messages\n", received);
+				STARPU_PTHREAD_COND_WAIT(&cond, &mutex);
+			}
+			STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
+		}
+
+		for(i=0 ; i<2 ; i++)
+			starpu_data_acquire(data[i], STARPU_R);
+		for(i=0 ; i<2 ; i++)
+			FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]);
+		for(i=0 ; i<2 ; i++)
+			starpu_data_release(data[i]);
+	}
+	FPRINTF_MPI(stderr, "Waiting ...\n");
+	starpu_task_wait_for_all();
+
+	starpu_data_unregister(data[0]);
+	starpu_data_unregister(data[1]);
+
+	if (rank == 0)
+	{
+		ret = (val[0] == VAL0 && val[1] == VAL1) ? 0 : 1;
+	}
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int size;
+	int rank;
+	int ret=0;
+	int sdetached, rdetached;
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED);
+        starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+        starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
+
+        if (size < 2)
+        {
+		FPRINTF_MPI(stderr, "We need at least 2 processes.\n");
+                MPI_Finalize();
+                return STARPU_TEST_SKIPPED;
+        }
+
+	for(sdetached=0 ; sdetached<=1 ; sdetached++)
+	{
+		for(rdetached=0 ; rdetached<=1 ; rdetached++)
+		{
+			ret += do_test(rank, sdetached, rdetached);
+		}
+	}
+
+        MPI_Finalize();
+	return ret;
+}

+ 7 - 8
nmad/tests/user_defined_datatype.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2012, 2013, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2012, 2013, 2014, 2015  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -69,8 +69,8 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	ret = starpu_mpi_init(&argc, &argv, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &nodes);
+	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
+	starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes);
 
 	if (nodes < 2)
 	{
@@ -100,7 +100,7 @@ int main(int argc, char **argv)
 			float foo_compare=42.0;
 			int value_compare=36;
 
-			FPRINTF_MPI(stderr, "Testing with function %p\n", f);
+			FPRINTF_MPI(stderr, "\nTesting with function %p\n", f);
 
 			if (rank == 0)
 			{
@@ -128,9 +128,9 @@ int main(int argc, char **argv)
 			}
 			for(i=0 ; i<ELEMENTS ; i++)
 			{
-				starpu_complex_data_register(&handle_complex[i], 0, real[i], imaginary[i], 2);
-				starpu_value_data_register(&handle_values[i], 0, &values[i]);
-				starpu_variable_data_register(&handle_vars[i], 0, (uintptr_t)&foo[i], sizeof(float));
+				starpu_complex_data_register(&handle_complex[i], STARPU_MAIN_RAM, real[i], imaginary[i], 2);
+				starpu_value_data_register(&handle_values[i], STARPU_MAIN_RAM, &values[i]);
+				starpu_variable_data_register(&handle_vars[i], STARPU_MAIN_RAM, (uintptr_t)&foo[i], sizeof(float));
 			}
 
 			f(handle_vars, ELEMENTS, rank, ELEMENTS);
@@ -151,7 +151,6 @@ int main(int argc, char **argv)
 				for(i=0 ; i<ELEMENTS ; i++)
 				{
 					int j;
-
 					compare = (foo[i] == foo_compare);
 					FPRINTF_MPI(stderr, "%s. foo[%d] = %f %s %f\n", compare==0?"ERROR":"SUCCESS", i, foo[i], compare==0?"!=":"==", foo_compare);
 

+ 6 - 3
nmad/tests/user_defined_datatype_value.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2013, 2015  Centre National de la Recherche Scientifique
+ * Copyright (C) 2013, 2014  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -26,7 +26,7 @@ struct starpu_value_interface
 int *starpu_value_get(starpu_data_handle_t handle)
 {
 	struct starpu_value_interface *value_interface =
-		(struct starpu_value_interface *) starpu_data_get_interface_on_node(handle, 0);
+		(struct starpu_value_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
 	return value_interface->value;
 }
 
@@ -40,7 +40,10 @@ static void value_register_data_handle(starpu_data_handle_t handle, unsigned hom
 		struct starpu_value_interface *local_interface = (struct starpu_value_interface *)
 			starpu_data_get_interface_on_node(handle, node);
 
-		local_interface->value = value_interface->value;
+		if (node == home_node)
+			local_interface->value = value_interface->value;
+		else
+			local_interface->value = 0;
 	}
 }