Преглед на файлове

Merge branch 'master' into knobs

Olivier Aumage преди 5 години
родител
ревизия
a1a4b06d30

+ 1 - 1
configure.ac

@@ -225,7 +225,7 @@ if test x$enable_simgrid = xyes ; then
 	fi
 
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
-			  #ifdef HAVE_SIMGRID_MSG_H
+			  #ifdef STARPU_HAVE_SIMGRID_MSG_H
 			  #include <simgrid/msg.h>
 			  #include <simgrid/host.h>
 			  #else

+ 2 - 0
doc/doxygen/refman.tex

@@ -297,6 +297,7 @@ Documentation License”.
 \input{starpu__mod_8f90}
 \input{starpu__mpi_8h}
 \input{starpu__mpi__lb_8h}
+\input{starpu__mpi__ms_8h}
 \input{starpu__opencl_8h}
 \input{starpu__openmp_8h}
 \input{starpu__perf__monitoring_8h}
@@ -313,6 +314,7 @@ Documentation License”.
 \input{starpu__stdlib_8h}
 \input{starpu__task_8h}
 \input{starpu__task__bundle_8h}
+\input{starpu__task__dep_8h}
 \input{starpu__task__list_8h}
 \input{starpu__task__util_8h}
 \input{starpu__thread_8h}

+ 2 - 1
include/starpu_task.h

@@ -1183,7 +1183,8 @@ struct starpu_task
    equivalent to initializing a structure starpu_task
    with the function starpu_task_init().
 */
-/* Note: remember to update starpu_task_init as well */
+/* Note: remember to update starpu_task_init and starpu_task_ft_create_retry
+ * as well */
 #define STARPU_TASK_INITIALIZER 			\
 {							\
 	.cl = NULL,					\

+ 74 - 19
mpi/src/mpi/starpu_mpi_early_data.c

@@ -20,15 +20,16 @@
 #include <mpi/starpu_mpi_early_data.h>
 #include <mpi/starpu_mpi_mpi_backend.h>
 #include <starpu_mpi_private.h>
-#include <common/uthash.h>
 
 #ifdef STARPU_USE_MPI_MPI
 
+/** the hashlist is on 2 levels, the first top level is indexed on (node, rank), the second lower level is indexed on the data tag */
+
 struct _starpu_mpi_early_data_handle_hashlist
 {
-	struct _starpu_mpi_early_data_handle_list list;
+	struct _starpu_mpi_early_data_handle_tag_hashlist *datahash;
 	UT_hash_handle hh;
-	struct _starpu_mpi_node_tag node_tag;
+	struct _starpu_mpi_node node;
 };
 
 /** stores data which have been received by MPI but have not been requested by the application */
@@ -50,7 +51,11 @@ void _starpu_mpi_early_data_check_termination(void)
 		struct _starpu_mpi_early_data_handle_hashlist *current=NULL, *tmp=NULL;
 		HASH_ITER(hh, _starpu_mpi_early_data_handle_hashmap, current, tmp)
 		{
-			_STARPU_MSG("Unexpected message with comm %ld source %d tag %ld\n", (long int)current->node_tag.comm, current->node_tag.rank, current->node_tag.data_tag);
+			struct _starpu_mpi_early_data_handle_tag_hashlist *tag_current=NULL, *tag_tmp=NULL;
+			HASH_ITER(hh, current->datahash, tag_current, tag_tmp)
+			{
+				_STARPU_MSG("Unexpected message with comm %ld source %d tag %ld\n", (long int)current->node.comm, current->node.rank, tag_current->data_tag);
+			}
 		}
 		STARPU_ASSERT_MSG(_starpu_mpi_early_data_handle_hashmap_count == 0, "Number of unexpected received messages left is not 0 (but %d), did you forget to post a receive corresponding to a send?", _starpu_mpi_early_data_handle_hashmap_count);
 	}
@@ -61,7 +66,15 @@ void _starpu_mpi_early_data_shutdown(void)
 	struct _starpu_mpi_early_data_handle_hashlist *current=NULL, *tmp=NULL;
 	HASH_ITER(hh, _starpu_mpi_early_data_handle_hashmap, current, tmp)
 	{
-		STARPU_ASSERT(_starpu_mpi_early_data_handle_list_empty(&current->list));
+		_STARPU_MPI_DEBUG(600, "Hash early_data with comm %ld source %d\n", (long int) current->node.comm, current->node.rank);
+		struct _starpu_mpi_early_data_handle_tag_hashlist *tag_entry=NULL, *tag_tmp=NULL;
+		HASH_ITER(hh, current->datahash, tag_entry, tag_tmp)
+		{
+			_STARPU_MPI_DEBUG(600, "Hash 2nd level with tag %ld\n", tag_entry->data_tag);
+			STARPU_ASSERT(_starpu_mpi_early_data_handle_list_empty(&tag_entry->list));
+			HASH_DEL(current->datahash, tag_entry);
+			free(tag_entry);
+		}
 		HASH_DEL(_starpu_mpi_early_data_handle_hashmap, current);
 		free(current);
 	}
@@ -75,8 +88,8 @@ struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _star
 	STARPU_PTHREAD_MUTEX_INIT(&early_data_handle->req_mutex, NULL);
 	STARPU_PTHREAD_COND_INIT(&early_data_handle->req_cond, NULL);
 	early_data_handle->env = envelope;
-	early_data_handle->node_tag.comm = comm;
-	early_data_handle->node_tag.rank = source;
+	early_data_handle->node_tag.node.comm = comm;
+	early_data_handle->node_tag.node.rank = source;
 	early_data_handle->node_tag.data_tag = envelope->data_tag;
 	return early_data_handle;
 }
@@ -87,45 +100,87 @@ struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu
 	struct _starpu_mpi_early_data_handle *early_data_handle;
 
 	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_data_handle_mutex);
-	_STARPU_MPI_DEBUG(60, "Looking for early_data_handle with comm %ld source %d tag %ld\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
-	HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);
+	_STARPU_MPI_DEBUG(60, "Looking for early_data_handle with comm %ld source %d tag %ld\n", (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag);
+	HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, &node_tag->node, sizeof(struct _starpu_mpi_node), hashlist);
 	if (hashlist == NULL)
 	{
+		_STARPU_MPI_DEBUG(600, "No entry for (comm %ld, source %d)\n", (long int)node_tag->node.comm, node_tag->node.rank);
 		early_data_handle = NULL;
 	}
 	else
 	{
-		if (_starpu_mpi_early_data_handle_list_empty(&hashlist->list))
+		struct _starpu_mpi_early_data_handle_tag_hashlist *tag_hashlist;
+		HASH_FIND(hh, hashlist->datahash, &node_tag->data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist);
+		if (tag_hashlist == NULL)
+		{
+			_STARPU_MPI_DEBUG(600, "No entry for tag %ld\n", node_tag->data_tag);
+			early_data_handle = NULL;
+		}
+		else if (_starpu_mpi_early_data_handle_list_empty(&tag_hashlist->list))
 		{
+			_STARPU_MPI_DEBUG(600, "List empty for tag %ld\n", node_tag->data_tag);
 			early_data_handle = NULL;
 		}
 		else
 		{
 			_starpu_mpi_early_data_handle_hashmap_count --;
-			early_data_handle = _starpu_mpi_early_data_handle_list_pop_front(&hashlist->list);
+			early_data_handle = _starpu_mpi_early_data_handle_list_pop_front(&tag_hashlist->list);
 		}
 	}
-	_STARPU_MPI_DEBUG(60, "Found early_data_handle %p with comm %ld source %d tag %ld\n", early_data_handle, (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
+	_STARPU_MPI_DEBUG(60, "Found early_data_handle %p with comm %ld source %d tag %ld\n", early_data_handle, (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_data_handle_mutex);
 	return early_data_handle;
 }
 
+struct _starpu_mpi_early_data_handle_tag_hashlist *_starpu_mpi_early_data_extract(struct _starpu_mpi_node_tag *node_tag)
+{
+	struct _starpu_mpi_early_data_handle_hashlist *hashlist;
+	struct _starpu_mpi_early_data_handle_tag_hashlist *tag_hashlist = NULL;
+
+	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_data_handle_mutex);
+	_STARPU_MPI_DEBUG(60, "Looking for hashlist for (comm %ld, source %d)\n", (long int)node_tag->node.comm, node_tag->node.rank);
+	HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, &node_tag->node, sizeof(struct _starpu_mpi_node), hashlist);
+	if (hashlist)
+	{
+		_STARPU_MPI_DEBUG(60, "Looking for hashlist for (tag %ld)\n", node_tag->data_tag);
+		HASH_FIND(hh, hashlist->datahash, &node_tag->data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist);
+		if (tag_hashlist)
+		{
+			_starpu_mpi_early_data_handle_hashmap_count -= _starpu_mpi_early_data_handle_list_size(&tag_hashlist->list);
+			HASH_DEL(hashlist->datahash, tag_hashlist);
+		}
+	}
+	_STARPU_MPI_DEBUG(60, "Found hashlist %p for (comm %ld, source %d) and (tag %ld)\n", tag_hashlist, (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag);
+	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_data_handle_mutex);
+	return tag_hashlist;
+}
+
 void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle)
 {
 	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_data_handle_mutex);
-	_STARPU_MPI_DEBUG(60, "Trying to add early_data_handle %p with comm %ld source %d tag %ld\n", early_data_handle, (long int)early_data_handle->node_tag.comm,
-			  early_data_handle->node_tag.rank, early_data_handle->node_tag.data_tag);
+	_STARPU_MPI_DEBUG(60, "Adding early_data_handle %p with comm %ld source %d tag %ld (%p)\n", early_data_handle, (long int)early_data_handle->node_tag.node.comm, early_data_handle->node_tag.node.rank, early_data_handle->node_tag.data_tag, &early_data_handle->node_tag.node);
 
 	struct _starpu_mpi_early_data_handle_hashlist *hashlist;
-	HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, &early_data_handle->node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);
+	HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, &early_data_handle->node_tag.node, sizeof(struct _starpu_mpi_node), hashlist);
 	if (hashlist == NULL)
 	{
 		_STARPU_MPI_MALLOC(hashlist, sizeof(struct _starpu_mpi_early_data_handle_hashlist));
-		_starpu_mpi_early_data_handle_list_init(&hashlist->list);
-		hashlist->node_tag = early_data_handle->node_tag;
-		HASH_ADD(hh, _starpu_mpi_early_data_handle_hashmap, node_tag, sizeof(hashlist->node_tag), hashlist);
+		hashlist->node = early_data_handle->node_tag.node;
+		hashlist->datahash = NULL;
+		HASH_ADD(hh, _starpu_mpi_early_data_handle_hashmap, node, sizeof(hashlist->node), hashlist);
 	}
-	_starpu_mpi_early_data_handle_list_push_back(&hashlist->list, early_data_handle);
+
+	struct _starpu_mpi_early_data_handle_tag_hashlist *tag_hashlist;
+	HASH_FIND(hh, hashlist->datahash, &early_data_handle->node_tag.data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist);
+	if (tag_hashlist == NULL)
+	{
+		_STARPU_MPI_MALLOC(tag_hashlist, sizeof(struct _starpu_mpi_early_data_handle_tag_hashlist));
+		tag_hashlist->data_tag = early_data_handle->node_tag.data_tag;
+		HASH_ADD(hh, hashlist->datahash, data_tag, sizeof(tag_hashlist->data_tag), tag_hashlist);
+		_starpu_mpi_early_data_handle_list_init(&tag_hashlist->list);
+	}
+
+	_starpu_mpi_early_data_handle_list_push_back(&tag_hashlist->list, early_data_handle);
 	_starpu_mpi_early_data_handle_hashmap_count ++;
 	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_data_handle_mutex);
 }

+ 11 - 1
mpi/src/mpi/starpu_mpi_early_data.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2017,2019                           CNRS
  * Copyright (C) 2009-2014,2016                           Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -23,6 +23,7 @@
 #include <mpi.h>
 #include <common/config.h>
 #include <common/list.h>
+#include <common/uthash.h>
 #include <starpu_mpi_private.h>
 
 #ifdef STARPU_USE_MPI_MPI
@@ -43,6 +44,13 @@ LIST_TYPE(_starpu_mpi_early_data_handle,
 	  starpu_pthread_cond_t req_cond;
 );
 
+struct _starpu_mpi_early_data_handle_tag_hashlist
+{
+	struct _starpu_mpi_early_data_handle_list list;
+	UT_hash_handle hh;
+	starpu_mpi_tag_t data_tag;
+};
+
 void _starpu_mpi_early_data_init(void);
 void _starpu_mpi_early_data_check_termination(void);
 void _starpu_mpi_early_data_shutdown(void);
@@ -51,6 +59,8 @@ struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _star
 struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu_mpi_node_tag *node_tag);
 void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle);
 
+struct _starpu_mpi_early_data_handle_tag_hashlist *_starpu_mpi_early_data_extract(struct _starpu_mpi_node_tag *node_tag);
+
 #ifdef __cplusplus
 }
 #endif

+ 69 - 16
mpi/src/mpi/starpu_mpi_early_request.c

@@ -24,11 +24,12 @@
 #ifdef STARPU_USE_MPI_MPI
 
 /** stores application requests for which data have not been received yet */
+/** the hashlist is on 2 levels, the first top level is indexed on (node, rank), the second lower level is indexed on the data tag */
 struct _starpu_mpi_early_request_hashlist
 {
-	struct _starpu_mpi_req_list list;
+	struct _starpu_mpi_early_request_tag_hashlist *datahash;
 	UT_hash_handle hh;
-	struct _starpu_mpi_node_tag node_tag;
+	struct _starpu_mpi_node node;
 };
 
 static starpu_pthread_mutex_t _starpu_mpi_early_request_mutex;
@@ -47,7 +48,14 @@ void _starpu_mpi_early_request_shutdown()
 	struct _starpu_mpi_early_request_hashlist *entry=NULL, *tmp=NULL;
 	HASH_ITER(hh, _starpu_mpi_early_request_hash, entry, tmp)
 	{
-		STARPU_ASSERT(_starpu_mpi_req_list_empty(&entry->list));
+		struct _starpu_mpi_early_request_tag_hashlist *tag_entry=NULL, *tag_tmp=NULL;
+		HASH_ITER(hh, entry->datahash, tag_entry, tag_tmp)
+		{
+			STARPU_ASSERT(_starpu_mpi_req_list_empty(&tag_entry->list));
+			HASH_DEL(entry->datahash, tag_entry);
+			free(tag_entry);
+		}
+
 		HASH_DEL(_starpu_mpi_early_request_hash, entry);
 		free(entry);
 	}
@@ -71,49 +79,94 @@ struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(starpu_mpi_tag_t data_
 	struct _starpu_mpi_early_request_hashlist *hashlist;
 
 	memset(&node_tag, 0, sizeof(struct _starpu_mpi_node_tag));
-	node_tag.comm = comm;
-	node_tag.rank = source;
+	node_tag.node.comm = comm;
+	node_tag.node.rank = source;
 	node_tag.data_tag = data_tag;
 
 	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_request_mutex);
-	_STARPU_MPI_DEBUG(100, "Looking for early_request with comm %ld source %d tag %ld\n", (long int)node_tag.comm, node_tag.rank, node_tag.data_tag);
-	HASH_FIND(hh, _starpu_mpi_early_request_hash, &node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);
+	_STARPU_MPI_DEBUG(100, "Looking for early_request with comm %ld source %d tag %ld\n", (long int)node_tag.node.comm, node_tag.node.rank, node_tag.data_tag);
+	HASH_FIND(hh, _starpu_mpi_early_request_hash, &node_tag.node, sizeof(struct _starpu_mpi_node), hashlist);
 	if (hashlist == NULL)
 	{
 		found = NULL;
 	}
 	else
 	{
-		if (_starpu_mpi_req_list_empty(&hashlist->list))
+		struct _starpu_mpi_early_request_tag_hashlist *tag_hashlist;
+		HASH_FIND(hh, hashlist->datahash, &node_tag.data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist);
+		if (tag_hashlist == NULL)
+		{
+			found = NULL;
+		}
+		else if (_starpu_mpi_req_list_empty(&tag_hashlist->list))
 		{
 			found = NULL;
 		}
 		else
 		{
-			found = _starpu_mpi_req_list_pop_front(&hashlist->list);
+			found = _starpu_mpi_req_list_pop_front(&tag_hashlist->list);
 			_starpu_mpi_early_request_hash_count --;
 		}
 	}
-	_STARPU_MPI_DEBUG(100, "Found early_request %p with comm %ld source %d tag %ld\n", found, (long int)node_tag.comm, node_tag.rank, node_tag.data_tag);
+	_STARPU_MPI_DEBUG(100, "Found early_request %p with comm %ld source %d tag %ld\n", found, (long int)node_tag.node.comm, node_tag.node.rank, node_tag.data_tag);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_request_mutex);
 	return found;
 }
 
+struct _starpu_mpi_early_request_tag_hashlist *_starpu_mpi_early_request_extract(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm)
+{
+	struct _starpu_mpi_node_tag node_tag;
+	struct _starpu_mpi_early_request_hashlist *hashlist;
+	struct _starpu_mpi_early_request_tag_hashlist *tag_hashlist = NULL;
+
+	memset(&node_tag, 0, sizeof(struct _starpu_mpi_node_tag));
+	node_tag.node.comm = comm;
+	node_tag.node.rank = source;
+	node_tag.data_tag = data_tag;
+
+	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_request_mutex);
+	_STARPU_MPI_DEBUG(100, "Looking for early_request with comm %ld source %d tag %ld\n", (long int)node_tag.node.comm, node_tag.node.rank, node_tag.data_tag);
+	HASH_FIND(hh, _starpu_mpi_early_request_hash, &node_tag.node, sizeof(struct _starpu_mpi_node), hashlist);
+	if (hashlist)
+	{
+		HASH_FIND(hh, hashlist->datahash, &node_tag.data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist);
+		if (tag_hashlist)
+		{
+			_starpu_mpi_early_request_hash_count -= _starpu_mpi_req_list_size(&tag_hashlist->list);
+			HASH_DEL(hashlist->datahash, tag_hashlist);
+		}
+	}
+	_STARPU_MPI_DEBUG(100, "Found hashlist %p with comm %ld source %d tag %ld\n", hashlist, (long int)node_tag.node.comm, node_tag.node.rank, node_tag.data_tag);
+	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_request_mutex);
+	return tag_hashlist;
+}
+
 void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req)
 {
 	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_request_mutex);
-	_STARPU_MPI_DEBUG(100, "Adding request %p with comm %ld source %d tag %ld in the application request hashmap\n", req, (long int)req->node_tag.comm, req->node_tag.rank, req->node_tag.data_tag);
+	_STARPU_MPI_DEBUG(100, "Adding request %p with comm %ld source %d tag %ld in the application request hashmap\n", req, (long int)req->node_tag.node.comm, req->node_tag.node.rank, req->node_tag.data_tag);
 
 	struct _starpu_mpi_early_request_hashlist *hashlist;
-	HASH_FIND(hh, _starpu_mpi_early_request_hash, &req->node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);
+	HASH_FIND(hh, _starpu_mpi_early_request_hash, &req->node_tag.node, sizeof(struct _starpu_mpi_node), hashlist);
 	if (hashlist == NULL)
 	{
 		_STARPU_MPI_MALLOC(hashlist, sizeof(struct _starpu_mpi_early_request_hashlist));
-		_starpu_mpi_req_list_init(&hashlist->list);
-		hashlist->node_tag = req->node_tag;
-		HASH_ADD(hh, _starpu_mpi_early_request_hash, node_tag, sizeof(hashlist->node_tag), hashlist);
+		hashlist->node = req->node_tag.node;
+		hashlist->datahash = NULL;
+		HASH_ADD(hh, _starpu_mpi_early_request_hash, node, sizeof(hashlist->node), hashlist);
+	}
+
+	struct _starpu_mpi_early_request_tag_hashlist *tag_hashlist;
+	HASH_FIND(hh, hashlist->datahash, &req->node_tag.data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist);
+	if (tag_hashlist == NULL)
+	{
+		_STARPU_MPI_MALLOC(tag_hashlist, sizeof(struct _starpu_mpi_early_request_tag_hashlist));
+		tag_hashlist->data_tag = req->node_tag.data_tag;
+		HASH_ADD(hh, hashlist->datahash, data_tag, sizeof(tag_hashlist->data_tag), tag_hashlist);
+		_starpu_mpi_req_list_init(&tag_hashlist->list);
 	}
-	_starpu_mpi_req_list_push_back(&hashlist->list, req);
+
+	_starpu_mpi_req_list_push_back(&tag_hashlist->list, req);
 	_starpu_mpi_early_request_hash_count ++;
 	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_request_mutex);
 }

+ 10 - 1
mpi/src/mpi/starpu_mpi_early_request.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2017,2019                           CNRS
  * Copyright (C) 2009-2014                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -31,6 +31,13 @@ extern "C"
 {
 #endif
 
+struct _starpu_mpi_early_request_tag_hashlist
+{
+	struct _starpu_mpi_req_list list;
+	UT_hash_handle hh;
+	starpu_mpi_tag_t data_tag;
+};
+
 void _starpu_mpi_early_request_init(void);
 void _starpu_mpi_early_request_shutdown(void);
 int _starpu_mpi_early_request_count(void);
@@ -39,6 +46,8 @@ void _starpu_mpi_early_request_check_termination(void);
 void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req);
 struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm);
 
+struct _starpu_mpi_early_request_tag_hashlist *_starpu_mpi_early_request_extract(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm);
+
 #ifdef __cplusplus
 }
 #endif

+ 52 - 52
mpi/src/mpi/starpu_mpi_mpi.c

@@ -152,7 +152,7 @@ void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, in
 	{
 		if (coop_sends->reqs_array[i]->request_type == SEND_REQ && submit_data)
 		{
-			_STARPU_MPI_DEBUG(0, "cooperative sends %p sending to %d\n", coop_sends, coop_sends->reqs_array[i]->node_tag.rank);
+			_STARPU_MPI_DEBUG(0, "cooperative sends %p sending to %d\n", coop_sends, coop_sends->reqs_array[i]->node_tag.node.rank);
 			_starpu_mpi_submit_ready_request(coop_sends->reqs_array[i]);
 		}
 		/* TODO: handle redirect requests */
@@ -166,7 +166,7 @@ void _starpu_mpi_submit_ready_request(void *arg)
 
 	_STARPU_MPI_INC_POSTED_REQUESTS(-1);
 
-	_STARPU_MPI_DEBUG(0, "new req %p srcdst %d tag %"PRIi64" and type %s %d\n", req, req->node_tag.rank, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->backend->is_internal_req);
+	_STARPU_MPI_DEBUG(0, "new req %p srcdst %d tag %"PRIi64" and type %s %d\n", req, req->node_tag.node.rank, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->backend->is_internal_req);
 
 	STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
 
@@ -193,7 +193,7 @@ void _starpu_mpi_submit_ready_request(void *arg)
 			}
 
 			_STARPU_MPI_DEBUG(3, "Pushing internal starpu_mpi_irecv request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
-					  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr,
+					  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr,
 					  req->datatype_name, (int)req->count, req->registered_datatype);
 			_starpu_mpi_req_list_push_front(&ready_recv_requests, req);
 			_STARPU_MPI_INC_READY_REQUESTS(+1);
@@ -245,8 +245,8 @@ void _starpu_mpi_submit_ready_request(void *arg)
 			/* Case: no matching data has been received. Store the receive request as an early_request. */
 			else
 			{
-				struct _starpu_mpi_req *sync_req = _starpu_mpi_sync_data_find(req->node_tag.data_tag, req->node_tag.rank, req->node_tag.comm);
-				_STARPU_MPI_DEBUG(3, "----------> Looking for sync data for tag %"PRIi64" and src %d = %p\n", req->node_tag.data_tag, req->node_tag.rank, sync_req);
+				struct _starpu_mpi_req *sync_req = _starpu_mpi_sync_data_find(req->node_tag.data_tag, req->node_tag.node.rank, req->node_tag.node.comm);
+				_STARPU_MPI_DEBUG(3, "----------> Looking for sync data for tag %"PRIi64" and src %d = %p\n", req->node_tag.data_tag, req->node_tag.node.rank, sync_req);
 				if (sync_req)
 				{
 					req->sync = 1;
@@ -268,7 +268,7 @@ void _starpu_mpi_submit_ready_request(void *arg)
 				}
 				else
 				{
-					_STARPU_MPI_DEBUG(3, "Adding the pending receive request %p (srcdst %d tag %"PRIi64") into the request hashmap\n", req, req->node_tag.rank, req->node_tag.data_tag);
+					_STARPU_MPI_DEBUG(3, "Adding the pending receive request %p (srcdst %d tag %"PRIi64") into the request hashmap\n", req, req->node_tag.node.rank, req->node_tag.data_tag);
 					_starpu_mpi_early_request_enqueue(req);
 				}
 			}
@@ -282,7 +282,7 @@ void _starpu_mpi_submit_ready_request(void *arg)
 			_starpu_mpi_req_list_push_front(&ready_recv_requests, req);
 		_STARPU_MPI_INC_READY_REQUESTS(+1);
 		_STARPU_MPI_DEBUG(3, "Pushing new request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
-				  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr,
+				  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr,
 				  req->datatype_name, (int)req->count, req->registered_datatype);
 	}
 
@@ -359,22 +359,22 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 {
 	_STARPU_MPI_LOG_IN();
 
-	_STARPU_MPI_DEBUG(0, "post MPI isend request %p type %s tag %"PRIi64" src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync);
+	_STARPU_MPI_DEBUG(0, "post MPI isend request %p type %s tag %"PRIi64" src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync);
 
-	_starpu_mpi_comm_amounts_inc(req->node_tag.comm, req->node_tag.rank, req->datatype, req->count);
+	_starpu_mpi_comm_amounts_inc(req->node_tag.node.comm, req->node_tag.node.rank, req->datatype, req->count);
 
-	_STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(req->node_tag.rank, req->node_tag.data_tag, 0);
+	_STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag, 0);
 
 	if (req->sync == 0)
 	{
-		_STARPU_MPI_COMM_TO_DEBUG(req, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.comm);
-		req->ret = MPI_Isend(req->ptr, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_DATA, req->node_tag.comm, &req->backend->data_request);
+		_STARPU_MPI_COMM_TO_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.node.comm);
+		req->ret = MPI_Isend(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.node.comm, &req->backend->data_request);
 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
 	}
 	else
 	{
-		_STARPU_MPI_COMM_TO_DEBUG(req, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.data_tag, req->node_tag.comm);
-		req->ret = MPI_Issend(req->ptr, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.comm, &req->backend->data_request);
+		_STARPU_MPI_COMM_TO_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.data_tag, req->node_tag.node.comm);
+		req->ret = MPI_Issend(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.node.comm, &req->backend->data_request);
 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Issend returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
 	}
 
@@ -382,7 +382,7 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 	_starpu_mpi_simgrid_wait_req(&req->backend->data_request, &req->status_store, &req->queue, &req->done);
 #endif
 
-	_STARPU_MPI_TRACE_ISEND_SUBMIT_END(req->node_tag.rank, req->node_tag.data_tag, starpu_data_get_size(req->data_handle), req->pre_sync_jobid);
+	_STARPU_MPI_TRACE_ISEND_SUBMIT_END(req->node_tag.node.rank, req->node_tag.data_tag, starpu_data_get_size(req->data_handle), req->pre_sync_jobid);
 
 	/* somebody is perhaps waiting for the MPI request to be posted */
 	STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex);
@@ -412,9 +412,9 @@ void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
 
 		MPI_Type_size(req->datatype, &size);
 		req->backend->envelope->size = (starpu_ssize_t)req->count * size;
-		_STARPU_MPI_DEBUG(20, "Post MPI isend count (%ld) datatype_size %ld request to %d\n",req->count,starpu_data_get_size(req->data_handle), req->node_tag.rank);
-		_STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.comm);
-		ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.comm, &req->backend->size_req);
+		_STARPU_MPI_DEBUG(20, "Post MPI isend count (%ld) datatype_size %ld request to %d\n",req->count,starpu_data_get_size(req->data_handle), req->node_tag.node.rank);
+		_STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.node.comm);
+		ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.node.comm, &req->backend->size_req);
 		STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "when sending envelope, MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(ret));
 	}
 	else
@@ -427,10 +427,10 @@ void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
 		if (req->backend->envelope->size != -1)
  		{
  			// We already know the size of the data, let's send it to overlap with the packing of the data
-			_STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (first call to pack)\n", req->backend->envelope->size, sizeof(req->count), "MPI_BYTE", req->node_tag.rank);
+			_STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (first call to pack)\n", req->backend->envelope->size, sizeof(req->count), "MPI_BYTE", req->node_tag.node.rank);
 			req->count = req->backend->envelope->size;
-			_STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.comm);
-			ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.comm, &req->backend->size_req);
+			_STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.node.comm);
+			ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.node.comm, &req->backend->size_req);
 			STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "when sending size, MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(ret));
  		}
 
@@ -439,9 +439,9 @@ void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
 		if (req->backend->envelope->size == -1)
  		{
  			// We know the size now, let's send it
-			_STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (second call to pack)\n", req->backend->envelope->size, sizeof(req->count), "MPI_BYTE", req->node_tag.rank);
-			_STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.comm);
-			ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.comm, &req->backend->size_req);
+			_STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (second call to pack)\n", req->backend->envelope->size, sizeof(req->count), "MPI_BYTE", req->node_tag.node.rank);
+			_STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.node.comm);
+			ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.node.comm, &req->backend->size_req);
 			STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "when sending size, MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(ret));
  		}
  		else
@@ -474,9 +474,9 @@ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
 {
 	_STARPU_MPI_LOG_IN();
 
-	_STARPU_MPI_DEBUG(0, "post MPI irecv request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
+	_STARPU_MPI_DEBUG(0, "post MPI irecv request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
 
-	_STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
+	_STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
 
 	if (req->sync)
 	{
@@ -484,9 +484,9 @@ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
 		_STARPU_MPI_CALLOC(_envelope, 1, sizeof(struct _starpu_mpi_envelope));
 		_envelope->mode = _STARPU_MPI_ENVELOPE_SYNC_READY;
 		_envelope->data_tag = req->node_tag.data_tag;
-		_STARPU_MPI_DEBUG(20, "Telling node %d it can send the data and waiting for the data back ...\n", req->node_tag.rank);
-		_STARPU_MPI_COMM_TO_DEBUG(_envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, _envelope->data_tag, req->node_tag.comm);
-		req->ret = MPI_Send(_envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.comm);
+		_STARPU_MPI_DEBUG(20, "Telling node %d it can send the data and waiting for the data back ...\n", req->node_tag.node.rank);
+		_STARPU_MPI_COMM_TO_DEBUG(_envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, _envelope->data_tag, req->node_tag.node.comm);
+		req->ret = MPI_Send(_envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.node.comm);
 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Send returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
 		free(_envelope);
 		_envelope = NULL;
@@ -494,20 +494,20 @@ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
 
 	if (req->sync)
 	{
-		_STARPU_MPI_COMM_FROM_DEBUG(req, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.data_tag, req->node_tag.comm);
-		req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.comm, &req->backend->data_request);
+		_STARPU_MPI_COMM_FROM_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.data_tag, req->node_tag.node.comm);
+		req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.node.comm, &req->backend->data_request);
 	}
 	else
 	{
-		_STARPU_MPI_COMM_FROM_DEBUG(req, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.comm);
-		req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_DATA, req->node_tag.comm, &req->backend->data_request);
+		_STARPU_MPI_COMM_FROM_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.node.comm);
+		req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.node.comm, &req->backend->data_request);
 #ifdef STARPU_SIMGRID
 		_starpu_mpi_simgrid_wait_req(&req->backend->data_request, &req->status_store, &req->queue, &req->done);
 #endif
 	}
 	STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_IRecv returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
 
-	_STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.rank, req->node_tag.data_tag);
+	_STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.node.rank, req->node_tag.data_tag);
 
 	/* somebody is perhaps waiting for the MPI request to be posted */
 	STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex);
@@ -532,7 +532,7 @@ void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
 	/* Which is the mpi request we are waiting for ? */
 	struct _starpu_mpi_req *req = waiting_req->backend->other_request;
 
-	_STARPU_MPI_TRACE_UWAIT_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
+	_STARPU_MPI_TRACE_UWAIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
 	if (req->backend->data_request != MPI_REQUEST_NULL)
 	{
 		// TODO: Fix for STARPU_SIMGRID
@@ -542,7 +542,7 @@ void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
 		req->ret = MPI_Wait(&req->backend->data_request, waiting_req->status);
 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
 	}
-	_STARPU_MPI_TRACE_UWAIT_END(req->node_tag.rank, req->node_tag.data_tag);
+	_STARPU_MPI_TRACE_UWAIT_END(req->node_tag.node.rank, req->node_tag.data_tag);
 
 	_starpu_mpi_handle_request_termination(req);
 
@@ -608,10 +608,10 @@ void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 	struct _starpu_mpi_req *req = testing_req->backend->other_request;
 
 	_STARPU_MPI_DEBUG(0, "Test request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
-			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr,
+			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr,
 			  req->datatype_name, (int)req->count, req->registered_datatype);
 
-	_STARPU_MPI_TRACE_UTESTING_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
+	_STARPU_MPI_TRACE_UTESTING_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
 
 #ifdef STARPU_SIMGRID
 	req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, testing_req->flag);
@@ -622,7 +622,7 @@ void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 
 	STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
 
-	_STARPU_MPI_TRACE_UTESTING_END(req->node_tag.rank, req->node_tag.data_tag);
+	_STARPU_MPI_TRACE_UTESTING_END(req->node_tag.node.rank, req->node_tag.data_tag);
 
 	if (*testing_req->flag)
 	{
@@ -710,7 +710,7 @@ static void _starpu_mpi_barrier_func(struct _starpu_mpi_req *barrier_req)
 {
 	_STARPU_MPI_LOG_IN();
 
-	barrier_req->ret = MPI_Barrier(barrier_req->node_tag.comm);
+	barrier_req->ret = MPI_Barrier(barrier_req->node_tag.node.comm);
 	STARPU_MPI_ASSERT_MSG(barrier_req->ret == MPI_SUCCESS, "MPI_Barrier returning %s", _starpu_mpi_get_mpi_error_code(barrier_req->ret));
 
 	_starpu_mpi_handle_request_termination(barrier_req);
@@ -753,7 +753,7 @@ int _starpu_mpi_barrier(MPI_Comm comm)
 	barrier_req->prio = INT_MAX;
 	barrier_req->func = _starpu_mpi_barrier_func;
 	barrier_req->request_type = BARRIER_REQ;
-	barrier_req->node_tag.comm = comm;
+	barrier_req->node_tag.node.comm = comm;
 
 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
 	_starpu_mpi_submit_ready_request(barrier_req);
@@ -797,7 +797,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 	_STARPU_MPI_LOG_IN();
 
 	_STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d internal_req %p\n",
-			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr,
+			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr,
 			  req->datatype_name, (int)req->count, req->registered_datatype, req->backend->internal_req);
 
 	if (req->backend->internal_req)
@@ -834,7 +834,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 				_starpu_mpi_datatype_free(req->data_handle, &req->datatype);
 			}
 		}
-		_STARPU_MPI_TRACE_TERMINATED(req, req->node_tag.rank, req->node_tag.data_tag);
+		_STARPU_MPI_TRACE_TERMINATED(req, req->node_tag.node.rank, req->node_tag.data_tag);
 	}
 
 	_starpu_mpi_release_req_data(req);
@@ -958,8 +958,8 @@ static void _starpu_mpi_test_detached_requests(void)
 	{
 		STARPU_PTHREAD_MUTEX_UNLOCK(&detached_requests_mutex);
 
-		_STARPU_MPI_TRACE_TEST_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
-		//_STARPU_MPI_DEBUG(3, "Test detached request %p - mpitag %"PRIi64" - TYPE %s %d\n", &req->backend->data_request, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->node_tag.rank);
+		_STARPU_MPI_TRACE_TEST_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
+		//_STARPU_MPI_DEBUG(3, "Test detached request %p - mpitag %"PRIi64" - TYPE %s %d\n", &req->backend->data_request, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->node_tag.node.rank);
 #ifdef STARPU_SIMGRID
 		req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, &flag);
 #else
@@ -968,7 +968,7 @@ static void _starpu_mpi_test_detached_requests(void)
 #endif
 
 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
-		_STARPU_MPI_TRACE_TEST_END(req->node_tag.rank, req->node_tag.data_tag);
+		_STARPU_MPI_TRACE_TEST_END(req->node_tag.node.rank, req->node_tag.data_tag);
 
 		if (!flag)
 		{
@@ -980,7 +980,7 @@ static void _starpu_mpi_test_detached_requests(void)
 		     	struct _starpu_mpi_req *next_req;
 			next_req = _starpu_mpi_req_list_next(req);
 
-			_STARPU_MPI_TRACE_COMPLETE_BEGIN(req->request_type, req->node_tag.rank, req->node_tag.data_tag);
+			_STARPU_MPI_TRACE_COMPLETE_BEGIN(req->request_type, req->node_tag.node.rank, req->node_tag.data_tag);
 
 			STARPU_PTHREAD_MUTEX_LOCK(&detached_requests_mutex);
 			if (req->request_type == SEND_REQ)
@@ -989,7 +989,7 @@ static void _starpu_mpi_test_detached_requests(void)
 			STARPU_PTHREAD_MUTEX_UNLOCK(&detached_requests_mutex);
 			_starpu_mpi_handle_request_termination(req);
 
-			_STARPU_MPI_TRACE_COMPLETE_END(req->request_type, req->node_tag.rank, req->node_tag.data_tag);
+			_STARPU_MPI_TRACE_COMPLETE_END(req->request_type, req->node_tag.node.rank, req->node_tag.data_tag);
 
 			STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex);
 			/* We don't want to free internal non-detached
@@ -1046,7 +1046,7 @@ static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req)
 
 	/* submit the request to MPI */
 	_STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
-			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle,
+			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle,
 			  req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
 	req->func(req);
 
@@ -1327,7 +1327,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 					{
 						if (envelope->sync)
 						{
-							_STARPU_MPI_DEBUG(2000, "-------------------------> adding request for tag %l"PRIi64"\n", envelope->data_tag);
+							_STARPU_MPI_DEBUG(2000, "-------------------------> adding request for tag %"PRIi64"\n", envelope->data_tag);
 							struct _starpu_mpi_req *new_req;
 #ifdef STARPU_DEVEL
 #warning creating a request is not really useful.
@@ -1336,9 +1336,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 							_starpu_mpi_request_init(&new_req);
 							new_req->request_type = RECV_REQ;
 							new_req->data_handle = NULL;
-							new_req->node_tag.rank = envelope_status.MPI_SOURCE;
+							new_req->node_tag.node.rank = envelope_status.MPI_SOURCE;
 							new_req->node_tag.data_tag = envelope->data_tag;
-							new_req->node_tag.comm = envelope_comm;
+							new_req->node_tag.node.comm = envelope_comm;
 							new_req->detached = 1;
 							new_req->sync = 1;
 							new_req->callback = NULL;

+ 6 - 6
mpi/src/mpi/starpu_mpi_sync_data.c

@@ -63,11 +63,11 @@ void _starpu_mpi_sync_data_handle_display_hash(struct _starpu_mpi_node_tag *node
 
 	if (hashlist == NULL)
 	{
-		_STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %ld does not exist\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
+		_STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %ld does not exist\n", (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag);
 	}
 	else if (_starpu_mpi_req_list_empty(&hashlist->list))
 	{
-		_STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %ld is empty\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
+		_STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %ld is empty\n", (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag);
 	}
 	else
 	{
@@ -76,7 +76,7 @@ void _starpu_mpi_sync_data_handle_display_hash(struct _starpu_mpi_node_tag *node
 		     cur != _starpu_mpi_req_list_end(&hashlist->list);
 		     cur = _starpu_mpi_req_list_next(cur))
 		{
-			_STARPU_MPI_DEBUG(60, "Element for comm %ld source %d and tag %ld: %p\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag, cur);
+			_STARPU_MPI_DEBUG(60, "Element for comm %ld source %d and tag %ld: %p\n", (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag, cur);
 		}
 	}
 }
@@ -99,8 +99,8 @@ struct _starpu_mpi_req *_starpu_mpi_sync_data_find(starpu_mpi_tag_t data_tag, in
 	struct _starpu_mpi_sync_data_handle_hashlist *found;
 
 	memset(&node_tag, 0, sizeof(struct _starpu_mpi_node_tag));
-	node_tag.comm = comm;
-	node_tag.rank = source;
+	node_tag.node.comm = comm;
+	node_tag.node.rank = source;
 	node_tag.data_tag = data_tag;
 
 	_STARPU_MPI_DEBUG(60, "Looking for sync_data_handle with comm %ld source %d tag %ld in the hashmap\n", (long int)comm, source, data_tag);
@@ -132,7 +132,7 @@ void _starpu_mpi_sync_data_add(struct _starpu_mpi_req *sync_req)
 {
 	struct _starpu_mpi_sync_data_handle_hashlist *hashlist;
 
-	_STARPU_MPI_DEBUG(2000, "Adding sync_req %p with comm %ld source %d tag %ld in the hashmap\n", sync_req, (long int)sync_req->node_tag.comm, sync_req->node_tag.rank, sync_req->node_tag.data_tag);
+	_STARPU_MPI_DEBUG(2000, "Adding sync_req %p with comm %ld source %d tag %ld in the hashmap\n", sync_req, (long int)sync_req->node_tag.node.comm, sync_req->node_tag.node.rank, sync_req->node_tag.data_tag);
 
 	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_sync_data_handle_mutex);
 	HASH_FIND(hh, _starpu_mpi_sync_data_handle_hashmap, &sync_req->node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);

+ 19 - 19
mpi/src/nmad/starpu_mpi_nmad.c

@@ -88,11 +88,11 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 {
 	_STARPU_MPI_LOG_IN();
 
-	_STARPU_MPI_DEBUG(30, "post NM isend request %p type %s tag %ld src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync);
+	_STARPU_MPI_DEBUG(30, "post NM isend request %p type %s tag %ld src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync);
 
-	_starpu_mpi_comm_amounts_inc(req->node_tag.comm, req->node_tag.rank, req->datatype, req->count);
+	_starpu_mpi_comm_amounts_inc(req->node_tag.node.comm, req->node_tag.node.rank, req->datatype, req->count);
 
-	_STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(req->node_tag.rank, req->node_tag.data_tag, 0);
+	_STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag, 0);
 
 	struct nm_data_s data;
 	nm_mpi_nmad_data_get(&data, (void*)req->ptr, req->datatype, req->count);
@@ -111,7 +111,7 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 		STARPU_ASSERT_MSG(req->ret == NM_ESUCCESS, "MPI_Issend returning %d", req->ret);
 	}
 
-	_STARPU_MPI_TRACE_ISEND_SUBMIT_END(req->node_tag.rank, req->node_tag.data_tag, starpu_data_get_size(req->data_handle), req->pre_sync_jobid);
+	_STARPU_MPI_TRACE_ISEND_SUBMIT_END(req->node_tag.node.rank, req->node_tag.data_tag, starpu_data_get_size(req->data_handle), req->pre_sync_jobid);
 
 	_starpu_mpi_handle_pending_request(req);
 
@@ -140,7 +140,7 @@ void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
 		if (psize != -1)
 		{
 			// We already know the size of the data, let's send it to overlap with the packing of the data
-			_STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (first call to pack)\n", psize, sizeof(req->count), "MPI_BYTE", req->node_tag.rank);
+			_STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (first call to pack)\n", psize, sizeof(req->count), "MPI_BYTE", req->node_tag.node.rank);
 			req->count = psize;
 			//ret = nm_sr_isend(nm_mpi_communicator_get_session(p_req->p_comm),nm_mpi_communicator_get_gate(p_comm,req->srcdst), req->mpi_tag,&req->count, sizeof(req->count), &req->backend->size_req);
 			ret = nm_sr_isend(req->backend->session,req->backend->gate, req->node_tag.data_tag,&req->count, sizeof(req->count), &req->backend->size_req);
@@ -154,7 +154,7 @@ void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
 		if (psize == -1)
 		{
 			// We know the size now, let's send it
-			_STARPU_MPI_DEBUG(1, "Sending size %ld (%ld %s) with tag %ld to node %d (second call to pack)\n", req->count, sizeof(req->count), "MPI_BYTE", req->node_tag.data_tag, req->node_tag.rank);
+			_STARPU_MPI_DEBUG(1, "Sending size %ld (%ld %s) with tag %ld to node %d (second call to pack)\n", req->count, sizeof(req->count), "MPI_BYTE", req->node_tag.data_tag, req->node_tag.node.rank);
 			ret = nm_sr_isend(req->backend->session,req->backend->gate, req->node_tag.data_tag,&req->count, sizeof(req->count), &req->backend->size_req);
 			STARPU_ASSERT_MSG(ret == NM_ESUCCESS, "when sending size, nm_sr_isend returning %d", ret);
 		}
@@ -179,9 +179,9 @@ static void _starpu_mpi_irecv_data_func(struct _starpu_mpi_req *req)
 {
 	_STARPU_MPI_LOG_IN();
 
-	_STARPU_MPI_DEBUG(20, "post NM irecv request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
+	_STARPU_MPI_DEBUG(20, "post NM irecv request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
 
-	_STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
+	_STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
 
 	//req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->srcdst, req->mpi_tag, req->comm, &req->request);
 	struct nm_data_s data;
@@ -190,7 +190,7 @@ static void _starpu_mpi_irecv_data_func(struct _starpu_mpi_req *req)
 	nm_sr_recv_unpack_data(req->backend->session, &(req->backend->data_request), &data);
 	nm_sr_recv_irecv(req->backend->session, &(req->backend->data_request), req->backend->gate, req->node_tag.data_tag, NM_TAG_MASK_FULL);
 
-	_STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.rank, req->node_tag.data_tag);
+	_STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.node.rank, req->node_tag.data_tag);
 
 	_starpu_mpi_handle_pending_request(req);
 
@@ -230,8 +230,8 @@ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
 		struct _starpu_mpi_irecv_size_callback *callback = malloc(sizeof(struct _starpu_mpi_irecv_size_callback));
 		callback->req = req;
 		starpu_variable_data_register(&callback->handle, 0, (uintptr_t)&(callback->req->count), sizeof(callback->req->count));
-		_STARPU_MPI_DEBUG(4, "Receiving size with tag %ld from node %d\n", req->node_tag.data_tag, req->node_tag.rank);
-		_starpu_mpi_irecv_common(callback->handle, req->node_tag.rank, req->node_tag.data_tag, req->node_tag.comm, 1, 0, _starpu_mpi_irecv_size_callback, callback,1,0,0);
+		_STARPU_MPI_DEBUG(4, "Receiving size with tag %ld from node %d\n", req->node_tag.data_tag, req->node_tag.node.rank);
+		_starpu_mpi_irecv_common(callback->handle, req->node_tag.node.rank, req->node_tag.data_tag, req->node_tag.node.comm, 1, 0, _starpu_mpi_irecv_size_callback, callback,1,0,0);
 	}
 
 }
@@ -243,7 +243,7 @@ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
 /********************************************************/
 
 #define _starpu_mpi_req_status(PUBLIC_REQ,STATUS) do {			\
-	STATUS->MPI_SOURCE=PUBLIC_REQ->node_tag.rank; /**< field name mandatory by spec */ \
+	STATUS->MPI_SOURCE=PUBLIC_REQ->node_tag.node.rank; /**< field name mandatory by spec */ \
 	STATUS->MPI_TAG=PUBLIC_REQ->node_tag.data_tag;    /**< field name mandatory by spec */ \
 	STATUS->MPI_ERROR=PUBLIC_REQ->ret;  /**< field name mandatory by spec */ \
 	STATUS->size=PUBLIC_REQ->count;       /**< size of data received */ \
@@ -286,9 +286,9 @@ int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 	struct _starpu_mpi_req *req = *public_req;
 	STARPU_MPI_ASSERT_MSG(!req->detached, "MPI_Test cannot be called on a detached request");
 	_STARPU_MPI_DEBUG(2, "Test request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
-			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
+			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
 
-	_STARPU_MPI_TRACE_UTESTING_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
+	_STARPU_MPI_TRACE_UTESTING_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag);
 
 	/* we must do a test_locked to avoid race condition :
 	 * without req_cond could still be used and couldn't be freed)*/
@@ -296,7 +296,7 @@ int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 	if (*flag && status!=MPI_STATUS_IGNORE)
 		_starpu_mpi_req_status(req,status);
 
-	_STARPU_MPI_TRACE_UTESTING_END(req->node_tag.rank, req->node_tag.data_tag);
+	_STARPU_MPI_TRACE_UTESTING_END(req->node_tag.node.rank, req->node_tag.data_tag);
 
 	if(*flag)
 	{
@@ -352,7 +352,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,n
 	_STARPU_MPI_LOG_IN();
 
 	_STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
-			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
+			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
 
 	if (req->request_type == RECV_REQ || req->request_type == SEND_REQ)
 	{
@@ -384,7 +384,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,n
 			_starpu_mpi_datatype_free(req->data_handle, &req->datatype);
 		}
 	}
-	_STARPU_MPI_TRACE_TERMINATED(req, req->node_tag.rank, req->node_tag.data_tag);
+	_STARPU_MPI_TRACE_TERMINATED(req, req->node_tag.node.rank, req->node_tag.data_tag);
 	_starpu_mpi_release_req_data(req);
 
 	/* Execute the specified callback, if any */
@@ -453,7 +453,7 @@ void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, in
 	{
 		if (coop_sends->reqs_array[i]->request_type == SEND_REQ && submit_data)
 		{
-			_STARPU_MPI_DEBUG(0, "cooperative sends %p sending to %d\n", coop_sends, coop_sends->reqs_array[i]->node_tag.rank);
+			_STARPU_MPI_DEBUG(0, "cooperative sends %p sending to %d\n", coop_sends, coop_sends->reqs_array[i]->node_tag.node.rank);
 			_starpu_mpi_submit_ready_request(coop_sends->reqs_array[i]);
 		}
 		/* TODO: handle redirect requests */
@@ -468,7 +468,7 @@ void _starpu_mpi_submit_ready_request(void *arg)
 
 	/* submit the request to MPI directly from submitter */
 	_STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
-			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
+			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
 	req->func(req);
 
 	_STARPU_MPI_LOG_OUT();

+ 1 - 1
mpi/src/nmad/starpu_mpi_nmad_backend.c

@@ -46,7 +46,7 @@ void _starpu_mpi_nmad_backend_request_init(struct _starpu_mpi_req *req)
 
 void _starpu_mpi_nmad_backend_request_fill(struct _starpu_mpi_req *req, MPI_Comm comm, int is_internal_req)
 {
-	nm_mpi_nmad_dest(&req->backend->session, &req->backend->gate, comm, req->node_tag.rank);
+	nm_mpi_nmad_dest(&req->backend->session, &req->backend->gate, comm, req->node_tag.node.rank);
 }
 
 void _starpu_mpi_nmad_backend_request_destroy(struct _starpu_mpi_req *req)

+ 5 - 5
mpi/src/starpu_mpi.c

@@ -261,8 +261,8 @@ struct _starpu_mpi_data *_starpu_mpi_data_get(starpu_data_handle_t data_handle)
 		_STARPU_CALLOC(mpi_data, 1, sizeof(struct _starpu_mpi_data));
 		mpi_data->magic = 42;
 		mpi_data->node_tag.data_tag = -1;
-		mpi_data->node_tag.rank = -1;
-		mpi_data->node_tag.comm = MPI_COMM_WORLD;
+		mpi_data->node_tag.node.rank = -1;
+		mpi_data->node_tag.node.comm = MPI_COMM_WORLD;
 		_starpu_spin_init(&mpi_data->coop_lock);
 		data_handle->mpi_data = mpi_data;
 		_starpu_mpi_cache_data_init(data_handle);
@@ -283,8 +283,8 @@ void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_
 	if (rank != -1)
 	{
 		_STARPU_MPI_TRACE_DATA_SET_RANK(data_handle, rank);
-		mpi_data->node_tag.rank = rank;
-		mpi_data->node_tag.comm = comm;
+		mpi_data->node_tag.node.rank = rank;
+		mpi_data->node_tag.node.comm = comm;
 	}
 }
 
@@ -301,7 +301,7 @@ void starpu_mpi_data_set_tag(starpu_data_handle_t handle, starpu_mpi_tag_t data_
 int starpu_mpi_data_get_rank(starpu_data_handle_t data)
 {
 	STARPU_ASSERT_MSG(data->mpi_data, "starpu_mpi_data_register MUST be called for data %p\n", data);
-	return ((struct _starpu_mpi_data *)(data->mpi_data))->node_tag.rank;
+	return ((struct _starpu_mpi_data *)(data->mpi_data))->node_tag.node.rank;
 }
 
 starpu_mpi_tag_t starpu_mpi_data_get_tag(starpu_data_handle_t data)

+ 2 - 2
mpi/src/starpu_mpi_cache.c

@@ -260,7 +260,7 @@ void _starpu_mpi_cache_sent_data_clear(starpu_data_handle_t data_handle)
 		return;
 
 	STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
-	starpu_mpi_comm_size(mpi_data->node_tag.comm, &size);
+	starpu_mpi_comm_size(mpi_data->node_tag.node.comm, &size);
 	for(n=0 ; n<size ; n++)
 	{
 		if (mpi_data->cache_sent[n] == 1)
@@ -326,7 +326,7 @@ static void _starpu_mpi_cache_flush_nolock(starpu_data_handle_t data_handle)
 	if (_starpu_cache_enabled == 0)
 		return;
 
-	starpu_mpi_comm_size(mpi_data->node_tag.comm, &nb_nodes);
+	starpu_mpi_comm_size(mpi_data->node_tag.node.comm, &nb_nodes);
 	for(i=0 ; i<nb_nodes ; i++)
 	{
 		if (mpi_data->cache_sent[i] == 1)

+ 5 - 5
mpi/src/starpu_mpi_coop_sends.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2018                                CNRS
+ * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2009-2018                                Université de Bordeaux
  * Copyright (C) 2012,2013,2016,2017                      Inria
  *
@@ -184,7 +184,7 @@ static int _starpu_mpi_coop_send_compatible(struct _starpu_mpi_req *req, struct
 
 	prevreq = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs);
 	return /* we can cope with tag being different */
-	          prevreq->node_tag.comm == req->node_tag.comm
+	          prevreq->node_tag.node.comm == req->node_tag.node.comm
 	       && prevreq->sequential_consistency == req->sequential_consistency;
 }
 
@@ -212,7 +212,7 @@ void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_
 					tofree = coop_sends;
 				}
 				coop_sends = mpi_data->coop_sends;
-				_STARPU_MPI_DEBUG(0, "%p: add to cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.rank);
+				_STARPU_MPI_DEBUG(0, "%p: add to cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.node.rank);
 				_starpu_mpi_req_multilist_push_back_coop_sends(&coop_sends->reqs, req);
 				coop_sends->n++;
 				req->coop_sends_head = coop_sends;
@@ -222,7 +222,7 @@ void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_
 			else
 			{
 				/* Nope, incompatible, put ours instead */
-				_STARPU_MPI_DEBUG(0, "%p: new cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.rank);
+				_STARPU_MPI_DEBUG(0, "%p: new cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.node.rank);
 				mpi_data->coop_sends = coop_sends;
 				first = 1;
 				_starpu_spin_unlock(&mpi_data->coop_lock);
@@ -234,7 +234,7 @@ void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_
 		else if (coop_sends)
 		{
 			/* Nobody else and we have allocated one, we're first! */
-			_STARPU_MPI_DEBUG(0, "%p: new cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.rank);
+			_STARPU_MPI_DEBUG(0, "%p: new cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.node.rank);
 			mpi_data->coop_sends = coop_sends;
 			first = 1;
 			done = 1;

+ 6 - 1
mpi/src/starpu_mpi_private.h

@@ -169,10 +169,15 @@ enum _starpu_mpi_request_type
 	UNKNOWN_REQ=6,
 };
 
-struct _starpu_mpi_node_tag
+struct _starpu_mpi_node
 {
 	MPI_Comm comm;
 	int rank;
+};
+
+struct _starpu_mpi_node_tag
+{
+	struct _starpu_mpi_node node;
 	starpu_mpi_tag_t data_tag;
 };
 

+ 4 - 4
mpi/src/starpu_mpi_req.c

@@ -34,9 +34,9 @@ void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
 	(*req)->count = -1;
 	(*req)->registered_datatype = -1;
 
-	(*req)->node_tag.rank = -1;
+	(*req)->node_tag.node.rank = -1;
 	(*req)->node_tag.data_tag = -1;
-	(*req)->node_tag.comm = 0;
+	(*req)->node_tag.node.comm = 0;
 
 	(*req)->func = NULL;
 
@@ -86,9 +86,9 @@ struct _starpu_mpi_req *_starpu_mpi_request_fill(starpu_data_handle_t data_handl
 	if (_starpu_mpi_use_prio)
 		req->prio = prio;
 	req->data_handle = data_handle;
-	req->node_tag.rank = srcdst;
+	req->node_tag.node.rank = srcdst;
 	req->node_tag.data_tag = data_tag;
-	req->node_tag.comm = comm;
+	req->node_tag.node.comm = comm;
 	req->detached = detached;
 	req->sync = sync;
 	req->callback = callback;

+ 3 - 1
mpi/tests/Makefile.am

@@ -118,7 +118,8 @@ starpu_mpi_TESTS +=				\
 	policy_selection			\
 	policy_selection2			\
 	ring_async_implicit			\
-	temporary
+	temporary				\
+	early_stuff
 
 if !STARPU_SIMGRID
 starpu_mpi_TESTS +=				\
@@ -183,6 +184,7 @@ noinst_PROGRAMS =				\
 	ring_async				\
 	ring_async_implicit			\
 	temporary				\
+	early_stuff				\
 	block_interface				\
 	block_interface_pinned			\
 	attr					\

+ 119 - 0
mpi/tests/early_stuff.c

@@ -0,0 +1,119 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2019                                     CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include <math.h>
+#include "helper.h"
+
+#ifndef STARPU_USE_MPI_MPI
+int main(int argc, char **argv)
+{
+	return STARPU_TEST_SKIPPED;
+}
+
+#else
+
+#include <mpi/starpu_mpi_early_data.h>
+#include <mpi/starpu_mpi_early_request.h>
+#include <mpi/starpu_mpi_mpi_backend.h>
+
+void early_data()
+{
+	struct _starpu_mpi_early_data_handle *edh[2];
+	struct _starpu_mpi_envelope envelope[2];
+	struct _starpu_mpi_node_tag node_tag[2];
+	struct _starpu_mpi_early_data_handle *early;
+	struct _starpu_mpi_early_data_handle_tag_hashlist *hash;
+
+	memset(&node_tag[0], 0, sizeof(struct _starpu_mpi_node_tag));
+	node_tag[0].node.rank = 1;
+	node_tag[0].node.comm = MPI_COMM_WORLD;
+	node_tag[0].data_tag = 42;
+
+	memset(&node_tag[1], 0, sizeof(struct _starpu_mpi_node_tag));
+	node_tag[1].node.rank = 2;
+	node_tag[1].node.comm = MPI_COMM_WORLD;
+	node_tag[1].data_tag = 84;
+
+	envelope[0].data_tag = node_tag[0].data_tag;
+	edh[0] = _starpu_mpi_early_data_create(&envelope[0], node_tag[0].node.rank, node_tag[0].node.comm);
+
+	envelope[1].data_tag = node_tag[1].data_tag;
+	edh[1] = _starpu_mpi_early_data_create(&envelope[1], node_tag[1].node.rank, node_tag[1].node.comm);
+
+	_starpu_mpi_early_data_add(edh[0]);
+	_starpu_mpi_early_data_add(edh[1]);
+
+	hash = _starpu_mpi_early_data_extract(&node_tag[1]);
+	STARPU_ASSERT(_starpu_mpi_early_data_handle_list_size(&hash->list) == 1);
+	early = _starpu_mpi_early_data_handle_list_pop_front(&hash->list);
+	STARPU_ASSERT(early->node_tag.node.comm == node_tag[1].node.comm && early->node_tag.node.rank == node_tag[1].node.rank && early->node_tag.data_tag == node_tag[1].data_tag);
+	STARPU_ASSERT(_starpu_mpi_early_data_handle_list_size(&hash->list) == 0);
+
+	early = _starpu_mpi_early_data_find(&node_tag[0]);
+	STARPU_ASSERT(early->node_tag.node.comm == node_tag[0].node.comm && early->node_tag.node.rank == node_tag[0].node.rank && early->node_tag.data_tag == node_tag[0].data_tag);
+}
+
+void early_request()
+{
+	struct _starpu_mpi_req req[2];
+	struct _starpu_mpi_req *early;
+	struct _starpu_mpi_early_request_tag_hashlist *hash;
+
+	memset(&req[0].node_tag, 0, sizeof(struct _starpu_mpi_node_tag));
+	req[0].node_tag.node.rank = 1;
+	req[0].node_tag.node.comm = MPI_COMM_WORLD;
+	req[0].node_tag.data_tag = 42;
+
+	memset(&req[1].node_tag, 0, sizeof(struct _starpu_mpi_node_tag));
+	req[1].node_tag.node.rank = 2;
+	req[1].node_tag.node.comm = MPI_COMM_WORLD;
+	req[1].node_tag.data_tag = 84;
+
+	_starpu_mpi_early_request_enqueue(&req[1]);
+	_starpu_mpi_early_request_enqueue(&req[0]);
+
+	early = _starpu_mpi_early_request_dequeue(req[0].node_tag.data_tag, req[0].node_tag.node.rank, req[0].node_tag.node.comm);
+	STARPU_ASSERT(early->node_tag.data_tag == req[0].node_tag.data_tag && early->node_tag.node.rank == req[0].node_tag.node.rank && early->node_tag.node.comm == req[0].node_tag.node.comm);
+
+	hash = _starpu_mpi_early_request_extract(req[1].node_tag.data_tag, req[1].node_tag.node.rank, req[1].node_tag.node.comm);
+	STARPU_ASSERT(_starpu_mpi_req_list_size(&hash->list) == 1);
+	early = _starpu_mpi_req_list_pop_front(&hash->list);
+	STARPU_ASSERT(_starpu_mpi_req_list_size(&hash->list) == 0);
+	STARPU_ASSERT(early->node_tag.data_tag == req[1].node_tag.data_tag && early->node_tag.node.rank == req[1].node_tag.node.rank && early->node_tag.node.comm == req[1].node_tag.node.comm);
+}
+
+int main(int argc, char **argv)
+{
+	int ret, rank, size, i;
+	starpu_data_handle_t tab_handle[4];
+	int mpi_init;
+
+	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
+	ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");
+
+	early_data();
+	early_request();
+
+	starpu_mpi_shutdown();
+
+	if (!mpi_init)
+		MPI_Finalize();
+	return 0;
+}
+
+#endif

+ 1 - 1
src/core/perfmodel/perfmodel_history.c

@@ -1390,7 +1390,7 @@ void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned s
 			}
 			else
 			{
-				_STARPU_DEBUG("Performance model file %s does not exist or is not readable\n", path);
+				_STARPU_DEBUG("Performance model file %s does not exist or is not readable: %s\n", path, strerror(errno));
 			}
 		}
 

+ 2 - 0
src/core/task.c

@@ -1603,6 +1603,8 @@ struct starpu_task *starpu_task_ft_create_retry
 	new_task->regenerate = 0;
 	new_task->no_submitorder = 1;
 	new_task->failed = 0;
+	new_task->scheduled = 0;
+	new_task->prefetched = 0;
 	new_task->status = STARPU_TASK_INVALID;
 	new_task->profiling_info = NULL;
 	new_task->prev = NULL;

+ 2 - 1
src/core/workers.c

@@ -968,7 +968,8 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 		 * before starting another one, to make sure they appear in
 		 * order in the trace.
 		 */
-		if (workerarg->run_by_starpu == 1 && workerarg->arch != STARPU_MPI_MS_WORKER)
+		if ((!workerarg->set || workerarg->set->workers == workerarg)
+			&& workerarg->run_by_starpu == 1 && workerarg->arch != STARPU_MPI_MS_WORKER)
 		{
 			STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
 			while (!workerarg->worker_is_running)

+ 125 - 0
tools/Makefile.am

@@ -90,7 +90,32 @@ dist_pkgdata_perfmodels_sampling_codelets_DATA = \
 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12.attila	\
 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21.attila	\
 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22.attila	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_atlas.attila	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_atlas.attila	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_atlas.attila	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_atlas.attila	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_atlas.attila	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_atlas.attila	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_atlas.attila	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22_atlas.attila	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_goto.attila	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_goto.attila	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_goto.attila	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_goto.attila	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_goto.attila	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_goto.attila	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_goto.attila	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22_goto.attila	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_openblas.attila	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_openblas.attila	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_openblas.attila	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_openblas.attila	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_openblas.attila	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_openblas.attila	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_openblas.attila	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22_openblas.attila	\
 	perfmodels/sampling/codelets/45/overlap_sleep_1024_24.attila	\
+\
 	perfmodels/sampling/codelets/45/chol_model_11.hannibal	\
 	perfmodels/sampling/codelets/45/chol_model_21.hannibal	\
 	perfmodels/sampling/codelets/45/chol_model_22.hannibal	\
@@ -98,6 +123,19 @@ dist_pkgdata_perfmodels_sampling_codelets_DATA = \
 	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12.hannibal	\
 	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21.hannibal	\
 	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22.hannibal	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_atlas.hannibal	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_atlas.hannibal	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_atlas.hannibal	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_atlas.hannibal	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_goto.hannibal	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_goto.hannibal	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_goto.hannibal	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_goto.hannibal	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_openblas.hannibal	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_openblas.hannibal	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_openblas.hannibal	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_openblas.hannibal	\
+\
 	perfmodels/sampling/codelets/45/chol_model_11.hannibal-pitch	\
 	perfmodels/sampling/codelets/45/chol_model_21.hannibal-pitch	\
 	perfmodels/sampling/codelets/45/chol_model_22.hannibal-pitch	\
@@ -105,6 +143,19 @@ dist_pkgdata_perfmodels_sampling_codelets_DATA = \
 	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12.hannibal-pitch	\
 	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21.hannibal-pitch	\
 	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22.hannibal-pitch	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_atlas.hannibal-pitch	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_atlas.hannibal-pitch	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_atlas.hannibal-pitch	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_atlas.hannibal-pitch	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_goto.hannibal-pitch	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_goto.hannibal-pitch	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_goto.hannibal-pitch	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_goto.hannibal-pitch	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_openblas.hannibal-pitch	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_openblas.hannibal-pitch	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_openblas.hannibal-pitch	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_openblas.hannibal-pitch	\
+\
 	perfmodels/sampling/codelets/45/chol_model_11.idgraf	\
 	perfmodels/sampling/codelets/45/chol_model_21.idgraf	\
 	perfmodels/sampling/codelets/45/chol_model_22.idgraf	\
@@ -121,6 +172,31 @@ dist_pkgdata_perfmodels_sampling_codelets_DATA = \
 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12.idgraf	\
 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21.idgraf	\
 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_atlas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_atlas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_atlas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_atlas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_atlas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_atlas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_atlas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22_atlas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_goto.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_goto.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_goto.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_goto.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_goto.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_goto.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_goto.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22_goto.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_openblas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_openblas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_openblas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_openblas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_openblas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_openblas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_openblas.idgraf	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22_openblas.idgraf	\
+\
 	perfmodels/sampling/codelets/45/chol_model_11.mirage	\
 	perfmodels/sampling/codelets/45/chol_model_21.mirage	\
 	perfmodels/sampling/codelets/45/chol_model_22.mirage	\
@@ -137,7 +213,32 @@ dist_pkgdata_perfmodels_sampling_codelets_DATA = \
 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12.mirage	\
 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21.mirage	\
 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22.mirage	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_atlas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_atlas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_atlas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_atlas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_atlas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_atlas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_atlas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22_atlas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_goto.mirage	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_goto.mirage	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_goto.mirage	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_goto.mirage	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_goto.mirage	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_goto.mirage	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_goto.mirage	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22_goto.mirage	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_openblas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_openblas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_openblas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_openblas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_openblas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_openblas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_openblas.mirage	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22_openblas.mirage	\
 	perfmodels/sampling/codelets/45/overlap_sleep_1024_24.mirage	\
+\
 	perfmodels/sampling/codelets/45/chol_model_11.sirocco	\
 	perfmodels/sampling/codelets/45/chol_model_21.sirocco	\
 	perfmodels/sampling/codelets/45/chol_model_22.sirocco	\
@@ -154,6 +255,30 @@ dist_pkgdata_perfmodels_sampling_codelets_DATA = \
 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12.sirocco	\
 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21.sirocco	\
 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_atlas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_atlas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_atlas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_atlas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_atlas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_atlas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_atlas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22_atlas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_goto.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_goto.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_goto.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_goto.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_goto.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_goto.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_goto.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22_goto.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11_openblas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12_openblas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21_openblas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22_openblas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_openblas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_openblas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_openblas.sirocco	\
+	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22_openblas.sirocco	\
 	perfmodels/sampling/codelets/45/overlap_sleep_1024_24.sirocco
 
 EXTRA_DIST =				\