Просмотр исходного кода

mpi/src: move fortran code in dedicated files

Nathalie Furmento лет назад: 8
Родитель
Сommit
33f2628107

+ 5 - 2
mpi/src/Makefile.am

@@ -68,7 +68,8 @@ noinst_HEADERS =					\
 	starpu_mpi_early_request.h			\
 	starpu_mpi_sync_data.h				\
 	starpu_mpi_comm.h				\
-	starpu_mpi_tag.h
+	starpu_mpi_tag.h				\
+	starpu_mpi_task_insert.h
 
 libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 	starpu_mpi.c					\
@@ -85,7 +86,9 @@ libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 	starpu_mpi_early_request.c			\
 	starpu_mpi_sync_data.c				\
 	starpu_mpi_comm.c				\
-	starpu_mpi_tag.c
+	starpu_mpi_tag.c				\
+	starpu_mpi_fortran.c				\
+	starpu_mpi_task_insert_fortran.c
 
 showcheck:
 	-cat /dev/null

+ 0 - 244
mpi/src/starpu_mpi.c

@@ -1171,16 +1171,6 @@ static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req)
 	_STARPU_MPI_LOG_OUT();
 }
 
-struct _starpu_mpi_argc_argv
-{
-	int initialize_mpi;
-	int *argc;
-	char ***argv;
-	MPI_Comm comm;
-	int fargc;	// Fortran argc
-	char **fargv;	// Fortran argv
-};
-
 static void _starpu_mpi_print_thread_level_support(int thread_level, char *msg)
 {
 	switch (thread_level)
@@ -1829,237 +1819,3 @@ int starpu_mpi_wait_for_all(MPI_Comm comm)
 	return 0;
 }
 
-#ifdef HAVE_MPI_COMM_F2C
-/* Fortran related functions */
-struct _starpu_mpi_argc_argv *fstarpu_mpi_argcv_alloc(int argc, int initialize_mpi, int comm_present, MPI_Fint comm)
-{
-	struct _starpu_mpi_argc_argv *argcv = calloc(1,sizeof(*argcv));
-	argcv->initialize_mpi = initialize_mpi;
-	if (comm_present) {
-		argcv->comm = MPI_Comm_f2c(comm);
-	} else {
-		argcv->comm = MPI_COMM_WORLD;
-	}
-	argcv->fargc = argc;
-	argcv->argc = &argcv->fargc;
-	argcv->fargv = calloc(argc, sizeof(char *));
-	argcv->argv = &argcv->fargv;
-	return argcv;
-}
-
-void fstarpu_mpi_argcv_set_arg(struct _starpu_mpi_argc_argv *argcv, int i, int len, char *_s)
-{
-	STARPU_ASSERT(len >= 0);
-	STARPU_ASSERT(i >= 0 && i < argcv->fargc);
-	char *s = malloc(len+1);
-	memcpy(s, _s, len);
-	s[len] = '\0';
-	argcv->fargv[i] = s;
-}
-
-void fstarpu_mpi_argcv_free(struct _starpu_mpi_argc_argv *argcv)
-{
-	if (argcv->fargv != NULL)
-	{
-		int i;
-		for (i=0; i<argcv->fargc; i++)
-		{
-			free(argcv->fargv[i]);
-		}
-		free(argcv->fargv);
-	}
-	free(argcv);
-}
-
-starpu_mpi_req *fstarpu_mpi_req_alloc(void)
-{
-	return calloc(1, sizeof(starpu_mpi_req));
-}
-
-void fstarpu_mpi_req_free(starpu_mpi_req *req)
-{
-	free(req);
-}
-
-MPI_Status *fstarpu_mpi_status_alloc(void)
-{
-	return calloc(1, sizeof(MPI_Status));
-}
-
-void fstarpu_mpi_status_free(MPI_Status *status)
-{
-	free(status);
-}
-
-int fstarpu_mpi_barrier(MPI_Fint comm)
-{
-	return starpu_mpi_barrier(MPI_Comm_f2c(comm));
-}
-
-int fstarpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg, int seq_const)
-{
-	return starpu_mpi_irecv_detached_sequential_consistency(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), callback, arg, seq_const);
-}
-
-int fstarpu_mpi_init_c(struct _starpu_mpi_argc_argv *argcv)
-{
-	return starpu_mpi_init_comm(argcv->argc, argcv->argv, argcv->initialize_mpi, argcv->comm);
-}
-
-void fstarpu_mpi_get_data_on_node(MPI_Fint comm, starpu_data_handle_t data_handle, int node)
-{
-	starpu_mpi_get_data_on_node(MPI_Comm_f2c(comm), data_handle, node);
-}
-
-void fstarpu_mpi_get_data_on_node_detached(MPI_Fint comm, starpu_data_handle_t data_handle, int node, void (*callback)(void *), void *arg)
-{
-	starpu_mpi_get_data_on_node_detached(MPI_Comm_f2c(comm), data_handle, node, callback, arg);
-}
-
-void fstarpu_mpi_redux_data(MPI_Fint comm, starpu_data_handle_t data_handle)
-{
-	starpu_mpi_redux_data(MPI_Comm_f2c(comm), data_handle);
-}
-
-/* scatter/gather */
-int fstarpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int cnt, int root, MPI_Fint comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
-{
-	return starpu_mpi_scatter_detached(data_handles, cnt, root, MPI_Comm_f2c(comm), scallback, sarg, rcallback, rarg);
-}
-
-int fstarpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int cnt, int root, MPI_Fint comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
-{
-	return starpu_mpi_gather_detached(data_handles, cnt, root, MPI_Comm_f2c(comm), scallback, sarg, rcallback, rarg);
-}
-
-/* isend/irecv detached unlock tag */
-int fstarpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
-{
-	return starpu_mpi_isend_detached_unlock_tag(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm), *starpu_tag);
-}
-
-int fstarpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
-{
-	return starpu_mpi_irecv_detached_unlock_tag(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), *starpu_tag);
-}
-
-/* isend/irecv array detached unlock tag */
-int fstarpu_mpi_isend_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *dsts, int *mpi_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
-{
-	MPI_Comm comms[array_size];
-	int i;
-	for (i = 0; i < array_size; i++)
-	{
-		comms[i] = MPI_Comm_f2c(_comms[i]);
-	}
-	int ret = starpu_mpi_isend_array_detached_unlock_tag((unsigned)array_size, data_handles, dsts, mpi_tags, comms, *starpu_tag);
-	return ret;
-}
-
-int fstarpu_mpi_irecv_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *srcs, int *mpi_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
-{
-	MPI_Comm comms[array_size];
-	int i;
-	for (i = 0; i < array_size; i++)
-	{
-		comms[i] = MPI_Comm_f2c(_comms[i]);
-	}
-	int ret = starpu_mpi_irecv_array_detached_unlock_tag((unsigned)array_size, data_handles, srcs, mpi_tags, comms, *starpu_tag);
-	return ret;
-}
-
-/* isend/irecv */
-int fstarpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int mpi_tag, MPI_Fint comm)
-{
-	return starpu_mpi_isend(data_handle, req, dst, mpi_tag, MPI_Comm_f2c(comm));
-}
-
-int fstarpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int src, int mpi_tag, MPI_Fint comm)
-{
-	return starpu_mpi_irecv(data_handle, req, src, mpi_tag, MPI_Comm_f2c(comm));
-}
-
-/* send/recv */
-int fstarpu_mpi_send(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm)
-{
-	return starpu_mpi_send(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm));
-}
-
-int fstarpu_mpi_recv(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, MPI_Status *status)
-{
-	return starpu_mpi_recv(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), status);
-}
-
-/* isend/irecv detached */
-int fstarpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
-{
-	return starpu_mpi_isend_detached(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm), callback, arg);
-}
-
-int fstarpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
-{
-	return starpu_mpi_irecv_detached(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), callback, arg);
-}
-
-/* issend / issend detached */
-int fstarpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int mpi_tag, MPI_Fint comm)
-{
-	return starpu_mpi_issend(data_handle, req, dst, mpi_tag, MPI_Comm_f2c(comm));
-}
-
-int fstarpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
-{
-	return starpu_mpi_issend_detached(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm), callback, arg);
-}
-
-/* cache */
-void fstarpu_mpi_cache_flush(MPI_Fint comm, starpu_data_handle_t data_handle)
-{
-	return starpu_mpi_cache_flush(MPI_Comm_f2c(comm), data_handle);
-}
-
-void fstarpu_mpi_cache_flush_all_data(MPI_Fint comm)
-{
-	return starpu_mpi_cache_flush_all_data(MPI_Comm_f2c(comm));
-}
-
-int fstarpu_mpi_comm_size(MPI_Fint comm, int *size)
-{
-	return starpu_mpi_comm_size(MPI_Comm_f2c(comm), size);
-}
-
-int fstarpu_mpi_comm_rank(MPI_Fint comm, int *rank)
-{
-	return starpu_mpi_comm_rank(MPI_Comm_f2c(comm), rank);
-}
-
-MPI_Fint fstarpu_mpi_world_comm()
-{
-	return MPI_Comm_c2f(MPI_COMM_WORLD);
-}
-
-void fstarpu_mpi_data_register_comm(starpu_data_handle_t handle, int tag, int rank, MPI_Fint comm)
-{
-	return starpu_mpi_data_register_comm(handle, tag, rank, MPI_Comm_f2c(comm));
-}
-
-void fstarpu_mpi_data_register(starpu_data_handle_t handle, int tag, int rank)
-{
-	return starpu_mpi_data_register_comm(handle, tag, rank, MPI_COMM_WORLD);
-}
-
-void fstarpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Fint comm)
-{
-	return starpu_mpi_data_set_rank_comm(handle, rank, MPI_Comm_f2c(comm));
-}
-
-void fstarpu_mpi_data_set_rank(starpu_data_handle_t handle, int rank)
-{
-	return starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD);
-}
-
-int fstarpu_mpi_wait_for_all(MPI_Fint comm)
-{
-	return starpu_mpi_wait_for_all(MPI_Comm_f2c(comm));
-}
-#endif

+ 256 - 0
mpi/src/starpu_mpi_fortran.c

@@ -0,0 +1,256 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2016  CNRS
+ * Copyright (C) 2016  Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdlib.h>
+#include <starpu_mpi.h>
+#include <common/config.h>
+#include "starpu_mpi_private.h"
+
+#ifdef HAVE_MPI_COMM_F2C
+/* Fortran related functions */
+struct _starpu_mpi_argc_argv *fstarpu_mpi_argcv_alloc(int argc, int initialize_mpi, int comm_present, MPI_Fint comm)
+{
+	struct _starpu_mpi_argc_argv *argcv = calloc(1,sizeof(*argcv));
+	argcv->initialize_mpi = initialize_mpi;
+	if (comm_present) {
+		argcv->comm = MPI_Comm_f2c(comm);
+	} else {
+		argcv->comm = MPI_COMM_WORLD;
+	}
+	argcv->fargc = argc;
+	argcv->argc = &argcv->fargc;
+	argcv->fargv = calloc(argc, sizeof(char *));
+	argcv->argv = &argcv->fargv;
+	return argcv;
+}
+
+void fstarpu_mpi_argcv_set_arg(struct _starpu_mpi_argc_argv *argcv, int i, int len, char *_s)
+{
+	STARPU_ASSERT(len >= 0);
+	STARPU_ASSERT(i >= 0 && i < argcv->fargc);
+	char *s = malloc(len+1);
+	memcpy(s, _s, len);
+	s[len] = '\0';
+	argcv->fargv[i] = s;
+}
+
+void fstarpu_mpi_argcv_free(struct _starpu_mpi_argc_argv *argcv)
+{
+	if (argcv->fargv != NULL)
+	{
+		int i;
+		for (i=0; i<argcv->fargc; i++)
+		{
+			free(argcv->fargv[i]);
+		}
+		free(argcv->fargv);
+	}
+	free(argcv);
+}
+
+starpu_mpi_req *fstarpu_mpi_req_alloc(void)
+{
+	return calloc(1, sizeof(starpu_mpi_req));
+}
+
+void fstarpu_mpi_req_free(starpu_mpi_req *req)
+{
+	free(req);
+}
+
+MPI_Status *fstarpu_mpi_status_alloc(void)
+{
+	return calloc(1, sizeof(MPI_Status));
+}
+
+void fstarpu_mpi_status_free(MPI_Status *status)
+{
+	free(status);
+}
+
+int fstarpu_mpi_barrier(MPI_Fint comm)
+{
+	return starpu_mpi_barrier(MPI_Comm_f2c(comm));
+}
+
+int fstarpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg, int seq_const)
+{
+	return starpu_mpi_irecv_detached_sequential_consistency(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), callback, arg, seq_const);
+}
+
+int fstarpu_mpi_init_c(struct _starpu_mpi_argc_argv *argcv)
+{
+	return starpu_mpi_init_comm(argcv->argc, argcv->argv, argcv->initialize_mpi, argcv->comm);
+}
+
+void fstarpu_mpi_get_data_on_node(MPI_Fint comm, starpu_data_handle_t data_handle, int node)
+{
+	starpu_mpi_get_data_on_node(MPI_Comm_f2c(comm), data_handle, node);
+}
+
+void fstarpu_mpi_get_data_on_node_detached(MPI_Fint comm, starpu_data_handle_t data_handle, int node, void (*callback)(void *), void *arg)
+{
+	starpu_mpi_get_data_on_node_detached(MPI_Comm_f2c(comm), data_handle, node, callback, arg);
+}
+
+void fstarpu_mpi_redux_data(MPI_Fint comm, starpu_data_handle_t data_handle)
+{
+	starpu_mpi_redux_data(MPI_Comm_f2c(comm), data_handle);
+}
+
+/* scatter/gather */
+int fstarpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int cnt, int root, MPI_Fint comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
+{
+	return starpu_mpi_scatter_detached(data_handles, cnt, root, MPI_Comm_f2c(comm), scallback, sarg, rcallback, rarg);
+}
+
+int fstarpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int cnt, int root, MPI_Fint comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)
+{
+	return starpu_mpi_gather_detached(data_handles, cnt, root, MPI_Comm_f2c(comm), scallback, sarg, rcallback, rarg);
+}
+
+/* isend/irecv detached unlock tag */
+int fstarpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
+{
+	return starpu_mpi_isend_detached_unlock_tag(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm), *starpu_tag);
+}
+
+int fstarpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
+{
+	return starpu_mpi_irecv_detached_unlock_tag(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), *starpu_tag);
+}
+
+/* isend/irecv array detached unlock tag */
+int fstarpu_mpi_isend_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *dsts, int *mpi_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
+{
+	MPI_Comm comms[array_size];
+	int i;
+	for (i = 0; i < array_size; i++)
+	{
+		comms[i] = MPI_Comm_f2c(_comms[i]);
+	}
+	int ret = starpu_mpi_isend_array_detached_unlock_tag((unsigned)array_size, data_handles, dsts, mpi_tags, comms, *starpu_tag);
+	return ret;
+}
+
+int fstarpu_mpi_irecv_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *srcs, int *mpi_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
+{
+	MPI_Comm comms[array_size];
+	int i;
+	for (i = 0; i < array_size; i++)
+	{
+		comms[i] = MPI_Comm_f2c(_comms[i]);
+	}
+	int ret = starpu_mpi_irecv_array_detached_unlock_tag((unsigned)array_size, data_handles, srcs, mpi_tags, comms, *starpu_tag);
+	return ret;
+}
+
+/* isend/irecv */
+int fstarpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int mpi_tag, MPI_Fint comm)
+{
+	return starpu_mpi_isend(data_handle, req, dst, mpi_tag, MPI_Comm_f2c(comm));
+}
+
+int fstarpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int src, int mpi_tag, MPI_Fint comm)
+{
+	return starpu_mpi_irecv(data_handle, req, src, mpi_tag, MPI_Comm_f2c(comm));
+}
+
+/* send/recv */
+int fstarpu_mpi_send(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm)
+{
+	return starpu_mpi_send(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm));
+}
+
+int fstarpu_mpi_recv(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, MPI_Status *status)
+{
+	return starpu_mpi_recv(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), status);
+}
+
+/* isend/irecv detached */
+int fstarpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
+{
+	return starpu_mpi_isend_detached(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm), callback, arg);
+}
+
+int fstarpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
+{
+	return starpu_mpi_irecv_detached(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), callback, arg);
+}
+
+/* issend / issend detached */
+int fstarpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int mpi_tag, MPI_Fint comm)
+{
+	return starpu_mpi_issend(data_handle, req, dst, mpi_tag, MPI_Comm_f2c(comm));
+}
+
+int fstarpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
+{
+	return starpu_mpi_issend_detached(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm), callback, arg);
+}
+
+/* cache */
+void fstarpu_mpi_cache_flush(MPI_Fint comm, starpu_data_handle_t data_handle)
+{
+	return starpu_mpi_cache_flush(MPI_Comm_f2c(comm), data_handle);
+}
+
+void fstarpu_mpi_cache_flush_all_data(MPI_Fint comm)
+{
+	return starpu_mpi_cache_flush_all_data(MPI_Comm_f2c(comm));
+}
+
+int fstarpu_mpi_comm_size(MPI_Fint comm, int *size)
+{
+	return starpu_mpi_comm_size(MPI_Comm_f2c(comm), size);
+}
+
+int fstarpu_mpi_comm_rank(MPI_Fint comm, int *rank)
+{
+	return starpu_mpi_comm_rank(MPI_Comm_f2c(comm), rank);
+}
+
+MPI_Fint fstarpu_mpi_world_comm()
+{
+	return MPI_Comm_c2f(MPI_COMM_WORLD);
+}
+
+void fstarpu_mpi_data_register_comm(starpu_data_handle_t handle, int tag, int rank, MPI_Fint comm)
+{
+	return starpu_mpi_data_register_comm(handle, tag, rank, MPI_Comm_f2c(comm));
+}
+
+void fstarpu_mpi_data_register(starpu_data_handle_t handle, int tag, int rank)
+{
+	return starpu_mpi_data_register_comm(handle, tag, rank, MPI_COMM_WORLD);
+}
+
+void fstarpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Fint comm)
+{
+	return starpu_mpi_data_set_rank_comm(handle, rank, MPI_Comm_f2c(comm));
+}
+
+void fstarpu_mpi_data_set_rank(starpu_data_handle_t handle, int rank)
+{
+	return starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD);
+}
+
+int fstarpu_mpi_wait_for_all(MPI_Fint comm)
+{
+	return starpu_mpi_wait_for_all(MPI_Comm_f2c(comm));
+}
+#endif

+ 11 - 0
mpi/src/starpu_mpi_private.h

@@ -220,6 +220,17 @@ LIST_TYPE(_starpu_mpi_req,
      	UT_hash_handle hh;
 );
 
+struct _starpu_mpi_argc_argv
+{
+	int initialize_mpi;
+	int *argc;
+	char ***argv;
+	MPI_Comm comm;
+	int fargc;	// Fortran argc
+	char **fargv;	// Fortran argv
+};
+
+
 #ifdef __cplusplus
 }
 #endif

+ 1 - 429
mpi/src/starpu_mpi_task_insert.c

@@ -2,8 +2,7 @@
  *
  * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016  CNRS
  * Copyright (C) 2011-2016  Université de Bordeaux
- * Copyright (C) 2014 INRIA
- * Copyright (C) 2016 Inria
+ * Copyright (C) 2014, 2016 Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -37,7 +36,6 @@
 	else								\
 		starpu_mpi_isend_detached(data, dest, data_tag, comm, callback, arg);
 
-static
 int _starpu_mpi_find_executee_node(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int *do_execute, int *inconsistent_execute, int *xrank)
 {
 	if (mode & STARPU_W)
@@ -78,7 +76,6 @@ int _starpu_mpi_find_executee_node(starpu_data_handle_t data, enum starpu_data_a
 	return 0;
 }
 
-static
 void _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int xrank, int do_execute, MPI_Comm comm)
 {
 	if (data && mode & STARPU_R)
@@ -434,285 +431,6 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 	return 0;
 }
 
-#ifdef HAVE_MPI_COMM_F2C
-static
-int _fstarpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nodes, int *xrank, int *do_execute, struct starpu_data_descr **descrs_p, int *nb_data_p, void **arglist)
-{
-	int arg_i = 0;
-	int inconsistent_execute = 0;
-	int arg_type, arg_type_nocommute;
-	int node_selected = 0;
-	int nb_allocated_data = 16;
-	struct starpu_data_descr *descrs;
-	int nb_data;
-	int select_node_policy = STARPU_MPI_NODE_SELECTION_CURRENT_POLICY;
-
-	_STARPU_TRACE_TASK_MPI_DECODE_START();
-
-	descrs = (struct starpu_data_descr *)malloc(nb_allocated_data * sizeof(struct starpu_data_descr));
-	nb_data = 0;
-	*do_execute = -1;
-	*xrank = -1;
-
-	while (arglist[arg_i] != NULL)
-	{
-		arg_type = (int)(intptr_t)arglist[arg_i];
-		arg_type_nocommute = arg_type & ~STARPU_COMMUTE;
-
-		if (arg_type==STARPU_EXECUTE_ON_NODE)
-		{
-			arg_i++;
-			*xrank = *(int *)arglist[arg_i];
-			if (node_selected == 0)
-			{
-				_STARPU_MPI_DEBUG(100, "Executing on node %d\n", *xrank);
-				*do_execute = 1;
-				node_selected = 1;
-				inconsistent_execute = 0;
-			}
-		}
-		else if (arg_type==STARPU_EXECUTE_ON_DATA)
-		{
-			arg_i++;
-			starpu_data_handle_t data = arglist[arg_i];
-			if (node_selected == 0)
-			{
-				*xrank = starpu_mpi_data_get_rank(data);
-				STARPU_ASSERT_MSG(*xrank != -1, "Rank of the data must be set using starpu_mpi_data_register() or starpu_data_set_rank()");
-				_STARPU_MPI_DEBUG(100, "Executing on data node %d\n", *xrank);
-				STARPU_ASSERT_MSG(*xrank <= nb_nodes, "Node %d to execute codelet is not a valid node (%d)", *xrank, nb_nodes);
-				*do_execute = 1;
-				node_selected = 1;
-				inconsistent_execute = 0;
-			}
-		}
-		else if (arg_type_nocommute & STARPU_R || arg_type_nocommute & STARPU_W || arg_type_nocommute & STARPU_RW || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX)
-		{
-			arg_i++;
-			starpu_data_handle_t data = arglist[arg_i];
-			enum starpu_data_access_mode mode = (enum starpu_data_access_mode) arg_type;
-			if (node_selected == 0)
-			{
-				int ret = _starpu_mpi_find_executee_node(data, mode, me, do_execute, &inconsistent_execute, xrank);
-				if (ret == -EINVAL)
-				{
-					free(descrs);
-					_STARPU_TRACE_TASK_MPI_DECODE_END();
-					return ret;
-				}
-			}
-			if (nb_data >= nb_allocated_data)
-			{
-				nb_allocated_data *= 2;
-				descrs = (struct starpu_data_descr *)realloc(descrs, nb_allocated_data * sizeof(struct starpu_data_descr));
-			}
-			descrs[nb_data].handle = data;
-			descrs[nb_data].mode = mode;
-			nb_data ++;
-		}
-		else if (arg_type == STARPU_DATA_ARRAY)
-		{
-			arg_i++;
-			starpu_data_handle_t *datas = arglist[arg_i];
-			arg_i++;
-			int nb_handles = *(int *)arglist[arg_i];
-			int i;
-
-			for(i=0 ; i<nb_handles ; i++)
-			{
-				STARPU_ASSERT_MSG(codelet->nbuffers == STARPU_VARIABLE_NBUFFERS || nb_data < codelet->nbuffers, "Too many data passed to starpu_mpi_task_insert");
-				enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(codelet, nb_data);
-				if (node_selected == 0)
-				{
-					int ret = _starpu_mpi_find_executee_node(datas[i], mode, me, do_execute, &inconsistent_execute, xrank);
-					if (ret == -EINVAL)
-					{
-						free(descrs);
-						_STARPU_TRACE_TASK_MPI_DECODE_END();
-						return ret;
-					}
-				}
-				if (nb_data >= nb_allocated_data)
-				{
-					nb_allocated_data *= 2;
-					descrs = (struct starpu_data_descr *)realloc(descrs, nb_allocated_data * sizeof(struct starpu_data_descr));
-				}
-				descrs[nb_data].handle = datas[i];
-				descrs[nb_data].mode = mode;
-				nb_data ++;
-			}
-		}
-		else if (arg_type == STARPU_DATA_MODE_ARRAY)
-		{
-			arg_i++;
-			struct starpu_data_descr *_descrs = arglist[arg_i];
-			arg_i++;
-			int nb_handles = *(int *)arglist[arg_i];
-			int i;
-
-			for(i=0 ; i<nb_handles ; i++)
-			{
-				enum starpu_data_access_mode mode = _descrs[i].mode;
-				if (node_selected == 0)
-				{
-					int ret = _starpu_mpi_find_executee_node(_descrs[i].handle, mode, me, do_execute, &inconsistent_execute, xrank);
-					if (ret == -EINVAL)
-					{
-						free(descrs);
-						_STARPU_TRACE_TASK_MPI_DECODE_END();
-						return ret;
-					}
-				}
-				if (nb_data >= nb_allocated_data)
-				{
-					nb_allocated_data *= 2;
-					descrs = (struct starpu_data_descr *)realloc(descrs, nb_allocated_data * sizeof(struct starpu_data_descr));
-				}
-				descrs[nb_data].handle = _descrs[i].handle;
-				descrs[nb_data].mode = mode;
-				nb_data ++;
-			}
-		}
-		else if (arg_type==STARPU_VALUE)
-		{
-			arg_i++;
-			/* void* */
-			arg_i++;
-			/* size_t */
-		}
-		else if (arg_type==STARPU_CL_ARGS)
-		{
-			arg_i++;
-			/* void* */
-			arg_i++;
-			/* size_t */
-		}
-		else if (arg_type==STARPU_CALLBACK)
-		{
-			arg_i++;
-			/* _starpu_callback_func_t */
-		}
-		else if (arg_type==STARPU_CALLBACK_WITH_ARG)
-		{
-			arg_i++;
-			/* _starpu_callback_func_t */
-			arg_i++;
-			/* void* */
-		}
-		else if (arg_type==STARPU_CALLBACK_ARG)
-		{
-			arg_i++;
-			/* void* */
-		}
-		else if (arg_type==STARPU_PRIORITY)
-		{
-			arg_i++;
-			/* int* */
-		}
-		/* STARPU_EXECUTE_ON_NODE handled above */
-		/* STARPU_EXECUTE_ON_DATA handled above */
-		/* STARPU_DATA_ARRAY handled above */
-		/* STARPU_DATA_MODE_ARRAY handled above */
-		else if (arg_type==STARPU_TAG)
-		{
-			arg_i++;
-			/* starpu_tag_t* */
-		}
-		else if (arg_type==STARPU_HYPERVISOR_TAG)
-		{
-			arg_i++;
-			/* int* */
-		}
-		else if (arg_type==STARPU_FLOPS)
-		{
-			arg_i++;
-			/* double* */
-		}
-		else if (arg_type==STARPU_SCHED_CTX)
-		{
-			arg_i++;
-			/* unsigned* */
-		}
-		else if (arg_type==STARPU_PROLOGUE_CALLBACK)
-                {
-			arg_i++;
-			/* _starpu_callback_func_t */
-		}
-                else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG)
-                {
-			arg_i++;
-			/* void* */
-                }
-                else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP)
-                {
-			arg_i++;
-			/* _starpu_callback_func_t */
-                }
-                else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG)
-                {
-			arg_i++;
-			/* void* */
-		}
-		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
-		{
-			arg_i++;
-			/* int* */
-		}
-		else if (arg_type==STARPU_TAG_ONLY)
-		{
-			arg_i++;
-			/* starpu_tag_t* */
-		}
-		else if (arg_type==STARPU_NAME)
-		{
-			arg_i++;
-			/* char* */
-		}
-		else if (arg_type==STARPU_POSSIBLY_PARALLEL)
-		{
-			arg_i++;
-			/* unsigned* */
-		}
-		else if (arg_type==STARPU_WORKER_ORDER)
-		{
-			arg_i++;
-			/* unsigned* */
-		}
-		else if (arg_type==STARPU_NODE_SELECTION_POLICY)
-		{
-			arg_i++;
-			/* int* */
-		}
-		else
-		{
-			STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", arg_type);
-		}
-
-		arg_i++;
-	}
-
-	if (inconsistent_execute == 1 || *xrank == -1)
-	{
-		// We need to find out which node is going to execute the codelet.
-		_STARPU_MPI_DISP("Different nodes are owning W data. The node to execute the codelet is going to be selected with the current selection node policy. See starpu_mpi_node_selection_set_current_policy() to change the policy, or use STARPU_EXECUTE_ON_NODE or STARPU_EXECUTE_ON_DATA to specify the node\n");
-		*xrank = _starpu_mpi_select_node(me, nb_nodes, descrs, nb_data, select_node_policy);
-		*do_execute = (me == *xrank);
-	}
-	else
-	{
-		_STARPU_MPI_DEBUG(100, "Inconsistent=%d - xrank=%d\n", inconsistent_execute, *xrank);
-		*do_execute = (me == *xrank);
-	}
-	_STARPU_MPI_DEBUG(100, "do_execute=%d\n", *do_execute);
-
-	*descrs_p = descrs;
-	*nb_data_p = nb_data;
-
-	_STARPU_TRACE_TASK_MPI_DECODE_END();
-	return 0;
-}
-#endif
-
 static
 int _starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, struct starpu_task **task, int *xrank_p, struct starpu_data_descr **descrs_p, int *nb_data_p, va_list varg_list)
 {
@@ -762,55 +480,6 @@ int _starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, stru
 	}
 }
 
-#ifdef HAVE_MPI_COMM_F2C
-static
-int _fstarpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, struct starpu_task **task, int *xrank_p, struct starpu_data_descr **descrs_p, int *nb_data_p, void **arglist)
-{
-	int me, do_execute, xrank, nb_nodes;
-	int ret;
-	int i;
-	struct starpu_data_descr *descrs;
-	int nb_data;
-
-	_STARPU_MPI_LOG_IN();
-
-	starpu_mpi_comm_rank(comm, &me);
-	starpu_mpi_comm_size(comm, &nb_nodes);
-
-	/* Find out whether we are to execute the data because we own the data to be written to. */
-	ret = _fstarpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, arglist);
-	if (ret < 0) return ret;
-
-	_STARPU_TRACE_TASK_MPI_PRE_START();
-	/* Send and receive data as requested */
-	for(i=0 ; i<nb_data ; i++)
-	{
-		_starpu_mpi_exchange_data_before_execution(descrs[i].handle, descrs[i].mode, me, xrank, do_execute, comm);
-	}
-
-	if (xrank_p) *xrank_p = xrank;
-	if (nb_data_p) *nb_data_p = nb_data;
-	if (descrs_p)
-		*descrs_p = descrs;
-	else
-		free(descrs);
-	_STARPU_TRACE_TASK_MPI_PRE_END();
-
-	if (do_execute == 0) return 1;
-	else
-	{
-		_STARPU_MPI_DEBUG(100, "Execution of the codelet %p (%s)\n", codelet, codelet?codelet->name:NULL);
-
-		*task = starpu_task_create();
-		(*task)->cl_arg_free = 1;
-
-		_fstarpu_task_insert_create(codelet, task, arglist);
-		return 0;
-	}
-}
-#endif
-
-static
 int _starpu_mpi_task_postbuild_v(MPI_Comm comm, int xrank, int do_execute, struct starpu_data_descr *descrs, int nb_data)
 {
 	int me, i;
@@ -864,41 +533,6 @@ int _starpu_mpi_task_insert_v(MPI_Comm comm, struct starpu_codelet *codelet, va_
 	return _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data);
 }
 
-#ifdef HAVE_MPI_COMM_F2C
-static
-int _fstarpu_mpi_task_insert_v(MPI_Comm comm, struct starpu_codelet *codelet, void **arglist)
-{
-	struct starpu_task *task;
-	int ret;
-	int xrank;
-	int do_execute = 0;
-	struct starpu_data_descr *descrs;
-	int nb_data;
-
-	ret = _fstarpu_mpi_task_build_v(comm, codelet, &task, &xrank, &descrs, &nb_data, arglist);
-	if (ret < 0) return ret;
-
-	if (ret == 0)
-	{
-		do_execute = 1;
-		ret = starpu_task_submit(task);
-
-		if (STARPU_UNLIKELY(ret == -ENODEV))
-		{
-			fprintf(stderr, "submission of task %p wih codelet %p failed (symbol `%s') (err: ENODEV)\n",
-				task, task->cl,
-				(codelet == NULL) ? "none" :
-				task->cl->name ? task->cl->name :
-				(task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none");
-
-			task->destroy = 0;
-			starpu_task_destroy(task);
-		}
-	}
-	return _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data);
-}
-#endif
-
 int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 {
 	va_list varg_list;
@@ -910,22 +544,6 @@ int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 	return ret;
 }
 
-#ifdef HAVE_MPI_COMM_F2C
-int fstarpu_mpi_task_insert(MPI_Fint comm, void ***_arglist)
-{
-	void **arglist = *_arglist;
-	struct starpu_codelet *codelet = arglist[0];
-	if (codelet == NULL)
-	{
-		STARPU_ABORT_MSG("task without codelet");
-	}
-	int ret;
-
-	ret = _fstarpu_mpi_task_insert_v(MPI_Comm_f2c(comm), codelet, arglist+1);
-	return ret;
-}
-#endif
-
 int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 {
 	va_list varg_list;
@@ -937,8 +555,6 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 	return ret;
 }
 
-/* fstarpu_mpi_insert_task: aliased to fstarpu_mpi_task_insert in fstarpu_mpi_mod.f90 */
-
 struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 {
 	va_list varg_list;
@@ -952,24 +568,6 @@ struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *
 	if (ret > 0) return NULL; else return task;
 }
 
-#ifdef HAVE_MPI_COMM_F2C
-struct starpu_task *fstarpu_mpi_task_build(MPI_Fint comm, void ***_arglist)
-{
-	void **arglist = *_arglist;
-	struct starpu_codelet *codelet = arglist[0];
-	if (codelet == NULL)
-	{
-		STARPU_ABORT_MSG("task without codelet");
-	}
-	struct starpu_task *task;
-	int ret;
-
-	ret = _fstarpu_mpi_task_build_v(MPI_Comm_f2c(comm), codelet, &task, NULL, NULL, NULL, arglist+1);
-	STARPU_ASSERT(ret >= 0);
-	if (ret > 0) return NULL; else return task;
-}
-#endif
-
 int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 {
 	int xrank, do_execute;
@@ -990,32 +588,6 @@ int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ..
 	return _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data);
 }
 
-#ifdef HAVE_MPI_COMM_F2C
-int fstarpu_mpi_task_post_build(MPI_Fint _comm, void ***_arglist)
-{
-	void **arglist = *_arglist;
-	struct starpu_codelet *codelet = arglist[0];
-	if (codelet == NULL)
-	{
-		STARPU_ABORT_MSG("task without codelet");
-	}
-	MPI_Comm comm = MPI_Comm_f2c(_comm);
-	int xrank, do_execute;
-	int ret, me, nb_nodes;
-	struct starpu_data_descr *descrs;
-	int nb_data;
-
-	starpu_mpi_comm_rank(comm, &me);
-	starpu_mpi_comm_size(comm, &nb_nodes);
-
-	/* Find out whether we are to execute the data because we own the data to be written to. */
-	ret = _fstarpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, arglist);
-	if (ret < 0) return ret;
-
-	return _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data);
-}
-#endif
-
 void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg)
 {
 	int me, rank, tag;

+ 31 - 0
mpi/src/starpu_mpi_task_insert.h

@@ -0,0 +1,31 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2016  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_MPI_TASK_INSERT_H__
+#define __STARPU_MPI_TASK_INSERT_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int _starpu_mpi_find_executee_node(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int *do_execute, int *inconsistent_execute, int *xrank);
+void _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int xrank, int do_execute, MPI_Comm comm);
+int _starpu_mpi_task_postbuild_v(MPI_Comm comm, int xrank, int do_execute, struct starpu_data_descr *descrs, int nb_data);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* __STARPU_MPI_TASK_INSERT_H__ */

+ 442 - 0
mpi/src/starpu_mpi_task_insert_fortran.c

@@ -0,0 +1,442 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2016  CNRS
+ * Copyright (C) 2016 Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdlib.h>
+#include <starpu_mpi.h>
+#include <common/config.h>
+#include <starpu_mpi_private.h>
+#include <starpu_mpi_task_insert.h>
+#include <starpu_mpi_select_node.h>
+#include <util/starpu_task_insert_utils.h>
+
+#ifdef HAVE_MPI_COMM_F2C
+static
+int _fstarpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nodes, int *xrank, int *do_execute, struct starpu_data_descr **descrs_p, int *nb_data_p, void **arglist)
+{
+	int arg_i = 0;
+	int inconsistent_execute = 0;
+	int arg_type, arg_type_nocommute;
+	int node_selected = 0;
+	int nb_allocated_data = 16;
+	struct starpu_data_descr *descrs;
+	int nb_data;
+	int select_node_policy = STARPU_MPI_NODE_SELECTION_CURRENT_POLICY;
+
+	_STARPU_TRACE_TASK_MPI_DECODE_START();
+
+	descrs = (struct starpu_data_descr *)malloc(nb_allocated_data * sizeof(struct starpu_data_descr));
+	nb_data = 0;
+	*do_execute = -1;
+	*xrank = -1;
+
+	while (arglist[arg_i] != NULL)
+	{
+		arg_type = (int)(intptr_t)arglist[arg_i];
+		arg_type_nocommute = arg_type & ~STARPU_COMMUTE;
+
+		if (arg_type==STARPU_EXECUTE_ON_NODE)
+		{
+			arg_i++;
+			*xrank = *(int *)arglist[arg_i];
+			if (node_selected == 0)
+			{
+				_STARPU_MPI_DEBUG(100, "Executing on node %d\n", *xrank);
+				*do_execute = 1;
+				node_selected = 1;
+				inconsistent_execute = 0;
+			}
+		}
+		else if (arg_type==STARPU_EXECUTE_ON_DATA)
+		{
+			arg_i++;
+			starpu_data_handle_t data = arglist[arg_i];
+			if (node_selected == 0)
+			{
+				*xrank = starpu_mpi_data_get_rank(data);
+				STARPU_ASSERT_MSG(*xrank != -1, "Rank of the data must be set using starpu_mpi_data_register() or starpu_data_set_rank()");
+				_STARPU_MPI_DEBUG(100, "Executing on data node %d\n", *xrank);
+				STARPU_ASSERT_MSG(*xrank <= nb_nodes, "Node %d to execute codelet is not a valid node (%d)", *xrank, nb_nodes);
+				*do_execute = 1;
+				node_selected = 1;
+				inconsistent_execute = 0;
+			}
+		}
+		else if (arg_type_nocommute & STARPU_R || arg_type_nocommute & STARPU_W || arg_type_nocommute & STARPU_RW || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX)
+		{
+			arg_i++;
+			starpu_data_handle_t data = arglist[arg_i];
+			enum starpu_data_access_mode mode = (enum starpu_data_access_mode) arg_type;
+			if (node_selected == 0)
+			{
+				int ret = _starpu_mpi_find_executee_node(data, mode, me, do_execute, &inconsistent_execute, xrank);
+				if (ret == -EINVAL)
+				{
+					free(descrs);
+					_STARPU_TRACE_TASK_MPI_DECODE_END();
+					return ret;
+				}
+			}
+			if (nb_data >= nb_allocated_data)
+			{
+				nb_allocated_data *= 2;
+				descrs = (struct starpu_data_descr *)realloc(descrs, nb_allocated_data * sizeof(struct starpu_data_descr));
+			}
+			descrs[nb_data].handle = data;
+			descrs[nb_data].mode = mode;
+			nb_data ++;
+		}
+		else if (arg_type == STARPU_DATA_ARRAY)
+		{
+			arg_i++;
+			starpu_data_handle_t *datas = arglist[arg_i];
+			arg_i++;
+			int nb_handles = *(int *)arglist[arg_i];
+			int i;
+
+			for(i=0 ; i<nb_handles ; i++)
+			{
+				STARPU_ASSERT_MSG(codelet->nbuffers == STARPU_VARIABLE_NBUFFERS || nb_data < codelet->nbuffers, "Too many data passed to starpu_mpi_task_insert");
+				enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(codelet, nb_data);
+				if (node_selected == 0)
+				{
+					int ret = _starpu_mpi_find_executee_node(datas[i], mode, me, do_execute, &inconsistent_execute, xrank);
+					if (ret == -EINVAL)
+					{
+						free(descrs);
+						_STARPU_TRACE_TASK_MPI_DECODE_END();
+						return ret;
+					}
+				}
+				if (nb_data >= nb_allocated_data)
+				{
+					nb_allocated_data *= 2;
+					descrs = (struct starpu_data_descr *)realloc(descrs, nb_allocated_data * sizeof(struct starpu_data_descr));
+				}
+				descrs[nb_data].handle = datas[i];
+				descrs[nb_data].mode = mode;
+				nb_data ++;
+			}
+		}
+		else if (arg_type == STARPU_DATA_MODE_ARRAY)
+		{
+			arg_i++;
+			struct starpu_data_descr *_descrs = arglist[arg_i];
+			arg_i++;
+			int nb_handles = *(int *)arglist[arg_i];
+			int i;
+
+			for(i=0 ; i<nb_handles ; i++)
+			{
+				enum starpu_data_access_mode mode = _descrs[i].mode;
+				if (node_selected == 0)
+				{
+					int ret = _starpu_mpi_find_executee_node(_descrs[i].handle, mode, me, do_execute, &inconsistent_execute, xrank);
+					if (ret == -EINVAL)
+					{
+						free(descrs);
+						_STARPU_TRACE_TASK_MPI_DECODE_END();
+						return ret;
+					}
+				}
+				if (nb_data >= nb_allocated_data)
+				{
+					nb_allocated_data *= 2;
+					descrs = (struct starpu_data_descr *)realloc(descrs, nb_allocated_data * sizeof(struct starpu_data_descr));
+				}
+				descrs[nb_data].handle = _descrs[i].handle;
+				descrs[nb_data].mode = mode;
+				nb_data ++;
+			}
+		}
+		else if (arg_type==STARPU_VALUE)
+		{
+			arg_i++;
+			/* void* */
+			arg_i++;
+			/* size_t */
+		}
+		else if (arg_type==STARPU_CL_ARGS)
+		{
+			arg_i++;
+			/* void* */
+			arg_i++;
+			/* size_t */
+		}
+		else if (arg_type==STARPU_CALLBACK)
+		{
+			arg_i++;
+			/* _starpu_callback_func_t */
+		}
+		else if (arg_type==STARPU_CALLBACK_WITH_ARG)
+		{
+			arg_i++;
+			/* _starpu_callback_func_t */
+			arg_i++;
+			/* void* */
+		}
+		else if (arg_type==STARPU_CALLBACK_ARG)
+		{
+			arg_i++;
+			/* void* */
+		}
+		else if (arg_type==STARPU_PRIORITY)
+		{
+			arg_i++;
+			/* int* */
+		}
+		/* STARPU_EXECUTE_ON_NODE handled above */
+		/* STARPU_EXECUTE_ON_DATA handled above */
+		/* STARPU_DATA_ARRAY handled above */
+		/* STARPU_DATA_MODE_ARRAY handled above */
+		else if (arg_type==STARPU_TAG)
+		{
+			arg_i++;
+			/* starpu_tag_t* */
+		}
+		else if (arg_type==STARPU_HYPERVISOR_TAG)
+		{
+			arg_i++;
+			/* int* */
+		}
+		else if (arg_type==STARPU_FLOPS)
+		{
+			arg_i++;
+			/* double* */
+		}
+		else if (arg_type==STARPU_SCHED_CTX)
+		{
+			arg_i++;
+			/* unsigned* */
+		}
+		else if (arg_type==STARPU_PROLOGUE_CALLBACK)
+                {
+			arg_i++;
+			/* _starpu_callback_func_t */
+		}
+                else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG)
+                {
+			arg_i++;
+			/* void* */
+                }
+                else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP)
+                {
+			arg_i++;
+			/* _starpu_callback_func_t */
+                }
+                else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG)
+                {
+			arg_i++;
+			/* void* */
+		}
+		else if (arg_type==STARPU_EXECUTE_ON_WORKER)
+		{
+			arg_i++;
+			/* int* */
+		}
+		else if (arg_type==STARPU_TAG_ONLY)
+		{
+			arg_i++;
+			/* starpu_tag_t* */
+		}
+		else if (arg_type==STARPU_NAME)
+		{
+			arg_i++;
+			/* char* */
+		}
+		else if (arg_type==STARPU_POSSIBLY_PARALLEL)
+		{
+			arg_i++;
+			/* unsigned* */
+		}
+		else if (arg_type==STARPU_WORKER_ORDER)
+		{
+			arg_i++;
+			/* unsigned* */
+		}
+		else if (arg_type==STARPU_NODE_SELECTION_POLICY)
+		{
+			arg_i++;
+			/* int* */
+		}
+		else
+		{
+			STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", arg_type);
+		}
+
+		arg_i++;
+	}
+
+	if (inconsistent_execute == 1 || *xrank == -1)
+	{
+		// We need to find out which node is going to execute the codelet.
+		_STARPU_MPI_DISP("Different nodes are owning W data. The node to execute the codelet is going to be selected with the current selection node policy. See starpu_mpi_node_selection_set_current_policy() to change the policy, or use STARPU_EXECUTE_ON_NODE or STARPU_EXECUTE_ON_DATA to specify the node\n");
+		*xrank = _starpu_mpi_select_node(me, nb_nodes, descrs, nb_data, select_node_policy);
+		*do_execute = (me == *xrank);
+	}
+	else
+	{
+		_STARPU_MPI_DEBUG(100, "Inconsistent=%d - xrank=%d\n", inconsistent_execute, *xrank);
+		*do_execute = (me == *xrank);
+	}
+	_STARPU_MPI_DEBUG(100, "do_execute=%d\n", *do_execute);
+
+	*descrs_p = descrs;
+	*nb_data_p = nb_data;
+
+	_STARPU_TRACE_TASK_MPI_DECODE_END();
+	return 0;
+}
+
+static
+int _fstarpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, struct starpu_task **task, int *xrank_p, struct starpu_data_descr **descrs_p, int *nb_data_p, void **arglist)
+{
+	int me, do_execute, xrank, nb_nodes;
+	int ret;
+	int i;
+	struct starpu_data_descr *descrs;
+	int nb_data;
+
+	_STARPU_MPI_LOG_IN();
+
+	starpu_mpi_comm_rank(comm, &me);
+	starpu_mpi_comm_size(comm, &nb_nodes);
+
+	/* Find out whether we are to execute the data because we own the data to be written to. */
+	ret = _fstarpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, arglist);
+	if (ret < 0) return ret;
+
+	_STARPU_TRACE_TASK_MPI_PRE_START();
+	/* Send and receive data as requested */
+	for(i=0 ; i<nb_data ; i++)
+	{
+		_starpu_mpi_exchange_data_before_execution(descrs[i].handle, descrs[i].mode, me, xrank, do_execute, comm);
+	}
+
+	if (xrank_p) *xrank_p = xrank;
+	if (nb_data_p) *nb_data_p = nb_data;
+	if (descrs_p)
+		*descrs_p = descrs;
+	else
+		free(descrs);
+	_STARPU_TRACE_TASK_MPI_PRE_END();
+
+	if (do_execute == 0) return 1;
+	else
+	{
+		_STARPU_MPI_DEBUG(100, "Execution of the codelet %p (%s)\n", codelet, codelet?codelet->name:NULL);
+
+		*task = starpu_task_create();
+		(*task)->cl_arg_free = 1;
+
+		_fstarpu_task_insert_create(codelet, task, arglist);
+		return 0;
+	}
+}
+
+static
+int _fstarpu_mpi_task_insert_v(MPI_Comm comm, struct starpu_codelet *codelet, void **arglist)
+{
+	struct starpu_task *task;
+	int ret;
+	int xrank;
+	int do_execute = 0;
+	struct starpu_data_descr *descrs;
+	int nb_data;
+
+	ret = _fstarpu_mpi_task_build_v(comm, codelet, &task, &xrank, &descrs, &nb_data, arglist);
+	if (ret < 0) return ret;
+
+	if (ret == 0)
+	{
+		do_execute = 1;
+		ret = starpu_task_submit(task);
+
+		if (STARPU_UNLIKELY(ret == -ENODEV))
+		{
+			fprintf(stderr, "submission of task %p wih codelet %p failed (symbol `%s') (err: ENODEV)\n",
+				task, task->cl,
+				(codelet == NULL) ? "none" :
+				task->cl->name ? task->cl->name :
+				(task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none");
+
+			task->destroy = 0;
+			starpu_task_destroy(task);
+		}
+	}
+	return _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data);
+}
+
+int fstarpu_mpi_task_insert(MPI_Fint comm, void ***_arglist)
+{
+	void **arglist = *_arglist;
+	struct starpu_codelet *codelet = arglist[0];
+	if (codelet == NULL)
+	{
+		STARPU_ABORT_MSG("task without codelet");
+	}
+	int ret;
+
+	ret = _fstarpu_mpi_task_insert_v(MPI_Comm_f2c(comm), codelet, arglist+1);
+	return ret;
+}
+
+/* fstarpu_mpi_insert_task: aliased to fstarpu_mpi_task_insert in fstarpu_mpi_mod.f90 */
+
+struct starpu_task *fstarpu_mpi_task_build(MPI_Fint comm, void ***_arglist)
+{
+	void **arglist = *_arglist;
+	struct starpu_codelet *codelet = arglist[0];
+	if (codelet == NULL)
+	{
+		STARPU_ABORT_MSG("task without codelet");
+	}
+	struct starpu_task *task;
+	int ret;
+
+	ret = _fstarpu_mpi_task_build_v(MPI_Comm_f2c(comm), codelet, &task, NULL, NULL, NULL, arglist+1);
+	STARPU_ASSERT(ret >= 0);
+	if (ret > 0) return NULL; else return task;
+}
+
+int fstarpu_mpi_task_post_build(MPI_Fint _comm, void ***_arglist)
+{
+	void **arglist = *_arglist;
+	struct starpu_codelet *codelet = arglist[0];
+	if (codelet == NULL)
+	{
+		STARPU_ABORT_MSG("task without codelet");
+	}
+	MPI_Comm comm = MPI_Comm_f2c(_comm);
+	int xrank, do_execute;
+	int ret, me, nb_nodes;
+	struct starpu_data_descr *descrs;
+	int nb_data;
+
+	starpu_mpi_comm_rank(comm, &me);
+	starpu_mpi_comm_size(comm, &nb_nodes);
+
+	/* Find out whether we are to execute the data because we own the data to be written to. */
+	ret = _fstarpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, arglist);
+	if (ret < 0) return ret;
+
+	return _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data);
+}
+
+#endif /* HAVE_MPI_COMM_F2C */
+
+
+