Browse Source

merge trunk

Nathalie Furmento 7 years ago
parent
commit
a5b9f794da

+ 5 - 0
ChangeLog

@@ -61,6 +61,11 @@ Small features:
     tag value.
   * New configure option enable-mpi-verbose to manage the display of
     extra MPI debug messages.
+  * New starpu_task_insert parameter STARPU_CL_ARGS_NFREE which allows
+    to set codelet parameters but without freeing them.
+  * New starpu_task_insert parameter STARPU_TASK_DEPS_ARRAY which
+    allows to declare task dependencies similarly to
+    starpu_task_declare_deps_array()
 
 Changes:
   * Vastly improve simgrid simulation time.

+ 7 - 7
doc/doxygen/chapters/410_mpi_support.doxy

@@ -883,13 +883,13 @@ the configure option \ref enable-mpi-master-slave
 "--enable-mpi-master-slave". However, you should not activate both MPI
 support and MPI Master-Slave support.
 
-If a codelet contains a kernel for CPU devices, it is automatically eligible to be executed
-on a MPI Slave device. Moreover, you can force the execution on a MPI Slave by setting
-the field \ref starpu_codelet::mpi_ms_funcs. Functions have to be
-globally-visible (i.e. not static) for StarPU to be able to look them
-up, and <c>-rdynamic</c> must be passed to gcc (or
-<c>-export-dynamic</c> to ld) so that symbols of the main program are
-visible.
+The existing kernels for CPU devices can be used as such. They only have to be
+exposed through the name of the function in the \ref starpu_codelet::cpu_funcs_name field.
+Functions have to be globally-visible (i.e. not static) for StarPU to
+be able to look them up, and <c>-rdynamic</c> must be passed to gcc (or
+<c>-export-dynamic</c> to ld) so that symbols of the main program are visible.
+Optionally, you can choose the use of another function on slaves thanks to
+the field \ref starpu_codelet::mpi_ms_funcs.
 
 By default, one core is dedicated on the master node to manage the
 entire set of slaves. If the implementation of MPI you are using has a

+ 17 - 1
doc/doxygen/chapters/api/insert_task.doxy

@@ -31,7 +31,8 @@ specifying the worker on which to execute the task (as specified by
 starpu_task::execute_on_a_specific_worker)
 <li> the specific values ::STARPU_VALUE, ::STARPU_CALLBACK,
 ::STARPU_CALLBACK_ARG, ::STARPU_CALLBACK_WITH_ARG, ::STARPU_PRIORITY,
-::STARPU_TAG, ::STARPU_TAG_ONLY, ::STARPU_FLOPS, ::STARPU_SCHED_CTX, ::STARPU_CL_ARGS
+::STARPU_TAG, ::STARPU_TAG_ONLY, ::STARPU_FLOPS, ::STARPU_SCHED_CTX, ::STARPU_CL_ARGS, ::STARPU_CL_ARGS_NFREE,
+::STARPU_TASK_DEPS_ARRAY,
 followed by the appropriated objects as defined elsewhere.
 </ul>
 
@@ -59,6 +60,14 @@ the task, and by the size of the arguments. The memory buffer should
 be the result of a previous call to starpu_codelet_pack_args(), and will be
 freed (i.e. starpu_task::cl_arg_free will be set to 1)
 
+\def STARPU_CL_ARGS_NFREE
+\ingroup API_Insert_Task
+Used when calling starpu_task_insert(), similarly to ::STARPU_CL_ARGS, must
+be followed by a memory buffer containing the arguments to be given to
+the task, and by the size of the arguments. The memory buffer should
+be the result of a previous call to starpu_codelet_pack_args(), and will NOT be
+freed (i.e. starpu_task::cl_arg_free will be set to 0)
+
 \def STARPU_CALLBACK
 \ingroup API_Insert_Task
 Used when calling starpu_task_insert(), must
@@ -130,6 +139,13 @@ Used when calling starpu_task_insert(), must
 be followed by the id of the scheduling context to which to submit the
 task to.
 
+\def STARPU_TASK_DEPS_ARRAY
+\ingroup API_Insert_Task
+Used when calling starpu_task_insert(), must
+be followed by a number of tasks, and an array containing these tasks.
+The function starpu_task_declare_deps_array() will be called with the
+given values.
+
 \fn void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...)
 \ingroup API_Insert_Task
 Pack arguments of type ::STARPU_VALUE into a buffer which can be

+ 22 - 22
doc/doxygen/chapters/api/mpi.doxy

@@ -76,41 +76,41 @@ Return the size of the communicator \c MPI_COMM_WORLD
 \anchor MPIPtpCommunication
 \ingroup API_MPI_Support
 
-\fn int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm)
+\fn int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm)
 \ingroup API_MPI_Support
 Perform a standard-mode, blocking send of \p data_handle to the node
 \p dest using the message tag \p data_tag within the communicator \p
 comm.
 
-\fn int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm)
+\fn int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
 \ingroup API_MPI_Support
 Similar to starpu_mpi_send, but takes a priority \p prio.
 
-\fn int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, MPI_Status *status)
+\fn int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status)
 \ingroup API_MPI_Support
 Perform a standard-mode, blocking receive in \p data_handle from the
 node \p source using the message tag \p data_tag within the
 communicator \p comm.
 
-\fn int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, MPI_Comm comm)
+\fn int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm)
 \ingroup API_MPI_Support
 Post a standard-mode, non blocking send of \p data_handle to the node
 \p dest using the message tag \p data_tag within the communicator \p
 comm. After the call, the pointer to the request \p req can be used to
 test or to wait for the completion of the communication.
 
-\fn int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, int prio, MPI_Comm comm)
+\fn int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
 \ingroup API_MPI_Support
 Similar to starpu_mpi_isend, but takes a priority \p prio.
 
-\fn int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int64_t data_tag, MPI_Comm comm)
+\fn int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm)
 \ingroup API_MPI_Support
 Post a nonblocking receive in \p data_handle from the node \p source
 using the message tag \p data_tag within the communicator \p comm.
 After the call, the pointer to the request \p req can be used to test
 or to wait for the completion of the communication.
 
-\fn int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+\fn int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
 \ingroup API_MPI_Support
 Post a standard-mode, non blocking send of \p data_handle to the node
 \p dest using the message tag \p data_tag within the communicator \p
@@ -121,11 +121,11 @@ communication completes, its resources are automatically released back
 to the system, there is no need to test or to wait for the completion
 of the request.
 
-\fn int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
+\fn int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
 \ingroup API_MPI_Support
 Similar to starpu_mpi_isend_detached, but takes a priority \p prio.
 
-\fn int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+\fn int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
 \ingroup API_MPI_Support
 Post a nonblocking receive in \p data_handle from the node \p source
 using the message tag \p data_tag within the communicator \p comm. On
@@ -136,7 +136,7 @@ communication completes, its resources are automatically released back
 to the system, there is no need to test or to wait for the completion
 of the request.
 
-\fn int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
+\fn int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
 \ingroup API_MPI_Support
 Post a nonblocking receive in \p data_handle from the node \p source
 using the message tag \p data_tag within the communicator \p comm. On
@@ -152,17 +152,17 @@ communication completes, its resources are automatically released back
 to the system, there is no need to test or to wait for the completion
 of the request.
 
-\fn int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, MPI_Comm comm)
+\fn int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm)
 \ingroup API_MPI_Support
 Perform a synchronous-mode, non-blocking send of \p data_handle to the node
 \p dest using the message tag \p data_tag within the communicator \p
 comm.
 
-\fn int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, int prio, MPI_Comm comm)
+\fn int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
 \ingroup API_MPI_Support
 Similar to starpu_mpi_issend, but takes a priority \p prio.
 
-\fn int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+\fn int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
 \ingroup API_MPI_Support
 Perform a synchronous-mode, non-blocking send of \p data_handle to the node
 \p dest using the message tag \p data_tag within the communicator \p
@@ -192,23 +192,23 @@ have called it.
 \ingroup API_MPI_Support
 Wait until all StarPU tasks and communications for the given communicator are completed.
 
-\fn int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag)
+\fn int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag)
 \ingroup API_MPI_Support
 Post a standard-mode, non blocking send of \p data_handle to the node
 \p dest using the message tag \p data_tag within the communicator \p
 comm. On completion, \p tag is unlocked.
 
-\fn int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag)
+\fn int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag)
 \ingroup API_MPI_Support
 Similar to starpu_mpi_isend_detached_unlock_tag(), but takes a priority \p prio.
 
-\fn int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag)
+\fn int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag)
 \ingroup API_MPI_Support
 Post a nonblocking receive in \p data_handle from the node \p source
 using the message tag \p data_tag within the communicator \p comm. On
 completion, \p tag is unlocked.
 
-\fn int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int64_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
+\fn int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
 \ingroup API_MPI_Support
 Post \p array_size standard-mode, non blocking send. Each post sends
 the n-th data of the array \p data_handle to the n-th node of the
@@ -216,11 +216,11 @@ array \p dest using the n-th message tag of the array \p data_tag
 within the n-th communicator of the array \p comm. On completion of
 the all the requests, \p tag is unlocked.
 
-\fn int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int64_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag)
+\fn int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag)
 \ingroup API_MPI_Support
 Similar to starpu_mpi_isend_array_detached_unlock_tag(), but takes a priority \p prio.
 
-\fn int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int64_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
+\fn int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
 \ingroup API_MPI_Support
 Post \p array_size nonblocking receive. Each post receives in the n-th
 data of the array \p data_handle from the n-th node of the array \p
@@ -297,7 +297,7 @@ value was previously sent to \p dest, and not flushed since then.
 \anchor MPIInsertTask
 \ingroup API_MPI_Support
 
-\fn void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int64_t data_tag, int rank, MPI_Comm comm)
+\fn void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm)
 \ingroup API_MPI_Support
 Register to MPI a StarPU data handle with the given tag, rank and MPI communicator.
 It also automatically clears the MPI communication cache when unregistering the data.
@@ -307,7 +307,7 @@ It also automatically clears the MPI communication cache when unregistering the
 Register to MPI a StarPU data handle with the given tag, rank and the MPI communicator \c MPI_COMM_WORLD.
 It also automatically clears the MPI communication cache when unregistering the data.
 
-\fn void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int64_t data_tag)
+\fn void starpu_mpi_data_set_tag(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag)
 \ingroup API_MPI_Support
 Register to MPI a StarPU data handle with the given tag. No rank will be defined.
 It also automatically clears the MPI communication cache when unregistering the data.
@@ -342,7 +342,7 @@ Return the rank of the given data.
 Return the rank of the given data.
 Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_rank()
 
-\fn int64_t starpu_mpi_data_get_tag(starpu_data_handle_t handle)
+\fn starpu_mpi_tag_t starpu_mpi_data_get_tag(starpu_data_handle_t handle)
 \ingroup API_MPI_Support
 Return the tag of the given data.
 

+ 7 - 0
examples/stencil/Makefile.am

@@ -30,6 +30,13 @@ AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFL
 if USE_MPI
 LIBS += $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
 AM_CPPFLAGS += -I$(top_srcdir)/mpi/include
+if STARPU_SIMGRID
+MPI			=	$(abs_top_builddir)/tools/starpu_smpirun -np 4 -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile
+else
+MPI			=	$(MPIEXEC) $(MPIEXEC_ARGS) -np 4
+endif
+else
+MPI			=
 endif
 
 CC = $(CC_OR_MPICC)

+ 4 - 0
include/fstarpu_mod.f90

@@ -32,6 +32,8 @@ module fstarpu_mod
         type(c_ptr), bind(C) :: FSTARPU_DATA_ARRAY
         type(c_ptr), bind(C) :: FSTARPU_DATA_MODE_ARRAY
         type(c_ptr), bind(C) :: FSTARPU_CL_ARGS
+        type(c_ptr), bind(C) :: FSTARPU_CL_ARGS_NFREE
+        type(c_ptr), bind(C) :: FSTARPU_TASK_DEPS_ARRAY
         type(c_ptr), bind(C) :: FSTARPU_CALLBACK
         type(c_ptr), bind(C) :: FSTARPU_CALLBACK_WITH_ARG
         type(c_ptr), bind(C) :: FSTARPU_CALLBACK_ARG
@@ -2259,6 +2261,8 @@ module fstarpu_mod
                         FSTARPU_DATA_ARRAY      = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_ARRAY"//C_NULL_CHAR)
                         FSTARPU_DATA_MODE_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_MODE_ARRAY"//C_NULL_CHAR)
                         FSTARPU_CL_ARGS = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS"//C_NULL_CHAR)
+                        FSTARPU_CL_ARGS_NFREE = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS_NFREE"//C_NULL_CHAR)
+                        FSTARPU_TASK_DEPS_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_TASK_DEPS_ARRAY"//C_NULL_CHAR)
                         FSTARPU_CALLBACK        = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK"//C_NULL_CHAR)
                         FSTARPU_CALLBACK_WITH_ARG       = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_WITH_ARG"//C_NULL_CHAR)
                         FSTARPU_CALLBACK_ARG    = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_ARG"//C_NULL_CHAR)

+ 3 - 1
include/starpu_task_util.h

@@ -61,7 +61,9 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 #define STARPU_NODE_SELECTION_POLICY (23<<STARPU_MODE_SHIFT)
 #define STARPU_NAME		 (24<<STARPU_MODE_SHIFT)
 #define STARPU_CL_ARGS		(25<<STARPU_MODE_SHIFT)
-#define STARPU_SHIFTED_MODE_MAX (26<<STARPU_MODE_SHIFT)
+#define STARPU_CL_ARGS_NFREE	(26<<STARPU_MODE_SHIFT)
+#define STARPU_TASK_DEPS_ARRAY	(27<<STARPU_MODE_SHIFT)
+#define STARPU_SHIFTED_MODE_MAX (28<<STARPU_MODE_SHIFT)
 
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);
 int starpu_task_insert(struct starpu_codelet *cl, ...);

+ 1 - 1
mpi/examples/mpi_lu/pxlu.c

@@ -99,7 +99,7 @@ static void send_data_to_mask(starpu_data_handle_t handle, int *rank_mask, int m
 
 	int rank_array[world_size];
 	MPI_Comm comm_array[world_size];
-	int mpi_tag_array[world_size];
+	starpu_mpi_tag_t mpi_tag_array[world_size];
 	starpu_data_handle_t handle_array[world_size];
 
 	int r;

+ 17 - 17
mpi/include/fstarpu_mpi_mod.f90

@@ -21,7 +21,7 @@ module fstarpu_mpi_mod
 
         interface
                 ! == mpi/include/starpu_mpi.h ==
-                ! int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, MPI_Comm comm);
+                ! int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm);
                 function fstarpu_mpi_isend (dh, mpi_req, dst, data_tag, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
@@ -34,7 +34,7 @@ module fstarpu_mpi_mod
                 end function fstarpu_mpi_isend
 
                 ! == mpi/include/starpu_mpi.h ==
-                ! int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, int prio, MPI_Comm comm);
+                ! int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm);
                 function fstarpu_mpi_isend_prio (dh, mpi_req, dst, data_tag, prio, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
@@ -47,7 +47,7 @@ module fstarpu_mpi_mod
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_isend_prio
 
-                ! int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int64_t data_tag, MPI_Comm comm);
+                ! int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm);
                 function fstarpu_mpi_irecv (dh, mpi_req, src, data_tag, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
@@ -59,7 +59,7 @@ module fstarpu_mpi_mod
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_irecv
 
-                ! int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm);
+                ! int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm);
                 function fstarpu_mpi_send (dh, dst, data_tag, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
@@ -70,7 +70,7 @@ module fstarpu_mpi_mod
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_send
 
-                ! int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm);
+                ! int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm);
                 function fstarpu_mpi_send_prio (dh, dst, data_tag, prio, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
@@ -82,7 +82,7 @@ module fstarpu_mpi_mod
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_send_prio
 
-                ! int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, MPI_Status *status);
+                ! int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status);
                 function fstarpu_mpi_recv (dh, src, data_tag, mpi_comm, mpi_status) bind(C)
                         use iso_c_binding
                         implicit none
@@ -94,7 +94,7 @@ module fstarpu_mpi_mod
                         type(c_ptr), value, intent(in) :: mpi_status
                 end function fstarpu_mpi_recv
 
-                ! int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+                ! int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
                 function fstarpu_mpi_isend_detached (dh, dst, data_tag, mpi_comm, callback, arg) bind(C)
                         use iso_c_binding
                         implicit none
@@ -107,7 +107,7 @@ module fstarpu_mpi_mod
                         type(c_ptr), value, intent(in) :: arg
                 end function fstarpu_mpi_isend_detached
 
-                ! int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
+                ! int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
                 function fstarpu_mpi_isend_detached_prio (dh, dst, data_tag, prio, mpi_comm, callback, arg) bind(C)
                         use iso_c_binding
                         implicit none
@@ -121,7 +121,7 @@ module fstarpu_mpi_mod
                         type(c_ptr), value, intent(in) :: arg
                 end function fstarpu_mpi_isend_detached_prio
 
-                ! int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+                ! int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
                 function fstarpu_mpi_recv_detached (dh, src, data_tag, mpi_comm, callback, arg) bind(C)
                         use iso_c_binding
                         implicit none
@@ -134,7 +134,7 @@ module fstarpu_mpi_mod
                         type(c_ptr), value, intent(in) :: arg
                 end function fstarpu_mpi_recv_detached
 
-                ! int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, MPI_Comm comm);
+                ! int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm);
                 function fstarpu_mpi_issend (dh, mpi_req, dst, data_tag, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
@@ -146,7 +146,7 @@ module fstarpu_mpi_mod
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_issend
 
-                ! int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, int prio, MPI_Comm comm);
+                ! int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm);
                 function fstarpu_mpi_issend_prio (dh, mpi_req, dst, data_tag, prio, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
@@ -159,7 +159,7 @@ module fstarpu_mpi_mod
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_issend_prio
 
-                ! int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+                ! int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
                 function fstarpu_mpi_issend_detached (dh, dst, data_tag, mpi_comm, callback, arg) bind(C)
                         use iso_c_binding
                         implicit none
@@ -172,7 +172,7 @@ module fstarpu_mpi_mod
                         type(c_ptr), value, intent(in) :: arg
                 end function fstarpu_mpi_issend_detached
 
-                ! int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
+                ! int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
                 function fstarpu_mpi_issend_detached_prio (dh, dst, data_tag, prio, mpi_comm, callback, arg) bind(C)
                         use iso_c_binding
                         implicit none
@@ -213,7 +213,7 @@ module fstarpu_mpi_mod
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_barrier
 
-                ! int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
+                ! int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
                 function fstarpu_mpi_recv_detached_sequential_consistency (dh, src, data_tag, mpi_comm, callback, arg, seq_const) &
                                 bind(C)
                         use iso_c_binding
@@ -343,7 +343,7 @@ module fstarpu_mpi_mod
                 end function fstarpu_mpi_gather_detached
 
 
-                ! int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag);
+                ! int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag);
                 function fstarpu_mpi_isend_detached_unlock_tag (dh, dst, data_tag, mpi_comm, starpu_tag) bind(C)
                         use iso_c_binding
                         implicit none
@@ -355,7 +355,7 @@ module fstarpu_mpi_mod
                         type(c_ptr), value, intent(in) :: starpu_tag
                 end function fstarpu_mpi_isend_detached_unlock_tag
 
-                ! int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag);
+                ! int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag);
                 function fstarpu_mpi_isend_detached_unlock_tag_prio (dh, dst, data_tag, prio, mpi_comm, starpu_tag) bind(C)
                         use iso_c_binding
                         implicit none
@@ -368,7 +368,7 @@ module fstarpu_mpi_mod
                         type(c_ptr), value, intent(in) :: starpu_tag
                 end function fstarpu_mpi_isend_detached_unlock_tag_prio
 
-                ! int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag);
+                ! int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag);
                 function fstarpu_mpi_recv_detached_unlock_tag (dh, src, data_tag, mpi_comm, starpu_tag) bind(C)
                         use iso_c_binding
                         implicit none

+ 25 - 23
mpi/include/starpu_mpi.h

@@ -33,24 +33,26 @@ extern "C"
 
 typedef void *starpu_mpi_req;
 
-int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, MPI_Comm comm);
-int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, int prio, MPI_Comm comm);
-int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int64_t data_tag, MPI_Comm comm);
-int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm);
-int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm);
-int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, MPI_Status *status);
-int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, MPI_Comm comm);
-int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, int prio, MPI_Comm comm);
-int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
+typedef int64_t starpu_mpi_tag_t;
+
+int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm);
+int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm);
+int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm);
+int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm);
+int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm);
+int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status);
+int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
+int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm);
+int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm);
+int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
 int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status);
 int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status);
 int starpu_mpi_barrier(MPI_Comm comm);
 
-int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
+int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
 
 int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm);
 int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi);
@@ -73,13 +75,13 @@ void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle,
 int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg);
 int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg);
 
-int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag);
-int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag);
-int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag);
+int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag);
+int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag);
+int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag);
 
-int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int64_t *data_tag, MPI_Comm *comm, starpu_tag_t tag);
-int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int64_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag);
-int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int64_t *data_tag, MPI_Comm *comm, starpu_tag_t tag);
+int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag);
+int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag);
+int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag);
 
 void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts);
 
@@ -97,19 +99,19 @@ int starpu_mpi_world_size(void);
 int starpu_mpi_get_communication_tag(void);
 void starpu_mpi_set_communication_tag(int tag);
 
-void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int64_t data_tag, int rank, MPI_Comm comm);
+void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm);
 #define starpu_mpi_data_register(data_handle, data_tag, rank) starpu_mpi_data_register_comm(data_handle, data_tag, rank, MPI_COMM_WORLD)
 
 #define STARPU_MPI_PER_NODE -2
 
 void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm);
 #define starpu_mpi_data_set_rank(handle, rank) starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD)
-void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int64_t data_tag);
+void starpu_mpi_data_set_tag(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag);
 #define starpu_data_set_rank starpu_mpi_data_set_rank
 #define starpu_data_set_tag starpu_mpi_data_set_tag
 
 int starpu_mpi_data_get_rank(starpu_data_handle_t handle);
-int64_t starpu_mpi_data_get_tag(starpu_data_handle_t handle);
+starpu_mpi_tag_t starpu_mpi_data_get_tag(starpu_data_handle_t handle);
 #define starpu_data_get_rank starpu_mpi_data_get_rank
 #define starpu_data_get_tag starpu_mpi_data_get_tag
 

+ 1 - 1
mpi/src/mpi/starpu_mpi_early_request.c

@@ -64,7 +64,7 @@ void _starpu_mpi_early_request_check_termination()
 	STARPU_ASSERT_MSG(_starpu_mpi_early_request_count() == 0, "Number of early requests left is not zero");
 }
 
-struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(int64_t data_tag, int source, MPI_Comm comm)
+struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm)
 {
 	struct _starpu_mpi_node_tag node_tag;
 	struct _starpu_mpi_req *found;

+ 1 - 1
mpi/src/mpi/starpu_mpi_early_request.h

@@ -37,7 +37,7 @@ int _starpu_mpi_early_request_count(void);
 void _starpu_mpi_early_request_check_termination(void);
 
 void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req);
-struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(int64_t data_tag, int source, MPI_Comm comm);
+struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm);
 
 #ifdef __cplusplus
 }

+ 2 - 2
mpi/src/mpi/starpu_mpi_mpi.c

@@ -96,7 +96,7 @@ static int posted_requests = 0, ready_requests = 0, newer_requests, barrier_runn
 #define _STARPU_MPI_INC_POSTED_REQUESTS(value) { STARPU_PTHREAD_MUTEX_LOCK(&mutex_posted_requests); posted_requests += value; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex_posted_requests); }
 #define _STARPU_MPI_INC_READY_REQUESTS(value) { STARPU_PTHREAD_MUTEX_LOCK(&mutex_ready_requests); ready_requests += value; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex_ready_requests); }
 
-extern struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count);
+extern struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count);
 
 #pragma weak smpi_simulated_main_
 extern int smpi_simulated_main_(int argc, char *argv[]);
@@ -343,7 +343,7 @@ static void nop_acquire_cb(void *arg)
 }
 
 struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
-						       int srcdst, int64_t data_tag, MPI_Comm comm,
+						       int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
 						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
 						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
 						       enum starpu_data_access_mode mode,

+ 1 - 1
mpi/src/mpi/starpu_mpi_sync_data.c

@@ -91,7 +91,7 @@ int _starpu_mpi_sync_data_count(void)
 	return _starpu_mpi_sync_data_handle_hashmap_count;
 }
 
-struct _starpu_mpi_req *_starpu_mpi_sync_data_find(int64_t data_tag, int source, MPI_Comm comm)
+struct _starpu_mpi_req *_starpu_mpi_sync_data_find(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm)
 {
 	struct _starpu_mpi_req *req;
 	struct _starpu_mpi_node_tag node_tag;

+ 1 - 1
mpi/src/mpi/starpu_mpi_sync_data.h

@@ -34,7 +34,7 @@ void _starpu_mpi_sync_data_init(void);
 void _starpu_mpi_sync_data_check_termination(void);
 void _starpu_mpi_sync_data_shutdown(void);
 
-struct _starpu_mpi_req *_starpu_mpi_sync_data_find(int64_t data_tag, int source, MPI_Comm comm);
+struct _starpu_mpi_req *_starpu_mpi_sync_data_find(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm);
 void _starpu_mpi_sync_data_add(struct _starpu_mpi_req *req);
 int _starpu_mpi_sync_data_count();
 

+ 4 - 4
mpi/src/mpi/starpu_mpi_tag.c

@@ -29,7 +29,7 @@
 struct handle_tag_entry
 {
 	UT_hash_handle hh;
-	int64_t data_tag;
+	starpu_mpi_tag_t data_tag;
 	starpu_data_handle_t handle;
 };
 
@@ -57,7 +57,7 @@ void _starpu_mpi_tag_shutdown(void)
 	registered_tag_handles = NULL;
 }
 
-starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(int64_t data_tag)
+starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(starpu_mpi_tag_t data_tag)
 {
 	struct handle_tag_entry *ret;
 
@@ -75,7 +75,7 @@ starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(int64_t data_tag)
 	}
 }
 
-void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, int64_t data_tag)
+void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag)
 {
 	if (data_tag == -1)
 	{
@@ -101,7 +101,7 @@ void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, int64_t data_tag
 
 int _starpu_mpi_tag_data_release(starpu_data_handle_t handle)
 {
-	int64_t data_tag = starpu_mpi_data_get_tag(handle);
+	starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(handle);
 
 	_STARPU_MPI_DEBUG(42, "Removing handle %p with tag %"PRIi64"d from hashtable\n", handle, data_tag);
 

+ 2 - 2
mpi/src/mpi/starpu_mpi_tag.h

@@ -31,9 +31,9 @@ extern "C"
 void _starpu_mpi_tag_init(void);
 void _starpu_mpi_tag_shutdown(void);
 
-void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, int64_t data_tag);
+void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag);
 int _starpu_mpi_tag_data_release(starpu_data_handle_t handle);
-starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(int64_t data_tag);
+starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(starpu_mpi_tag_t data_tag);
 
 #ifdef __cplusplus
 }

+ 19 - 19
mpi/src/starpu_mpi.c

@@ -42,7 +42,7 @@
 #endif
 
 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
-							int dest, int64_t data_tag, MPI_Comm comm,
+							int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm,
 							unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
 							int sequential_consistency)
 {
@@ -55,7 +55,7 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t dat
 					      sequential_consistency, 0, 0);
 }
 
-int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int64_t data_tag, int prio, MPI_Comm comm)
+int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
 {
 	_STARPU_MPI_LOG_IN();
 	STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_isend needs a valid starpu_mpi_req");
@@ -72,12 +72,12 @@ int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *publ
 	return 0;
 }
 
-int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int64_t data_tag, MPI_Comm comm)
+int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm)
 {
 	return starpu_mpi_isend_prio(data_handle, public_req, dest, data_tag, 0, comm);
 }
 
-int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
+int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
 {
 	_STARPU_MPI_LOG_IN();
 	_starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 1, 0, prio, callback, arg, 1);
@@ -85,12 +85,12 @@ int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, i
 	return 0;
 }
 
-int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
 {
 	return starpu_mpi_isend_detached_prio(data_handle, dest, data_tag, 0, comm, callback, arg);
 }
 
-int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm)
+int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
 {
 	starpu_mpi_req req;
 	MPI_Status status;
@@ -105,12 +105,12 @@ int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int64_t dat
 	return 0;
 }
 
-int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm)
+int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm)
 {
 	return starpu_mpi_send_prio(data_handle, dest, data_tag, 0, comm);
 }
 
-int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int64_t data_tag, int prio, MPI_Comm comm)
+int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm)
 {
 	_STARPU_MPI_LOG_IN();
 	STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_issend needs a valid starpu_mpi_req");
@@ -125,12 +125,12 @@ int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *pub
 	return 0;
 }
 
-int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int64_t data_tag, MPI_Comm comm)
+int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm)
 {
 	return starpu_mpi_issend_prio(data_handle, public_req, dest, data_tag, 0, comm);
 }
 
-int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
+int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
 {
 	_STARPU_MPI_LOG_IN();
 
@@ -140,17 +140,17 @@ int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest,
 	return 0;
 }
 
-int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
 {
 	return starpu_mpi_issend_detached_prio(data_handle, dest, data_tag, 0, comm, callback, arg);
 }
 
-struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count)
+struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count)
 {
 	return _starpu_mpi_isend_irecv_common(data_handle, source, data_tag, comm, detached, sync, 0, callback, arg, RECV_REQ, _starpu_mpi_irecv_size_func, STARPU_W, sequential_consistency, is_internal_req, count);
 }
 
-int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, int64_t data_tag, MPI_Comm comm)
+int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm)
 {
 	_STARPU_MPI_LOG_IN();
 	STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_irecv needs a valid starpu_mpi_req");
@@ -167,7 +167,7 @@ int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_re
 	return 0;
 }
 
-int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
 {
 	_STARPU_MPI_LOG_IN();
 
@@ -176,7 +176,7 @@ int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int6
 	return 0;
 }
 
-int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
+int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
 {
 	_STARPU_MPI_LOG_IN();
 
@@ -186,7 +186,7 @@ int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_h
 	return 0;
 }
 
-int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, MPI_Status *status)
+int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status)
 {
 	starpu_mpi_req req;
 
@@ -223,7 +223,7 @@ void _starpu_mpi_data_clear(starpu_data_handle_t data_handle)
 	free(data_handle->mpi_data);
 }
 
-void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int64_t data_tag, int rank, MPI_Comm comm)
+void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm)
 {
 	struct _starpu_mpi_data *mpi_data;
 	if (data_handle->mpi_data)
@@ -265,7 +265,7 @@ void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Co
 	starpu_mpi_data_register_comm(handle, -1, rank, comm);
 }
 
-void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int64_t data_tag)
+void starpu_mpi_data_set_tag(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag)
 {
 	starpu_mpi_data_register_comm(handle, data_tag, -1, MPI_COMM_WORLD);
 }
@@ -276,7 +276,7 @@ int starpu_mpi_data_get_rank(starpu_data_handle_t data)
 	return ((struct _starpu_mpi_data *)(data->mpi_data))->node_tag.rank;
 }
 
-int64_t starpu_mpi_data_get_tag(starpu_data_handle_t data)
+starpu_mpi_tag_t starpu_mpi_data_get_tag(starpu_data_handle_t data)
 {
 	STARPU_ASSERT_MSG(data->mpi_data, "starpu_mpi_data_register MUST be called for data %p\n", data);
 	return ((struct _starpu_mpi_data *)(data->mpi_data))->node_tag.data_tag;

+ 3 - 3
mpi/src/starpu_mpi_collective.c

@@ -62,7 +62,7 @@ int _callback_set(int rank, starpu_data_handle_t *data_handles, int count, int r
 			if (data_handles[x])
 			{
 				int owner = starpu_mpi_data_get_rank(data_handles[x]);
-				int64_t data_tag = starpu_mpi_data_get_tag(data_handles[x]);
+				starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data_handles[x]);
 				STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle");
 				if ((rank == root) && (owner != root))
 				{
@@ -103,7 +103,7 @@ int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, i
 		if (data_handles[x])
 		{
 			int owner = starpu_mpi_data_get_rank(data_handles[x]);
-			int64_t data_tag = starpu_mpi_data_get_tag(data_handles[x]);
+			starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data_handles[x]);
 			STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle");
 			if ((rank == root) && (owner != root))
 			{
@@ -138,7 +138,7 @@ int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, in
 		if (data_handles[x])
 		{
 			int owner = starpu_mpi_data_get_rank(data_handles[x]);
-			int64_t data_tag = starpu_mpi_data_get_tag(data_handles[x]);
+			starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data_handles[x]);
 			STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle");
 			if ((rank == root) && (owner != root))
 			{

+ 22 - 22
mpi/src/starpu_mpi_fortran.c

@@ -97,7 +97,7 @@ int fstarpu_mpi_barrier(MPI_Fint comm)
 	return starpu_mpi_barrier(MPI_Comm_f2c(comm));
 }
 
-int fstarpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int src, int64_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg, int seq_const)
+int fstarpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg, int seq_const)
 {
 	return starpu_mpi_irecv_detached_sequential_consistency(data_handle, src, data_tag, MPI_Comm_f2c(comm), callback, arg, seq_const);
 }
@@ -138,22 +138,22 @@ int fstarpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int cnt, int
 }
 
 /* isend/irecv detached unlock tag */
-int fstarpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dst, int64_t data_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
+int fstarpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
 {
 	return starpu_mpi_isend_detached_unlock_tag(data_handle, dst, data_tag, MPI_Comm_f2c(comm), *starpu_tag);
 }
-int fstarpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dst, int64_t data_tag, int prio, MPI_Fint comm, starpu_tag_t *starpu_tag)
+int fstarpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm, starpu_tag_t *starpu_tag)
 {
 	return starpu_mpi_isend_detached_unlock_tag_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm), *starpu_tag);
 }
 
-int fstarpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int src, int64_t data_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
+int fstarpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
 {
 	return starpu_mpi_irecv_detached_unlock_tag(data_handle, src, data_tag, MPI_Comm_f2c(comm), *starpu_tag);
 }
 
 /* isend/irecv array detached unlock tag */
-int fstarpu_mpi_isend_array_detached_unlock_tag_prio(int array_size, starpu_data_handle_t *data_handles, int *dsts, int64_t *data_tags, int *prio, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
+int fstarpu_mpi_isend_array_detached_unlock_tag_prio(int array_size, starpu_data_handle_t *data_handles, int *dsts, starpu_mpi_tag_t *data_tags, int *prio, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
 {
 	MPI_Comm comms[array_size];
 	int i;
@@ -165,12 +165,12 @@ int fstarpu_mpi_isend_array_detached_unlock_tag_prio(int array_size, starpu_data
 	return ret;
 }
 
-int fstarpu_mpi_isend_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *dsts, int64_t *data_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
+int fstarpu_mpi_isend_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *dsts, starpu_mpi_tag_t *data_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
 {
 	return fstarpu_mpi_isend_array_detached_unlock_tag_prio(array_size, data_handles, dsts, data_tags, NULL, _comms, starpu_tag);
 }
 
-int fstarpu_mpi_irecv_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *srcs, int64_t *data_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
+int fstarpu_mpi_irecv_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *srcs, starpu_mpi_tag_t *data_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
 {
 	MPI_Comm comms[array_size];
 	int i;
@@ -183,69 +183,69 @@ int fstarpu_mpi_irecv_array_detached_unlock_tag(int array_size, starpu_data_hand
 }
 
 /* isend/irecv */
-int fstarpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int64_t data_tag, MPI_Fint comm)
+int fstarpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm)
 {
 	return starpu_mpi_isend(data_handle, req, dst, data_tag, MPI_Comm_f2c(comm));
 }
 
-int fstarpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int64_t data_tag, int prio, MPI_Fint comm)
+int fstarpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm)
 {
 	return starpu_mpi_isend_prio(data_handle, req, dst, data_tag, prio, MPI_Comm_f2c(comm));
 }
 
-int fstarpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int src, int64_t data_tag, MPI_Fint comm)
+int fstarpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm)
 {
 	return starpu_mpi_irecv(data_handle, req, src, data_tag, MPI_Comm_f2c(comm));
 }
 
 /* send/recv */
-int fstarpu_mpi_send(starpu_data_handle_t data_handle, int dst, int64_t data_tag, MPI_Fint comm)
+int fstarpu_mpi_send(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm)
 {
 	return starpu_mpi_send(data_handle, dst, data_tag, MPI_Comm_f2c(comm));
 }
 
-int fstarpu_mpi_send_prio(starpu_data_handle_t data_handle, int dst, int64_t data_tag, int prio, MPI_Fint comm)
+int fstarpu_mpi_send_prio(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm)
 {
 	return starpu_mpi_send_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm));
 }
 
-int fstarpu_mpi_recv(starpu_data_handle_t data_handle, int src, int64_t data_tag, MPI_Fint comm, MPI_Status *status)
+int fstarpu_mpi_recv(starpu_data_handle_t data_handle, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm, MPI_Status *status)
 {
 	return starpu_mpi_recv(data_handle, src, data_tag, MPI_Comm_f2c(comm), status);
 }
 
 /* isend/irecv detached */
-int fstarpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dst, int64_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
+int fstarpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
 {
 	return starpu_mpi_isend_detached(data_handle, dst, data_tag, MPI_Comm_f2c(comm), callback, arg);
 }
 
-int fstarpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dst, int64_t data_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg)
+int fstarpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg)
 {
 	return starpu_mpi_isend_detached_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm), callback, arg);
 }
 
-int fstarpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int src, int64_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
+int fstarpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
 {
 	return starpu_mpi_irecv_detached(data_handle, src, data_tag, MPI_Comm_f2c(comm), callback, arg);
 }
 
 /* issend / issend detached */
-int fstarpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int64_t data_tag, MPI_Fint comm)
+int fstarpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm)
 {
 	return starpu_mpi_issend(data_handle, req, dst, data_tag, MPI_Comm_f2c(comm));
 }
-int fstarpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int64_t data_tag, int prio, MPI_Fint comm)
+int fstarpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm)
 {
 	return starpu_mpi_issend_prio(data_handle, req, dst, data_tag, prio, MPI_Comm_f2c(comm));
 }
 
-int fstarpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dst, int64_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
+int fstarpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
 {
 	return starpu_mpi_issend_detached(data_handle, dst, data_tag, MPI_Comm_f2c(comm), callback, arg);
 }
 
-int fstarpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dst, int64_t data_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg)
+int fstarpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg)
 {
 	return starpu_mpi_issend_detached_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm), callback, arg);
 }
@@ -276,12 +276,12 @@ MPI_Fint fstarpu_mpi_world_comm()
 	return MPI_Comm_c2f(MPI_COMM_WORLD);
 }
 
-void fstarpu_mpi_data_register_comm(starpu_data_handle_t handle, int64_t data_tag, int rank, MPI_Fint comm)
+void fstarpu_mpi_data_register_comm(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag, int rank, MPI_Fint comm)
 {
 	return starpu_mpi_data_register_comm(handle, data_tag, rank, MPI_Comm_f2c(comm));
 }
 
-void fstarpu_mpi_data_register(starpu_data_handle_t handle, int64_t data_tag, int rank)
+void fstarpu_mpi_data_register(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag, int rank)
 {
 	return starpu_mpi_data_register_comm(handle, data_tag, rank, MPI_COMM_WORLD);
 }

+ 6 - 6
mpi/src/starpu_mpi_helper.c

@@ -27,7 +27,7 @@ static void starpu_mpi_unlock_tag_callback(void *arg)
 	free(tagptr);
 }
 
-int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag)
+int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag)
 {
 	starpu_tag_t *tagptr;
 	_STARPU_MPI_MALLOC(tagptr, sizeof(starpu_tag_t));
@@ -36,12 +36,12 @@ int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle,
 	return starpu_mpi_isend_detached_prio(data_handle, dest, data_tag, prio, comm, starpu_mpi_unlock_tag_callback, tagptr);
 }
 
-int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag)
+int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag)
 {
 	return starpu_mpi_isend_detached_unlock_tag_prio(data_handle, dest, data_tag, 0, comm, tag);
 }
 
-int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag)
+int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag)
 {
 	starpu_tag_t *tagptr;
 	_STARPU_MPI_MALLOC(tagptr, sizeof(starpu_tag_t));
@@ -69,7 +69,7 @@ static void starpu_mpi_array_unlock_callback(void *_arg)
 	}
 }
 
-int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int64_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag)
+int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag)
 {
 	if (!array_size)
 		return 0;
@@ -91,12 +91,12 @@ int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_
 	return 0;
 }
 
-int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int64_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
+int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
 {
 	return starpu_mpi_isend_array_detached_unlock_tag_prio(array_size, data_handle, dest, data_tag, NULL, comm, tag);
 }
 
-int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int64_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
+int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
 {
 	if (!array_size)
 		return 0;

+ 1 - 1
mpi/src/starpu_mpi_init.c

@@ -48,7 +48,7 @@ static void _starpu_mpi_print_thread_level_support(int thread_level, char *msg)
 	{
 		case MPI_THREAD_SERIALIZED:
 		{
-			_STARPU_DEBUG("MPI%s MPI_THREAD_SERIALIZED; Multiple threads may make MPI calls, but only one at a time.\n", msg);
+			_STARPU_DISP("MPI%s MPI_THREAD_SERIALIZED; Multiple threads may make MPI calls, but only one at a time.\n", msg);
 			break;
 		}
 		case MPI_THREAD_FUNNELED:

+ 2 - 2
mpi/src/starpu_mpi_private.h

@@ -170,7 +170,7 @@ struct _starpu_mpi_envelope
 {
 	int mode;
 	starpu_ssize_t size;
-	int64_t data_tag;
+	starpu_mpi_tag_t data_tag;
 	unsigned sync;
 };
 #endif /* STARPU_USE_MPI_MPI */
@@ -190,7 +190,7 @@ struct _starpu_mpi_node_tag
 {
 	MPI_Comm comm;
 	int rank;
-	int64_t data_tag;
+	starpu_mpi_tag_t data_tag;
 };
 
 struct _starpu_mpi_data

+ 16 - 6
mpi/src/starpu_mpi_task_insert.c

@@ -105,7 +105,7 @@ void _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum
 	if (data && mode & STARPU_R)
 	{
 		int mpi_rank = starpu_mpi_data_get_rank(data);
-		int64_t data_tag = starpu_mpi_data_get_tag(data);
+		starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data);
 		if (mpi_rank == -1)
 		{
 			_STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n");
@@ -147,7 +147,7 @@ void _starpu_mpi_exchange_data_after_execution(starpu_data_handle_t data, enum s
 	if (mode & STARPU_W)
 	{
 		int mpi_rank = starpu_mpi_data_get_rank(data);
-		int64_t data_tag = starpu_mpi_data_get_tag(data);
+		starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data);
 		if(mpi_rank == -1)
 		{
 			_STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n");
@@ -348,8 +348,18 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 		}
 		else if (arg_type==STARPU_CL_ARGS)
 		{
-			(void)va_arg(varg_list, void *);
-			(void)va_arg(varg_list, size_t);
+			(void)va_arg(varg_list_copy, void *);
+			(void)va_arg(varg_list_copy, size_t);
+		}
+		else if (arg_type==STARPU_CL_ARGS_NFREE)
+		{
+			(void)va_arg(varg_list_copy, void *);
+			(void)va_arg(varg_list_copy, size_t);
+		}
+		else if (arg_type==STARPU_TASK_DEPS_ARRAY)
+		{
+			(void)va_arg(varg_list_copy, unsigned);
+			(void)va_arg(varg_list_copy, struct starpu_task **);
 		}
 		else if (arg_type==STARPU_CALLBACK)
 		{
@@ -652,7 +662,7 @@ struct _starpu_mpi_redux_data_args
 {
 	starpu_data_handle_t data_handle;
 	starpu_data_handle_t new_handle;
-	int64_t data_tag;
+	starpu_mpi_tag_t data_tag;
 	int node;
 	MPI_Comm comm;
 	struct starpu_task *taskB;
@@ -723,7 +733,7 @@ void _starpu_mpi_redux_data_recv_callback(void *callback_arg)
 void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio)
 {
 	int me, rank, nb_nodes;
-	int64_t tag;
+	starpu_mpi_tag_t tag;
 
 	rank = starpu_mpi_data_get_rank(data_handle);
 	tag = starpu_mpi_data_get_tag(data_handle);

+ 14 - 0
mpi/src/starpu_mpi_task_insert_fortran.c

@@ -176,6 +176,20 @@ int _fstarpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_no
 			arg_i++;
 			/* size_t */
 		}
+		else if (arg_type==STARPU_CL_ARGS_NFREE)
+		{
+			arg_i++;
+			/* void* */
+			arg_i++;
+			/* size_t */
+		}
+		else if (arg_type==STARPU_TASK_DEPS_ARRAY)
+		{
+			arg_i++;
+			/* unsigned */
+			arg_i++;
+			/* struct starpu_task ** */
+		}
 		else if (arg_type==STARPU_CALLBACK)
 		{
 			arg_i++;

+ 1 - 1
mpi/tests/Makefile.am

@@ -104,7 +104,6 @@ if BUILD_TESTS
 starpu_mpi_TESTS =
 
 starpu_mpi_TESTS +=				\
-	attr					\
 	cache					\
 	cache_disable				\
 	callback				\
@@ -132,6 +131,7 @@ starpu_mpi_TESTS +=				\
 
 if !STARPU_SIMGRID
 starpu_mpi_TESTS +=				\
+	attr					\
 	pingpong				\
 	mpi_test				\
 	mpi_isend				\

+ 2 - 1
src/common/barrier_counter.c

@@ -130,7 +130,7 @@ int _starpu_barrier_counter_increment_until_full_counter(struct _starpu_barrier_
 	{
 		/* have those not happy enough tell us how much again */
 		barrier_c->min_threshold = 0;
-		STARPU_PTHREAD_COND_BROADCAST(&barrier->cond);
+		STARPU_PTHREAD_COND_BROADCAST(&barrier_c->cond2);
 	}
 
 	STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex);
@@ -144,6 +144,7 @@ int _starpu_barrier_counter_increment(struct _starpu_barrier_counter *barrier_c,
 
 	barrier->reached_start++;
 	barrier->reached_flops += flops;
+	STARPU_PTHREAD_COND_BROADCAST(&barrier_c->cond2);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex);
 	return 0;
 }

+ 53 - 11
src/core/sched_ctx.c

@@ -54,8 +54,25 @@ static void set_priority_hierarchically_on_notified_workers(int* workers_to_add,
 static void fetch_tasks_from_empty_ctx_list(struct _starpu_sched_ctx *sched_ctx);
 static void add_notified_workers(int *workers_to_add, int nworkers_to_add, unsigned sched_ctx_id);
 
+/* reused from combined_workers.c */
+static int compar_int(const void *pa, const void *pb)
+{
+	int a = *((int *)pa);
+	int b = *((int *)pb);
+
+	return a - b;
+}
+
+/* reused from combined_workers.c */
+static void sort_workerid_array(int nworkers, int workerid_array[])
+{
+	qsort(workerid_array, nworkers, sizeof(int), compar_int);
+}
+
 /* notify workers that a ctx change operation is about to proceed.
  *
+ * workerids must be sorted by ascending id
+ *
  * Once this function returns, the notified workers must not start a new
  * scheduling operation until they are notified that the ctx change op is
  * done.
@@ -67,6 +84,8 @@ static void notify_workers_about_changing_ctx_pending(const unsigned nworkers, c
 	unsigned i;
 	for (i=0; i<nworkers; i++)
 	{
+		/* check that workerids[] is sorted to prevent multi-lock acquisition deadlocks */
+		STARPU_ASSERT(i == 0 || (workerids[i] > workerids[i-1]));
 		if (starpu_worker_is_combined_worker(workerids[i]))
 			continue;
 		if (workerids[i] == cur_workerid)
@@ -80,6 +99,8 @@ static void notify_workers_about_changing_ctx_pending(const unsigned nworkers, c
 
 /* notify workers that a ctx change operation is complete.
  *
+ * workerids must be sorted by ascending id
+ *
  * Once this function returns, the workers may proceed with scheduling operations again.
  */
 static void notify_workers_about_changing_ctx_done(const unsigned nworkers, const int * const workerids)
@@ -89,6 +110,8 @@ static void notify_workers_about_changing_ctx_done(const unsigned nworkers, cons
 	unsigned i;
 	for (i=0; i<nworkers; i++)
 	{
+		/* check that workerids[] is sorted to prevent multi-lock acquisition deadlocks */
+		STARPU_ASSERT(i == 0 || (workerids[i] > workerids[i-1]));
 		if (starpu_worker_is_combined_worker(workerids[i]))
 			continue;
 		if (workerids[i] == cur_workerid)
@@ -368,6 +391,7 @@ static void _starpu_add_workers_to_new_sched_ctx(struct _starpu_sched_ctx *sched
 		STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
 	}
 
+	sort_workerid_array(nworkers, workerids);
 	notify_workers_about_changing_ctx_pending(nworkers, workerids);
 	_do_add_notified_workers(sched_ctx, workerids, nworkers);
 	if(sched_ctx->sched_policy && sched_ctx->sched_policy->add_workers)
@@ -498,6 +522,7 @@ static void _starpu_sched_ctx_create_hwloc_tree(struct _starpu_sched_ctx *sched_
 }
 #endif
 
+/* Must be called with sched_ctx_manag mutex held */
 struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *policy, int *workerids,
 						   int nworkers_ctx, unsigned is_initial_sched,
 						   const char *sched_ctx_name,
@@ -510,7 +535,6 @@ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *p
 {
 	struct _starpu_machine_config *config = _starpu_get_machine_config();
 
-	STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag);
 	STARPU_ASSERT(config->topology.nsched_ctxs < STARPU_NMAX_SCHED_CTXS);
 
 	unsigned id = _starpu_get_first_free_sched_ctx(config);
@@ -624,7 +648,6 @@ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *p
 	}
 
 	(void)STARPU_ATOMIC_ADD(&config->topology.nsched_ctxs,1);
-	STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag);
 
 	return sched_ctx;
 }
@@ -670,6 +693,7 @@ int starpu_sched_ctx_get_stream_worker(unsigned sub_ctx)
 
 unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx_name, ...)
 {
+	STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag);
 	va_list varg_list;
 	int arg_type;
 	int min_prio_set = 0;
@@ -754,6 +778,7 @@ unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched
 		if (workerids[i] < 0 || workerids[i] >= num_workers)
 		{
 			_STARPU_ERROR("Invalid worker ID (%d) specified!\n", workerids[i]);
+			STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag);
 			return STARPU_NMAX_SCHED_CTXS;
 		}
 	}
@@ -765,6 +790,7 @@ unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched
 
 	int *added_workerids;
 	unsigned nw_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &added_workerids);
+	sort_workerid_array(nw_ctx, added_workerids);
 	notify_workers_about_changing_ctx_pending(nw_ctx, added_workerids);
 	_starpu_sched_ctx_lock_write(sched_ctx->id);
 	_starpu_update_notified_workers_with_ctx(added_workerids, nw_ctx, sched_ctx->id);
@@ -774,11 +800,13 @@ unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched
 #ifdef STARPU_USE_SC_HYPERVISOR
 	sched_ctx->perf_counters = NULL;
 #endif
+	STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag);
 	return sched_ctx->id;
 }
 
 int fstarpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx_name, void **arglist)
 {
+	STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag);
 	int arg_i = 0;
 	int min_prio_set = 0;
 	int max_prio_set = 0;
@@ -879,6 +907,7 @@ int fstarpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx
 			if (workerids[i] < 0 || workerids[i] >= num_workers)
 			{
 				_STARPU_ERROR("Invalid worker ID (%d) specified!\n", workerids[i]);
+				STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag);
 				return STARPU_NMAX_SCHED_CTXS;
 			}
 		}
@@ -891,6 +920,7 @@ int fstarpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx
 
 	int *added_workerids;
 	unsigned nw_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &added_workerids);
+	sort_workerid_array(nw_ctx, added_workerids);
 	notify_workers_about_changing_ctx_pending(nw_ctx, added_workerids);
 	_starpu_sched_ctx_lock_write(sched_ctx->id);
 	_starpu_update_notified_workers_with_ctx(added_workerids, nw_ctx, sched_ctx->id);
@@ -900,6 +930,7 @@ int fstarpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx
 #ifdef STARPU_USE_SC_HYPERVISOR
 	sched_ctx->perf_counters = NULL;
 #endif
+	STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag);
 	return (int)sched_ctx->id;
 }
 
@@ -920,7 +951,8 @@ void starpu_sched_ctx_set_perf_counters(unsigned sched_ctx_id, void* perf_counte
 }
 #endif
 
-/* free all structures for the context */
+/* free all structures for the context
+   Must be called with sched_ctx_manag mutex held */
 static void _starpu_delete_sched_ctx(struct _starpu_sched_ctx *sched_ctx)
 {
 	STARPU_ASSERT(sched_ctx->id != STARPU_NMAX_SCHED_CTXS);
@@ -949,13 +981,13 @@ static void _starpu_delete_sched_ctx(struct _starpu_sched_ctx *sched_ctx)
 	hwloc_bitmap_free(sched_ctx->hwloc_workers_set);
 #endif //STARPU_HAVE_HWLOC
 
-	STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag);
+	sched_ctx->id = STARPU_NMAX_SCHED_CTXS;
 	config->topology.nsched_ctxs--;
-	STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag);
 }
 
 void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 {
+	STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag);
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	STARPU_ASSERT(sched_ctx);
 
@@ -978,6 +1010,7 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 	unsigned nworkers_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &workerids);
 	int backup_workerids[nworkers_ctx];
 	memcpy(backup_workerids, workerids, nworkers_ctx*sizeof(backup_workerids[0]));
+	sort_workerid_array(nworkers_ctx, backup_workerids);
 	notify_workers_about_changing_ctx_pending(nworkers_ctx, backup_workerids);
 
 	/*if both of them have all the ressources is pointless*/
@@ -1006,21 +1039,28 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 		 * complete before altering the sched_ctx under sched_mutex protection */
 		_starpu_update_notified_workers_without_ctx(workerids, nworkers_ctx, sched_ctx_id, 1);
 		_starpu_sched_ctx_free_scheduling_data(sched_ctx);
+		notify_workers_about_changing_ctx_done(nworkers_ctx, backup_workerids);
+		occupied_sms -= sched_ctx->nsms;
+		_starpu_sched_ctx_unlock_write(sched_ctx_id);
+		STARPU_PTHREAD_RWLOCK_DESTROY(&sched_ctx->rwlock);
 		_starpu_delete_sched_ctx(sched_ctx);
 	}
-	notify_workers_about_changing_ctx_done(nworkers_ctx, backup_workerids);
-
-	_starpu_sched_ctx_unlock_write(sched_ctx_id);
+	else
+	{
+		notify_workers_about_changing_ctx_done(nworkers_ctx, backup_workerids);
+		occupied_sms -= sched_ctx->nsms;
+		_starpu_sched_ctx_unlock_write(sched_ctx_id);
+	}
 	/* workerids is malloc-ed in starpu_sched_ctx_get_workers_list, don't forget to free it when
 	   you don't use it anymore */
 	free(workerids);
-	occupied_sms -= sched_ctx->nsms;
-	return;
+	STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag);
 }
 
 /* called after the workers are terminated so we don't have anything else to do but free the memory*/
 void _starpu_delete_all_sched_ctxs()
 {
+	STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag);
 	unsigned i;
 	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
 	{
@@ -1038,7 +1078,7 @@ void _starpu_delete_all_sched_ctxs()
 	}
 
 	STARPU_PTHREAD_KEY_DELETE(sched_ctx_key);
-	return;
+	STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag);
 }
 
 static void _starpu_check_workers(int *workerids, int nworkers)
@@ -1260,6 +1300,7 @@ void starpu_sched_ctx_add_workers(int *workers_to_add, unsigned nworkers_to_add,
 	}
 	else
 	{
+		sort_workerid_array(cumulated_nworkers, cumulated_workerids);
 		notify_workers_about_changing_ctx_pending(cumulated_nworkers, cumulated_workerids);
 		_starpu_sched_ctx_lock_write(sched_ctx_id);
 		add_notified_workers(workers_to_add, nworkers_to_add, sched_ctx_id);
@@ -1334,6 +1375,7 @@ void starpu_sched_ctx_remove_workers(int *workers_to_remove, unsigned nworkers_t
 		}
 		else
 		{
+			sort_workerid_array(cumulated_nworkers, cumulated_workerids);
 			notify_workers_about_changing_ctx_pending(cumulated_nworkers, cumulated_workerids);
 			_starpu_sched_ctx_lock_write(sched_ctx_id);
 			remove_notified_workers(workers_to_remove, nworkers_to_remove, sched_ctx_id);

+ 4 - 0
src/core/workers.c

@@ -1545,6 +1545,10 @@ unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED, stru
 #ifdef STARPU_NON_BLOCKING_DRIVERS
 	return 0;
 #else
+	/* do not block if a sched_ctx change operation is pending */
+	if (worker->state_changing_ctx_notice)
+		return 0;
+
 	unsigned can_block = 1;
 
 	struct starpu_driver driver;

+ 5 - 0
src/core/workers.h

@@ -116,6 +116,7 @@ LIST_TYPE(_starpu_worker,
 	  * - transition from 1 to 0 triggers a unblock_req
 	  */
 	unsigned block_in_parallel_ref_count;
+	starpu_pthread_t thread_changing_ctx; /* thread currently changing a sched_ctx containing the worker */
 	/* list of deferred context changes
 	 *
 	 * when the current thread is a worker, _and_ this worker is in a
@@ -900,6 +901,7 @@ static inline int _starpu_worker_sched_op_pending(void)
  */
 static inline void _starpu_worker_enter_changing_ctx_op(struct _starpu_worker * const worker)
 {
+	STARPU_ASSERT(!starpu_pthread_equal(worker->thread_changing_ctx, starpu_pthread_self()));
 	/* flush pending requests to start on a fresh transaction epoch */
 	while (worker->state_changing_ctx_notice)
 		STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex);
@@ -911,6 +913,8 @@ static inline void _starpu_worker_enter_changing_ctx_op(struct _starpu_worker *
 	 */
 	worker->state_changing_ctx_notice = 1;
 
+	worker->thread_changing_ctx = starpu_pthread_self();
+
 	/* allow for an already started sched_op to complete */
 	if (worker->state_sched_op_pending)
 	{
@@ -939,6 +943,7 @@ static inline void _starpu_worker_enter_changing_ctx_op(struct _starpu_worker *
  */
 static inline void _starpu_worker_leave_changing_ctx_op(struct _starpu_worker * const worker)
 {
+	worker->thread_changing_ctx = (starpu_pthread_t)0;
 	worker->state_changing_ctx_notice = 0;
 	STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond);
 }

+ 3 - 0
src/drivers/mp_common/sink_common.c

@@ -779,6 +779,9 @@ void _starpu_sink_common_execute(struct _starpu_mp_node *node,
 
 	_STARPU_MALLOC(task->interfaces, task->nb_interfaces * sizeof(*task->interfaces));
 
+#ifdef STARPU_DEVEL
+#warning TODO: use pack/unpack for user-defined interfaces
+#endif
 	/* The function needs an array pointing to each interface it needs
 	 * during execution. As in sink-side there is no mean to know which
 	 * kind of interface to expect, the array is composed of unions of

+ 4 - 0
src/util/fstarpu.c

@@ -34,6 +34,8 @@ static const intptr_t fstarpu_locality	= STARPU_LOCALITY;
 static const intptr_t fstarpu_data_array	= STARPU_DATA_ARRAY;
 static const intptr_t fstarpu_data_mode_array	= STARPU_DATA_MODE_ARRAY;
 static const intptr_t fstarpu_cl_args	= STARPU_CL_ARGS;
+static const intptr_t fstarpu_cl_args_nfree	= STARPU_CL_ARGS_NFREE;
+static const intptr_t fstarpu_task_deps_array	= STARPU_TASK_DEPS_ARRAY;
 static const intptr_t fstarpu_callback	= STARPU_CALLBACK;
 static const intptr_t fstarpu_callback_with_arg	= STARPU_CALLBACK_WITH_ARG;
 static const intptr_t fstarpu_callback_arg	= STARPU_CALLBACK_ARG;
@@ -105,6 +107,8 @@ intptr_t fstarpu_get_constant(char *s)
 	else if	(!strcmp(s, "FSTARPU_DATA_ARRAY"))	{ return fstarpu_data_array; }
 	else if	(!strcmp(s, "FSTARPU_DATA_MODE_ARRAY"))	{ return fstarpu_data_mode_array; }
 	else if	(!strcmp(s, "FSTARPU_CL_ARGS"))	{ return fstarpu_cl_args; }
+	else if	(!strcmp(s, "FSTARPU_CL_ARGS_NFREE"))	{ return fstarpu_cl_args_nfree; }
+	else if	(!strcmp(s, "FSTARPU_TASK_DEPS_ARRAY"))	{ return fstarpu_task_deps_array; }
 	else if	(!strcmp(s, "FSTARPU_CALLBACK"))	{ return fstarpu_callback; }
 	else if	(!strcmp(s, "FSTARPU_CALLBACK_WITH_ARG"))	{ return fstarpu_callback_with_arg; }
 	else if	(!strcmp(s, "FSTARPU_CALLBACK_ARG"))	{ return fstarpu_callback_arg; }

+ 52 - 0
src/util/starpu_task_insert_utils.c

@@ -78,6 +78,16 @@ int _starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, va_lis
 			(void)va_arg(varg_list, void *);
 			(void)va_arg(varg_list, size_t);
 		}
+		else if (arg_type==STARPU_CL_ARGS_NFREE)
+		{
+			(void)va_arg(varg_list, void *);
+			(void)va_arg(varg_list, size_t);
+		}
+		else if (arg_type==STARPU_TASK_DEPS_ARRAY)
+		{
+			(void)va_arg(varg_list, unsigned);
+			(void)va_arg(varg_list, struct starpu_task **);
+		}
 		else if (arg_type==STARPU_CALLBACK)
 		{
 			(void)va_arg(varg_list, _starpu_callback_func_t);
@@ -301,6 +311,8 @@ int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **t
 	int current_buffer;
 	int nargs = 0;
 	int allocated_buffers = 0;
+	unsigned ndeps = 0;
+	struct starpu_task **task_deps_array = NULL;
 
 	_STARPU_TRACE_TASK_BUILD_START();
 
@@ -343,6 +355,18 @@ int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **t
 			(*task)->cl_arg_size = va_arg(varg_list, size_t);
 			(*task)->cl_arg_free = 1;
 		}
+		else if (arg_type==STARPU_CL_ARGS_NFREE)
+		{
+			(*task)->cl_arg = va_arg(varg_list, void *);
+			(*task)->cl_arg_size = va_arg(varg_list, size_t);
+			(*task)->cl_arg_free = 0;
+		}
+		else if (arg_type==STARPU_TASK_DEPS_ARRAY)
+		{
+			STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' cannot be set twice");
+			ndeps = va_arg(varg_list, unsigned);
+			task_deps_array = va_arg(varg_list, struct starpu_task **);
+		}
 		else if (arg_type==STARPU_CALLBACK)
 		{
 			(*task)->callback_func = va_arg(varg_list, _starpu_callback_func_t);
@@ -485,6 +509,11 @@ int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **t
 		arg_buffer_ = NULL;
 	}
 
+	if (task_deps_array)
+	{
+		starpu_task_declare_deps_array((*task), ndeps, task_deps_array);
+	}
+
 	_STARPU_TRACE_TASK_BUILD_END();
 	return 0;
 }
@@ -498,6 +527,8 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **
 	int current_buffer = 0;
 	int nargs = 0;
 	int allocated_buffers = 0;
+	unsigned ndeps = 0;
+	struct starpu_task **task_deps_array = NULL;
 
 	_STARPU_TRACE_TASK_BUILD_START();
 
@@ -549,6 +580,22 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **
 			(*task)->cl_arg_size = (size_t)(intptr_t)arglist[arg_i];
 			(*task)->cl_arg_free = 1;
 		}
+		else if (arg_type == STARPU_CL_ARGS_NFREE)
+		{
+			arg_i++;
+			(*task)->cl_arg = arglist[arg_i];
+			arg_i++;
+			(*task)->cl_arg_size = (size_t)(intptr_t)arglist[arg_i];
+			(*task)->cl_arg_free = 0;
+		}
+		else if (arg_type==STARPU_TASK_DEPS_ARRAY)
+		{
+			STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' cannot be set twice");
+			arg_i++;
+			ndeps = *(unsigned *)arglist[arg_i];
+			arg_i++;
+			task_deps_array = arglist[arg_i];
+		}
 		else if (arg_type == STARPU_CALLBACK)
 		{
 			arg_i++;
@@ -707,6 +754,11 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task **
 		arg_buffer_ = NULL;
 	}
 
+	if (task_deps_array)
+	{
+		starpu_task_declare_deps_array(*task, ndeps, task_deps_array);
+	}
+
 	_STARPU_TRACE_TASK_BUILD_END();
 
 	return 0;

+ 1 - 0
tests/Makefile.am

@@ -154,6 +154,7 @@ myPROGRAMS +=					\
 	main/codelet_null_callback		\
 	datawizard/allocate			\
 	datawizard/acquire_cb			\
+	datawizard/deps				\
 	datawizard/user_interaction_implicit	\
 	datawizard/interfaces/copy_interfaces	\
 	datawizard/locality			\

+ 111 - 0
tests/datawizard/deps.c

@@ -0,0 +1,111 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include "../helper.h"
+
+#define N 10
+#define LOOPS 4
+
+void null_cpu_func(void *buffers[], void *arg)
+{
+	(void)arg;
+	(void)buffers;
+}
+
+void prod_cpu_func(void *buffers[], void *arg)
+{
+	int *data = (int *)STARPU_VECTOR_GET_PTR(buffers[0]);
+	int n = STARPU_VECTOR_GET_NX(buffers[0]);
+	int i;
+	int factor;
+
+	starpu_codelet_unpack_args(arg, &factor);
+
+	FPRINTF(stderr, "Multiplying by %d\n", factor);
+	for(i=0 ; i<n ; i++) data[i] *= factor;
+}
+
+static struct starpu_codelet cl_null =
+{
+	.cpu_funcs = {null_cpu_func},
+	.cpu_funcs_name = {"null_cpu_func"},
+	.name = "null",
+};
+
+static struct starpu_codelet cl_prod =
+{
+	.cpu_funcs = {prod_cpu_func},
+	.cpu_funcs_name = {"prod_cpu_func"},
+	.nbuffers = 1,
+	.modes = {STARPU_RW},
+	.name = "prod",
+};
+
+int main(int argc, char **argv)
+{
+	int i, j, ret;
+	int data[N];
+	int data2[N];
+	int factor[LOOPS];
+	starpu_data_handle_t data_handle;
+
+	ret = starpu_initialize(NULL, &argc, &argv);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	for(i=0 ; i<N ; i++) data[i] = 12;
+	for(i=0 ; i<N ; i++) data2[i] = 12;
+	starpu_vector_data_register(&data_handle, STARPU_MAIN_RAM, (uintptr_t) data, N, sizeof(int));
+
+	struct starpu_task *motherTask = starpu_task_build(&cl_null, STARPU_NAME, "motherTask", 0);
+
+	for (i = 0; i < LOOPS; i++)
+	{
+		factor[i] = i+1;
+		for(j=0 ; j<N ; j++) data2[j] *= factor[i];
+		ret = starpu_task_insert(&cl_prod,
+					 STARPU_RW, data_handle,
+					 STARPU_VALUE, &factor[i], sizeof(factor[i]),
+					 STARPU_TASK_DEPS_ARRAY, 1, &motherTask,
+					 0);
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	}
+
+	ret = starpu_task_submit(motherTask);
+	if (ret == -ENODEV) goto enodev;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+
+	starpu_task_wait_for_all();
+	starpu_data_unregister(data_handle);
+
+	for(i=0 ; i<N ; i++)
+	{
+		FPRINTF(stderr, "data[%d] = %d ==? %d \n", i, data[i], data2[i]);
+		STARPU_ASSERT_MSG(data[i] == data2[i], "Incorrect computation\n");
+	}
+
+	starpu_shutdown();
+	return EXIT_SUCCESS;
+
+enodev:
+	fprintf(stderr, "WARNING: No one can execute this task\n");
+	/* yes, we do not perform the computation but we did detect that no one
+ 	 * could perform the kernel, so this is not an error from StarPU */
+	starpu_shutdown();
+	return STARPU_TEST_SKIPPED;
+}

+ 1 - 1
tests/datawizard/variable_size.c

@@ -319,7 +319,7 @@ int main(int argc, char **argv)
 	starpu_task_wait_for_all();
 
 	/* Cholesky-like accesses */
-	for (i = 0; i < 100; i++)
+	for (i = 0; i < N; i++)
 		for (x = i; x < N; x++)
 			for (y = x; y < N; y++)
 				starpu_task_insert(&cl, STARPU_RW, handles[x][y], STARPU_PRIORITY, (2*N-x-y), 0);

+ 1 - 5
tools/dev/checker/starpu_check_copyright.sh

@@ -25,10 +25,6 @@ do
     if test -z "$copyright"
     then
 	echo "File $f does not include a proper copyright"
+	svn log $f | grep '|' | awk -F'|' '{print $2}' | sort | uniq
     fi
 done
-
-for f in $(cat /tmp/list_$$)
-do
-    svn log $f | grep '|' | awk -F'|' '{print $2}' | sort | uniq
-done