Nathalie Furmento 7 lat temu
rodzic
commit
a2f44d1c40
60 zmienionych plików z 577 dodań i 386 usunięć
  1. 3 0
      ChangeLog
  2. 0 4
      configure.ac
  3. 50 38
      doc/doxygen/chapters/api/mpi.doxy
  4. 60 60
      mpi/include/fstarpu_mpi_mod.f90
  5. 28 24
      mpi/include/starpu_mpi.h
  6. 2 2
      mpi/src/load_balancer/policy/load_heat_propagation.c
  7. 1 1
      mpi/src/mpi/starpu_mpi_comm.c
  8. 3 3
      mpi/src/mpi/starpu_mpi_early_data.c
  9. 4 4
      mpi/src/mpi/starpu_mpi_early_request.c
  10. 1 1
      mpi/src/mpi/starpu_mpi_early_request.h
  11. 19 19
      mpi/src/mpi/starpu_mpi_mpi.c
  12. 7 7
      mpi/src/mpi/starpu_mpi_sync_data.c
  13. 1 1
      mpi/src/mpi/starpu_mpi_sync_data.h
  14. 18 15
      mpi/src/mpi/starpu_mpi_tag.c
  15. 2 2
      mpi/src/mpi/starpu_mpi_tag.h
  16. 37 22
      mpi/src/starpu_mpi.c
  17. 3 3
      mpi/src/starpu_mpi_collective.c
  18. 49 44
      mpi/src/starpu_mpi_fortran.c
  19. 39 39
      mpi/src/starpu_mpi_fxt.h
  20. 9 13
      mpi/src/starpu_mpi_helper.c
  21. 3 3
      mpi/src/starpu_mpi_private.h
  22. 7 6
      mpi/src/starpu_mpi_task_insert.c
  23. 2 0
      mpi/tests/Makefile.am
  24. 37 0
      mpi/tests/attr.c
  25. 1 1
      mpi/tests/block_interface.c
  26. 1 1
      mpi/tests/block_interface_pinned.c
  27. 2 1
      mpi/tests/datatypes.c
  28. 1 1
      mpi/tests/early_request.c
  29. 1 1
      mpi/tests/gather.c
  30. 1 1
      mpi/tests/gather2.c
  31. 1 1
      mpi/tests/insert_task_count.c
  32. 1 1
      mpi/tests/insert_task_dyn_handles.c
  33. 1 1
      mpi/tests/load_balancer.c
  34. 1 1
      mpi/tests/mpi_detached_tag.c
  35. 1 1
      mpi/tests/mpi_earlyrecv.c
  36. 5 1
      mpi/tests/mpi_earlyrecv2.c
  37. 1 1
      mpi/tests/mpi_irecv.c
  38. 1 1
      mpi/tests/mpi_irecv_detached.c
  39. 1 1
      mpi/tests/mpi_isend.c
  40. 1 1
      mpi/tests/mpi_isend_detached.c
  41. 1 1
      mpi/tests/mpi_redux.c
  42. 1 1
      mpi/tests/mpi_test.c
  43. 1 1
      mpi/tests/pingpong.c
  44. 1 1
      mpi/tests/policy_register.c
  45. 1 1
      mpi/tests/ring.c
  46. 1 1
      mpi/tests/ring_async.c
  47. 1 1
      mpi/tests/ring_async_implicit.c
  48. 1 1
      mpi/tests/ring_sync.c
  49. 1 1
      mpi/tests/ring_sync_detached.c
  50. 3 0
      src/common/prio_list.h
  51. 29 29
      src/core/disk_ops/disk_hdf5.c
  52. 5 4
      src/core/disk_ops/unistd/disk_unistd_global.c
  53. 24 3
      src/core/sched_ctx.c
  54. 33 0
      src/core/simgrid.c
  55. 16 0
      src/core/simgrid.h
  56. 3 2
      src/datawizard/data_request.c
  57. 28 12
      src/drivers/driver_common/driver_common.c
  58. 7 0
      tests/Makefile.am
  59. 13 0
      tests/datawizard/variable_size.c
  60. 1 0
      tools/starpu_lp2paje.c

+ 3 - 0
ChangeLog

@@ -56,6 +56,9 @@ Small features:
     variables.
   * Add disk to disk copy functions and support asynchronous full read/write
     in disk backends.
+  * New function starpu_mpi_comm_get_attr() which allows to return the
+    value of the attribute STARPU_MPI_TAG_UB, i.e the upper bound for
+    tag value.
 
 Changes:
   * Vastly improve simgrid simulation time.

+ 0 - 4
configure.ac

@@ -3311,10 +3311,6 @@ LIBSTARPU_LDFLAGS="$HWLOC_LIBS $FXT_LIBS $STARPU_COI_LDFLAGS $STARPU_SCIF_LDFLAG
 AC_SUBST([LIBSTARPU_LDFLAGS])
 
 LIBSTARPU_LINK=libstarpu-$STARPU_EFFECTIVE_VERSION.la
-if test x$enable_perf_debug = xyes; then
-	# For gperf to work, we need to link statically our tests
-	LIBSTARPU_LINK=".libs/libstarpu-$STARPU_EFFECTIVE_VERSION.a $LIBSTARPU_LDFLAGS $STARPU_CUDA_LDFLAGS $STARPU_OPENCL_LDFLAGS"
-fi
 AC_SUBST([LIBSTARPU_LINK])
 
 if test "x$enable_shared" = xno; then

+ 50 - 38
doc/doxygen/chapters/api/mpi.doxy

@@ -76,44 +76,44 @@ Return the size of the communicator \c MPI_COMM_WORLD
 \anchor MPIPtpCommunication
 \ingroup API_MPI_Support
 
-\fn int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm)
+\fn int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm)
 \ingroup API_MPI_Support
 Perform a standard-mode, blocking send of \p data_handle to the node
-\p dest using the message tag \p mpi_tag within the communicator \p
+\p dest using the message tag \p data_tag within the communicator \p
 comm.
 
-\fn int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm)
+\fn int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm)
 \ingroup API_MPI_Support
 Similar to starpu_mpi_send, but takes a priority \p prio.
 
-\fn int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, MPI_Status *status)
+\fn int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, MPI_Status *status)
 \ingroup API_MPI_Support
 Perform a standard-mode, blocking receive in \p data_handle from the
-node \p source using the message tag \p mpi_tag within the
+node \p source using the message tag \p data_tag within the
 communicator \p comm.
 
-\fn int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm)
+\fn int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, MPI_Comm comm)
 \ingroup API_MPI_Support
 Post a standard-mode, non blocking send of \p data_handle to the node
-\p dest using the message tag \p mpi_tag within the communicator \p
+\p dest using the message tag \p data_tag within the communicator \p
 comm. After the call, the pointer to the request \p req can be used to
 test or to wait for the completion of the communication.
 
-\fn int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, int prio, MPI_Comm comm)
+\fn int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, int prio, MPI_Comm comm)
 \ingroup API_MPI_Support
 Similar to starpu_mpi_isend, but takes a priority \p prio.
 
-\fn int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int mpi_tag, MPI_Comm comm)
+\fn int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int64_t data_tag, MPI_Comm comm)
 \ingroup API_MPI_Support
 Post a nonblocking receive in \p data_handle from the node \p source
-using the message tag \p mpi_tag within the communicator \p comm.
+using the message tag \p data_tag within the communicator \p comm.
 After the call, the pointer to the request \p req can be used to test
 or to wait for the completion of the communication.
 
-\fn int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+\fn int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
 \ingroup API_MPI_Support
 Post a standard-mode, non blocking send of \p data_handle to the node
-\p dest using the message tag \p mpi_tag within the communicator \p
+\p dest using the message tag \p data_tag within the communicator \p
 comm. On completion, the \p callback function is called with the
 argument \p arg.
 Similarly to the pthread detached functionality, when a detached
@@ -121,14 +121,14 @@ communication completes, its resources are automatically released back
 to the system, there is no need to test or to wait for the completion
 of the request.
 
-\fn int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
+\fn int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
 \ingroup API_MPI_Support
 Similar to starpu_mpi_isend_detached, but takes a priority \p prio.
 
-\fn int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+\fn int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
 \ingroup API_MPI_Support
 Post a nonblocking receive in \p data_handle from the node \p source
-using the message tag \p mpi_tag within the communicator \p comm. On
+using the message tag \p data_tag within the communicator \p comm. On
 completion, the \p callback function is called with the argument \p
 arg.
 Similarly to the pthread detached functionality, when a detached
@@ -136,10 +136,10 @@ communication completes, its resources are automatically released back
 to the system, there is no need to test or to wait for the completion
 of the request.
 
-\fn int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
+\fn int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
 \ingroup API_MPI_Support
 Post a nonblocking receive in \p data_handle from the node \p source
-using the message tag \p mpi_tag within the communicator \p comm. On
+using the message tag \p data_tag within the communicator \p comm. On
 completion, the \p callback function is called with the argument \p
 arg.
 The parameter \p sequential_consistency allows to enable or disable
@@ -152,20 +152,20 @@ communication completes, its resources are automatically released back
 to the system, there is no need to test or to wait for the completion
 of the request.
 
-\fn int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm)
+\fn int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, MPI_Comm comm)
 \ingroup API_MPI_Support
 Perform a synchronous-mode, non-blocking send of \p data_handle to the node
-\p dest using the message tag \p mpi_tag within the communicator \p
+\p dest using the message tag \p data_tag within the communicator \p
 comm.
 
-\fn int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, int prio, MPI_Comm comm)
+\fn int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, int prio, MPI_Comm comm)
 \ingroup API_MPI_Support
 Similar to starpu_mpi_issend, but takes a priority \p prio.
 
-\fn int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+\fn int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
 \ingroup API_MPI_Support
 Perform a synchronous-mode, non-blocking send of \p data_handle to the node
-\p dest using the message tag \p mpi_tag within the communicator \p
+\p dest using the message tag \p data_tag within the communicator \p
 comm. On completion, the \p callback function is called with the argument \p
 arg.
 Similarly to the pthread detached functionality, when a detached
@@ -192,39 +192,39 @@ have called it.
 \ingroup API_MPI_Support
 Wait until all StarPU tasks and communications for the given communicator are completed.
 
-\fn int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)
+\fn int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag)
 \ingroup API_MPI_Support
 Post a standard-mode, non blocking send of \p data_handle to the node
-\p dest using the message tag \p mpi_tag within the communicator \p
+\p dest using the message tag \p data_tag within the communicator \p
 comm. On completion, \p tag is unlocked.
 
-\fn int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, starpu_tag_t tag)
+\fn int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag)
 \ingroup API_MPI_Support
-Similar to starpu_mpi_isend_detached_unlock_tag, but takes a priority \p prio.
+Similar to starpu_mpi_isend_detached_unlock_tag(), but takes a priority \p prio.
 
-\fn int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)
+\fn int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag)
 \ingroup API_MPI_Support
 Post a nonblocking receive in \p data_handle from the node \p source
-using the message tag \p mpi_tag within the communicator \p comm. On
+using the message tag \p data_tag within the communicator \p comm. On
 completion, \p tag is unlocked.
 
-\fn int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag)
+\fn int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int64_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
 \ingroup API_MPI_Support
 Post \p array_size standard-mode, non blocking send. Each post sends
 the n-th data of the array \p data_handle to the n-th node of the
-array \p dest using the n-th message tag of the array \p mpi_tag
+array \p dest using the n-th message tag of the array \p data_tag
 within the n-th communicator of the array \p comm. On completion of
 the all the requests, \p tag is unlocked.
 
-\fn int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag)
+\fn int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int64_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag)
 \ingroup API_MPI_Support
-Similar to starpu_mpi_isend_array_detached_unlock_tag, but takes a priority \p prio.
+Similar to starpu_mpi_isend_array_detached_unlock_tag(), but takes a priority \p prio.
 
-\fn int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag)
+\fn int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int64_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
 \ingroup API_MPI_Support
 Post \p array_size nonblocking receive. Each post receives in the n-th
 data of the array \p data_handle from the n-th node of the array \p
-source using the n-th message tag of the array \p mpi_tag within the
+source using the n-th message tag of the array \p data_tag within the
 n-th communicator of the array \p comm. On completion of the all the
 requests, \p tag is unlocked.
 
@@ -245,6 +245,18 @@ It is important that the function is called before any communication can take pl
 \ingroup API_MPI_Support
 Unregister the MPI datatype functions stored for the interface of the given handle.
 
+\def STARPU_MPI_TAG_UB
+\ingroup API_MPI_Support
+When given to the function starpu_mpi_comm_get_attr(), retrieve the
+value for the upper bound for tag value.
+
+\fn int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int *flag);
+\ingroup API_MPI_Support
+Retrieve an attribute value by key, similarly to the MPI function \c MPI_comm_get_attr().
+If an attribute is attached on \p comm to \p keyval, then the call
+returns \p flag equal to \c 1, and the attribute value in \p
+attribute_val. Otherwise, \p flag is set to \0.
+
 @name Communication Cache
 \ingroup API_MPI_Support
 
@@ -285,17 +297,17 @@ value was previously sent to \p dest, and not flushed since then.
 \anchor MPIInsertTask
 \ingroup API_MPI_Support
 
-\fn void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int tag, int rank, MPI_Comm comm)
+\fn void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int64_t data_tag, int rank, MPI_Comm comm)
 \ingroup API_MPI_Support
 Register to MPI a StarPU data handle with the given tag, rank and MPI communicator.
 It also automatically clears the MPI communication cache when unregistering the data.
 
-\def starpu_mpi_data_register(data_handle, tag, rank)
+\def starpu_mpi_data_register(data_handle, data_tag, rank)
 \ingroup API_MPI_Support
 Register to MPI a StarPU data handle with the given tag, rank and the MPI communicator \c MPI_COMM_WORLD.
 It also automatically clears the MPI communication cache when unregistering the data.
 
-\fn void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int tag)
+\fn void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int64_t data_tag)
 \ingroup API_MPI_Support
 Register to MPI a StarPU data handle with the given tag. No rank will be defined.
 It also automatically clears the MPI communication cache when unregistering the data.
@@ -330,7 +342,7 @@ Return the rank of the given data.
 Return the rank of the given data.
 Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_rank()
 
-\fn int starpu_mpi_data_get_tag(starpu_data_handle_t handle)
+\fn int64_t starpu_mpi_data_get_tag(starpu_data_handle_t handle)
 \ingroup API_MPI_Support
 Return the tag of the given data.
 

+ 60 - 60
mpi/include/fstarpu_mpi_mod.f90

@@ -21,165 +21,165 @@ module fstarpu_mpi_mod
 
         interface
                 ! == mpi/include/starpu_mpi.h ==
-                ! int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm);
-                function fstarpu_mpi_isend (dh, mpi_req, dst, mpi_tag, mpi_comm) bind(C)
+                ! int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, MPI_Comm comm);
+                function fstarpu_mpi_isend (dh, mpi_req, dst, data_tag, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_isend
                         type(c_ptr), value, intent(in) :: dh
                         type(c_ptr), value, intent(in) :: mpi_req
                         integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_isend
 
                 ! == mpi/include/starpu_mpi.h ==
-                ! int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, int prio, MPI_Comm comm);
-                function fstarpu_mpi_isend_prio (dh, mpi_req, dst, mpi_tag, prio, mpi_comm) bind(C)
+                ! int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, int prio, MPI_Comm comm);
+                function fstarpu_mpi_isend_prio (dh, mpi_req, dst, data_tag, prio, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_isend_prio
                         type(c_ptr), value, intent(in) :: dh
                         type(c_ptr), value, intent(in) :: mpi_req
                         integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: prio
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_isend_prio
 
-                ! int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int mpi_tag, MPI_Comm comm);
-                function fstarpu_mpi_irecv (dh, mpi_req, src, mpi_tag, mpi_comm) bind(C)
+                ! int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int64_t data_tag, MPI_Comm comm);
+                function fstarpu_mpi_irecv (dh, mpi_req, src, data_tag, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_irecv
                         type(c_ptr), value, intent(in) :: dh
                         type(c_ptr), value, intent(in) :: mpi_req
                         integer(c_int), value, intent(in) :: src
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_irecv
 
-                ! int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm);
-                function fstarpu_mpi_send (dh, dst, mpi_tag, mpi_comm) bind(C)
+                ! int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm);
+                function fstarpu_mpi_send (dh, dst, data_tag, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_send
                         type(c_ptr), value, intent(in) :: dh
                         integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_send
 
-                ! int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm);
-                function fstarpu_mpi_send_prio (dh, dst, mpi_tag, prio, mpi_comm) bind(C)
+                ! int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm);
+                function fstarpu_mpi_send_prio (dh, dst, data_tag, prio, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_send_prio
                         type(c_ptr), value, intent(in) :: dh
                         integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: prio
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_send_prio
 
-                ! int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, MPI_Status *status);
-                function fstarpu_mpi_recv (dh, src, mpi_tag, mpi_comm, mpi_status) bind(C)
+                ! int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, MPI_Status *status);
+                function fstarpu_mpi_recv (dh, src, data_tag, mpi_comm, mpi_status) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_recv
                         type(c_ptr), value, intent(in) :: dh
                         integer(c_int), value, intent(in) :: src
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: mpi_comm
                         type(c_ptr), value, intent(in) :: mpi_status
                 end function fstarpu_mpi_recv
 
-                ! int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-                function fstarpu_mpi_isend_detached (dh, dst, mpi_tag, mpi_comm, callback, arg) bind(C)
+                ! int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+                function fstarpu_mpi_isend_detached (dh, dst, data_tag, mpi_comm, callback, arg) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_isend_detached
                         type(c_ptr), value, intent(in) :: dh
                         integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: mpi_comm
                         type(c_funptr), value, intent(in) :: callback
                         type(c_ptr), value, intent(in) :: arg
                 end function fstarpu_mpi_isend_detached
 
-                ! int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
-                function fstarpu_mpi_isend_detached_prio (dh, dst, mpi_tag, prio, mpi_comm, callback, arg) bind(C)
+                ! int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
+                function fstarpu_mpi_isend_detached_prio (dh, dst, data_tag, prio, mpi_comm, callback, arg) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_isend_detached_prio
                         type(c_ptr), value, intent(in) :: dh
                         integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: prio
                         integer(c_int), value, intent(in) :: mpi_comm
                         type(c_funptr), value, intent(in) :: callback
                         type(c_ptr), value, intent(in) :: arg
                 end function fstarpu_mpi_isend_detached_prio
 
-                ! int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-                function fstarpu_mpi_recv_detached (dh, src, mpi_tag, mpi_comm, callback, arg) bind(C)
+                ! int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+                function fstarpu_mpi_recv_detached (dh, src, data_tag, mpi_comm, callback, arg) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_recv_detached
                         type(c_ptr), value, intent(in) :: dh
                         integer(c_int), value, intent(in) :: src
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: mpi_comm
                         type(c_funptr), value, intent(in) :: callback
                         type(c_ptr), value, intent(in) :: arg
                 end function fstarpu_mpi_recv_detached
 
-                ! int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm);
-                function fstarpu_mpi_issend (dh, mpi_req, dst, mpi_tag, mpi_comm) bind(C)
+                ! int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, MPI_Comm comm);
+                function fstarpu_mpi_issend (dh, mpi_req, dst, data_tag, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_issend
                         type(c_ptr), value, intent(in) :: dh
                         type(c_ptr), value, intent(in) :: mpi_req
                         integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_issend
 
-                ! int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, int prio, MPI_Comm comm);
-                function fstarpu_mpi_issend_prio (dh, mpi_req, dst, mpi_tag, prio, mpi_comm) bind(C)
+                ! int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, int prio, MPI_Comm comm);
+                function fstarpu_mpi_issend_prio (dh, mpi_req, dst, data_tag, prio, mpi_comm) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_issend_prio
                         type(c_ptr), value, intent(in) :: dh
                         type(c_ptr), value, intent(in) :: mpi_req
                         integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: prio
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_issend_prio
 
-                ! int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-                function fstarpu_mpi_issend_detached (dh, dst, mpi_tag, mpi_comm, callback, arg) bind(C)
+                ! int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+                function fstarpu_mpi_issend_detached (dh, dst, data_tag, mpi_comm, callback, arg) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_issend_detached
                         type(c_ptr), value, intent(in) :: dh
                         integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: mpi_comm
                         type(c_funptr), value, intent(in) :: callback
                         type(c_ptr), value, intent(in) :: arg
                 end function fstarpu_mpi_issend_detached
 
-                ! int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
-                function fstarpu_mpi_issend_detached_prio (dh, dst, mpi_tag, prio, mpi_comm, callback, arg) bind(C)
+                ! int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
+                function fstarpu_mpi_issend_detached_prio (dh, dst, data_tag, prio, mpi_comm, callback, arg) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_issend_detached_prio
                         type(c_ptr), value, intent(in) :: dh
                         integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: prio
                         integer(c_int), value, intent(in) :: mpi_comm
                         type(c_funptr), value, intent(in) :: callback
@@ -213,15 +213,15 @@ module fstarpu_mpi_mod
                         integer(c_int), value, intent(in) :: mpi_comm
                 end function fstarpu_mpi_barrier
 
-                ! int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
-                function fstarpu_mpi_recv_detached_sequential_consistency (dh, src, mpi_tag, mpi_comm, callback, arg, seq_const) &
+                ! int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
+                function fstarpu_mpi_recv_detached_sequential_consistency (dh, src, data_tag, mpi_comm, callback, arg, seq_const) &
                                 bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_recv_detached_sequential_consistency
                         type(c_ptr), value, intent(in) :: dh
                         integer(c_int), value, intent(in) :: src
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: mpi_comm
                         type(c_funptr), value, intent(in) :: callback
                         type(c_ptr), value, intent(in) :: arg
@@ -343,45 +343,45 @@ module fstarpu_mpi_mod
                 end function fstarpu_mpi_gather_detached
 
 
-                ! int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, starpu_tag_t tag);
-                function fstarpu_mpi_isend_detached_unlock_tag (dh, dst, mpi_tag, mpi_comm, starpu_tag) bind(C)
+                ! int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag);
+                function fstarpu_mpi_isend_detached_unlock_tag (dh, dst, data_tag, mpi_comm, starpu_tag) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_isend_detached_unlock_tag
                         type(c_ptr), value, intent(in) :: dh
                         integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: mpi_comm
                         type(c_ptr), value, intent(in) :: starpu_tag
                 end function fstarpu_mpi_isend_detached_unlock_tag
 
-                ! int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, starpu_tag_t tag);
-                function fstarpu_mpi_isend_detached_unlock_tag_prio (dh, dst, mpi_tag, prio, mpi_comm, starpu_tag) bind(C)
+                ! int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag);
+                function fstarpu_mpi_isend_detached_unlock_tag_prio (dh, dst, data_tag, prio, mpi_comm, starpu_tag) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_isend_detached_unlock_tag_prio
                         type(c_ptr), value, intent(in) :: dh
                         integer(c_int), value, intent(in) :: dst
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: prio
                         integer(c_int), value, intent(in) :: mpi_comm
                         type(c_ptr), value, intent(in) :: starpu_tag
                 end function fstarpu_mpi_isend_detached_unlock_tag_prio
 
-                ! int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, starpu_tag_t tag);
-                function fstarpu_mpi_recv_detached_unlock_tag (dh, src, mpi_tag, mpi_comm, starpu_tag) bind(C)
+                ! int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag);
+                function fstarpu_mpi_recv_detached_unlock_tag (dh, src, data_tag, mpi_comm, starpu_tag) bind(C)
                         use iso_c_binding
                         implicit none
                         integer(c_int) :: fstarpu_mpi_recv_detached_unlock_tag
                         type(c_ptr), value, intent(in) :: dh
                         integer(c_int), value, intent(in) :: src
-                        integer(c_int), value, intent(in) :: mpi_tag
+                        integer(c_int), value, intent(in) :: data_tag
                         integer(c_int), value, intent(in) :: mpi_comm
                         type(c_ptr), value, intent(in) :: starpu_tag
                 end function fstarpu_mpi_recv_detached_unlock_tag
 
-                ! int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag);
-                function fstarpu_mpi_isend_array_detached_unlock_tag (array_size, dhs, dsts, mpi_tags, mpi_comms, starpu_tag) &
+                ! int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *data_tag, MPI_Comm *comm, starpu_tag_t tag);
+                function fstarpu_mpi_isend_array_detached_unlock_tag (array_size, dhs, dsts, data_tags, mpi_comms, starpu_tag) &
                                 bind(C)
                         use iso_c_binding
                         implicit none
@@ -389,13 +389,13 @@ module fstarpu_mpi_mod
                         integer(c_int), value, intent(in) :: array_size
                         type(c_ptr), intent(in) :: dhs(*)
                         integer(c_int), intent(in) :: dsts(*)
-                        integer(c_int), intent(in) :: mpi_tags(*)
+                        integer(c_int), intent(in) :: data_tags(*)
                         integer(c_int), intent(in) :: mpi_comms(*)
                         type(c_ptr), value, intent(in) :: starpu_tag
                 end function fstarpu_mpi_isend_array_detached_unlock_tag
 
-                ! int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag);
-                function fstarpu_mpi_isend_array_detached_unlock_tag_prio (array_size, dhs, dsts, mpi_tags, prio, mpi_comms, &
+                ! int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag);
+                function fstarpu_mpi_isend_array_detached_unlock_tag_prio (array_size, dhs, dsts, data_tags, prio, mpi_comms, &
                                 starpu_tag) bind(C)
                         use iso_c_binding
                         implicit none
@@ -403,14 +403,14 @@ module fstarpu_mpi_mod
                         integer(c_int), value, intent(in) :: array_size
                         type(c_ptr), intent(in) :: dhs(*)
                         integer(c_int), intent(in) :: dsts(*)
-                        integer(c_int), intent(in) :: mpi_tags(*)
+                        integer(c_int), intent(in) :: data_tags(*)
                         integer(c_int), intent(in) :: prio(*)
                         integer(c_int), intent(in) :: mpi_comms(*)
                         type(c_ptr), value, intent(in) :: starpu_tag
                 end function fstarpu_mpi_isend_array_detached_unlock_tag_prio
 
-                ! int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag);
-                function fstarpu_mpi_recv_array_detached_unlock_tag (array_size, dhs, srcs, mpi_tags, mpi_comms, starpu_tag) &
+                ! int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *data_tag, MPI_Comm *comm, starpu_tag_t tag);
+                function fstarpu_mpi_recv_array_detached_unlock_tag (array_size, dhs, srcs, data_tags, mpi_comms, starpu_tag) &
                                 bind(C)
                         use iso_c_binding
                         implicit none
@@ -418,7 +418,7 @@ module fstarpu_mpi_mod
                         integer(c_int), value, intent(in) :: array_size
                         type(c_ptr), intent(in) :: dhs(*)
                         integer(c_int), intent(in) :: srcs(*)
-                        integer(c_int), intent(in) :: mpi_tags(*)
+                        integer(c_int), intent(in) :: data_tags(*)
                         integer(c_int), intent(in) :: mpi_comms(*)
                         type(c_ptr), value, intent(in) :: starpu_tag
                 end function fstarpu_mpi_recv_array_detached_unlock_tag

+ 28 - 24
mpi/include/starpu_mpi.h

@@ -24,6 +24,7 @@
 #if defined(STARPU_USE_MPI)
 
 #include <mpi.h>
+#include <stdint.h>
 
 #ifdef __cplusplus
 extern "C"
@@ -32,24 +33,24 @@ extern "C"
 
 typedef void *starpu_mpi_req;
 
-int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm);
-int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, int prio, MPI_Comm comm);
-int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int mpi_tag, MPI_Comm comm);
-int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm);
-int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm);
-int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, MPI_Status *status);
-int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm);
-int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, int prio, MPI_Comm comm);
-int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
-int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
+int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, MPI_Comm comm);
+int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, int prio, MPI_Comm comm);
+int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int64_t data_tag, MPI_Comm comm);
+int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm);
+int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm);
+int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, MPI_Status *status);
+int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
+int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, MPI_Comm comm);
+int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int64_t data_tag, int prio, MPI_Comm comm);
+int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg);
+int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg);
 int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status);
 int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status);
 int starpu_mpi_barrier(MPI_Comm comm);
 
-int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
+int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency);
 
 int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm);
 int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi);
@@ -72,13 +73,13 @@ void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle,
 int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg);
 int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg);
 
-int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, starpu_tag_t tag);
-int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int mpi_tag, int prio, MPI_Comm comm, starpu_tag_t tag);
-int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, starpu_tag_t tag);
+int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag);
+int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag);
+int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag);
 
-int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag);
-int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag);
-int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag);
+int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int64_t *data_tag, MPI_Comm *comm, starpu_tag_t tag);
+int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int64_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag);
+int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int64_t *data_tag, MPI_Comm *comm, starpu_tag_t tag);
 
 void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts);
 
@@ -96,19 +97,19 @@ int starpu_mpi_world_size(void);
 int starpu_mpi_get_communication_tag(void);
 void starpu_mpi_set_communication_tag(int tag);
 
-void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int tag, int rank, MPI_Comm comm);
-#define starpu_mpi_data_register(data_handle, tag, rank) starpu_mpi_data_register_comm(data_handle, tag, rank, MPI_COMM_WORLD)
+void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int64_t data_tag, int rank, MPI_Comm comm);
+#define starpu_mpi_data_register(data_handle, data_tag, rank) starpu_mpi_data_register_comm(data_handle, data_tag, rank, MPI_COMM_WORLD)
 
 #define STARPU_MPI_PER_NODE -2
 
 void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm);
 #define starpu_mpi_data_set_rank(handle, rank) starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD)
-void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int tag);
+void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int64_t data_tag);
 #define starpu_data_set_rank starpu_mpi_data_set_rank
 #define starpu_data_set_tag starpu_mpi_data_set_tag
 
 int starpu_mpi_data_get_rank(starpu_data_handle_t handle);
-int starpu_mpi_data_get_tag(starpu_data_handle_t handle);
+int64_t starpu_mpi_data_get_tag(starpu_data_handle_t handle);
 #define starpu_data_get_rank starpu_mpi_data_get_rank
 #define starpu_data_get_tag starpu_mpi_data_get_tag
 
@@ -137,6 +138,9 @@ int starpu_mpi_datatype_unregister(starpu_data_handle_t handle);
 int starpu_mpi_pre_submit_hook_register(void (*f)(struct starpu_task *));
 int starpu_mpi_pre_submit_hook_unregister();
 
+#define STARPU_MPI_TAG_UB 1
+int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int *flag);
+
 #ifdef __cplusplus
 }
 #endif

+ 2 - 2
mpi/src/load_balancer/policy/load_heat_propagation.c

@@ -289,12 +289,12 @@ static void update_data_ranks()
 				//        fprintf(stderr,"Bring back data %p (tag %d) from node %d on node %d\n", handle, (data_movements_get_tags_table(data_movements_handles[i]))[j], starpu_mpi_data_get_rank(handle), my_rank);
 				//}
 
-				_STARPU_DEBUG("Call of starpu_mpi_get_data_on_node(%d,%d) on node %d\n", starpu_mpi_data_get_tag(handle), dst_rank, my_rank);
+				_STARPU_DEBUG("Call of starpu_mpi_get_data_on_node(%ld,%d) on node %d\n", starpu_mpi_data_get_tag(handle), dst_rank, my_rank);
 
 				/* Migrate the data handle */
 				starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, handle, dst_rank, NULL, NULL);
 
-				_STARPU_DEBUG("New rank (%d) of data %d upgraded on node %d\n", dst_rank, starpu_mpi_data_get_tag(handle), my_rank);
+				_STARPU_DEBUG("New rank (%d) of data %ld upgraded on node %d\n", dst_rank, starpu_mpi_data_get_tag(handle), my_rank);
 				starpu_mpi_data_set_rank_comm(handle, dst_rank, MPI_COMM_WORLD);
 			}
 		}

+ 1 - 1
mpi/src/mpi/starpu_mpi_comm.c

@@ -139,7 +139,7 @@ void _starpu_mpi_comm_post_recv()
 		if (_comm->posted == 0)
 		{
 			_STARPU_MPI_DEBUG(3, "Posting a receive to get a data envelop on comm %d %ld\n", i, (long int)_comm->comm);
-			_STARPU_MPI_COMM_FROM_DEBUG(_comm->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _STARPU_MPI_TAG_ENVELOPE, _STARPU_MPI_TAG_ENVELOPE, _comm->comm);
+			_STARPU_MPI_COMM_FROM_DEBUG(_comm->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _STARPU_MPI_TAG_ENVELOPE, (int64_t)_STARPU_MPI_TAG_ENVELOPE, _comm->comm);
 			MPI_Irecv(_comm->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _STARPU_MPI_TAG_ENVELOPE, _comm->comm, &_comm->request);
 #ifdef STARPU_SIMGRID
 			_starpu_mpi_simgrid_wait_req(&_comm->request, &_comm->status, &_comm->queue, &_comm->done);

+ 3 - 3
mpi/src/mpi/starpu_mpi_early_data.c

@@ -78,7 +78,7 @@ struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu
 	struct _starpu_mpi_early_data_handle *early_data_handle;
 
 	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_data_handle_mutex);
-	_STARPU_MPI_DEBUG(60, "Looking for early_data_handle with comm %ld source %d tag %d\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
+	_STARPU_MPI_DEBUG(60, "Looking for early_data_handle with comm %ld source %d tag %ld\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
 	HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);
 	if (hashlist == NULL)
 	{
@@ -96,7 +96,7 @@ struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu
 			early_data_handle = _starpu_mpi_early_data_handle_list_pop_front(&hashlist->list);
 		}
 	}
-	_STARPU_MPI_DEBUG(60, "Found early_data_handle %p with comm %ld source %d tag %d\n", early_data_handle, (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
+	_STARPU_MPI_DEBUG(60, "Found early_data_handle %p with comm %ld source %d tag %ld\n", early_data_handle, (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_data_handle_mutex);
 	return early_data_handle;
 }
@@ -104,7 +104,7 @@ struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu
 void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle)
 {
 	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_data_handle_mutex);
-	_STARPU_MPI_DEBUG(60, "Trying to add early_data_handle %p with comm %ld source %d tag %d\n", early_data_handle, (long int)early_data_handle->node_tag.comm,
+	_STARPU_MPI_DEBUG(60, "Trying to add early_data_handle %p with comm %ld source %d tag %ld\n", early_data_handle, (long int)early_data_handle->node_tag.comm,
 			  early_data_handle->node_tag.rank, early_data_handle->node_tag.data_tag);
 
 	struct _starpu_mpi_early_data_handle_hashlist *hashlist;

+ 4 - 4
mpi/src/mpi/starpu_mpi_early_request.c

@@ -64,7 +64,7 @@ void _starpu_mpi_early_request_check_termination()
 	STARPU_ASSERT_MSG(_starpu_mpi_early_request_count() == 0, "Number of early requests left is not zero");
 }
 
-struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(int data_tag, int source, MPI_Comm comm)
+struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(int64_t data_tag, int source, MPI_Comm comm)
 {
 	struct _starpu_mpi_node_tag node_tag;
 	struct _starpu_mpi_req *found;
@@ -76,7 +76,7 @@ struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(int data_tag, int sour
 	node_tag.rank = source;
 	node_tag.data_tag = data_tag;
 
-	_STARPU_MPI_DEBUG(100, "Looking for early_request with comm %ld source %d tag %d\n", (long int)node_tag.comm, node_tag.rank, node_tag.data_tag);
+	_STARPU_MPI_DEBUG(100, "Looking for early_request with comm %ld source %d tag %ld\n", (long int)node_tag.comm, node_tag.rank, node_tag.data_tag);
 	HASH_FIND(hh, _starpu_mpi_early_request_hash, &node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);
 	if (hashlist == NULL)
 	{
@@ -94,7 +94,7 @@ struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(int data_tag, int sour
 			_starpu_mpi_early_request_hash_count --;
 		}
 	}
-	_STARPU_MPI_DEBUG(100, "Found early_request %p with comm %ld source %d tag %d\n", found, (long int)node_tag.comm, node_tag.rank, node_tag.data_tag);
+	_STARPU_MPI_DEBUG(100, "Found early_request %p with comm %ld source %d tag %ld\n", found, (long int)node_tag.comm, node_tag.rank, node_tag.data_tag);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_request_mutex);
 	return found;
 }
@@ -102,7 +102,7 @@ struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(int data_tag, int sour
 void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req)
 {
 	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_request_mutex);
-	_STARPU_MPI_DEBUG(100, "Adding request %p with comm %ld source %d tag %d in the application request hashmap\n", req, (long int)req->node_tag.comm, req->node_tag.rank, req->node_tag.data_tag);
+	_STARPU_MPI_DEBUG(100, "Adding request %p with comm %ld source %d tag %ld in the application request hashmap\n", req, (long int)req->node_tag.comm, req->node_tag.rank, req->node_tag.data_tag);
 
 	struct _starpu_mpi_early_request_hashlist *hashlist;
 	HASH_FIND(hh, _starpu_mpi_early_request_hash, &req->node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);

+ 1 - 1
mpi/src/mpi/starpu_mpi_early_request.h

@@ -37,7 +37,7 @@ int _starpu_mpi_early_request_count(void);
 void _starpu_mpi_early_request_check_termination(void);
 
 void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req);
-struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(int data_tag, int source, MPI_Comm comm);
+struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(int64_t data_tag, int source, MPI_Comm comm);
 
 #ifdef __cplusplus
 }

+ 19 - 19
mpi/src/mpi/starpu_mpi_mpi.c

@@ -96,7 +96,7 @@ static int posted_requests = 0, ready_requests = 0, newer_requests, barrier_runn
 #define _STARPU_MPI_INC_POSTED_REQUESTS(value) { STARPU_PTHREAD_MUTEX_LOCK(&mutex_posted_requests); posted_requests += value; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex_posted_requests); }
 #define _STARPU_MPI_INC_READY_REQUESTS(value) { STARPU_PTHREAD_MUTEX_LOCK(&mutex_ready_requests); ready_requests += value; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex_ready_requests); }
 
-extern struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count);
+extern struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count);
 
 #pragma weak smpi_simulated_main_
 extern int smpi_simulated_main_(int argc, char *argv[]);
@@ -208,7 +208,7 @@ static void _starpu_mpi_submit_ready_request(void *arg)
 
 	_STARPU_MPI_INC_POSTED_REQUESTS(-1);
 
-	_STARPU_MPI_DEBUG(3, "new req %p srcdst %d tag %d and type %s %d\n", req, req->node_tag.rank, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->is_internal_req);
+	_STARPU_MPI_DEBUG(3, "new req %p srcdst %d tag %ld and type %s %d\n", req, req->node_tag.rank, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->is_internal_req);
 
 	STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
 
@@ -234,7 +234,7 @@ static void _starpu_mpi_submit_ready_request(void *arg)
 				_STARPU_MPI_MALLOC(req->ptr, req->count);
 			}
 
-			_STARPU_MPI_DEBUG(3, "Pushing internal starpu_mpi_irecv request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
+			_STARPU_MPI_DEBUG(3, "Pushing internal starpu_mpi_irecv request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
 					  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr,
 					  req->datatype_name, (int)req->count, req->registered_datatype);
 			_starpu_mpi_req_list_push_front(&ready_recv_requests, req);
@@ -266,7 +266,7 @@ static void _starpu_mpi_submit_ready_request(void *arg)
 				STARPU_PTHREAD_MUTEX_UNLOCK(&(early_data_handle->req_mutex));
 				STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
 
-				_STARPU_MPI_DEBUG(3, "The RECV request %p with tag %d has already been received, copying previously received data into handle's pointer..\n", req, req->node_tag.data_tag);
+				_STARPU_MPI_DEBUG(3, "The RECV request %p with tag %ld has already been received, copying previously received data into handle's pointer..\n", req, req->node_tag.data_tag);
 				STARPU_ASSERT(req->data_handle != early_data_handle->handle);
 
 				req->internal_req = early_data_handle->req;
@@ -288,7 +288,7 @@ static void _starpu_mpi_submit_ready_request(void *arg)
 			else
 			{
 				struct _starpu_mpi_req *sync_req = _starpu_mpi_sync_data_find(req->node_tag.data_tag, req->node_tag.rank, req->node_tag.comm);
-				_STARPU_MPI_DEBUG(3, "----------> Looking for sync data for tag %d and src %d = %p\n", req->node_tag.data_tag, req->node_tag.rank, sync_req);
+				_STARPU_MPI_DEBUG(3, "----------> Looking for sync data for tag %ld and src %d = %p\n", req->node_tag.data_tag, req->node_tag.rank, sync_req);
 				if (sync_req)
 				{
 					req->sync = 1;
@@ -310,7 +310,7 @@ static void _starpu_mpi_submit_ready_request(void *arg)
 				}
 				else
 				{
-					_STARPU_MPI_DEBUG(3, "Adding the pending receive request %p (srcdst %d tag %d) into the request hashmap\n", req, req->node_tag.rank, req->node_tag.data_tag);
+					_STARPU_MPI_DEBUG(3, "Adding the pending receive request %p (srcdst %d tag %ld) into the request hashmap\n", req, req->node_tag.rank, req->node_tag.data_tag);
 					_starpu_mpi_early_request_enqueue(req);
 				}
 			}
@@ -323,7 +323,7 @@ static void _starpu_mpi_submit_ready_request(void *arg)
 		else
 			_starpu_mpi_req_list_push_front(&ready_recv_requests, req);
 		_STARPU_MPI_INC_READY_REQUESTS(+1);
-		_STARPU_MPI_DEBUG(3, "Pushing new request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
+		_STARPU_MPI_DEBUG(3, "Pushing new request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
 				  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr,
 				  req->datatype_name, (int)req->count, req->registered_datatype);
 	}
@@ -343,7 +343,7 @@ static void nop_acquire_cb(void *arg)
 }
 
 struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
-						       int srcdst, int data_tag, MPI_Comm comm,
+						       int srcdst, int64_t data_tag, MPI_Comm comm,
 						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
 						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
 						       enum starpu_data_access_mode mode,
@@ -453,7 +453,7 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 {
 	_STARPU_MPI_LOG_IN();
 
-	_STARPU_MPI_DEBUG(30, "post MPI isend request %p type %s tag %d src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync);
+	_STARPU_MPI_DEBUG(30, "post MPI isend request %p type %s tag %ld src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync);
 
 	_starpu_mpi_comm_amounts_inc(req->node_tag.comm, req->node_tag.rank, req->datatype, req->count);
 
@@ -567,7 +567,7 @@ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
 {
 	_STARPU_MPI_LOG_IN();
 
-	_STARPU_MPI_DEBUG(20, "post MPI irecv request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
+	_STARPU_MPI_DEBUG(20, "post MPI irecv request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
 
 	_STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
 
@@ -700,7 +700,7 @@ void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 	/* Which is the mpi request we are testing for ? */
 	struct _starpu_mpi_req *req = testing_req->other_request;
 
-	_STARPU_MPI_DEBUG(2, "Test request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
+	_STARPU_MPI_DEBUG(2, "Test request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
 			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr,
 			  req->datatype_name, (int)req->count, req->registered_datatype);
 
@@ -889,7 +889,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 {
 	_STARPU_MPI_LOG_IN();
 
-	_STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d internal_req %p\n",
+	_STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d internal_req %p\n",
 			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr,
 			  req->datatype_name, (int)req->count, req->registered_datatype, req->internal_req);
 
@@ -1138,7 +1138,7 @@ static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req)
 	STARPU_MPI_ASSERT_MSG(req, "Invalid request");
 
 	/* submit the request to MPI */
-	_STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
+	_STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
 			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle,
 			  req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
 	req->func(req);
@@ -1148,7 +1148,7 @@ static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req)
 
 static void _starpu_mpi_receive_early_data(struct _starpu_mpi_envelope *envelope, MPI_Status status, MPI_Comm comm)
 {
-	_STARPU_MPI_DEBUG(20, "Request with tag %d and source %d not found, creating a early_data_handle to receive incoming data..\n", envelope->data_tag, status.MPI_SOURCE);
+	_STARPU_MPI_DEBUG(20, "Request with tag %ld and source %d not found, creating a early_data_handle to receive incoming data..\n", envelope->data_tag, status.MPI_SOURCE);
 	_STARPU_MPI_DEBUG(20, "Request sync %d\n", envelope->sync);
 
 	struct _starpu_mpi_early_data_handle* early_data_handle = _starpu_mpi_early_data_create(envelope, status.MPI_SOURCE, comm);
@@ -1178,7 +1178,7 @@ static void _starpu_mpi_receive_early_data(struct _starpu_mpi_envelope *envelope
 		//_starpu_mpi_early_data_add(early_data_handle);
 	}
 
-	_STARPU_MPI_DEBUG(20, "Posting internal detached irecv on early_data_handle with tag %d from comm %ld src %d ..\n",
+	_STARPU_MPI_DEBUG(20, "Posting internal detached irecv on early_data_handle with tag %ld from comm %ld src %d ..\n",
 			  early_data_handle->node_tag.data_tag, (long int)comm, status.MPI_SOURCE);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
 	early_data_handle->req = _starpu_mpi_irecv_common(early_data_handle->handle, status.MPI_SOURCE,
@@ -1384,14 +1384,14 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 				{
 					struct _starpu_mpi_req *_sync_req = _starpu_mpi_sync_data_find(envelope->data_tag, envelope_status.MPI_SOURCE, envelope_comm);
 					_STARPU_MPI_DEBUG(20, "Sending data with tag %d to node %d\n", _sync_req->node_tag.data_tag, envelope_status.MPI_SOURCE);
-					STARPU_MPI_ASSERT_MSG(envelope->data_tag == _sync_req->node_tag.data_tag, "Tag mismatch (envelope %d != req %d)\n", envelope->data_tag, _sync_req->node_tag.data_tag);
+					STARPU_MPI_ASSERT_MSG(envelope->data_tag == _sync_req->node_tag.data_tag, "Tag mismatch (envelope %ld != req %ld)\n", envelope->data_tag, _sync_req->node_tag.data_tag);
 					STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
 					_starpu_mpi_isend_data_func(_sync_req);
 					STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
 				}
 				else
 				{
-					_STARPU_MPI_DEBUG(3, "Searching for application request with tag %d and source %d (size %ld)\n", envelope->data_tag, envelope_status.MPI_SOURCE, envelope->size);
+					_STARPU_MPI_DEBUG(3, "Searching for application request with tag %ld and source %d (size %ld)\n", envelope->data_tag, envelope_status.MPI_SOURCE, envelope->size);
 
 					struct _starpu_mpi_req *early_request = _starpu_mpi_early_request_dequeue(envelope->data_tag, envelope_status.MPI_SOURCE, envelope_comm);
 
@@ -1404,7 +1404,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 					{
 						if (envelope->sync)
 						{
-							_STARPU_MPI_DEBUG(2000, "-------------------------> adding request for tag %d\n", envelope->data_tag);
+							_STARPU_MPI_DEBUG(2000, "-------------------------> adding request for tag %ld\n", envelope->data_tag);
 							struct _starpu_mpi_req *new_req;
 #ifdef STARPU_DEVEL
 #warning creating a request is not really useful.
@@ -1438,7 +1438,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 					 * _starpu_mpi_handle_ready_request. */
 					else
 					{
-						_STARPU_MPI_DEBUG(2000, "A matching application request has been found for the incoming data with tag %d\n", envelope->data_tag);
+						_STARPU_MPI_DEBUG(2000, "A matching application request has been found for the incoming data with tag %ld\n", envelope->data_tag);
 						_STARPU_MPI_DEBUG(2000, "Request sync %d\n", envelope->sync);
 
 						early_request->sync = envelope->sync;

+ 7 - 7
mpi/src/mpi/starpu_mpi_sync_data.c

@@ -62,11 +62,11 @@ void _starpu_mpi_sync_data_handle_display_hash(struct _starpu_mpi_node_tag *node
 
 	if (hashlist == NULL)
 	{
-		_STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %d does not exist\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
+		_STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %ld does not exist\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
 	}
 	else if (_starpu_mpi_req_list_empty(&hashlist->list))
 	{
-		_STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %d is empty\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
+		_STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %ld is empty\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag);
 	}
 	else
 	{
@@ -75,7 +75,7 @@ void _starpu_mpi_sync_data_handle_display_hash(struct _starpu_mpi_node_tag *node
 		     cur != _starpu_mpi_req_list_end(&hashlist->list);
 		     cur = _starpu_mpi_req_list_next(cur))
 		{
-			_STARPU_MPI_DEBUG(60, "Element for comm %ld source %d and tag %d: %p\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag, cur);
+			_STARPU_MPI_DEBUG(60, "Element for comm %ld source %d and tag %ld: %p\n", (long int)node_tag->comm, node_tag->rank, node_tag->data_tag, cur);
 		}
 	}
 }
@@ -91,7 +91,7 @@ int _starpu_mpi_sync_data_count(void)
 	return _starpu_mpi_sync_data_handle_hashmap_count;
 }
 
-struct _starpu_mpi_req *_starpu_mpi_sync_data_find(int data_tag, int source, MPI_Comm comm)
+struct _starpu_mpi_req *_starpu_mpi_sync_data_find(int64_t data_tag, int source, MPI_Comm comm)
 {
 	struct _starpu_mpi_req *req;
 	struct _starpu_mpi_node_tag node_tag;
@@ -102,7 +102,7 @@ struct _starpu_mpi_req *_starpu_mpi_sync_data_find(int data_tag, int source, MPI
 	node_tag.rank = source;
 	node_tag.data_tag = data_tag;
 
-	_STARPU_MPI_DEBUG(60, "Looking for sync_data_handle with comm %ld source %d tag %d in the hashmap\n", (long int)comm, source, data_tag);
+	_STARPU_MPI_DEBUG(60, "Looking for sync_data_handle with comm %ld source %d tag %ld in the hashmap\n", (long int)comm, source, data_tag);
 
 	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_sync_data_handle_mutex);
 	HASH_FIND(hh, _starpu_mpi_sync_data_handle_hashmap, &node_tag, sizeof(struct _starpu_mpi_node_tag), found);
@@ -123,7 +123,7 @@ struct _starpu_mpi_req *_starpu_mpi_sync_data_find(int data_tag, int source, MPI
 		}
 	}
 	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_sync_data_handle_mutex);
-	_STARPU_MPI_DEBUG(60, "Found sync_data_handle %p with comm %ld source %d tag %d in the hashmap\n", req, (long int)comm, source, data_tag);
+	_STARPU_MPI_DEBUG(60, "Found sync_data_handle %p with comm %ld source %d tag %ld in the hashmap\n", req, (long int)comm, source, data_tag);
 	return req;
 }
 
@@ -131,7 +131,7 @@ void _starpu_mpi_sync_data_add(struct _starpu_mpi_req *sync_req)
 {
 	struct _starpu_mpi_sync_data_handle_hashlist *hashlist;
 
-	_STARPU_MPI_DEBUG(2000, "Adding sync_req %p with comm %ld source %d tag %d in the hashmap\n", sync_req, (long int)sync_req->node_tag.comm, sync_req->node_tag.rank, sync_req->node_tag.data_tag);
+	_STARPU_MPI_DEBUG(2000, "Adding sync_req %p with comm %ld source %d tag %ld in the hashmap\n", sync_req, (long int)sync_req->node_tag.comm, sync_req->node_tag.rank, sync_req->node_tag.data_tag);
 
 	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_sync_data_handle_mutex);
 	HASH_FIND(hh, _starpu_mpi_sync_data_handle_hashmap, &sync_req->node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist);

+ 1 - 1
mpi/src/mpi/starpu_mpi_sync_data.h

@@ -34,7 +34,7 @@ void _starpu_mpi_sync_data_init(void);
 void _starpu_mpi_sync_data_check_termination(void);
 void _starpu_mpi_sync_data_shutdown(void);
 
-struct _starpu_mpi_req *_starpu_mpi_sync_data_find(int data_tag, int source, MPI_Comm comm);
+struct _starpu_mpi_req *_starpu_mpi_sync_data_find(int64_t data_tag, int source, MPI_Comm comm);
 void _starpu_mpi_sync_data_add(struct _starpu_mpi_req *req);
 int _starpu_mpi_sync_data_count();
 

+ 18 - 15
mpi/src/mpi/starpu_mpi_tag.c

@@ -29,7 +29,7 @@
 struct handle_tag_entry
 {
 	UT_hash_handle hh;
-	int tag;
+	int64_t data_tag;
 	starpu_data_handle_t handle;
 };
 
@@ -57,12 +57,12 @@ void _starpu_mpi_tag_shutdown(void)
 	registered_tag_handles = NULL;
 }
 
-starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(int tag)
+starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(int64_t data_tag)
 {
 	struct handle_tag_entry *ret;
 
 	_starpu_spin_lock(&registered_tag_handles_lock);
-	HASH_FIND_INT(registered_tag_handles, &tag, ret);
+	HASH_FIND_INT(registered_tag_handles, &data_tag, ret);
 	_starpu_spin_unlock(&registered_tag_handles_lock);
 
 	if (ret)
@@ -75,40 +75,43 @@ starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(int tag)
 	}
 }
 
-void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, int tag)
+void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, int64_t data_tag)
 {
-	struct handle_tag_entry *entry;
-	if (tag == -1)
+	if (data_tag == -1)
+	{
 		/* No tag for this data, probably a temporary data not to be communicated */
 		return;
+	}
+
+	struct handle_tag_entry *entry;
 	_STARPU_MPI_MALLOC(entry, sizeof(*entry));
 
-	STARPU_ASSERT_MSG(!(_starpu_mpi_tag_get_data_handle_from_tag(tag)),
-			  "There is already a data handle %p registered with the tag %d\n", _starpu_mpi_tag_get_data_handle_from_tag(tag), tag);
+	STARPU_ASSERT_MSG(!(_starpu_mpi_tag_get_data_handle_from_tag(data_tag)),
+			  "There is already a data handle %p registered with the tag %ld\n", _starpu_mpi_tag_get_data_handle_from_tag(data_tag), data_tag);
 
-	_STARPU_MPI_DEBUG(42, "Adding handle %p with tag %d in hashtable\n", handle, tag);
+	_STARPU_MPI_DEBUG(42, "Adding handle %p with tag %ld in hashtable\n", handle, data_tag);
 
 	entry->handle = handle;
-	entry->tag = tag;
+	entry->data_tag = data_tag;
 
 	_starpu_spin_lock(&registered_tag_handles_lock);
-	HASH_ADD_INT(registered_tag_handles, tag, entry);
+	HASH_ADD_INT(registered_tag_handles, data_tag, entry);
 	_starpu_spin_unlock(&registered_tag_handles_lock);
 }
 
 int _starpu_mpi_tag_data_release(starpu_data_handle_t handle)
 {
-	int tag = starpu_mpi_data_get_tag(handle);
+	int64_t data_tag = starpu_mpi_data_get_tag(handle);
 
-	_STARPU_MPI_DEBUG(42, "Removing handle %p with tag %d from hashtable\n", handle, tag);
+	_STARPU_MPI_DEBUG(42, "Removing handle %p with tag %ld from hashtable\n", handle, data_tag);
 
-	if (tag != -1)
+	if (data_tag != -1)
 	{
 		struct handle_tag_entry *tag_entry;
 
 		_starpu_spin_lock(&registered_tag_handles_lock);
 		HASH_FIND_INT(registered_tag_handles, &(((struct _starpu_mpi_data *)(handle->mpi_data))->node_tag.data_tag), tag_entry);
-		STARPU_ASSERT_MSG((tag_entry != NULL),"Data handle %p with tag %d isn't in the hashmap !",handle,tag);
+		STARPU_ASSERT_MSG((tag_entry != NULL),"Data handle %p with tag %ld isn't in the hashmap !", handle, data_tag);
 
 		HASH_DEL(registered_tag_handles, tag_entry);
 

+ 2 - 2
mpi/src/mpi/starpu_mpi_tag.h

@@ -31,9 +31,9 @@ extern "C"
 void _starpu_mpi_tag_init(void);
 void _starpu_mpi_tag_shutdown(void);
 
-void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, int tag);
+void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, int64_t data_tag);
 int _starpu_mpi_tag_data_release(starpu_data_handle_t handle);
-starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(int tag);
+starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(int64_t data_tag);
 
 #ifdef __cplusplus
 }

+ 37 - 22
mpi/src/starpu_mpi.c

@@ -42,7 +42,7 @@
 #endif
 
 static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
-							int dest, int data_tag, MPI_Comm comm,
+							int dest, int64_t data_tag, MPI_Comm comm,
 							unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
 							int sequential_consistency)
 {
@@ -55,7 +55,7 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t dat
 					      sequential_consistency, 0, 0);
 }
 
-int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int data_tag, int prio, MPI_Comm comm)
+int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int64_t data_tag, int prio, MPI_Comm comm)
 {
 	_STARPU_MPI_LOG_IN();
 	STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_isend needs a valid starpu_mpi_req");
@@ -72,12 +72,12 @@ int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *publ
 	return 0;
 }
 
-int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int data_tag, MPI_Comm comm)
+int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int64_t data_tag, MPI_Comm comm)
 {
 	return starpu_mpi_isend_prio(data_handle, public_req, dest, data_tag, 0, comm);
 }
 
-int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
+int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
 {
 	_STARPU_MPI_LOG_IN();
 	_starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 1, 0, prio, callback, arg, 1);
@@ -85,12 +85,12 @@ int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, i
 	return 0;
 }
 
-int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
 {
 	return starpu_mpi_isend_detached_prio(data_handle, dest, data_tag, 0, comm, callback, arg);
 }
 
-int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int data_tag, int prio, MPI_Comm comm)
+int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm)
 {
 	starpu_mpi_req req;
 	MPI_Status status;
@@ -105,12 +105,12 @@ int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, int data_ta
 	return 0;
 }
 
-int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int data_tag, MPI_Comm comm)
+int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm)
 {
 	return starpu_mpi_send_prio(data_handle, dest, data_tag, 0, comm);
 }
 
-int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int data_tag, int prio, MPI_Comm comm)
+int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int64_t data_tag, int prio, MPI_Comm comm)
 {
 	_STARPU_MPI_LOG_IN();
 	STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_issend needs a valid starpu_mpi_req");
@@ -125,12 +125,12 @@ int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *pub
 	return 0;
 }
 
-int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int data_tag, MPI_Comm comm)
+int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int64_t data_tag, MPI_Comm comm)
 {
 	return starpu_mpi_issend_prio(data_handle, public_req, dest, data_tag, 0, comm);
 }
 
-int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, int data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
+int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg)
 {
 	_STARPU_MPI_LOG_IN();
 
@@ -140,17 +140,17 @@ int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest,
 	return 0;
 }
 
-int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
 {
 	return starpu_mpi_issend_detached_prio(data_handle, dest, data_tag, 0, comm, callback, arg);
 }
 
-struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count)
+struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count)
 {
 	return _starpu_mpi_isend_irecv_common(data_handle, source, data_tag, comm, detached, sync, 0, callback, arg, RECV_REQ, _starpu_mpi_irecv_size_func, STARPU_W, sequential_consistency, is_internal_req, count);
 }
 
-int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, int data_tag, MPI_Comm comm)
+int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, int64_t data_tag, MPI_Comm comm)
 {
 	_STARPU_MPI_LOG_IN();
 	STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_irecv needs a valid starpu_mpi_req");
@@ -167,7 +167,7 @@ int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_re
 	return 0;
 }
 
-int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
+int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg)
 {
 	_STARPU_MPI_LOG_IN();
 
@@ -176,7 +176,7 @@ int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int
 	return 0;
 }
 
-int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
+int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
 {
 	_STARPU_MPI_LOG_IN();
 
@@ -186,7 +186,7 @@ int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_h
 	return 0;
 }
 
-int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int data_tag, MPI_Comm comm, MPI_Status *status)
+int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, MPI_Status *status)
 {
 	starpu_mpi_req req;
 
@@ -223,7 +223,7 @@ void _starpu_mpi_data_clear(starpu_data_handle_t data_handle)
 	free(data_handle->mpi_data);
 }
 
-void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int tag, int rank, MPI_Comm comm)
+void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int64_t data_tag, int rank, MPI_Comm comm)
 {
 	struct _starpu_mpi_data *mpi_data;
 	if (data_handle->mpi_data)
@@ -245,9 +245,9 @@ void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int tag, in
 		_starpu_data_set_unregister_hook(data_handle, _starpu_mpi_data_clear);
 	}
 
-	if (tag != -1)
+	if (data_tag != -1)
 	{
-		mpi_data->node_tag.data_tag = tag;
+		mpi_data->node_tag.data_tag = data_tag;
 	}
 	if (rank != -1)
 	{
@@ -265,9 +265,9 @@ void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Co
 	starpu_mpi_data_register_comm(handle, -1, rank, comm);
 }
 
-void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int tag)
+void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int64_t data_tag)
 {
-	starpu_mpi_data_register_comm(handle, tag, -1, MPI_COMM_WORLD);
+	starpu_mpi_data_register_comm(handle, data_tag, -1, MPI_COMM_WORLD);
 }
 
 int starpu_mpi_data_get_rank(starpu_data_handle_t data)
@@ -276,7 +276,7 @@ int starpu_mpi_data_get_rank(starpu_data_handle_t data)
 	return ((struct _starpu_mpi_data *)(data->mpi_data))->node_tag.rank;
 }
 
-int starpu_mpi_data_get_tag(starpu_data_handle_t data)
+int64_t starpu_mpi_data_get_tag(starpu_data_handle_t data)
 {
 	STARPU_ASSERT_MSG(data->mpi_data, "starpu_mpi_data_register MUST be called for data %p\n", data);
 	return ((struct _starpu_mpi_data *)(data->mpi_data))->node_tag.data_tag;
@@ -408,3 +408,18 @@ int starpu_mpi_wait_for_all(MPI_Comm comm)
 	}
 	return 0;
 }
+
+int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int *flag)
+{
+	(void) comm;
+	if (keyval == STARPU_MPI_TAG_UB)
+	{
+		*flag = 1;
+		*(int64_t *)attribute_val = INT64_MAX;
+	}
+	else
+	{
+		*flag = 0;
+	}
+	return 0;
+}

+ 3 - 3
mpi/src/starpu_mpi_collective.c

@@ -62,7 +62,7 @@ int _callback_set(int rank, starpu_data_handle_t *data_handles, int count, int r
 			if (data_handles[x])
 			{
 				int owner = starpu_mpi_data_get_rank(data_handles[x]);
-				int data_tag = starpu_mpi_data_get_tag(data_handles[x]);
+				int64_t data_tag = starpu_mpi_data_get_tag(data_handles[x]);
 				STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle");
 				if ((rank == root) && (owner != root))
 				{
@@ -103,7 +103,7 @@ int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, i
 		if (data_handles[x])
 		{
 			int owner = starpu_mpi_data_get_rank(data_handles[x]);
-			int data_tag = starpu_mpi_data_get_tag(data_handles[x]);
+			int64_t data_tag = starpu_mpi_data_get_tag(data_handles[x]);
 			STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle");
 			if ((rank == root) && (owner != root))
 			{
@@ -138,7 +138,7 @@ int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, in
 		if (data_handles[x])
 		{
 			int owner = starpu_mpi_data_get_rank(data_handles[x]);
-			int data_tag = starpu_mpi_data_get_tag(data_handles[x]);
+			int64_t data_tag = starpu_mpi_data_get_tag(data_handles[x]);
 			STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle");
 			if ((rank == root) && (owner != root))
 			{

+ 49 - 44
mpi/src/starpu_mpi_fortran.c

@@ -97,9 +97,9 @@ int fstarpu_mpi_barrier(MPI_Fint comm)
 	return starpu_mpi_barrier(MPI_Comm_f2c(comm));
 }
 
-int fstarpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg, int seq_const)
+int fstarpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int src, int64_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg, int seq_const)
 {
-	return starpu_mpi_irecv_detached_sequential_consistency(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), callback, arg, seq_const);
+	return starpu_mpi_irecv_detached_sequential_consistency(data_handle, src, data_tag, MPI_Comm_f2c(comm), callback, arg, seq_const);
 }
 
 int fstarpu_mpi_init_c(struct _starpu_mpi_argc_argv *argcv)
@@ -138,22 +138,22 @@ int fstarpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int cnt, int
 }
 
 /* isend/irecv detached unlock tag */
-int fstarpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
+int fstarpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dst, int64_t data_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
 {
-	return starpu_mpi_isend_detached_unlock_tag(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm), *starpu_tag);
+	return starpu_mpi_isend_detached_unlock_tag(data_handle, dst, data_tag, MPI_Comm_f2c(comm), *starpu_tag);
 }
-int fstarpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dst, int mpi_tag, int prio, MPI_Fint comm, starpu_tag_t *starpu_tag)
+int fstarpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dst, int64_t data_tag, int prio, MPI_Fint comm, starpu_tag_t *starpu_tag)
 {
-	return starpu_mpi_isend_detached_unlock_tag_prio(data_handle, dst, mpi_tag, prio, MPI_Comm_f2c(comm), *starpu_tag);
+	return starpu_mpi_isend_detached_unlock_tag_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm), *starpu_tag);
 }
 
-int fstarpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
+int fstarpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int src, int64_t data_tag, MPI_Fint comm, starpu_tag_t *starpu_tag)
 {
-	return starpu_mpi_irecv_detached_unlock_tag(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), *starpu_tag);
+	return starpu_mpi_irecv_detached_unlock_tag(data_handle, src, data_tag, MPI_Comm_f2c(comm), *starpu_tag);
 }
 
 /* isend/irecv array detached unlock tag */
-int fstarpu_mpi_isend_array_detached_unlock_tag_prio(int array_size, starpu_data_handle_t *data_handles, int *dsts, int *mpi_tags, int *prio, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
+int fstarpu_mpi_isend_array_detached_unlock_tag_prio(int array_size, starpu_data_handle_t *data_handles, int *dsts, int64_t *data_tags, int *prio, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
 {
 	MPI_Comm comms[array_size];
 	int i;
@@ -161,15 +161,16 @@ int fstarpu_mpi_isend_array_detached_unlock_tag_prio(int array_size, starpu_data
 	{
 		comms[i] = MPI_Comm_f2c(_comms[i]);
 	}
-	int ret = starpu_mpi_isend_array_detached_unlock_tag_prio((unsigned)array_size, data_handles, dsts, mpi_tags, prio, comms, *starpu_tag);
+	int ret = starpu_mpi_isend_array_detached_unlock_tag_prio((unsigned)array_size, data_handles, dsts, data_tags, prio, comms, *starpu_tag);
 	return ret;
 }
-int fstarpu_mpi_isend_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *dsts, int *mpi_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
+
+int fstarpu_mpi_isend_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *dsts, int64_t *data_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
 {
-	return fstarpu_mpi_isend_array_detached_unlock_tag_prio(array_size, data_handles, dsts, mpi_tags, NULL, _comms, starpu_tag);
+	return fstarpu_mpi_isend_array_detached_unlock_tag_prio(array_size, data_handles, dsts, data_tags, NULL, _comms, starpu_tag);
 }
 
-int fstarpu_mpi_irecv_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *srcs, int *mpi_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
+int fstarpu_mpi_irecv_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *srcs, int64_t *data_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag)
 {
 	MPI_Comm comms[array_size];
 	int i;
@@ -177,72 +178,76 @@ int fstarpu_mpi_irecv_array_detached_unlock_tag(int array_size, starpu_data_hand
 	{
 		comms[i] = MPI_Comm_f2c(_comms[i]);
 	}
-	int ret = starpu_mpi_irecv_array_detached_unlock_tag((unsigned)array_size, data_handles, srcs, mpi_tags, comms, *starpu_tag);
+	int ret = starpu_mpi_irecv_array_detached_unlock_tag((unsigned)array_size, data_handles, srcs, data_tags, comms, *starpu_tag);
 	return ret;
 }
 
 /* isend/irecv */
-int fstarpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int mpi_tag, MPI_Fint comm)
+int fstarpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int64_t data_tag, MPI_Fint comm)
 {
-	return starpu_mpi_isend(data_handle, req, dst, mpi_tag, MPI_Comm_f2c(comm));
+	return starpu_mpi_isend(data_handle, req, dst, data_tag, MPI_Comm_f2c(comm));
 }
-int fstarpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int mpi_tag, int prio, MPI_Fint comm)
+
+int fstarpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int64_t data_tag, int prio, MPI_Fint comm)
 {
-	return starpu_mpi_isend_prio(data_handle, req, dst, mpi_tag, prio, MPI_Comm_f2c(comm));
+	return starpu_mpi_isend_prio(data_handle, req, dst, data_tag, prio, MPI_Comm_f2c(comm));
 }
 
-int fstarpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int src, int mpi_tag, MPI_Fint comm)
+int fstarpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int src, int64_t data_tag, MPI_Fint comm)
 {
-	return starpu_mpi_irecv(data_handle, req, src, mpi_tag, MPI_Comm_f2c(comm));
+	return starpu_mpi_irecv(data_handle, req, src, data_tag, MPI_Comm_f2c(comm));
 }
 
 /* send/recv */
-int fstarpu_mpi_send(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm)
+int fstarpu_mpi_send(starpu_data_handle_t data_handle, int dst, int64_t data_tag, MPI_Fint comm)
 {
-	return starpu_mpi_send(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm));
+	return starpu_mpi_send(data_handle, dst, data_tag, MPI_Comm_f2c(comm));
 }
-int fstarpu_mpi_send_prio(starpu_data_handle_t data_handle, int dst, int mpi_tag, int prio, MPI_Fint comm)
+
+int fstarpu_mpi_send_prio(starpu_data_handle_t data_handle, int dst, int64_t data_tag, int prio, MPI_Fint comm)
 {
-	return starpu_mpi_send_prio(data_handle, dst, mpi_tag, prio, MPI_Comm_f2c(comm));
+	return starpu_mpi_send_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm));
 }
 
-int fstarpu_mpi_recv(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, MPI_Status *status)
+int fstarpu_mpi_recv(starpu_data_handle_t data_handle, int src, int64_t data_tag, MPI_Fint comm, MPI_Status *status)
 {
-	return starpu_mpi_recv(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), status);
+	return starpu_mpi_recv(data_handle, src, data_tag, MPI_Comm_f2c(comm), status);
 }
 
 /* isend/irecv detached */
-int fstarpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
+int fstarpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dst, int64_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
 {
-	return starpu_mpi_isend_detached(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm), callback, arg);
+	return starpu_mpi_isend_detached(data_handle, dst, data_tag, MPI_Comm_f2c(comm), callback, arg);
 }
-int fstarpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dst, int mpi_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg)
+
+int fstarpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dst, int64_t data_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg)
 {
-	return starpu_mpi_isend_detached_prio(data_handle, dst, mpi_tag, prio, MPI_Comm_f2c(comm), callback, arg);
+	return starpu_mpi_isend_detached_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm), callback, arg);
 }
 
-int fstarpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int src, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
+int fstarpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int src, int64_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
 {
-	return starpu_mpi_irecv_detached(data_handle, src, mpi_tag, MPI_Comm_f2c(comm), callback, arg);
+	return starpu_mpi_irecv_detached(data_handle, src, data_tag, MPI_Comm_f2c(comm), callback, arg);
 }
 
 /* issend / issend detached */
-int fstarpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int mpi_tag, MPI_Fint comm)
+int fstarpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int64_t data_tag, MPI_Fint comm)
 {
-	return starpu_mpi_issend(data_handle, req, dst, mpi_tag, MPI_Comm_f2c(comm));
+	return starpu_mpi_issend(data_handle, req, dst, data_tag, MPI_Comm_f2c(comm));
 }
-int fstarpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int mpi_tag, int prio, MPI_Fint comm)
+int fstarpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, int64_t data_tag, int prio, MPI_Fint comm)
 {
-	return starpu_mpi_issend_prio(data_handle, req, dst, mpi_tag, prio, MPI_Comm_f2c(comm));
+	return starpu_mpi_issend_prio(data_handle, req, dst, data_tag, prio, MPI_Comm_f2c(comm));
 }
 
-int fstarpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dst, int mpi_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
+int fstarpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dst, int64_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg)
 {
-	return starpu_mpi_issend_detached(data_handle, dst, mpi_tag, MPI_Comm_f2c(comm), callback, arg);
+	return starpu_mpi_issend_detached(data_handle, dst, data_tag, MPI_Comm_f2c(comm), callback, arg);
 }
-int fstarpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dst, int mpi_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg)
+
+int fstarpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dst, int64_t data_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg)
 {
-	return starpu_mpi_issend_detached_prio(data_handle, dst, mpi_tag, prio, MPI_Comm_f2c(comm), callback, arg);
+	return starpu_mpi_issend_detached_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm), callback, arg);
 }
 
 /* cache */
@@ -271,14 +276,14 @@ MPI_Fint fstarpu_mpi_world_comm()
 	return MPI_Comm_c2f(MPI_COMM_WORLD);
 }
 
-void fstarpu_mpi_data_register_comm(starpu_data_handle_t handle, int tag, int rank, MPI_Fint comm)
+void fstarpu_mpi_data_register_comm(starpu_data_handle_t handle, int64_t data_tag, int rank, MPI_Fint comm)
 {
-	return starpu_mpi_data_register_comm(handle, tag, rank, MPI_Comm_f2c(comm));
+	return starpu_mpi_data_register_comm(handle, data_tag, rank, MPI_Comm_f2c(comm));
 }
 
-void fstarpu_mpi_data_register(starpu_data_handle_t handle, int tag, int rank)
+void fstarpu_mpi_data_register(starpu_data_handle_t handle, int64_t data_tag, int rank)
 {
-	return starpu_mpi_data_register_comm(handle, tag, rank, MPI_COMM_WORLD);
+	return starpu_mpi_data_register_comm(handle, data_tag, rank, MPI_COMM_WORLD);
 }
 
 void fstarpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Fint comm)

+ 39 - 39
mpi/src/starpu_mpi_fxt.h

@@ -61,29 +61,29 @@ extern "C"
 	FUT_DO_PROBE3(_STARPU_MPI_FUT_STOP, (rank), (worldsize), _starpu_gettid());
 #define _STARPU_MPI_TRACE_BARRIER(rank, worldsize, key)	\
 	FUT_DO_PROBE4(_STARPU_MPI_FUT_BARRIER, (rank), (worldsize), (key), _starpu_gettid());
-#define _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(dest, mpi_tag, size)	\
-	FUT_DO_PROBE4(_STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN, (dest), (mpi_tag), (size), _starpu_gettid());
-#define _STARPU_MPI_TRACE_ISEND_SUBMIT_END(dest, mpi_tag, size, jobid)	\
-	FUT_DO_PROBE5(_STARPU_MPI_FUT_ISEND_SUBMIT_END, (dest), (mpi_tag), (size), (jobid), _starpu_gettid());
-#define _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN, (src), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_IRECV_SUBMIT_END(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_IRECV_SUBMIT_END, (src), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(dest, mpi_tag, size)	\
-	FUT_DO_PROBE4(_STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN, (dest), (mpi_tag), (size), _starpu_gettid());
-#define _STARPU_MPI_TRACE_COMPLETE_BEGIN(type, rank, mpi_tag)		\
-	if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN((rank), (mpi_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN((rank), (mpi_tag), 0); }
-#define _STARPU_MPI_TRACE_ISEND_COMPLETE_END(dest, mpi_tag, size)	\
-	FUT_DO_PROBE4(_STARPU_MPI_FUT_ISEND_COMPLETE_END, (dest), (mpi_tag), (size), _starpu_gettid());
-#define _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN, (src), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_IRECV_COMPLETE_END(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_IRECV_COMPLETE_END, (src), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_COMPLETE_END(type, rank, mpi_tag)		\
-	if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_END((rank), (mpi_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_END((rank), (mpi_tag), 0); }
-#define _STARPU_MPI_TRACE_TERMINATED(req, rank, mpi_tag)		\
-	if ((req)->request_type == RECV_REQ) FUT_DO_PROBE4(_STARPU_MPI_FUT_IRECV_TERMINATED, (rank), (mpi_tag), (req)->post_sync_jobid, _starpu_gettid()); else \
-	if ((req)->request_type == SEND_REQ) FUT_DO_PROBE3(_STARPU_MPI_FUT_ISEND_TERMINATED, (rank), (mpi_tag), _starpu_gettid());
+#define _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(dest, data_tag, size)	\
+	FUT_DO_PROBE4(_STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN, (dest), (data_tag), (size), _starpu_gettid());
+#define _STARPU_MPI_TRACE_ISEND_SUBMIT_END(dest, data_tag, size, jobid)	\
+	FUT_DO_PROBE5(_STARPU_MPI_FUT_ISEND_SUBMIT_END, (dest), (data_tag), (size), (jobid), _starpu_gettid());
+#define _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(src, data_tag)	\
+	FUT_DO_PROBE3(_STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN, (src), (data_tag), _starpu_gettid());
+#define _STARPU_MPI_TRACE_IRECV_SUBMIT_END(src, data_tag)	\
+	FUT_DO_PROBE3(_STARPU_MPI_FUT_IRECV_SUBMIT_END, (src), (data_tag), _starpu_gettid());
+#define _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(dest, data_tag, size)	\
+	FUT_DO_PROBE4(_STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN, (dest), (data_tag), (size), _starpu_gettid());
+#define _STARPU_MPI_TRACE_COMPLETE_BEGIN(type, rank, data_tag)		\
+	if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN((rank), (data_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN((rank), (data_tag), 0); }
+#define _STARPU_MPI_TRACE_ISEND_COMPLETE_END(dest, data_tag, size)	\
+	FUT_DO_PROBE4(_STARPU_MPI_FUT_ISEND_COMPLETE_END, (dest), (data_tag), (size), _starpu_gettid());
+#define _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(src, data_tag)	\
+	FUT_DO_PROBE3(_STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN, (src), (data_tag), _starpu_gettid());
+#define _STARPU_MPI_TRACE_IRECV_COMPLETE_END(src, data_tag)	\
+	FUT_DO_PROBE3(_STARPU_MPI_FUT_IRECV_COMPLETE_END, (src), (data_tag), _starpu_gettid());
+#define _STARPU_MPI_TRACE_COMPLETE_END(type, rank, data_tag)		\
+	if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_END((rank), (data_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_END((rank), (data_tag), 0); }
+#define _STARPU_MPI_TRACE_TERMINATED(req, rank, data_tag)		\
+	if ((req)->request_type == RECV_REQ) FUT_DO_PROBE4(_STARPU_MPI_FUT_IRECV_TERMINATED, (rank), (data_tag), (req)->post_sync_jobid, _starpu_gettid()); else \
+	if ((req)->request_type == SEND_REQ) FUT_DO_PROBE3(_STARPU_MPI_FUT_ISEND_TERMINATED, (rank), (data_tag), _starpu_gettid());
 #define _STARPU_MPI_TRACE_SLEEP_BEGIN()	\
 	FUT_DO_PROBE1(_STARPU_MPI_FUT_SLEEP_BEGIN, _starpu_gettid());
 #define _STARPU_MPI_TRACE_SLEEP_END()	\
@@ -92,14 +92,14 @@ extern "C"
 	FUT_DO_PROBE1(_STARPU_MPI_FUT_DTESTING_BEGIN,  _starpu_gettid());
 #define _STARPU_MPI_TRACE_DTESTING_END()	\
 	FUT_DO_PROBE1(_STARPU_MPI_FUT_DTESTING_END, _starpu_gettid());
-#define _STARPU_MPI_TRACE_UTESTING_BEGIN(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_UTESTING_BEGIN, (src), (mpi_tag),  _starpu_gettid());
-#define _STARPU_MPI_TRACE_UTESTING_END(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_UTESTING_END, (src), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_UWAIT_BEGIN(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_UWAIT_BEGIN, (src), (mpi_tag),  _starpu_gettid());
-#define _STARPU_MPI_TRACE_UWAIT_END(src, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_UWAIT_END, (src), (mpi_tag), _starpu_gettid());
+#define _STARPU_MPI_TRACE_UTESTING_BEGIN(src, data_tag)	\
+	FUT_DO_PROBE3(_STARPU_MPI_FUT_UTESTING_BEGIN, (src), (data_tag),  _starpu_gettid());
+#define _STARPU_MPI_TRACE_UTESTING_END(src, data_tag)	\
+	FUT_DO_PROBE3(_STARPU_MPI_FUT_UTESTING_END, (src), (data_tag), _starpu_gettid());
+#define _STARPU_MPI_TRACE_UWAIT_BEGIN(src, data_tag)	\
+	FUT_DO_PROBE3(_STARPU_MPI_FUT_UWAIT_BEGIN, (src), (data_tag),  _starpu_gettid());
+#define _STARPU_MPI_TRACE_UWAIT_END(src, data_tag)	\
+	FUT_DO_PROBE3(_STARPU_MPI_FUT_UWAIT_END, (src), (data_tag), _starpu_gettid());
 #define _STARPU_MPI_TRACE_DATA_SET_RANK(handle, rank)	\
 	FUT_DO_PROBE3(_STARPU_MPI_FUT_DATA_SET_RANK, (handle), (rank), _starpu_gettid());
 #if 0
@@ -108,15 +108,15 @@ extern "C"
 	FUT_DO_PROBE1(_STARPU_MPI_FUT_TESTING_DETACHED_BEGIN, _starpu_gettid());
 #define _STARPU_MPI_TRACE_TESTING_DETACHED_END()	\
 	FUT_DO_PROBE1(_STARPU_MPI_FUT_TESTING_DETACHED_END, _starpu_gettid());
-#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_BEGIN, (peer), (mpi_tag), _starpu_gettid());
-#define _STARPU_MPI_TRACE_TEST_END(peer, mpi_tag)	\
-	FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_END, (peer), (mpi_tag), _starpu_gettid());
+#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag)	\
+	FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_BEGIN, (peer), (data_tag), _starpu_gettid());
+#define _STARPU_MPI_TRACE_TEST_END(peer, data_tag)	\
+	FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_END, (peer), (data_tag), _starpu_gettid());
 #else
 #define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN()		do {} while(0)
 #define _STARPU_MPI_TRACE_TESTING_DETACHED_END()		do {} while(0)
-#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, mpi_tag)		do {} while(0)
-#define _STARPU_MPI_TRACE_TEST_END(peer, mpi_tag)		do {} while(0)
+#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag)		do {} while(0)
+#define _STARPU_MPI_TRACE_TEST_END(peer, data_tag)		do {} while(0)
 #endif
 #define TRACE
 #else
@@ -145,8 +145,8 @@ extern "C"
 #define _STARPU_MPI_TRACE_DATA_SET_RANK(a, b)			do {} while(0);
 #define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN()		do {} while(0)
 #define _STARPU_MPI_TRACE_TESTING_DETACHED_END()		do {} while(0)
-#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, mpi_tag)		do {} while(0)
-#define _STARPU_MPI_TRACE_TEST_END(peer, mpi_tag)		do {} while(0)
+#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag)		do {} while(0)
+#define _STARPU_MPI_TRACE_TEST_END(peer, data_tag)		do {} while(0)
 #endif
 
 #ifdef __cplusplus

+ 9 - 13
mpi/src/starpu_mpi_helper.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2015, 2017  Université de Bordeaux
- * Copyright (C) 2010, 2012, 2014, 2016  CNRS
+ * Copyright (C) 2010, 2012, 2014, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,7 +27,7 @@ static void starpu_mpi_unlock_tag_callback(void *arg)
 	free(tagptr);
 }
 
-int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int data_tag, int prio, MPI_Comm comm, starpu_tag_t tag)
+int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, int64_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag)
 {
 	starpu_tag_t *tagptr;
 	_STARPU_MPI_MALLOC(tagptr, sizeof(starpu_tag_t));
@@ -35,13 +35,13 @@ int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle,
 
 	return starpu_mpi_isend_detached_prio(data_handle, dest, data_tag, prio, comm, starpu_mpi_unlock_tag_callback, tagptr);
 }
-int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int data_tag, MPI_Comm comm, starpu_tag_t tag)
+
+int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag)
 {
 	return starpu_mpi_isend_detached_unlock_tag_prio(data_handle, dest, data_tag, 0, comm, tag);
 }
 
-
-int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int data_tag, MPI_Comm comm, starpu_tag_t tag)
+int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int64_t data_tag, MPI_Comm comm, starpu_tag_t tag)
 {
 	starpu_tag_t *tagptr;
 	_STARPU_MPI_MALLOC(tagptr, sizeof(starpu_tag_t));
@@ -69,9 +69,7 @@ static void starpu_mpi_array_unlock_callback(void *_arg)
 	}
 }
 
-int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size,
-		starpu_data_handle_t *data_handle, int *dest, int *data_tag, int *prio,
-		MPI_Comm *comm, starpu_tag_t tag)
+int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int64_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag)
 {
 	if (!array_size)
 		return 0;
@@ -92,15 +90,13 @@ int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size,
 
 	return 0;
 }
-int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size,
-		starpu_data_handle_t *data_handle, int *dest, int *data_tag,
-		MPI_Comm *comm, starpu_tag_t tag)
+
+int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int64_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
 {
 	return starpu_mpi_isend_array_detached_unlock_tag_prio(array_size, data_handle, dest, data_tag, NULL, comm, tag);
 }
 
-
-int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *data_tag, MPI_Comm *comm, starpu_tag_t tag)
+int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int64_t *data_tag, MPI_Comm *comm, starpu_tag_t tag)
 {
 	if (!array_size)
 		return 0;

+ 3 - 3
mpi/src/starpu_mpi_private.h

@@ -112,7 +112,7 @@ int _starpu_debug_rank;
 			starpu_mpi_comm_rank(comm, &_rank); \
 			MPI_Type_size(datatype, &__size);		\
 			MPI_Comm_get_name(comm, _comm_name, &_comm_name_len); \
-			fprintf(stderr, "[%d][starpu_mpi] :%d:%s:%d:%d:%d:%s:%p:%ld:%d:%s:%d\n", _rank, _rank, way, node, tag, utag, _comm_name, ptr, count, __size, __starpu_func__ , __LINE__); \
+			fprintf(stderr, "[%d][starpu_mpi] :%d:%s:%d:%d:%ld:%s:%p:%ld:%d:%s:%d\n", _rank, _rank, way, node, tag, utag, _comm_name, ptr, count, __size, __starpu_func__ , __LINE__); \
 			fflush(stderr);					\
 		}							\
 	} while(0);
@@ -170,7 +170,7 @@ struct _starpu_mpi_envelope
 {
 	int mode;
 	starpu_ssize_t size;
-	int data_tag;
+	int64_t data_tag;
 	unsigned sync;
 };
 #endif /* STARPU_USE_MPI_MPI */
@@ -190,7 +190,7 @@ struct _starpu_mpi_node_tag
 {
 	MPI_Comm comm;
 	int rank;
-	int data_tag;
+	int64_t data_tag;
 };
 
 struct _starpu_mpi_data

+ 7 - 6
mpi/src/starpu_mpi_task_insert.c

@@ -105,7 +105,7 @@ void _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum
 	if (data && mode & STARPU_R)
 	{
 		int mpi_rank = starpu_mpi_data_get_rank(data);
-		int data_tag = starpu_mpi_data_get_tag(data);
+		int64_t data_tag = starpu_mpi_data_get_tag(data);
 		if (mpi_rank == -1)
 		{
 			_STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n");
@@ -147,7 +147,7 @@ void _starpu_mpi_exchange_data_after_execution(starpu_data_handle_t data, enum s
 	if (mode & STARPU_W)
 	{
 		int mpi_rank = starpu_mpi_data_get_rank(data);
-		int data_tag = starpu_mpi_data_get_tag(data);
+		int64_t data_tag = starpu_mpi_data_get_tag(data);
 		if(mpi_rank == -1)
 		{
 			_STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n");
@@ -652,7 +652,7 @@ struct _starpu_mpi_redux_data_args
 {
 	starpu_data_handle_t data_handle;
 	starpu_data_handle_t new_handle;
-	int tag;
+	int64_t data_tag;
 	int node;
 	MPI_Comm comm;
 	struct starpu_task *taskB;
@@ -715,14 +715,15 @@ void _starpu_mpi_redux_data_recv_callback(void *callback_arg)
 	struct _starpu_mpi_redux_data_args *args = (struct _starpu_mpi_redux_data_args *) callback_arg;
 	starpu_data_register_same(&args->new_handle, args->data_handle);
 
-	starpu_mpi_irecv_detached_sequential_consistency(args->new_handle, args->node, args->tag, args->comm, _starpu_mpi_redux_data_detached_callback, args, 0);
+	starpu_mpi_irecv_detached_sequential_consistency(args->new_handle, args->node, args->data_tag, args->comm, _starpu_mpi_redux_data_detached_callback, args, 0);
 }
 
 /* TODO: this should rather be implicitly called by starpu_mpi_task_insert when
  * a data previously accessed in REDUX mode gets accessed in R mode. */
 void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio)
 {
-	int me, rank, tag, nb_nodes;
+	int me, rank, nb_nodes;
+	int64_t tag;
 
 	rank = starpu_mpi_data_get_rank(data_handle);
 	tag = starpu_mpi_data_get_tag(data_handle);
@@ -772,7 +773,7 @@ void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle,
 				struct _starpu_mpi_redux_data_args *args;
 				_STARPU_MPI_MALLOC(args, sizeof(struct _starpu_mpi_redux_data_args));
 				args->data_handle = data_handle;
-				args->tag = tag;
+				args->data_tag = tag;
 				args->node = i;
 				args->comm = comm;
 

+ 2 - 0
mpi/tests/Makefile.am

@@ -104,6 +104,7 @@ if BUILD_TESTS
 starpu_mpi_TESTS =
 
 starpu_mpi_TESTS +=				\
+	attr					\
 	cache					\
 	cache_disable				\
 	callback				\
@@ -190,6 +191,7 @@ noinst_PROGRAMS =				\
 	temporary				\
 	block_interface				\
 	block_interface_pinned			\
+	attr					\
 	cache					\
 	cache_disable				\
 	callback				\

+ 37 - 0
mpi/tests/attr.c

@@ -0,0 +1,37 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+int main(int argc, char **argv)
+{
+	int flag;
+	int64_t value;
+
+	(void) argc;
+	(void) argv;
+
+	starpu_mpi_comm_get_attr(MPI_COMM_WORLD, 42, NULL, &flag);
+	STARPU_ASSERT_MSG(flag == 0, "starpu_mpi_comm_get_attr was called with invalid argument\n");
+
+	starpu_mpi_comm_get_attr(MPI_COMM_WORLD, STARPU_MPI_TAG_UB, &value, &flag);
+	STARPU_ASSERT_MSG(flag == 1, "starpu_mpi_comm_get_attr was called with valid argument\n");
+
+	FPRINTF(stderr, "Value: %ld\n", value);
+
+	return 0;
+}

+ 1 - 1
mpi/tests/block_interface.c

@@ -33,7 +33,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/block_interface_pinned.c

@@ -33,7 +33,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 2 - 1
mpi/tests/datatypes.c

@@ -49,6 +49,7 @@ void send_recv_and_check(int rank, int node, starpu_data_handle_t handle_s, int
  */
 void check_void(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error)
 {
+	(void)error;
 	FPRINTF_MPI(stderr, "Success with void value\n");
 }
 
@@ -580,7 +581,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/early_request.c

@@ -197,7 +197,7 @@ int main(int argc, char * argv[])
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank);

+ 1 - 1
mpi/tests/gather.c

@@ -28,7 +28,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/gather2.c

@@ -26,7 +26,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/insert_task_count.c

@@ -69,7 +69,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/insert_task_dyn_handles.c

@@ -79,7 +79,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/load_balancer.c

@@ -54,7 +54,7 @@ int main(int argc, char **argv)
 	MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init);
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	unsetenv("STARPU_MPI_LB");

+ 1 - 1
mpi/tests/mpi_detached_tag.c

@@ -39,7 +39,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/mpi_earlyrecv.c

@@ -31,7 +31,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 5 - 1
mpi/tests/mpi_earlyrecv2.c

@@ -142,6 +142,10 @@ int exchange_variable(int rank, int detached)
 
 void check_void(starpu_data_handle_t handle, int i, int rank, int *error)
 {
+	(void)handle;
+	(void)i;
+	(void)rank;
+	(void)error;
 }
 
 int exchange_void(int rank, int detached)
@@ -213,7 +217,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/mpi_irecv.c

@@ -37,7 +37,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/mpi_irecv_detached.c

@@ -54,7 +54,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/mpi_isend.c

@@ -37,7 +37,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/mpi_isend_detached.c

@@ -53,7 +53,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/mpi_redux.c

@@ -42,7 +42,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/mpi_test.c

@@ -37,7 +37,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/pingpong.c

@@ -38,7 +38,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/policy_register.c

@@ -75,7 +75,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/ring.c

@@ -81,7 +81,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/ring_async.c

@@ -81,7 +81,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/ring_async_implicit.c

@@ -77,7 +77,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, 1);
+	ret = starpu_mpi_init(&argc, &argv, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
 	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);

+ 1 - 1
mpi/tests/ring_sync.c

@@ -81,7 +81,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 1 - 1
mpi/tests/ring_sync_detached.c

@@ -94,7 +94,7 @@ int main(int argc, char **argv)
 
 	ret = starpu_init(NULL);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
-	ret = starpu_mpi_init(NULL, NULL, mpi_init);
+	ret = starpu_mpi_init(&argc, &argv, mpi_init);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);

+ 3 - 0
src/common/prio_list.h

@@ -35,6 +35,9 @@
  * * Initialize a new priority list
  * void FOO_prio_list_init(struct FOO_prio_list*)
  *
+ * * Free an empty priority list
+ * void FOO_prio_list_deinit(struct FOO_prio_list*)
+ *
  * * Add a new cell at the end of the list of the priority of the cell (O(log2 p))
  * void FOO_prio_list_push_back(struct FOO_prio_list*, struct FOO*)
  *

+ 29 - 29
src/core/disk_ops/disk_hdf5.c

@@ -46,7 +46,7 @@ static starpu_pthread_cond_t global_cond;
 static struct _starpu_hdf5_work_list global_work_list;        /* This list contains the work for the hdf5 thread */
 #endif
 
-#ifdef H5_HAVE_THREADSAFE						
+#ifdef H5_HAVE_THREADSAFE
 
 #define HDF5_VAR_THREAD fileBase->thread
 #define HDF5_VAR_RUN fileBase->run
@@ -54,7 +54,7 @@ static struct _starpu_hdf5_work_list global_work_list;        /* This list conta
 #define HDF5_VAR_COND fileBase->cond
 #define HDF5_VAR_WORK_LIST fileBase->work_list
 
-#else									
+#else
 
 #define HDF5_VAR_THREAD global_thread
 #define HDF5_VAR_RUN global_run
@@ -62,7 +62,7 @@ static struct _starpu_hdf5_work_list global_work_list;        /* This list conta
 #define HDF5_VAR_COND global_cond
 #define HDF5_VAR_WORK_LIST global_work_list
 
-#endif									
+#endif
 
 enum hdf5_work_type { READ, WRITE, FULL_READ, FULL_WRITE, COPY };
 
@@ -140,7 +140,7 @@ static void starpu_hdf5_full_write_internal(struct _starpu_hdf5_work * work)
 		/* Get official datatype */
 		hid_t datatype = H5Dget_type(work->obj_dst->dataset);
 		hsize_t sizeDatatype = H5Tget_size(datatype);
-		
+
 		/* Count in number of elements */
 		hsize_t extendsdim[1] = {work->size/sizeDatatype};
 		status = H5Dset_extent (work->obj_dst->dataset, extendsdim);
@@ -263,10 +263,10 @@ static void starpu_hdf5_copy_internal(struct _starpu_hdf5_work * work)
 		/* Dirty : Delete dataspace because H5Ocopy only works if destination does not exist */
 		H5Ldelete(work->base_dst->fileID, work->obj_dst->path, H5P_DEFAULT);
 
-		status = H5Ocopy(work->base_src->fileID, work->obj_src->path, work->base_dst->fileID, work->obj_dst->path, H5P_DEFAULT, H5P_DEFAULT); 
+		status = H5Ocopy(work->base_src->fileID, work->obj_src->path, work->base_dst->fileID, work->obj_dst->path, H5P_DEFAULT, H5P_DEFAULT);
 		STARPU_ASSERT_MSG(status >= 0, "Can not copy data (%s) associed to this disk (%s) to the data (%s) on this disk (%s)\n", work->obj_src->path, work->base_src->path, work->obj_dst->path, work->base_dst->path);
 
-		work->obj_dst->dataset = H5Dopen2(work->base_dst->fileID, work->obj_dst->path, H5P_DEFAULT);				
+		work->obj_dst->dataset = H5Dopen2(work->base_dst->fileID, work->obj_dst->path, H5P_DEFAULT);
 	}
 	else
 	{
@@ -278,11 +278,11 @@ static void starpu_hdf5_copy_internal(struct _starpu_hdf5_work * work)
 
 		void * ptr;
 		int ret = _starpu_malloc_flags_on_node(STARPU_MAIN_RAM, &ptr, work->size, 0);
-		STARPU_ASSERT_MSG(ret == 0, "Cannot allocate %lu bytes to perform disk to disk operation", work->size);
+		STARPU_ASSERT_MSG(ret == 0, "Cannot allocate %lu bytes to perform disk to disk operation", (unsigned long)work->size);
 
 		/* buffer is only used internally to store intermediate data */
 		work->ptr = ptr;
-		
+
 		starpu_hdf5_read_internal(work);
 		starpu_hdf5_write_internal(work);
 
@@ -342,7 +342,7 @@ static void * _starpu_hdf5_internal_thread(void * arg)
                                 case FULL_WRITE:
                                         starpu_hdf5_full_write_internal(work);
                                         break;
-				
+
 				case COPY:
 					starpu_hdf5_copy_internal(work);
 					break;
@@ -383,7 +383,7 @@ static void _starpu_hdf5_create_thread(struct starpu_hdf5_base * fileBase)
         HDF5_VAR_RUN = 1;
 
         STARPU_PTHREAD_COND_INIT(&HDF5_VAR_COND, NULL);
-        STARPU_PTHREAD_CREATE(&HDF5_VAR_THREAD, NULL, _starpu_hdf5_internal_thread, (void *) fileBase); 
+        STARPU_PTHREAD_CREATE(&HDF5_VAR_THREAD, NULL, _starpu_hdf5_internal_thread, (void *) fileBase);
 }
 
 /* returns the size in BYTES */
@@ -438,7 +438,7 @@ static void starpu_hdf5_send_work(void *base_src, void *obj_src, off_t offset_sr
         struct starpu_hdf5_base * fileBase;
 	if (fileBase_src != NULL)
 		fileBase = fileBase_src;
-	else	
+	else
 		fileBase = fileBase_dst;
 #endif
 
@@ -466,14 +466,14 @@ static struct starpu_hdf5_obj * _starpu_hdf5_data_alloc(struct starpu_hdf5_base
                 free(obj);
                 return NULL;
         }
-	
+
 	hsize_t chunkdim[1] = {STARPU_CHUNK_DIM};
 	hid_t prop = H5Pcreate (H5P_DATASET_CREATE);
 	herr_t status = H5Pset_chunk (prop, 1, chunkdim);
         STARPU_ASSERT_MSG(status >= 0, "Error when setting HDF5 property \n");
 
         /* create a dataset at location name, with data described by the dataspace.
-         * Each element are like char in C (expected one byte) 
+         * Each element are like char in C (expected one byte)
          */
         obj->dataset = H5Dcreate2(fileBase->fileID, name, H5T_NATIVE_CHAR, dataspace, H5P_DEFAULT, prop, H5P_DEFAULT);
 
@@ -490,7 +490,7 @@ static struct starpu_hdf5_obj * _starpu_hdf5_data_alloc(struct starpu_hdf5_base
 	obj->size = size;
 
         _starpu_hdf5_protect_stop((void *) fileBase);
-        
+
         return obj;
 }
 
@@ -502,7 +502,7 @@ static struct starpu_hdf5_obj * _starpu_hdf5_data_open(struct starpu_hdf5_base *
         _starpu_hdf5_protect_start((void *) fileBase);
 
         /* create a dataset at location name, with data described by the dataspace.
-         * Each element are like char in C (expected one byte) 
+         * Each element are like char in C (expected one byte)
          */
         obj->dataset = H5Dopen2(fileBase->fileID, name, H5P_DEFAULT);
 
@@ -516,7 +516,7 @@ static struct starpu_hdf5_obj * _starpu_hdf5_data_open(struct starpu_hdf5_base *
 
         obj->path = name;
 	obj->size = size;
-        
+
         return obj;
 }
 
@@ -528,7 +528,7 @@ static void *starpu_hdf5_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIB
 #ifndef H5_HAVE_THREADSAFE
 	int actual_nb_disk = STARPU_ATOMIC_ADD(&nb_disk_open, 1);
 	if (actual_nb_disk == 1)
-	{	
+	{
 #endif
 		STARPU_PTHREAD_MUTEX_INIT(&HDF5_VAR_MUTEX, NULL);
 #ifndef H5_HAVE_THREADSAFE
@@ -561,14 +561,14 @@ static void *starpu_hdf5_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIB
 
                 /* Truncate it */
                 fileBase->fileID = H5Fcreate((char *)fileBase->path, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
-                if (fileBase->fileID < 0) 
+                if (fileBase->fileID < 0)
                 {
-                        free(fileBase); 
+                        free(fileBase);
                         _STARPU_ERROR("Can not create the HDF5 file (%s)", (char *) parameter);
 			return NULL;
                 }
                 fileBase->created = 1;
-        } 
+        }
         else
         {
                 /* Well, open it ! */
@@ -576,7 +576,7 @@ static void *starpu_hdf5_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIB
 		STARPU_ASSERT(path);
 
                 fileBase->fileID = H5Fopen((char *)parameter, H5F_ACC_RDWR, H5P_DEFAULT);
-                if (fileBase->fileID < 0) 
+                if (fileBase->fileID < 0)
                 {
                         free(fileBase);
 			free(path);
@@ -597,7 +597,7 @@ static void *starpu_hdf5_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIB
 	}
 #endif
 
-#if H5_VERS_MAJOR > 1 || (H5_VERS_MAJOR == 1 && H5_VERS_MINOR > 10) || (H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 10 && H5_VERS_RELEASE > 0) 
+#if H5_VERS_MAJOR > 1 || (H5_VERS_MAJOR == 1 && H5_VERS_MINOR > 10) || (H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 10 && H5_VERS_RELEASE > 0)
 	H5Pset_file_space_strategy(fileBase->fileID, H5F_FSPACE_STRATEGY_FSM_AGGR, 0, 0);
 #endif
 
@@ -653,7 +653,7 @@ static void starpu_hdf5_unplug(void *base)
         STARPU_ASSERT_MSG(status >= 0, "Can not unplug this HDF5 disk (%s)\n", fileBase->path);
         if (fileBase->created)
         {
-                unlink(fileBase->path);        
+                unlink(fileBase->path);
         }
         else
         {
@@ -703,7 +703,7 @@ static void starpu_hdf5_free(void *base, void *obj, size_t size STARPU_ATTRIBUTE
         status = H5Dclose(dataObj->dataset);
         STARPU_ASSERT_MSG(status >= 0, "Can not free this HDF5 dataset (%s)\n", dataObj->path);
 
-        /* remove the dataset link in the HDF5 
+        /* remove the dataset link in the HDF5
          * But it doesn't delete the space in the file */
         status = H5Ldelete(fileBase->fileID, dataObj->path, H5P_DEFAULT);
         STARPU_ASSERT_MSG(status >= 0, "Can not delete the link associed to this dataset (%s)\n", dataObj->path);
@@ -774,14 +774,14 @@ static int starpu_hdf5_full_read(void *base, void *obj, void **ptr, size_t *size
         *size = _starpu_get_size_obj(dataObj);
         _starpu_hdf5_protect_stop(base);
 
-        _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); 
+        _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0);
 
         starpu_hdf5_send_work(base, obj, 0, NULL, NULL, 0, *ptr, *size, (void*) &finished, FULL_READ);
-        
+
         starpu_hdf5_wait(&finished);
 
         starpu_sem_destroy(&finished);
-        
+
         return 0;
 }
 
@@ -861,10 +861,10 @@ void * starpu_hdf5_async_full_read (void * base, void * obj, void ** ptr, size_t
         *size = _starpu_get_size_obj(dataObj);
         _starpu_hdf5_protect_stop(base);
 
-        _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); 
+        _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0);
 
         starpu_hdf5_send_work(base, obj, 0, NULL, NULL, 0, *ptr, *size, (void*) finished, FULL_READ);
-        
+
         return finished;
 }
 

+ 5 - 4
src/core/disk_ops/unistd/disk_unistd_global.c

@@ -177,7 +177,7 @@ static void _starpu_unistd_init(struct starpu_unistd_global_obj *obj, int descri
 static int _starpu_unistd_reopen(struct starpu_unistd_global_obj *obj)
 {
 	int id = open(obj->path, obj->flags);
-	STARPU_ASSERT(id >= 0);
+	STARPU_ASSERT_MSG(id >= 0, "Reopening file %s failed: errno %d", obj->path, errno);
 	return id;
 }
 
@@ -202,6 +202,7 @@ static void _starpu_unistd_fini(struct starpu_unistd_global_obj *obj)
 	STARPU_PTHREAD_MUTEX_DESTROY(&obj->mutex);
 
 	free(obj->path);
+	obj->path = NULL;
 	free(obj);
 }
 
@@ -608,12 +609,12 @@ static void * starpu_unistd_internal_thread(void * arg)
 			if (starpu_unistd_copy_failed == INIT && ret == -1 && errno == ENOSYS)
 			{
 				starpu_unistd_copy_failed = FAILED;
-			} 
+			}
 			else
 			{
 #endif
 				STARPU_ASSERT_MSG(ret >= 0, "Copy_file_range failed (errno %d)", errno);
-				STARPU_ASSERT_MSG((size_t) ret == work->len, "Copy_file_range failed (value %zd instead of %zd)", ret, work->len);
+				STARPU_ASSERT_MSG((size_t) ret == work->len, "Copy_file_range failed (value %ld instead of %ld)", (long)ret, (long)work->len);
 #if !defined(HAVE_COPY_FILE_RANGE) && defined( __NR_copy_file_range)
 				starpu_unistd_copy_failed = CHECKED;
 			}
@@ -1064,7 +1065,7 @@ void *  starpu_unistd_global_copy(void *base_src, void* obj_src, off_t offset_sr
 		starpu_unistd_global_wait_request((void *) event);
 		/* add token when StarPU will test/wait the request */
 		starpu_sem_post(&work->finished);
-	
+
 		STARPU_PTHREAD_MUTEX_LOCK(&thread->mutex);
 		/* here copy_file_range does not work */
 		if (starpu_unistd_copy_failed == FAILED)

+ 24 - 3
src/core/sched_ctx.c

@@ -2071,6 +2071,14 @@ unsigned starpu_sched_ctx_get_priority(int workerid, unsigned sched_ctx_id)
 
 unsigned _starpu_sched_ctx_last_worker_awake(struct _starpu_worker *worker)
 {
+	/* The worker being checked must have its status set to sleeping during
+	 * the check, to allow for an other worker being checked concurrently
+	 * to make the safe, pessimistic assumption that it is the last worker
+	 * awake. In the worst case, both workers will follow this pessimistic
+	 * path and perform one more scheduling loop */
+	STARPU_HG_DISABLE_CHECKING(_starpu_config.workers[worker->workerid].status);
+	STARPU_ASSERT(_starpu_config.workers[worker->workerid].status == STATUS_SLEEPING);
+	STARPU_HG_ENABLE_CHECKING(_starpu_config.workers[worker->workerid].status);
 	struct _starpu_sched_ctx_list_iterator list_it;
 
 	_starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it);
@@ -2087,10 +2095,23 @@ unsigned _starpu_sched_ctx_last_worker_awake(struct _starpu_worker *worker)
 		while(workers->has_next(workers, &it))
 		{
 			int workerid = workers->get_next(workers, &it);
-			if(workerid != worker->workerid && _starpu_worker_get_status(workerid) != STATUS_SLEEPING)
+			if(workerid != worker->workerid)
 			{
-				last_worker_awake = 0;
-				break;
+				/* The worker status is intendedly checked
+				 * without taking locks. If multiple workers
+				 * are concurrently assessing whether they are
+				 * the last worker awake, they will follow the
+				 * pessimistic path and assume that they are
+				 * the last worker awake */
+				STARPU_HG_DISABLE_CHECKING(_starpu_config.workers[workerid].status);
+				const int cond = _starpu_config.workers[workerid].status != STATUS_SLEEPING;
+				STARPU_HG_ENABLE_CHECKING(_starpu_config.workers[workerid].status);
+
+				if (cond)
+				{
+					last_worker_awake = 0;
+					break;
+				}
 			}
 		}
 		if(last_worker_awake)

+ 33 - 0
src/core/simgrid.c

@@ -1155,4 +1155,37 @@ void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code, vo
 #endif
 				 );
 }
+
+static size_t used;
+
+void _starpu_simgrid_data_new(size_t size)
+{
+	// Note: this is just declarative
+	//_STARPU_DISP("data new: %zd, now %zd\n", size, used);
+}
+
+void _starpu_simgrid_data_increase(size_t size)
+{
+	used += size;
+	_STARPU_DISP("data increase: %zd, now %zd\n", size, used);
+}
+
+void _starpu_simgrid_data_alloc(size_t size)
+{
+	used += size;
+	_STARPU_DISP("data alloc: %zd, now %zd\n", size, used);
+}
+
+void _starpu_simgrid_data_free(size_t size)
+{
+	used -= size;
+	_STARPU_DISP("data free: %zd, now %zd\n", size, used);
+}
+
+void _starpu_simgrid_data_transfer(size_t size, unsigned src_node, unsigned dst_node)
+{
+	_STARPU_DISP("data transfer %zd from %u to %u\n", size, src_node, dst_node);
+}
+
+
 #endif

+ 16 - 0
src/core/simgrid.h

@@ -100,4 +100,20 @@ void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code,
 #define _SIMGRID_TIMER_END }
 #endif
 
+/* Experimental functions for OOC stochastic analysis */
+/* disk <-> MAIN_RAM only */
+#if defined(STARPU_SIMGRID) && 0
+void _starpu_simgrid_data_new(size_t size);
+void _starpu_simgrid_data_increase(size_t size);
+void _starpu_simgrid_data_alloc(size_t size);
+void _starpu_simgrid_data_free(size_t size);
+void _starpu_simgrid_data_transfer(size_t size, unsigned src_node, unsigned dst_node);
+#else
+#define _starpu_simgrid_data_new(size) (void)0
+#define _starpu_simgrid_data_increase(size) (void)0
+#define _starpu_simgrid_data_alloc(size) (void)0
+#define _starpu_simgrid_data_free(size) (void)0
+#define _starpu_simgrid_data_transfer(size, src_node, dst_node) (void)0
+#endif
+
 #endif // __SIMGRID_H__

+ 3 - 2
src/datawizard/data_request.c

@@ -71,6 +71,7 @@ void _starpu_deinit_data_request_lists(void)
 		_starpu_data_request_prio_list_deinit(&prefetch_requests[i]);
 		_starpu_data_request_prio_list_deinit(&idle_requests[i]);
 		STARPU_PTHREAD_MUTEX_DESTROY(&data_requests_pending_list_mutex[i]);
+		_starpu_data_request_prio_list_deinit(&data_requests_pending[i]);
 		STARPU_PTHREAD_MUTEX_DESTROY(&data_requests_list_mutex[i]);
 	}
 }
@@ -624,6 +625,7 @@ static int __starpu_handle_node_data_requests(struct _starpu_data_request_prio_l
 		/* Prefetch requests might have gotten promoted while in tmp list */
 		_starpu_data_request_prio_list_push_back(&new_data_requests[r->prefetch], r);
 	}
+	_starpu_data_request_prio_list_deinit(&local_list);
 
 	for (i = 0; i <= prefetch; i++)
 		if (!_starpu_data_request_prio_list_empty(&new_data_requests[i]))
@@ -689,7 +691,6 @@ static int _handle_pending_node_data_requests(unsigned src_node, unsigned force)
 //	_STARPU_DEBUG("_starpu_handle_pending_node_data_requests ...\n");
 //
 	struct _starpu_data_request_prio_list new_data_requests_pending;
-	struct _starpu_data_request_prio_list empty_list;
 	unsigned taken, kept;
 
 #ifdef STARPU_NON_BLOCKING_DRIVERS
@@ -700,7 +701,6 @@ static int _handle_pending_node_data_requests(unsigned src_node, unsigned force)
 		return 0;
 #endif
 
-	_starpu_data_request_prio_list_init(&empty_list);
 #ifdef STARPU_NON_BLOCKING_DRIVERS
 	if (!force)
 	{
@@ -787,6 +787,7 @@ static int _handle_pending_node_data_requests(unsigned src_node, unsigned force)
 			}
 		}
 	}
+	_starpu_data_request_prio_list_deinit(&local_list);
 	STARPU_PTHREAD_MUTEX_LOCK(&data_requests_pending_list_mutex[src_node]);
 	data_requests_npending[src_node] -= taken - kept;
 	if (kept)

+ 28 - 12
src/drivers/driver_common/driver_common.c

@@ -390,6 +390,7 @@ struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *worker, int w
 		_starpu_worker_leave_sched_op(worker);
 		STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond);
 
+#ifndef STARPU_NON_BLOCKING_DRIVERS
 		int cond_no_keep_awake = !worker->state_keep_awake;
 		int cond_can_block = _starpu_worker_can_block(memnode, worker);
 		int cond_no_last_awake = !_starpu_sched_ctx_last_worker_awake(worker);
@@ -408,23 +409,30 @@ struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *worker, int w
 
 				cond_no_keep_awake = !worker->state_keep_awake;
 				cond_can_block = _starpu_worker_can_block(memnode, worker);
-				cond_no_last_awake = !_starpu_sched_ctx_last_worker_awake(worker);
 				cond_no_block_in_parallel_rq = !worker->state_block_in_parallel_req;
 				cond_no_unblock_in_parallel_rq = !worker->state_unblock_in_parallel_req;
-			}
-			/* do not check status != SLEEPING here since status is
-			 * not changed by other threads/workers */
-			while (cond_no_keep_awake
+				if (cond_no_keep_awake
 					&& cond_can_block
-					&& cond_no_last_awake
 					&& cond_no_block_in_parallel_rq
-					&& cond_no_unblock_in_parallel_rq);
+					&& cond_no_unblock_in_parallel_rq)
+				{
+					_starpu_worker_set_status_sleeping(workerid);
+					cond_no_last_awake = !_starpu_sched_ctx_last_worker_awake(worker);
+				}
+				else
+				{
+					cond_no_last_awake = 0;
+					break;
+				}
+			}
+			while (cond_no_last_awake);
 			//_STARPU_DEBUG("worker %u waking up: %d|%d|%d|%d|%d\n", worker->workerid, cond_no_keep_awake, cond_can_block, cond_no_last_awake, cond_no_block_in_parallel_rq, cond_no_unblock_in_parallel_rq);
 			worker->state_keep_awake = 0;
 			_starpu_worker_set_status_scheduling_done(workerid);
 			STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
 		}
 		else
+#endif
 		{
 			//_STARPU_DEBUG("worker %u wont sleep: %d|%d|%d|%d|%d\n", worker->workerid, cond_no_keep_awake, cond_can_block, cond_no_last_awake, cond_no_block_in_parallel_rq, cond_no_unblock_in_parallel_rq);
 			_starpu_worker_set_status_scheduling_done(workerid);
@@ -608,15 +616,23 @@ int _starpu_get_multi_worker_task(struct _starpu_worker *workers, struct starpu_
 
 				cond_no_keep_awake = !worker->state_keep_awake;
 				cond_can_block = _starpu_worker_can_block(memnode, worker);
-				cond_no_last_awake = !_starpu_sched_ctx_last_worker_awake(worker);
 				cond_no_block_in_parallel_rq = !worker->state_block_in_parallel_req;
 				cond_no_unblock_in_parallel_rq = !worker->state_unblock_in_parallel_req;
-			}
-			while (cond_no_keep_awake
+				if (cond_no_keep_awake
 					&& cond_can_block
-					&& cond_no_last_awake
 					&& cond_no_block_in_parallel_rq
-					&& cond_no_unblock_in_parallel_rq);
+					&& cond_no_unblock_in_parallel_rq)
+				{
+					_starpu_worker_set_status_sleeping(workerid);
+					cond_no_last_awake = !_starpu_sched_ctx_last_worker_awake(worker);
+				}
+				else
+				{
+					cond_no_last_awake = 0;
+					break;
+				}
+			}
+			while (cond_no_last_awake);
 			//_STARPU_DEBUG("worker %u waking up: %d|%d|%d|%d|%d\n", worker->workerid, cond_no_keep_awake, cond_can_block, cond_no_last_awake, cond_no_block_in_parallel_rq, cond_no_unblock_in_parallel_rq);
 			worker->state_keep_awake = 0;
 			_starpu_worker_set_status_scheduling_done(workerid);

+ 7 - 0
tests/Makefile.am

@@ -42,6 +42,11 @@ EXTRA_DIST =					\
 	microbenchs/tasks_size_overhead_sched.sh	\
 	microbenchs/tasks_size_overhead_scheds.sh	\
 	microbenchs/tasks_size_overhead.gp	\
+	microbenchs/parallel_dependent_homogeneous_tasks_data.sh	\
+	microbenchs/parallel_independent_heterogeneous_tasks_data.sh	\
+	microbenchs/parallel_independent_heterogeneous_tasks.sh	\
+	microbenchs/parallel_independent_homogeneous_tasks_data.sh	\
+	microbenchs/parallel_independent_homogeneous_tasks.sh	\
 	datawizard/scratch_opencl_kernel.cl     \
 	datawizard/sync_and_notify_data_opencl_codelet.cl\
 	datawizard/opencl_codelet_unsigned_inc_kernel.cl \
@@ -62,6 +67,8 @@ EXTRA_DIST =					\
 	perfmodels/opencl_memset_kernel.cl \
 	$(MICROBENCHS:=.sh) \
 	microbenchs/microbench.sh \
+	model-checking/prio_list.sh \
+	model-checking/barrier.sh \
 	model-checking/starpu-mc.sh.in
 
 CLEANFILES = 					\

+ 13 - 0
tests/datawizard/variable_size.c

@@ -29,6 +29,8 @@
 #define VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(addr, size) (void)0
 #endif
 
+#include <core/simgrid.h>
+
 #define FULLSIZE (5*1024*1024ULL)
 #define INCREASE 0.80
 #ifdef STARPU_QUICK_CHECK
@@ -99,6 +101,8 @@ void variable_size_data_register(starpu_data_handle_t *handleptr, unsigned x, un
 	/* Round to page size */
 	interface.size -= interface.size & (65536-1);
 
+	_starpu_simgrid_data_new(interface.size);
+
 	starpu_data_register(handleptr, -1, &interface, &starpu_interface_variable_size_ops);
 }
 
@@ -143,6 +147,8 @@ static starpu_ssize_t allocate_variable_size_on_node(void *data_interface,
 {
 	struct variable_size_interface *variable_interface = data_interface;
 	variable_interface->ptr = starpu_malloc_on_node_flags(dst_node, variable_interface->size, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW);
+	if (dst_node == STARPU_MAIN_RAM)
+		_starpu_simgrid_data_alloc(variable_interface->size);
 	STARPU_ASSERT(variable_interface->ptr);
 	return 0;
 }
@@ -152,6 +158,8 @@ static void free_variable_size_on_node(void *data_interface,
 {
 	struct variable_size_interface *variable_interface = data_interface;
 	starpu_free_on_node(node, variable_interface->ptr, variable_interface->size);
+	if (node == STARPU_MAIN_RAM)
+		_starpu_simgrid_data_free(variable_interface->size);
 }
 
 static int variable_size_copy(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data)
@@ -192,6 +200,9 @@ static struct starpu_data_interface_ops starpu_interface_variable_size_ops =
 	.pack_data = NULL,
 	.unpack_data = NULL,
 	.describe = describe_variable_size,
+
+	/* We want to observe actual allocations/deallocations */
+	.dontcache = 1,
 };
 
 
@@ -213,6 +224,8 @@ static void kernel(void *descr[], void *cl_arg)
 	/* fprintf(stderr,"increase from %lu by %lu\n", variable_interface->size, increase); */
 	starpu_free_on_node_flags(dst_node, old, variable_interface->size, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW);
 	variable_interface->size += increase;
+	if (increase)
+		_starpu_simgrid_data_increase(increase);
 	starpu_sleep(0.010);
 }
 

+ 1 - 0
tools/starpu_lp2paje.c

@@ -20,6 +20,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <common/config.h>
 
 #define PROGNAME "starpu_lp2paje"