|
@@ -130,13 +130,12 @@ void _starpu_mpi_cache_data_init(starpu_data_handle_t data_handle)
|
|
|
return;
|
|
|
|
|
|
STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
|
|
|
- mpi_data->cache_received.in_cache = 0;
|
|
|
- mpi_data->cache_received.ft_induced_cache = 0;
|
|
|
+ mpi_data->cache_received = 0;
|
|
|
+ mpi_data->ft_induced_cache_received = 0;
|
|
|
_STARPU_MALLOC(mpi_data->cache_sent, _starpu_cache_comm_size*sizeof(mpi_data->cache_sent[0]));
|
|
|
for(i=0 ; i<_starpu_cache_comm_size ; i++)
|
|
|
{
|
|
|
- mpi_data->cache_sent[i].in_cache = 0;
|
|
|
- mpi_data->cache_sent[i].ft_induced_cache = 0;
|
|
|
+ mpi_data->cache_sent[i] = 0;
|
|
|
}
|
|
|
STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
|
|
|
}
|
|
@@ -187,14 +186,14 @@ void starpu_mpi_cached_receive_clear(starpu_data_handle_t data_handle)
|
|
|
STARPU_ASSERT(mpi_data->magic == 42);
|
|
|
STARPU_MPI_ASSERT_MSG(mpi_rank < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", mpi_rank, _starpu_cache_comm_size);
|
|
|
|
|
|
- if (mpi_data->cache_received.in_cache == 1)
|
|
|
+ if (mpi_data->cache_received == 1)
|
|
|
{
|
|
|
#ifdef STARPU_DEVEL
|
|
|
# warning TODO: Somebody else will write to the data, so discard our cached copy if any. starpu_mpi could just remember itself.
|
|
|
#endif
|
|
|
_STARPU_MPI_DEBUG(2, "Clearing receive cache for data %p\n", data_handle);
|
|
|
- mpi_data->cache_received.in_cache = 0;
|
|
|
- mpi_data->cache_received.ft_induced_cache = 0;
|
|
|
+ mpi_data->cache_received = 0;
|
|
|
+ mpi_data->ft_induced_cache_received = 0;
|
|
|
starpu_data_invalidate_submit(data_handle);
|
|
|
_starpu_mpi_cache_data_remove_nolock(data_handle);
|
|
|
_starpu_mpi_cache_stats_dec(mpi_rank, data_handle);
|
|
@@ -202,6 +201,12 @@ void starpu_mpi_cached_receive_clear(starpu_data_handle_t data_handle)
|
|
|
STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
|
|
|
}
|
|
|
|
|
|
+int starpu_mpi_cache_set_ft_induced_cache_receive(starpu_data_handle_t data_handle)
|
|
|
+{
|
|
|
+ struct _starpu_mpi_data *mpi_data = data_handle->mpi_data;
|
|
|
+ mpi_data->ft_induced_cache_received = 1;
|
|
|
+}
|
|
|
+
|
|
|
int starpu_mpi_cached_receive_set(starpu_data_handle_t data_handle)
|
|
|
{
|
|
|
int mpi_rank = starpu_mpi_data_get_rank(data_handle);
|
|
@@ -214,22 +219,24 @@ int starpu_mpi_cached_receive_set(starpu_data_handle_t data_handle)
|
|
|
STARPU_ASSERT(mpi_data->magic == 42);
|
|
|
STARPU_MPI_ASSERT_MSG(mpi_rank < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", mpi_rank, _starpu_cache_comm_size);
|
|
|
|
|
|
- int already_received = mpi_data->cache_received.in_cache;
|
|
|
+ int already_received = mpi_data->cache_received;
|
|
|
if (already_received == 0)
|
|
|
{
|
|
|
_STARPU_MPI_DEBUG(2, "Noting that data %p has already been received by %d\n", data_handle, mpi_rank);
|
|
|
- mpi_data->cache_received.in_cache = 1;
|
|
|
+ mpi_data->cache_received = 1;
|
|
|
_starpu_mpi_cache_data_add_nolock(data_handle);
|
|
|
_starpu_mpi_cache_stats_inc(mpi_rank, data_handle);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
- if (mpi_data->cache_received.ft_induced_cache == 1)
|
|
|
- {
|
|
|
- _STARPU_MPI_FT_STATS_RECV_CACHED_CP_DATA(starpu_data_get_size(data_handle));
|
|
|
- _STARPU_MPI_FT_STATS_CANCEL_RECV_CP_DATA(starpu_data_get_size(data_handle));
|
|
|
- mpi_data->cache_received.ft_induced_cache = 0;
|
|
|
- }
|
|
|
+ #ifdef STARPU_USE_MPI_FT_STATS
|
|
|
+ if (mpi_data->ft_induced_cache_received == 1)
|
|
|
+ {
|
|
|
+ _STARPU_MPI_FT_STATS_RECV_CACHED_CP_DATA(starpu_data_get_size(data_handle));
|
|
|
+ _STARPU_MPI_FT_STATS_CANCEL_RECV_CP_DATA(starpu_data_get_size(data_handle));
|
|
|
+ mpi_data->ft_induced_cache_received = 0;
|
|
|
+ }
|
|
|
+ #endif //STARPU_USE_MPI_FT_STATS
|
|
|
_STARPU_MPI_DEBUG(2, "Do not receive data %p from node %d as it is already available\n", data_handle, mpi_rank);
|
|
|
}
|
|
|
STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
|
|
@@ -246,7 +253,7 @@ int starpu_mpi_cached_receive(starpu_data_handle_t data_handle)
|
|
|
|
|
|
STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
|
|
|
STARPU_ASSERT(mpi_data->magic == 42);
|
|
|
- already_received = mpi_data->cache_received.in_cache;
|
|
|
+ already_received = mpi_data->cache_received;
|
|
|
STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
|
|
|
return already_received;
|
|
|
}
|
|
@@ -266,11 +273,10 @@ void starpu_mpi_cached_send_clear(starpu_data_handle_t data_handle)
|
|
|
starpu_mpi_comm_size(mpi_data->node_tag.node.comm, &size);
|
|
|
for(n=0 ; n<size ; n++)
|
|
|
{
|
|
|
- if (mpi_data->cache_sent[n].in_cache == 1)
|
|
|
+ if (mpi_data->cache_sent[n] == 1)
|
|
|
{
|
|
|
_STARPU_MPI_DEBUG(2, "Clearing send cache for data %p\n", data_handle);
|
|
|
- mpi_data->cache_sent[n].in_cache = 0;
|
|
|
- mpi_data->cache_sent[n].ft_induced_cache = 0;
|
|
|
+ mpi_data->cache_sent[n] = 0;
|
|
|
_starpu_mpi_cache_data_remove_nolock(data_handle);
|
|
|
}
|
|
|
}
|
|
@@ -287,21 +293,15 @@ int starpu_mpi_cached_send_set(starpu_data_handle_t data_handle, int dest)
|
|
|
STARPU_MPI_ASSERT_MSG(dest < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", dest, _starpu_cache_comm_size);
|
|
|
|
|
|
STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
|
|
|
- int already_sent = mpi_data->cache_sent[dest].in_cache;
|
|
|
- if (mpi_data->cache_sent[dest].in_cache == 0)
|
|
|
+ int already_sent = mpi_data->cache_sent[dest];
|
|
|
+ if (mpi_data->cache_sent[dest] == 0)
|
|
|
{
|
|
|
- mpi_data->cache_sent[dest].in_cache = 1;
|
|
|
+ mpi_data->cache_sent[dest] = 1;
|
|
|
_starpu_mpi_cache_data_add_nolock(data_handle);
|
|
|
_STARPU_MPI_DEBUG(2, "Noting that data %p has already been sent to %d\n", data_handle, dest);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
- if (mpi_data->cache_sent[dest].ft_induced_cache == 1)
|
|
|
- {
|
|
|
- _STARPU_MPI_FT_STATS_SEND_CACHED_CP_DATA(starpu_data_get_size(data_handle));
|
|
|
- _STARPU_MPI_FT_STATS_CANCEL_SEND_CP_DATA(starpu_data_get_size(data_handle));
|
|
|
- mpi_data->cache_sent[dest].ft_induced_cache = 0;
|
|
|
- }
|
|
|
_STARPU_MPI_DEBUG(2, "Do not send data %p to node %d as it has already been sent\n", data_handle, dest);
|
|
|
}
|
|
|
STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
|
|
@@ -318,7 +318,7 @@ int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest)
|
|
|
|
|
|
STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex);
|
|
|
STARPU_MPI_ASSERT_MSG(dest < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", dest, _starpu_cache_comm_size);
|
|
|
- already_sent = mpi_data->cache_sent[dest].in_cache;
|
|
|
+ already_sent = mpi_data->cache_sent[dest];
|
|
|
STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
|
|
|
return already_sent;
|
|
|
}
|
|
@@ -334,21 +334,20 @@ static void _starpu_mpi_cache_flush_nolock(starpu_data_handle_t data_handle)
|
|
|
starpu_mpi_comm_size(mpi_data->node_tag.node.comm, &nb_nodes);
|
|
|
for(i=0 ; i<nb_nodes ; i++)
|
|
|
{
|
|
|
- if (mpi_data->cache_sent[i].in_cache == 1)
|
|
|
+ if (mpi_data->cache_sent[i] == 1)
|
|
|
{
|
|
|
_STARPU_MPI_DEBUG(2, "Clearing send cache for data %p\n", data_handle);
|
|
|
- mpi_data->cache_sent[i].in_cache = 0;
|
|
|
- mpi_data->cache_sent[i].ft_induced_cache = 0;
|
|
|
+ mpi_data->cache_sent[i] = 0;
|
|
|
_starpu_mpi_cache_stats_dec(i, data_handle);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- if (mpi_data->cache_received.in_cache == 1)
|
|
|
+ if (mpi_data->cache_received == 1)
|
|
|
{
|
|
|
int mpi_rank = starpu_mpi_data_get_rank(data_handle);
|
|
|
_STARPU_MPI_DEBUG(2, "Clearing received cache for data %p\n", data_handle);
|
|
|
- mpi_data->cache_received.in_cache = 0;
|
|
|
- mpi_data->cache_received.ft_induced_cache = 0;
|
|
|
+ mpi_data->cache_received = 0;
|
|
|
+ mpi_data->ft_induced_cache_received = 0;
|
|
|
_starpu_mpi_cache_stats_dec(mpi_rank, data_handle);
|
|
|
}
|
|
|
}
|