15 anni fa · 066a3c3236
--- a/mpi/Makefile.am
+++ b/mpi/Makefile.am
@@ -55,7 +55,8 @@ check_PROGRAMS +=					\
 
				 	tests/mpi_irecv					\
			
 
				 	tests/ring					\
			
 
				 	tests/ring_async				\
			
 
				-	tests/block_interface
			
 
				+	tests/block_interface				\
			
 
				+	tests/block_interface_pinned
			
 
				 
			
 
				 tests_mpi_isend_LDADD =					\
			
 
				 	libstarpumpi.la
			
@@ -81,10 +82,10 @@ tests_ring_LDADD =					\
 
				 tests_ring_SOURCES =					\
			
 
				 	tests/ring.c
			
 
				 
			
 
				-tests_ring_async_LDADD =					\
			
 
				+tests_ring_async_LDADD =				\
			
 
				 	libstarpumpi.la
			
 
				 
			
 
				-tests_ring_async_SOURCES =					\
			
 
				+tests_ring_async_SOURCES =				\
			
 
				 	tests/ring_async.c
			
 
				 
			
 
				 tests_block_interface_LDADD =				\
			
@@ -93,6 +94,12 @@ tests_block_interface_LDADD =				\
 
				 tests_block_interface_SOURCES =				\
			
 
				 	tests/block_interface.c
			
 
				 
			
 
				+tests_block_interface_pinned_LDADD =			\
			
 
				+	libstarpumpi.la
			
 
				+
			
 
				+tests_block_interface_pinned_SOURCES =			\
			
 
				+	tests/block_interface_pinned.c
			
 
				+
			
 
				 if USE_CUDA
			
 
				 tests_ring_SOURCES += tests/ring_kernel.cu
			
 
				 tests_ring_async_SOURCES += tests/ring_kernel.cu
			
--- a/mpi/starpu_mpi.c
+++ b/mpi/starpu_mpi.c
@@ -30,11 +30,6 @@ static int running = 0;
 
				 
			
 
				 static void _handle_new_mpi_isend(struct starpu_mpi_req_s *req)
			
 
				 {
			
 
				-	//int rank;
			
 
				-	//MPI_Comm_rank(MPI_COMM_WORLD, &rank);
			
 
				-	//fprintf(stdout, "Rank %d _handle_new_mpi_isend\n", rank);
			
 
				-	//fflush(stdout);
			
 
				-
			
 
				 	void *ptr = starpu_mpi_handle_to_ptr(req->data_handle);
			
 
				 	starpu_mpi_handle_to_datatype(req->data_handle, &req->datatype);
			
 
				 
			
@@ -131,18 +126,37 @@ int starpu_mpi_send(starpu_data_handle data_handle,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void _handle_new_mpi_wait(struct starpu_mpi_req_s *req)
			
 
				+{
			
 
				+	req->ret = MPI_Wait(&req->request, req->status);
			
 
				+	handle_request_termination(req);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				 int starpu_mpi_wait(struct starpu_mpi_req_s *req, MPI_Status *status)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				 	pthread_mutex_lock(&req->req_mutex);
			
 
				 
			
 
				+	req->status = status;
			
 
				+
			
 
				+	/* we don't submit a wait request until the previous mpi request was
			
 
				+	 * actually submitted */
			
 
				 	while (!req->submitted)
			
 
				 		pthread_cond_wait(&req->req_cond, &req->req_mutex);
			
 
				 
			
 
				-	ret = MPI_Wait(&req->request, status);
			
 
				+	req->submitted = 0;
			
 
				+	req->handle_new = _handle_new_mpi_wait;
			
 
				+	req->status = status;
			
 
				 
			
 
				-	handle_request_termination(req);
			
 
				+	submit_mpi_req(req);
			
 
				+
			
 
				+	while (!req->submitted)
			
 
				+		pthread_cond_wait(&req->req_cond, &req->req_mutex);
			
 
				+
			
 
				+	ret = req->ret;
			
 
				 
			
 
				 	pthread_mutex_unlock(&req->req_mutex);
			
 
				 
			
--- a/mpi/starpu_mpi.h
+++ b/mpi/starpu_mpi.h
@@ -28,12 +28,14 @@ LIST_TYPE(starpu_mpi_req,
 
				 	starpu_access_mode mode;
			
 
				 	MPI_Datatype datatype;
			
 
				 	MPI_Request request;
			
 
				+	MPI_Status *status;
			
 
				 	void (*handle_new)(struct starpu_mpi_req_s *);
			
 
				 	void (*handle_pending)(struct starpu_mpi_req_s *);
			
 
				 	unsigned submitted;
			
 
				 	int dst;
			
 
				 	int src;
			
 
				 	int mpi_tag;
			
 
				+	int ret;
			
 
				 	MPI_Comm comm;
			
 
				 	pthread_mutex_t req_mutex;
			
 
				 	pthread_cond_t req_cond;
			
--- a/mpi/tests/block_interface_pinned.c
+++ b/mpi/tests/block_interface_pinned.c
@@ -0,0 +1,139 @@
 
				+/*
			
 
				+ * StarPU
			
 
				+ * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu_mpi.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#define NITER	2048
			
 
				+
			
 
				+#define BIGSIZE	64
			
 
				+#define SIZE	64
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	MPI_Init(NULL, NULL);
			
 
				+
			
 
				+	int rank, size;
			
 
				+
			
 
				+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
			
 
				+	MPI_Comm_size(MPI_COMM_WORLD, &size);
			
 
				+
			
 
				+	if (size < 2)
			
 
				+	{
			
 
				+		if (rank == 0)
			
 
				+			fprintf(stderr, "We need at least processes.\n");
			
 
				+
			
 
				+		MPI_Finalize();
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* We only use 2 nodes for that test */
			
 
				+	if (rank >= 2)
			
 
				+	{
			
 
				+		MPI_Finalize();
			
 
				+		return 0;
			
 
				+	}
			
 
				+		
			
 
				+	starpu_init(NULL);
			
 
				+	starpu_mpi_initialize();
			
 
				+
			
 
				+	/* Node 0 will allocate a big block and only register an inner part of
			
 
				+	 * it as the block data, Node 1 will allocate a block of small size and
			
 
				+	 * register it directly. Node 0 and 1 will then exchange the content of
			
 
				+	 * their blocks. */
			
 
				+
			
 
				+	float *block;
			
 
				+	starpu_data_handle block_handle;
			
 
				+
			
 
				+	if (rank == 0)
			
 
				+	{
			
 
				+		starpu_malloc_pinned_if_possible((void **)&block,
			
 
				+				BIGSIZE*BIGSIZE*BIGSIZE*sizeof(float));
			
 
				+		memset(block, 0, BIGSIZE*BIGSIZE*BIGSIZE*sizeof(float));
			
 
				+
			
 
				+		/* fill the inner block */
			
 
				+		unsigned i, j, k;
			
 
				+		for (k = 0; k < SIZE; k++)
			
 
				+		for (j = 0; j < SIZE; j++)
			
 
				+		for (i = 0; i < SIZE; i++)
			
 
				+		{
			
 
				+			block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f;
			
 
				+		}
			
 
				+
			
 
				+		starpu_register_block_data(&block_handle, 0,
			
 
				+			(uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE,
			
 
				+			SIZE, SIZE, SIZE, sizeof(float));
			
 
				+	}
			
 
				+	else /* rank == 1 */
			
 
				+	{
			
 
				+		starpu_malloc_pinned_if_possible((void **)&block,
			
 
				+			SIZE*SIZE*SIZE*sizeof(float));
			
 
				+		memset(block, 0, SIZE*SIZE*SIZE*sizeof(float));
			
 
				+
			
 
				+		starpu_register_block_data(&block_handle, 0,
			
 
				+			(uintptr_t)block, SIZE, SIZE*SIZE,
			
 
				+			SIZE, SIZE, SIZE, sizeof(float));
			
 
				+	}
			
 
				+
			
 
				+	if (rank == 0)
			
 
				+	{
			
 
				+		starpu_mpi_send(block_handle, 1, 0x42, MPI_COMM_WORLD);
			
 
				+
			
 
				+		MPI_Status status;
			
 
				+		starpu_mpi_recv(block_handle, 1, 0x1337, MPI_COMM_WORLD, &status);
			
 
				+
			
 
				+		/* check the content of the block */
			
 
				+		starpu_sync_data_with_mem(block_handle, STARPU_R);
			
 
				+		unsigned i, j, k;
			
 
				+		for (k = 0; k < SIZE; k++)
			
 
				+		for (j = 0; j < SIZE; j++)
			
 
				+		for (i = 0; i < SIZE; i++)
			
 
				+		{
			
 
				+			assert(block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] == 33.0f);
			
 
				+		}
			
 
				+		starpu_release_data_from_mem(block_handle);
			
 
				+		
			
 
				+	}
			
 
				+	else /* rank == 1 */
			
 
				+	{
			
 
				+		MPI_Status status;
			
 
				+		starpu_mpi_recv(block_handle, 0, 0x42, MPI_COMM_WORLD, &status);
			
 
				+
			
 
				+		/* check the content of the block and modify it */
			
 
				+		starpu_sync_data_with_mem(block_handle, STARPU_RW);
			
 
				+		unsigned i, j, k;
			
 
				+		for (k = 0; k < SIZE; k++)
			
 
				+		for (j = 0; j < SIZE; j++)
			
 
				+		for (i = 0; i < SIZE; i++)
			
 
				+		{
			
 
				+			assert(block[i + j*SIZE + k*SIZE*SIZE] == 1.0f);
			
 
				+			block[i + j*SIZE + k*SIZE*SIZE] = 33.0f;
			
 
				+		}
			
 
				+		starpu_release_data_from_mem(block_handle);
			
 
				+
			
 
				+		starpu_mpi_send(block_handle, 0, 0x1337, MPI_COMM_WORLD);
			
 
				+	}
			
 
				+
			
 
				+	fprintf(stdout, "Rank %d is done\n", rank);
			
 
				+	fflush(stdout);
			
 
				+
			
 
				+	starpu_mpi_shutdown();
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	MPI_Finalize();
			
 
				+
			
 
				+	return 0;
			
 
				+}