Просмотр исходного кода

Modification in the Native Fortran interface of the functions fstarpu_mpi_task_insert, fstarpu_mpi_task_build and fstarpu_mpi_task_post_build to only take 1 parameter being the MPI communicator, the codelet and the various parameters for the task.

- the functions use to have 2 parameters, a MPI communicator and a
  list of parameters starting with the codelet. They now have a single
  parameter, being the MPI communicator, the codelet, and the various
  parameters for the task

- the previous code fails with a SIGFPE when running with gcc-9&co

- could be related to the following change in GCC 9
  C descriptors and the ISO_Fortran_binding.h source file have been
  implemented.
Nathalie Furmento лет назад: 6
Родитель
Сommit
45b7e3e596

+ 6 - 0
ChangeLog

@@ -42,6 +42,12 @@ New features:
     fields of starpu_task to 0.
   * starpufft: Support 3D.
 
+Changes:
+  * Modification in the Native Fortran interface of the functions
+    fstarpu_mpi_task_insert, fstarpu_mpi_task_build and
+    fstarpu_mpi_task_post_build to only take 1 parameter being the MPI
+    communicator, the codelet and the various parameters for the task.
+
 Small features:
   * New starpu_task_insert() and alike parameter STARPU_TASK_WORKERIDS
     allowing to set the fields starpu_task::workerids_len and

+ 10 - 0
mpi/examples/Makefile.am

@@ -283,6 +283,7 @@ if BUILD_EXAMPLES
 if !STARPU_SANITIZE
 examplebin_PROGRAMS +=		\
 	native_fortran/nf_mm	\
+	native_fortran/nf_mm_task_build	\
 	native_fortran/nf_basic_ring
 
 native_fortran_nf_mm_SOURCES	=			\
@@ -294,6 +295,15 @@ native_fortran_nf_mm_SOURCES	=			\
 native_fortran_nf_mm_LDADD =					\
 	-lm
 
+native_fortran_nf_mm_task_build_SOURCES	=			\
+	native_fortran/nf_mm_cl.f90			\
+	$(top_srcdir)/mpi/include/fstarpu_mpi_mod.f90	\
+	$(top_srcdir)/include/fstarpu_mod.f90		\
+	native_fortran/nf_mm_task_build.f90
+
+native_fortran_nf_mm_task_build_LDADD =					\
+	-lm
+
 native_fortran_nf_basic_ring_SOURCES	=			\
 	$(top_srcdir)/mpi/include/fstarpu_mpi_mod.f90	\
 	$(top_srcdir)/include/fstarpu_mod.f90		\

+ 10 - 9
mpi/examples/native_fortran/nf_mm.f90

@@ -1,6 +1,6 @@
 ! StarPU --- Runtime system for heterogeneous multicore architectures.
 !
-! Copyright (C) 2017                                     CNRS
+! Copyright (C) 2017, 2019                               CNRS
 ! Copyright (C) 2016                                     Inria
 ! Copyright (C) 2016                                     Université de Bordeaux
 !
@@ -23,7 +23,8 @@ program nf_mm
         implicit none
 
         logical, parameter :: verbose = .false.
-        integer(c_int) :: comm_rank, comm_size, comm_world
+        integer(c_int) :: comm_size, comm_rank
+        integer(c_int), target :: comm_world
         integer(c_int) :: N = 16, BS = 4, NB
         real(kind=c_double),allocatable,target :: A(:,:), B(:,:), C(:,:)
         type(c_ptr),allocatable :: dh_A(:), dh_B(:), dh_C(:,:)
@@ -166,13 +167,13 @@ program nf_mm
         end do
 
         do b_col=1,NB
-        do b_row=1,NB
-                ret = fstarpu_mpi_task_insert(comm_world, (/ cl_mm, &
-                        FSTARPU_R,  dh_A(b_row), &
-                        FSTARPU_R,  dh_B(b_col), &
-                        FSTARPU_RW, dh_C(b_row,b_col), &
-                        C_NULL_PTR /))
-        end do
+           do b_row=1,NB
+              call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_mm, &
+                   FSTARPU_R,  dh_A(b_row), &
+                   FSTARPU_R,  dh_B(b_col), &
+                   FSTARPU_RW, dh_C(b_row,b_col), &
+                   C_NULL_PTR /))
+           end do
         end do
 
         call fstarpu_task_wait_for_all()

+ 248 - 0
mpi/examples/native_fortran/nf_mm_task_build.f90

@@ -0,0 +1,248 @@
+! StarPU --- Runtime system for heterogeneous multicore architectures.
+!
+! Copyright (C) 2017, 2019                               CNRS
+! Copyright (C) 2016                                     Inria
+! Copyright (C) 2016                                     Université de Bordeaux
+!
+! StarPU is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at
+! your option) any later version.
+!
+! StarPU is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+!
+! See the GNU Lesser General Public License in COPYING.LGPL for more details.
+!
+program nf_mm
+        use iso_c_binding       ! C interfacing module
+        use fstarpu_mod         ! StarPU interfacing module
+        use fstarpu_mpi_mod     ! StarPU-MPI interfacing module
+        use nf_mm_cl
+        implicit none
+
+        logical, parameter :: verbose = .false.
+        integer(c_int) :: comm_size, comm_rank
+        integer(c_int), target :: comm_world
+        integer(c_int) :: N = 16, BS = 4, NB
+        real(kind=c_double),allocatable,target :: A(:,:), B(:,:), C(:,:)
+        type(c_ptr),allocatable :: dh_A(:), dh_B(:), dh_C(:,:)
+        type(c_ptr) :: cl_mm
+        type(c_ptr) :: task
+        integer(c_int) :: ncpu
+        integer(c_int) :: ret
+        integer(c_int) :: row, col
+        integer(c_int) :: b_row, b_col
+        integer(c_int) :: mr, tag, rank
+
+        ret = fstarpu_init(C_NULL_PTR)
+        if (ret == -19) then
+                stop 77
+        else if (ret /= 0) then
+                stop 1
+        end if
+
+        ret = fstarpu_mpi_init(1)
+        print *,"fstarpu_mpi_init status:", ret
+        if (ret /= 0) then
+                stop 1
+        end if
+
+        ! stop there if no CPU worker available
+        ncpu = fstarpu_cpu_worker_get_count()
+        if (ncpu == 0) then
+                call fstarpu_shutdown()
+                stop 77
+        end if
+
+        comm_world = fstarpu_mpi_world_comm()
+        comm_size = fstarpu_mpi_world_size()
+        comm_rank = fstarpu_mpi_world_rank()
+
+        if (comm_size < 2) then
+                call fstarpu_shutdown()
+                ret = fstarpu_mpi_shutdown()
+                stop 77
+        end if
+
+        ! TODO: process app's argc/argv
+        NB = N/BS
+
+        ! allocate and initialize codelet
+        cl_mm = fstarpu_codelet_allocate()
+        call fstarpu_codelet_set_name(cl_mm, c_char_"nf_mm_cl"//c_null_char)
+        call fstarpu_codelet_add_cpu_func(cl_mm, C_FUNLOC(cl_cpu_mult))
+        call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R)
+        call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R)
+        call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_RW)
+
+        ! allocate matrices
+        if (comm_rank == 0) then
+                allocate(A(N,N))
+                allocate(B(N,N))
+                allocate(C(N,N))
+        end if
+
+        ! init matrices
+        if (comm_rank == 0) then
+                do col=1,N
+                do row=1,N
+                if (row == col) then
+                        A(row,col) = 2
+                else
+                        A(row,col) = 0
+                end if
+                B(row,col) = row*N+col
+                C(row,col) = 0
+                end do
+                end do
+
+                if (verbose) then
+                        print *,"A"
+                        call mat_disp(A)
+                        print *,"B"
+                        call mat_disp(B)
+                        print *,"C"
+                        call mat_disp(C)
+                end if
+        end if
+
+        ! allocate data handles
+        allocate(dh_A(NB))
+        allocate(dh_B(NB))
+        allocate(dh_C(NB,NB))
+
+        ! register matrices
+        if (comm_rank == 0) then
+                mr = 0 ! TODO: use STARPU_MAIN_RAM constant
+        else
+                mr = -1
+        end if
+        tag = 0
+
+        do b_row=1,NB
+                if (comm_rank == 0) then
+                        call fstarpu_matrix_data_register(dh_A(b_row), mr, &
+                                c_loc( A(1+(b_row-1)*BS,1) ), N, BS, N, c_sizeof(A(1,1)))
+                else
+                        call fstarpu_matrix_data_register(dh_A(b_row), mr, &
+                                c_null_ptr, N, BS, N, c_sizeof(A(1,1)))
+                end if
+                call fstarpu_mpi_data_register(dh_A(b_row), tag, 0)
+                tag = tag+1
+        end do
+
+        do b_col=1,NB
+                if (comm_rank == 0) then
+                        call fstarpu_matrix_data_register(dh_B(b_col), mr, &
+                                c_loc( B(1,1+(b_col-1)*BS) ), N, N, BS, c_sizeof(B(1,1)))
+                else
+                        call fstarpu_matrix_data_register(dh_B(b_col), mr, &
+                                c_null_ptr, N, N, BS, c_sizeof(B(1,1)))
+                end if
+                call fstarpu_mpi_data_register(dh_B(b_col), tag, 0)
+                tag = tag+1
+        end do
+
+        do b_col=1,NB
+        do b_row=1,NB
+                if (comm_rank == 0) then
+                        call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, &
+                                c_loc( C(1+(b_row-1)*BS,1+(b_col-1)*BS) ), N, BS, BS, c_sizeof(C(1,1)))
+                else
+                        call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, &
+                                c_null_ptr, N, BS, BS, c_sizeof(C(1,1)))
+                end if
+                call fstarpu_mpi_data_register(dh_C(b_row,b_col), tag, 0)
+                tag = tag+1
+        end do
+        end do
+
+        ! distribute matrix C
+        do b_col=1,NB
+        do b_row=1,NB
+        rank = modulo(b_row+b_col, comm_size)
+        call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), rank)
+        end do
+        end do
+
+        do b_col=1,NB
+           do b_row=1,NB
+              task = fstarpu_mpi_task_build((/ c_loc(comm_world), cl_mm, &
+                   				FSTARPU_R,  dh_A(b_row), &
+                                                FSTARPU_R,  dh_B(b_col), &
+                                                FSTARPU_RW, dh_C(b_row,b_col), &
+                                                C_NULL_PTR /))
+              if (c_associated(task)) then
+                 ret = fstarpu_task_submit(task)
+              endif
+              call fstarpu_mpi_task_post_build((/ c_loc(comm_world), cl_mm, &
+                   				FSTARPU_R,  dh_A(b_row), &
+                                                FSTARPU_R,  dh_B(b_col), &
+                                                FSTARPU_RW, dh_C(b_row,b_col), &
+                                                C_NULL_PTR /))
+           end do
+        end do
+
+        call fstarpu_task_wait_for_all()
+
+        ! undistribute matrix C
+        do b_col=1,NB
+        do b_row=1,NB
+        call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), 0)
+        end do
+        end do
+
+        ! unregister matrices
+        do b_row=1,NB
+                call fstarpu_data_unregister(dh_A(b_row))
+        end do
+
+        do b_col=1,NB
+                call fstarpu_data_unregister(dh_B(b_col))
+        end do
+
+        do b_col=1,NB
+        do b_row=1,NB
+                call fstarpu_data_unregister(dh_C(b_row,b_col))
+        end do
+        end do
+
+        ! check result
+        if (comm_rank == 0) then
+                if (verbose) then
+                        print *,"final C"
+                        call mat_disp(C)
+                end if
+
+                do col=1,N
+                do row=1,N
+                if (abs(C(row,col) - 2*(row*N+col)) > 1.0) then
+                        print *, "check failed"
+                        stop 1
+                end if
+                end do
+                end do
+        end if
+
+        ! free handles
+        deallocate(dh_A)
+        deallocate(dh_B)
+        deallocate(dh_C)
+
+        ! free matrices
+        if (comm_rank == 0) then
+                deallocate(A)
+                deallocate(B)
+                deallocate(C)
+        end if
+        call fstarpu_codelet_free(cl_mm)
+        call fstarpu_shutdown()
+
+        ret = fstarpu_mpi_shutdown()
+        print *,"fstarpu_mpi_shutdown status:", ret
+        if (ret /= 0) then
+                stop 1
+        end if
+end program nf_mm

+ 16 - 23
mpi/include/fstarpu_mpi_mod.f90

@@ -1,6 +1,6 @@
 ! StarPU --- Runtime system for heterogeneous multicore architectures.
 !
-! Copyright (C) 2017                                     CNRS
+! Copyright (C) 2017,2019                                CNRS
 ! Copyright (C) 2016                                     Inria
 ! Copyright (C) 2016,2017                                Université de Bordeaux
 !
@@ -247,34 +247,27 @@ module fstarpu_mpi_mod
                 end function fstarpu_mpi_shutdown
 
                 ! struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-                function fstarpu_mpi_task_build(mpi_comm,arglist) bind(C)
-                        use iso_c_binding, only: c_ptr,c_int
+                function fstarpu_mpi_task_build(arglist) bind(C)
+                        use iso_c_binding, only: c_ptr
                         type(c_ptr) :: fstarpu_mpi_task_build
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), dimension(:), intent(in) :: arglist
+                        type(c_ptr), dimension(*), intent(in) :: arglist
                 end function fstarpu_mpi_task_build
 
                 ! int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-                function fstarpu_mpi_task_post_build(mpi_comm,arglist) bind(C)
-                        use iso_c_binding, only: c_ptr,c_int
-                        integer(c_int) :: fstarpu_mpi_task_post_build
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), dimension(:), intent(in) :: arglist
-                end function fstarpu_mpi_task_post_build
+                subroutine fstarpu_mpi_task_post_build(arglist) bind(C)
+                        use iso_c_binding, only: c_ptr
+                        type(c_ptr), dimension(*), intent(in) :: arglist
+                end subroutine fstarpu_mpi_task_post_build
 
                 ! int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...);
-                function fstarpu_mpi_task_insert(mpi_comm,arglist) bind(C)
-                        use iso_c_binding, only: c_ptr,c_int
-                        integer(c_int) :: fstarpu_mpi_task_insert
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), dimension(:), intent(in) :: arglist
-                end function fstarpu_mpi_task_insert
-                function fstarpu_mpi_insert_task(mpi_comm,arglist) bind(C,name="fstarpu_mpi_task_insert")
-                        use iso_c_binding, only: c_ptr,c_int
-                        integer(c_int) :: fstarpu_mpi_insert_task
-                        integer(c_int), value, intent(in) :: mpi_comm
-                        type(c_ptr), dimension(:), intent(in) :: arglist
-                end function fstarpu_mpi_insert_task
+                subroutine fstarpu_mpi_task_insert(arglist) bind(C)
+                        use iso_c_binding, only: c_ptr
+                        type(c_ptr), dimension(*), intent(in) :: arglist
+                end subroutine fstarpu_mpi_task_insert
+                subroutine fstarpu_mpi_insert_task(arglist) bind(C,name="fstarpu_mpi_task_insert")
+                        use iso_c_binding, only: c_ptr
+                        type(c_ptr), dimension(*), intent(in) :: arglist
+                end subroutine fstarpu_mpi_insert_task
 
                 ! void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node);
                 subroutine fstarpu_mpi_get_data_on_node(mpi_comm,dh,node) bind(C)

+ 20 - 24
mpi/src/starpu_mpi_task_insert_fortran.c

@@ -488,26 +488,26 @@ int _fstarpu_mpi_task_insert_v(MPI_Comm comm, struct starpu_codelet *codelet, vo
 	return _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data, prio);
 }
 
-int fstarpu_mpi_task_insert(MPI_Fint comm, void ***_arglist)
+void fstarpu_mpi_task_insert(void **arglist)
 {
-	void **arglist = *_arglist;
-	struct starpu_codelet *codelet = arglist[0];
+	MPI_Fint comm = *((MPI_Fint *)arglist[0]);
+	struct starpu_codelet *codelet = arglist[1];
 	if (codelet == NULL)
 	{
 		STARPU_ABORT_MSG("task without codelet");
 	}
-	int ret;
 
-	ret = _fstarpu_mpi_task_insert_v(MPI_Comm_f2c(comm), codelet, arglist+1);
-	return ret;
+	int ret;
+	ret = _fstarpu_mpi_task_insert_v(MPI_Comm_f2c(comm), codelet, arglist+2);
+	STARPU_ASSERT(ret >= 0);
 }
 
 /* fstarpu_mpi_insert_task: aliased to fstarpu_mpi_task_insert in fstarpu_mpi_mod.f90 */
 
-struct starpu_task *fstarpu_mpi_task_build(MPI_Fint comm, void ***_arglist)
+struct starpu_task *fstarpu_mpi_task_build(void **arglist)
 {
-	void **arglist = *_arglist;
-	struct starpu_codelet *codelet = arglist[0];
+	MPI_Fint comm = *((MPI_Fint *)arglist[0]);
+	struct starpu_codelet *codelet = arglist[1];
 	if (codelet == NULL)
 	{
 		STARPU_ABORT_MSG("task without codelet");
@@ -515,38 +515,34 @@ struct starpu_task *fstarpu_mpi_task_build(MPI_Fint comm, void ***_arglist)
 	struct starpu_task *task;
 	int ret;
 
-	ret = _fstarpu_mpi_task_build_v(MPI_Comm_f2c(comm), codelet, &task, NULL, NULL, NULL, NULL, arglist+1);
+	ret = _fstarpu_mpi_task_build_v(MPI_Comm_f2c(comm), codelet, &task, NULL, NULL, NULL, NULL, arglist+2);
 	STARPU_ASSERT(ret >= 0);
-	return (ret > 0) ? NULL : task;
+	return task;
 }
 
-int fstarpu_mpi_task_post_build(MPI_Fint _comm, void ***_arglist)
+void fstarpu_mpi_task_post_build(void **arglist)
 {
-	void **arglist = *_arglist;
-	struct starpu_codelet *codelet = arglist[0];
+	MPI_Fint comm = *((MPI_Fint *)arglist[0]);
+	struct starpu_codelet *codelet = arglist[1];
 	if (codelet == NULL)
 	{
 		STARPU_ABORT_MSG("task without codelet");
 	}
-	MPI_Comm comm = MPI_Comm_f2c(_comm);
 	int xrank, do_execute;
 	int ret, me, nb_nodes;
 	struct starpu_data_descr *descrs;
 	int nb_data;
 	int prio;
 
-	starpu_mpi_comm_rank(comm, &me);
-	starpu_mpi_comm_size(comm, &nb_nodes);
+	starpu_mpi_comm_rank(MPI_Comm_f2c(comm), &me);
+	starpu_mpi_comm_size(MPI_Comm_f2c(comm), &nb_nodes);
 
 	/* Find out whether we are to execute the data because we own the data to be written to. */
-	ret = _fstarpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, arglist);
-	if (ret < 0)
-		return ret;
+	ret = _fstarpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, arglist+2);
+	STARPU_ASSERT(ret >= 0);
 
-	return _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data, prio);
+	ret = _starpu_mpi_task_postbuild_v(MPI_Comm_f2c(comm), xrank, do_execute, descrs, nb_data, prio);
+	STARPU_ASSERT(ret >= 0);
 }
 
 #endif /* HAVE_MPI_COMM_F2C */
-
-
-