| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239 | 
							- ! StarPU --- Runtime system for heterogeneous multicore architectures.
 
- !
 
- ! Copyright (C) 2016-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 
- !
 
- ! StarPU is free software; you can redistribute it and/or modify
 
- ! it under the terms of the GNU Lesser General Public License as published by
 
- ! the Free Software Foundation; either version 2.1 of the License, or (at
 
- ! your option) any later version.
 
- !
 
- ! StarPU is distributed in the hope that it will be useful, but
 
- ! WITHOUT ANY WARRANTY; without even the implied warranty of
 
- ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
- !
 
- ! See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
- !
 
- program nf_mm
 
-         use iso_c_binding       ! C interfacing module
 
-         use fstarpu_mod         ! StarPU interfacing module
 
-         use fstarpu_mpi_mod     ! StarPU-MPI interfacing module
 
-         use nf_mm_cl
 
-         implicit none
 
-         logical, parameter :: verbose = .false.
 
-         integer(c_int) :: comm_size, comm_rank
 
-         integer(c_int), target :: comm_world
 
-         integer(c_int) :: N = 16, BS = 4, NB
 
-         real(kind=c_double),allocatable,target :: A(:,:), B(:,:), C(:,:)
 
-         type(c_ptr),allocatable :: dh_A(:), dh_B(:), dh_C(:,:)
 
-         type(c_ptr) :: cl_mm
 
-         integer(c_int) :: ncpu
 
-         integer(c_int) :: ret
 
-         integer(c_int) :: row, col
 
-         integer(c_int) :: b_row, b_col
 
-         integer(c_int) :: mr, rank
 
-         integer(c_int64_t) :: tag
 
-         ret = fstarpu_init(C_NULL_PTR)
 
-         if (ret == -19) then
 
-                 stop 77
 
-         else if (ret /= 0) then
 
-                 stop 1
 
-         end if
 
-         ret = fstarpu_mpi_init(1)
 
-         print *,"fstarpu_mpi_init status:", ret
 
-         if (ret /= 0) then
 
-                 stop 1
 
-         end if
 
-         ! stop there if no CPU worker available
 
-         ncpu = fstarpu_cpu_worker_get_count()
 
-         if (ncpu == 0) then
 
-                 call fstarpu_shutdown()
 
-                 stop 77
 
-         end if
 
-         comm_world = fstarpu_mpi_world_comm()
 
-         comm_size = fstarpu_mpi_world_size()
 
-         comm_rank = fstarpu_mpi_world_rank()
 
-         if (comm_size < 2) then
 
-                 call fstarpu_shutdown()
 
-                 ret = fstarpu_mpi_shutdown()
 
-                 stop 77
 
-         end if
 
-         ! TODO: process app's argc/argv
 
-         NB = N/BS
 
-         ! allocate and initialize codelet
 
-         cl_mm = fstarpu_codelet_allocate()
 
-         call fstarpu_codelet_set_name(cl_mm, c_char_"nf_mm_cl"//c_null_char)
 
-         call fstarpu_codelet_add_cpu_func(cl_mm, C_FUNLOC(cl_cpu_mult))
 
-         call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R)
 
-         call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R)
 
-         call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_RW)
 
-         ! allocate matrices
 
-         if (comm_rank == 0) then
 
-                 allocate(A(N,N))
 
-                 allocate(B(N,N))
 
-                 allocate(C(N,N))
 
-         end if
 
-         ! init matrices
 
-         if (comm_rank == 0) then
 
-                 do col=1,N
 
-                 do row=1,N
 
-                 if (row == col) then
 
-                         A(row,col) = 2
 
-                 else
 
-                         A(row,col) = 0
 
-                 end if
 
-                 B(row,col) = row*N+col
 
-                 C(row,col) = 0
 
-                 end do
 
-                 end do
 
-                 if (verbose) then
 
-                         print *,"A"
 
-                         call mat_disp(A)
 
-                         print *,"B"
 
-                         call mat_disp(B)
 
-                         print *,"C"
 
-                         call mat_disp(C)
 
-                 end if
 
-         end if
 
-         ! allocate data handles
 
-         allocate(dh_A(NB))
 
-         allocate(dh_B(NB))
 
-         allocate(dh_C(NB,NB))
 
-         ! register matrices
 
-         if (comm_rank == 0) then
 
-                 mr = 0 ! TODO: use STARPU_MAIN_RAM constant
 
-         else
 
-                 mr = -1
 
-         end if
 
-         tag = 0
 
-         do b_row=1,NB
 
-                 if (comm_rank == 0) then
 
-                         call fstarpu_matrix_data_register(dh_A(b_row), mr, &
 
-                                 c_loc( A(1+(b_row-1)*BS,1) ), N, BS, N, c_sizeof(A(1,1)))
 
-                 else
 
-                         call fstarpu_matrix_data_register(dh_A(b_row), mr, &
 
-                                 c_null_ptr, N, BS, N, c_sizeof(A(1,1)))
 
-                 end if
 
-                 call fstarpu_mpi_data_register(dh_A(b_row), tag, 0)
 
-                 tag = tag+1
 
-         end do
 
-         do b_col=1,NB
 
-                 if (comm_rank == 0) then
 
-                         call fstarpu_matrix_data_register(dh_B(b_col), mr, &
 
-                                 c_loc( B(1,1+(b_col-1)*BS) ), N, N, BS, c_sizeof(B(1,1)))
 
-                 else
 
-                         call fstarpu_matrix_data_register(dh_B(b_col), mr, &
 
-                                 c_null_ptr, N, N, BS, c_sizeof(B(1,1)))
 
-                 end if
 
-                 call fstarpu_mpi_data_register(dh_B(b_col), tag, 0)
 
-                 tag = tag+1
 
-         end do
 
-         do b_col=1,NB
 
-         do b_row=1,NB
 
-                 if (comm_rank == 0) then
 
-                         call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, &
 
-                                 c_loc( C(1+(b_row-1)*BS,1+(b_col-1)*BS) ), N, BS, BS, c_sizeof(C(1,1)))
 
-                 else
 
-                         call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, &
 
-                                 c_null_ptr, N, BS, BS, c_sizeof(C(1,1)))
 
-                 end if
 
-                 call fstarpu_mpi_data_register(dh_C(b_row,b_col), tag, 0)
 
-                 tag = tag+1
 
-         end do
 
-         end do
 
-         ! distribute matrix C
 
-         do b_col=1,NB
 
-         do b_row=1,NB
 
-         rank = modulo(b_row+b_col, comm_size)
 
-         call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), rank)
 
-         end do
 
-         end do
 
-         do b_col=1,NB
 
-            do b_row=1,NB
 
-               call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_mm, &
 
-                    FSTARPU_R,  dh_A(b_row), &
 
-                    FSTARPU_R,  dh_B(b_col), &
 
-                    FSTARPU_RW, dh_C(b_row,b_col), &
 
-                    C_NULL_PTR /))
 
-            end do
 
-         end do
 
-         call fstarpu_task_wait_for_all()
 
-         ! undistribute matrix C
 
-         do b_col=1,NB
 
-         do b_row=1,NB
 
-         call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), 0)
 
-         end do
 
-         end do
 
-         ! unregister matrices
 
-         do b_row=1,NB
 
-                 call fstarpu_data_unregister(dh_A(b_row))
 
-         end do
 
-         do b_col=1,NB
 
-                 call fstarpu_data_unregister(dh_B(b_col))
 
-         end do
 
-         do b_col=1,NB
 
-         do b_row=1,NB
 
-                 call fstarpu_data_unregister(dh_C(b_row,b_col))
 
-         end do
 
-         end do
 
-         ! check result
 
-         if (comm_rank == 0) then
 
-                 if (verbose) then
 
-                         print *,"final C"
 
-                         call mat_disp(C)
 
-                 end if
 
-                 do col=1,N
 
-                 do row=1,N
 
-                 if (abs(C(row,col) - 2*(row*N+col)) > 1.0) then
 
-                         print *, "check failed"
 
-                         stop 1
 
-                 end if
 
-                 end do
 
-                 end do
 
-         end if
 
-         ! free handles
 
-         deallocate(dh_A)
 
-         deallocate(dh_B)
 
-         deallocate(dh_C)
 
-         ! free matrices
 
-         if (comm_rank == 0) then
 
-                 deallocate(A)
 
-                 deallocate(B)
 
-                 deallocate(C)
 
-         end if
 
-         call fstarpu_codelet_free(cl_mm)
 
-         call fstarpu_shutdown()
 
-         ret = fstarpu_mpi_shutdown()
 
-         print *,"fstarpu_mpi_shutdown status:", ret
 
-         if (ret /= 0) then
 
-                 stop 1
 
-         end if
 
- end program nf_mm
 
 
  |