module mpi_mapping_auxiliaries_m !! Auxiliary routines for MPI mapping across planes use MPI use precision_m, only: FP, MPI_FP use screen_io_m, only : get_stderr use csrmat_m, only : csrmat_t implicit none public :: get_mpipattern public :: getdata_fwdbwdplane public :: communicate_matrixmpi private :: getdata_fwdbwdplane_fp private :: getdata_fwdbwdplane_int private :: getdata_fwdbwdplane_csr interface getdata_fwdbwdplane !! Overloading routine getdata_fwdbwdplane for integer and real data module procedure getdata_fwdbwdplane_fp module procedure getdata_fwdbwdplane_int module procedure getdata_fwdbwdplane_csr end interface contains subroutine get_mpipattern(comm, istart, iend, n, n_loc, & iloc_start, iloc_end) !! Divides an integer range into chunks, !! that can be worked on with separate MPI-processes ! n_loc is defined via integer division. All leftover points are ! assigned to the last process. integer, intent(in) :: comm !! MPI communicator integer, intent(in) :: istart !! Initial index of range integer, intent(in) :: iend !! Final index of range integer, intent(out) :: n !! Global length of range integer, intent(out) :: n_loc !! Local length of range integer, intent(out) :: iloc_start !! Local start of MPI-chunk integer, intent(out) :: iloc_end !! Local end of MPI-chunk integer :: rank, nprocs, ierr call MPI_comm_rank(comm,rank,ierr) call MPI_comm_size(comm,nprocs,ierr) ! Global length of range (How many points in total) n = iend - istart + 1 ! Divide into equal blocks for each of the processors n_loc = n / nprocs ! If chunk is too small, only rank 0 should work on data if (n_loc == 0) then if (rank == 0) then n_loc = n iloc_start = istart iloc_end = iend else n_loc = 0 iloc_start = n+1 iloc_end = n endif return endif ! Determination of local chunks iloc_start = istart + (n_loc * rank) iloc_end = istart + (n_loc * rank) + n_loc - 1 ! Assign remaining part to last process if (rank == nprocs - 1) then iloc_end = iend n_loc = iend - iloc_start + 1 endif end subroutine subroutine getdata_fwdbwdplane_fp(comm, step, nsend, usend, nrecv, urecv) !! Communicates data (FP) between planes !! receives data from plane rank + step !! and sends data to plane rank-step !! with periodicity in communication integer, intent(in) :: comm !! MPI Communicator integer, intent(in) :: step !! Step size of communication integer, intent(in) :: nsend !! Dimension of array to be sent data real(FP), intent(in), dimension(nsend) :: usend !! Array to be sent integer, intent(out) :: nrecv !! Dimension of received array real(FP), intent(out), allocatable, dimension(:) :: urecv !! Array to be received integer :: rank, nprocs, ierr integer :: dest, source, tag, request, status(MPI_STATUS_SIZE) ! establish communication pattern call MPI_comm_rank(comm, rank, ierr) call MPI_comm_size(comm, nprocs, ierr) source = rank+step source = modulo(source, nprocs) dest = rank-step dest = modulo(dest, nprocs) ! Communicate dimension tag = 0 call MPI_isend(nsend, 1, MPI_INTEGER, dest, tag, comm, request, ierr) call MPI_recv(nrecv, 1, MPI_INTEGER, source, tag, comm, status, ierr) call MPI_wait(request, status, ierr) if (allocated(urecv)) deallocate(urecv) allocate(urecv(nrecv)) ! Communicate actual data tag = 1 call MPI_isend(usend, nsend, MPI_FP, dest, tag, comm, request, ierr) call MPI_recv(urecv, nrecv, MPI_FP, source, tag, comm, status, ierr) call MPI_wait(request, status, ierr) end subroutine subroutine getdata_fwdbwdplane_int(comm, step, nsend, usend, nrecv, urecv) !! Communicates data (Integer) between planes !! receives data from plane rank + step !! and sends data to plane rank-step !! with periodicity in communication integer, intent(in) :: comm !! MPI Communicator integer, intent(in) :: step !! Step size of communication integer, intent(in) :: nsend !! Dimension of array to be sent data integer, intent(in), dimension(nsend) :: usend !! Array to be sent integer, intent(out) :: nrecv !! Dimension of received array integer, intent(out), allocatable, dimension(:) :: urecv !! Array to be received integer :: rank, nprocs, ierr integer :: dest, source, tag, request, status(MPI_STATUS_SIZE) ! establish communication pattern call MPI_comm_rank(comm, rank, ierr) call MPI_comm_size(comm, nprocs, ierr) source = rank+step source = modulo(source, nprocs) dest = rank-step dest = modulo(dest, nprocs) ! Communicate dimension tag = 0 call MPI_isend(nsend, 1, MPI_INTEGER, dest, tag, comm, request, ierr) call MPI_recv(nrecv, 1, MPI_INTEGER, source, tag, comm, status, ierr) call MPI_wait(request, status, ierr) if (allocated(urecv)) deallocate(urecv) allocate(urecv(nrecv)) ! Communicate actual data tag = 1 call MPI_isend(usend, nsend, MPI_INTEGER, dest, tag, comm, request, ierr) call MPI_recv(urecv, nrecv, MPI_INTEGER, source, tag, comm, status, ierr) call MPI_wait(request, status, ierr) end subroutine subroutine getdata_fwdbwdplane_csr(comm, step, acsr_send, acsr_recv) !! Communicates data (CSR matrix) between planes !! receives data from plane rank + step !! and sends data to plane rank-step !! with periodicity in communication integer, intent(in) :: comm !! MPI Communicator integer, intent(in) :: step !! Step size of communication type(csrmat_t), intent(in) :: acsr_send !! CSR matrix to be sent type(csrmat_t), intent(out), allocatable :: acsr_recv !! CSR matrix to be received integer, dimension(3) :: iwrk1 integer, allocatable, dimension(:) :: iwrk2 integer :: nrecv if (allocated(acsr_recv)) deallocate(acsr_recv) allocate(acsr_recv) iwrk1(1) = acsr_send%ndim iwrk1(2) = acsr_send%ncol iwrk1(3) = acsr_send%nnz call getdata_fwdbwdplane(comm, step, 3, iwrk1, nrecv, iwrk2) acsr_recv%ndim = iwrk2(1) acsr_recv%ncol = iwrk2(2) acsr_recv%nnz = iwrk2(3) call getdata_fwdbwdplane(comm, step, acsr_send%ndim+1, acsr_send%i, nrecv, acsr_recv%i) call getdata_fwdbwdplane(comm, step, acsr_send%nnz, acsr_send%j, nrecv, acsr_recv%j) call getdata_fwdbwdplane(comm, step, acsr_send%nnz, acsr_send%val, nrecv, acsr_recv%val) end subroutine subroutine communicate_matrixmpi(comm, ndim_glob, ndim_loc, & nz_al, icsr, jcsr, val) !! Assembles a global CSR matrix, which was built partially !! on individual processes integer, intent(in) :: comm !! MPI communicator integer, intent(in) :: ndim_glob !! Dimension of global matrix integer, intent(in) :: ndim_loc !! Local dimension of partial matrix integer, intent(in) :: nz_al !! Dimension of jcsr, val, !! must be larger than numbers of non-zeros of global matrix integer, intent(inout), dimension(ndim_glob+1) :: icsr !! On input: i-indices (CSR format) of partial matrix !! On output: i-indices (CSR format) of global matrix integer, intent(inout), dimension(nz_al) :: jcsr !! On input: columnd indices (CSR format) of partial matrix !! On output: columnd indices (CSR format) of global matrix real(FP), intent(inout), dimension(nz_al) :: val !! On input: Values (CSR format) of partial matrix !! On output: Values (CSR format) of global matrix integer :: rank, nprocs, ierr integer :: disp, nz_loc, nz,i integer, allocatable, dimension(:) :: ndim_locs, disp_ndim_locs, & nz_locs, disp_nz_locs ! Establish information for communication call MPI_comm_rank(comm, rank, ierr) call MPI_comm_size(comm, nprocs, ierr) ! Communicate dimensions allocate(ndim_locs(0:nprocs-1), disp_ndim_locs(0:nprocs-1)) call MPI_allgather(ndim_loc, 1, MPI_integer, & ndim_locs, 1, MPI_integer, comm, ierr) call MPI_exscan(ndim_loc, disp, 1, MPI_integer, & MPI_sum, comm, ierr) if (rank == 0) then disp = 0 endif call MPI_allgather(disp, 1, MPI_integer, & disp_ndim_locs, 1, MPI_integer, comm, ierr) ! Communicate numbers of nonzeros allocate(nz_locs(0:nprocs-1), disp_nz_locs(0:nprocs-1)) nz_loc = icsr(ndim_glob+1) - 1 call MPI_allgather(nz_loc, 1, MPI_integer, & nz_locs, 1, MPI_integer, comm, ierr) call MPI_allreduce(nz_loc, nz, 1, MPI_integer, & MPI_sum, comm, ierr) if (nz > nz_al) then write(get_stderr(),*) 'error(communicate_matrixmpi), nz > nz_al' error stop endif call MPI_exscan(nz_loc, disp, 1, MPI_integer, MPI_sum, comm, ierr) if (rank == 0) then disp = 0 endif call MPI_allgather(disp, 1, MPI_integer, & disp_nz_locs, 1, MPI_integer, comm, ierr) ! Bring matrix into global context do i = disp_ndim_locs(rank) + 1, disp_ndim_locs(rank) + ndim_loc icsr(i) = icsr(i) + disp_nz_locs(rank) enddo icsr(ndim_glob+1) = nz + 1 jcsr = cshift(jcsr, -disp_nz_locs(rank)) val = cshift(val, -disp_nz_locs(rank)) ! Communicate Matrix call MPI_allgatherv(MPI_in_place, ndim_locs(rank), MPI_integer, & icsr, ndim_locs, disp_ndim_locs, MPI_integer, & comm, ierr) call MPI_allgatherv(MPI_in_place, nz_locs(rank), MPI_integer, & jcsr, nz_locs, disp_nz_locs, MPI_integer, & comm, ierr) call MPI_allgatherv(MPI_in_place, nz_locs(rank), MPI_FP, & val, nz_locs, disp_nz_locs, MPI_FP, & comm, ierr) end subroutine end module