mpi_mapping_auxiliaries_m.f90 Source File


Source Code

module mpi_mapping_auxiliaries_m
    !! Auxiliary routines for MPI mapping across planes
    use MPI
    use precision_m, only: FP, MPI_FP
    use screen_io_m, only : get_stderr
    use csrmat_m, only : csrmat_t
    implicit none

    public :: get_mpipattern
    public :: getdata_fwdbwdplane
    public :: communicate_matrixmpi

    private :: getdata_fwdbwdplane_fp
    private :: getdata_fwdbwdplane_int
    private :: getdata_fwdbwdplane_csr

    interface getdata_fwdbwdplane
        !! Overloading routine getdata_fwdbwdplane for integer and real data
        module procedure getdata_fwdbwdplane_fp
        module procedure getdata_fwdbwdplane_int
        module procedure getdata_fwdbwdplane_csr
    end interface

contains

    subroutine get_mpipattern(comm, istart, iend, n, n_loc, &
                              iloc_start, iloc_end)
        !! Divides an integer range into chunks,
        !! that can be worked on with separate MPI-processes
        ! n_loc is defined via integer division. All leftover points are
        ! assigned to the last process.
        integer, intent(in) :: comm
        !! MPI communicator
        integer, intent(in) :: istart
        !! Initial index of range
        integer, intent(in) :: iend
        !! Final index of range
        integer, intent(out) :: n
        !! Global length of range
        integer, intent(out) :: n_loc
        !! Local length of range
        integer, intent(out) :: iloc_start
        !! Local start of MPI-chunk
        integer, intent(out) ::  iloc_end
        !! Local end of MPI-chunk

        integer :: rank, nprocs, ierr

        call MPI_comm_rank(comm,rank,ierr)
        call MPI_comm_size(comm,nprocs,ierr)

        ! Global length of range (How many points in total)
        n = iend - istart + 1

        ! Divide into equal blocks for each of the processors
        n_loc = n / nprocs

        ! If chunk is too small, only rank 0 should work on data
        if (n_loc == 0) then

            if (rank == 0) then
                n_loc      = n
                iloc_start = istart
                iloc_end   = iend
            else
                n_loc      = 0
                iloc_start = n+1
                iloc_end   = n
            endif

            return

        endif

        ! Determination of local chunks
        iloc_start = istart + (n_loc * rank)
        iloc_end   = istart + (n_loc * rank) + n_loc - 1

        ! Assign remaining part to last process
        if (rank == nprocs - 1) then
            iloc_end = iend
            n_loc    = iend - iloc_start + 1
        endif

    end subroutine

    subroutine getdata_fwdbwdplane_fp(comm, step, nsend, usend, nrecv, urecv)
        !! Communicates data (FP) between planes
        !! receives data from plane rank + step
        !! and sends data to plane rank-step
        !! with periodicity in communication
        integer, intent(in) :: comm
        !! MPI Communicator
        integer, intent(in) :: step
        !! Step size of communication
        integer, intent(in) :: nsend
        !! Dimension of array to be sent data
        real(FP), intent(in), dimension(nsend) :: usend
        !! Array to be sent
        integer, intent(out) :: nrecv
        !! Dimension of received array
        real(FP), intent(out), allocatable, dimension(:) :: urecv
        !! Array to be received

        integer :: rank, nprocs, ierr
        integer :: dest, source, tag, request, status(MPI_STATUS_SIZE)

        ! establish communication pattern
        call MPI_comm_rank(comm, rank, ierr)
        call MPI_comm_size(comm, nprocs, ierr)

        source = rank+step
        source = modulo(source, nprocs)

        dest = rank-step
        dest = modulo(dest, nprocs)

        ! Communicate dimension
        tag = 0
        call MPI_isend(nsend, 1, MPI_INTEGER, dest, tag, comm, request, ierr)
        call MPI_recv(nrecv, 1, MPI_INTEGER, source, tag, comm, status, ierr)
        call MPI_wait(request, status, ierr)

        if (allocated(urecv)) deallocate(urecv)
        allocate(urecv(nrecv))

        ! Communicate actual data
        tag = 1
        call MPI_isend(usend, nsend, MPI_FP, dest, tag, comm, request, ierr)
        call MPI_recv(urecv, nrecv, MPI_FP, source, tag, comm, status, ierr)
        call MPI_wait(request, status, ierr)

    end subroutine

    subroutine getdata_fwdbwdplane_int(comm, step, nsend, usend, nrecv, urecv)
        !! Communicates data (Integer) between planes
        !! receives data from plane rank + step
        !! and sends data to plane rank-step
        !! with periodicity in communication
        integer, intent(in) :: comm
        !! MPI Communicator
        integer, intent(in) :: step
        !! Step size of communication
        integer, intent(in) :: nsend
        !! Dimension of array to be sent data
        integer, intent(in), dimension(nsend) :: usend
        !! Array to be sent
        integer, intent(out) :: nrecv
        !! Dimension of received array
        integer, intent(out), allocatable, dimension(:) :: urecv
        !! Array to be received

        integer :: rank, nprocs, ierr
        integer :: dest, source, tag, request, status(MPI_STATUS_SIZE)

        ! establish communication pattern
        call MPI_comm_rank(comm, rank, ierr)
        call MPI_comm_size(comm, nprocs, ierr)

        source = rank+step
        source = modulo(source, nprocs)

        dest = rank-step
        dest = modulo(dest, nprocs)

        ! Communicate dimension
        tag = 0
        call MPI_isend(nsend, 1, MPI_INTEGER, dest, tag, comm, request, ierr)
        call MPI_recv(nrecv, 1, MPI_INTEGER, source, tag, comm, status, ierr)
        call MPI_wait(request, status, ierr)

        if (allocated(urecv)) deallocate(urecv)
        allocate(urecv(nrecv))

        ! Communicate actual data
        tag = 1
        call MPI_isend(usend, nsend, MPI_INTEGER, dest, tag, comm, request, ierr)
        call MPI_recv(urecv, nrecv, MPI_INTEGER, source, tag, comm, status, ierr)
        call MPI_wait(request, status, ierr)

    end subroutine

    subroutine getdata_fwdbwdplane_csr(comm, step, acsr_send, acsr_recv)
        !! Communicates data (CSR matrix) between planes
        !! receives data from plane rank + step
        !! and sends data to plane rank-step
        !! with periodicity in communication
        integer, intent(in) :: comm
        !! MPI Communicator
        integer, intent(in) :: step
        !! Step size of communication
        type(csrmat_t), intent(in) :: acsr_send
        !! CSR matrix to be sent
        type(csrmat_t), intent(out), allocatable :: acsr_recv
        !! CSR matrix to be received

        integer, dimension(3) :: iwrk1
        integer, allocatable, dimension(:) :: iwrk2
        integer :: nrecv

        if (allocated(acsr_recv)) deallocate(acsr_recv)
        allocate(acsr_recv)

        iwrk1(1) = acsr_send%ndim
        iwrk1(2) = acsr_send%ncol
        iwrk1(3) = acsr_send%nnz
        call getdata_fwdbwdplane(comm, step, 3, iwrk1, nrecv, iwrk2)

        acsr_recv%ndim = iwrk2(1)
        acsr_recv%ncol = iwrk2(2)
        acsr_recv%nnz = iwrk2(3)

        call getdata_fwdbwdplane(comm, step, acsr_send%ndim+1, acsr_send%i, nrecv, acsr_recv%i)
        call getdata_fwdbwdplane(comm, step, acsr_send%nnz, acsr_send%j, nrecv, acsr_recv%j)
        call getdata_fwdbwdplane(comm, step, acsr_send%nnz, acsr_send%val, nrecv, acsr_recv%val)

    end subroutine

    subroutine communicate_matrixmpi(comm, ndim_glob, ndim_loc, &
                                     nz_al, icsr, jcsr, val)
        !! Assembles a global CSR matrix, which was built partially
        !! on individual processes
        integer, intent(in) :: comm
        !! MPI communicator
        integer, intent(in) :: ndim_glob
        !! Dimension of global matrix
        integer, intent(in) :: ndim_loc
        !! Local dimension of partial matrix
        integer, intent(in) :: nz_al
        !! Dimension of jcsr, val,
        !! must be larger than numbers of non-zeros of global matrix
        integer, intent(inout), dimension(ndim_glob+1) :: icsr
        !! On input: i-indices (CSR format) of partial matrix
        !! On output: i-indices (CSR format) of global matrix
        integer, intent(inout), dimension(nz_al) :: jcsr
        !! On input: columnd indices (CSR format) of partial matrix
        !! On output: columnd indices (CSR format) of global matrix
        real(FP), intent(inout), dimension(nz_al) :: val
        !! On input: Values (CSR format) of partial matrix
        !! On output: Values (CSR format) of global  matrix

        integer :: rank, nprocs, ierr
        integer :: disp, nz_loc, nz,i
        integer, allocatable, dimension(:) :: ndim_locs, disp_ndim_locs, &
                                              nz_locs, disp_nz_locs

        ! Establish information for communication
        call MPI_comm_rank(comm, rank, ierr)
        call MPI_comm_size(comm, nprocs, ierr)

        ! Communicate dimensions
        allocate(ndim_locs(0:nprocs-1), disp_ndim_locs(0:nprocs-1))
        call MPI_allgather(ndim_loc, 1, MPI_integer, &
                           ndim_locs, 1, MPI_integer, comm, ierr)
        call MPI_exscan(ndim_loc, disp, 1, MPI_integer, &
                        MPI_sum, comm, ierr)
        if (rank == 0) then
            disp = 0
        endif
        call MPI_allgather(disp, 1, MPI_integer, &
                           disp_ndim_locs, 1, MPI_integer, comm, ierr)

        ! Communicate numbers of nonzeros
        allocate(nz_locs(0:nprocs-1), disp_nz_locs(0:nprocs-1))

        nz_loc = icsr(ndim_glob+1) - 1
        call MPI_allgather(nz_loc, 1, MPI_integer, &
                           nz_locs, 1, MPI_integer, comm, ierr)
        call MPI_allreduce(nz_loc, nz, 1, MPI_integer, &
                           MPI_sum, comm, ierr)
        if (nz > nz_al) then
            write(get_stderr(),*) 'error(communicate_matrixmpi), nz > nz_al'
            error stop
        endif
        call MPI_exscan(nz_loc, disp, 1, MPI_integer, MPI_sum, comm, ierr)
        if (rank == 0) then
            disp = 0
        endif
        call MPI_allgather(disp, 1, MPI_integer, &
                           disp_nz_locs, 1, MPI_integer, comm, ierr)

        ! Bring matrix into global context
        do i = disp_ndim_locs(rank) + 1, disp_ndim_locs(rank) + ndim_loc
            icsr(i) = icsr(i) + disp_nz_locs(rank)
        enddo
        icsr(ndim_glob+1) = nz + 1

        jcsr = cshift(jcsr, -disp_nz_locs(rank))
        val = cshift(val, -disp_nz_locs(rank))

        ! Communicate Matrix
        call MPI_allgatherv(MPI_in_place, ndim_locs(rank), MPI_integer, &
                            icsr, ndim_locs, disp_ndim_locs, MPI_integer, &
                            comm, ierr)
        call MPI_allgatherv(MPI_in_place, nz_locs(rank), MPI_integer, &
                            jcsr, nz_locs, disp_nz_locs, MPI_integer, &
                            comm, ierr)
        call MPI_allgatherv(MPI_in_place, nz_locs(rank), MPI_FP, &
                            val, nz_locs, disp_nz_locs, MPI_FP, &
                            comm, ierr)

    end subroutine

end module