csrmat_m.f90 Source File


Source Code

module csrmat_m
    !! Algebra routines for
    !! - vectors
    !! - sparse matrices in csr format
    use list_operations_m, only : sort_row
    use precision_m, only : FP, FP_EPS
    use netcdf
    use, intrinsic :: iso_c_binding, only: c_int32_t, c_ptr, c_loc
    use screen_io_m, only: get_stdout
    use error_handling_m, only: handle_error, handle_error_netcdf, error_info_t
    use status_codes_m, only: PARALLAX_ERR_CSRMAT
    implicit none

    public :: csr_times_vec
    public :: csr_times_vec_single
    public :: csr_residuum
    public :: sortsum
    public :: diffvecs_nrm
    public :: csr_transpose

    private :: sortsum_csrrow
    private :: transpose_csr
    type, public, bind(C) :: csrmat_data_t
        !! Data type used to expose `csrmat_t` data to C/C++
        !! Please see `csrmat_t` for descriptions of individual
        !! properties/members.
        !! Values are copied directly.
        !! Arrays are exposed with pointers.
        integer(c_int32_t) :: ndim
        integer(c_int32_t) :: ncol
        integer(c_int32_t) :: nnz
        type(c_ptr) :: i
        type(c_ptr) :: j
        type(c_ptr) :: val
    end type csrmat_data_t

    type, public :: csrmat_t
        !! Compressed-sparse-row (CSR) matrix format
        integer :: ndim
        !! Number of rows (dimension)
        integer :: ncol
        !! Number of columns
        integer :: nnz
        !! Number of non-zero elements
        integer, allocatable, dimension(:) :: i
        !dir$ attributes align:64 :: i
        !! i-array in CSR format, of dimension ndim+1
        integer, allocatable, dimension(:) :: j
        !dir$ attributes align:64 :: j
        !! j-array in CSR format, of dimension nnz
        real(FP), allocatable, dimension(:) :: val
        !dir$ attributes align:64 :: val
        !! val-array in CSR format, of dimension nnz
    contains
        procedure :: write_netcdf
        procedure :: read_netcdf
        procedure :: sortsum
        procedure :: create_copy
        final :: destructor
        procedure :: display
        procedure, public :: expose_data
    end type csrmat_t


contains

    subroutine read_netcdf(self, fgid)
        !! Reads csr matrix from file
        class(csrmat_t), intent(inout) :: self
        !! csr matrix
        integer, intent(in) :: fgid
        !! File or group id number of existing Netcdf4 file

        integer :: nf90_stat
        integer :: iddim_i, iddim_nnz, id_i, id_j, id_val
        integer :: dimi

        ! read global attributes (metadata)
        nf90_stat = nf90_get_att(fgid, NF90_GLOBAL,'ndim', self%ndim)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)
        nf90_stat = nf90_get_att(fgid, NF90_GLOBAL, 'ncol', self%ncol)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        ! get dimensions
        nf90_stat = nf90_inq_dimid(fgid, 'dimi', iddim_i) ! dimension of i is ndim+1
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)
        nf90_stat =  nf90_inquire_dimension(fgid, iddim_i, len = dimi)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        nf90_stat = nf90_inq_dimid(fgid, 'nnz', iddim_nnz)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)
        nf90_stat =  nf90_inquire_dimension(fgid, iddim_nnz, len = self%nnz)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        ! allocate fields
        if (.not. allocated(self%i)) then
            allocate( self%i( dimi ) )
        else
            call handle_error("CSR-fields must not be allocated (i)!", &
                              PARALLAX_ERR_CSRMAT, __LINE__, __FILE__)
        endif

        if (.not. allocated(self%j)) then
            allocate( self%j( self%nnz ) )
        else
            call handle_error("CSR-fields must not be allocated (j)!", &
                              PARALLAX_ERR_CSRMAT, __LINE__, __FILE__)
        endif

        if (.not. allocated(self%val)) then
            allocate( self%val( self%nnz ) )
        else
            call handle_error("CSR-fields must not be allocated (nnz)!", &
                              PARALLAX_ERR_CSRMAT, __LINE__, __FILE__)
        endif

        ! read fields
        nf90_stat = nf90_inq_varid(fgid, 'i', id_i)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)
        nf90_stat = nf90_get_var(fgid, id_i, self%i)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        nf90_stat = nf90_inq_varid(fgid, 'j', id_j)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)
        nf90_stat = nf90_get_var(fgid, id_j, self%j)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        nf90_stat = nf90_inq_varid(fgid, 'val', id_val)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)
        nf90_stat = nf90_get_var(fgid, id_val, self%val)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

    end subroutine

    subroutine write_netcdf(self, fgid)
        !! writes csr matrix to file
        implicit none

        class(csrmat_t), intent(in) :: self
        !!csr matrix

        integer, intent(in) :: fgid
        !! file or group id number of existing Netcdf4 file

        integer :: nf90_stat
        integer :: iddim_i, iddim_nnz
        integer :: id_i, id_j, id_val

        ! write global attributes (metadata)
        nf90_stat = nf90_put_att(fgid, NF90_GLOBAL,'ndim', self%ndim)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        nf90_stat = nf90_put_att(fgid, NF90_GLOBAL, 'ncol', self%ncol)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        ! define dimensions
        nf90_stat = nf90_def_dim(fgid, "dimi", self%ndim+1, iddim_i) ! dimension of i is ndim+1
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        nf90_stat = nf90_def_dim(fgid, "nnz", self%nnz, iddim_nnz)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        ! define variables
        nf90_stat = nf90_def_var(fgid, "i", NF90_INT, iddim_i, id_i )
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        nf90_stat = nf90_def_var(fgid, "j", NF90_INT, iddim_nnz, id_j )
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        nf90_stat = nf90_def_var(fgid, "val", NF90_DOUBLE, iddim_nnz, id_val )
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        ! end of definition
        nf90_stat = nf90_enddef(fgid)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        ! put variables
        nf90_stat = nf90_put_var(fgid, id_i, self%i )
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        nf90_stat = nf90_put_var(fgid, id_j, self%j(1 : self%nnz) )
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        nf90_stat = nf90_put_var(fgid, id_val, self%val(1 : self%nnz) )
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

        ! put into redefinition state
        nf90_stat = nf90_redef(fgid)
        call handle_error_netcdf(nf90_stat, __LINE__, __FILE__)

    end subroutine write_netcdf

    subroutine create_copy(self, res)
        !! Creates a copy of the matrix
        class(csrmat_t), intent(in) :: self
        !! Matrix
        type(csrmat_t), intent(out) :: res
        !! Copy of matrix

        integer :: row, k

        res%ndim = self%ndim
        res%ncol = self%ncol
        res%nnz = self%nnz

        allocate(res%i(res%ndim+1))
        allocate(res%j(res%nnz))
        allocate(res%val(res%nnz))

        !$omp parallel default(none) private(row, k) shared(self, res)
        !$omp do
        do row = 1, self%ndim+1
            res%i(row) = self%i(row)
        enddo
        !$omp end do
        !$omp do
        do k = 1, self%nnz
            res%j(k) = self%j(k)
            res%val(k) = self%val(k)
        enddo
        !$omp end do
        !$omp end parallel

    end subroutine

    subroutine sortsum(self)
        !! sorts and sums entries in ascending j-order for each row
        implicit none

        class(csrmat_t), intent(inout) :: self
        !!csr matrix

        integer, parameter :: wsize = 20
        integer, dimension( self%ndim+1 ) :: icsr2
        integer :: row, j, nz, nzt, js
        integer, dimension(wsize) :: workj
        real(FP), dimension(wsize) :: workr

        nz = 0
        do row = 1, self%ndim
            nzt = self%i(row+1) - self%i(row)
            js = self%i(row)
            do j = 1, nzt
                workj(j) = self%j(js+j-1)
                workr(j) = self%val(js+j-1)
            enddo
            call sortsum_csrrow(nzt, workj, workr)
            icsr2(row) = nz+1
            js = icsr2(row)
            do j = 1, nzt
                self%j(js+j-1) = workj(j)
                self%val(js+j-1) = workr(j)
            enddo
            nz = nz + nzt
        enddo
        icsr2( self%ndim+1 ) = nz+1

        self%i=icsr2

        self%nnz = nz

    end subroutine sortsum

    subroutine sortsum_csrrow(nz, j, val)
        !! uniques and sorts values of j in ascending order for single row
        !! corresponding values are summed up in val
        implicit none

        integer,intent(inout)::nz
        !! number of nonzeros in row (before and aafter)

        integer,intent(inout),dimension(nz)::j
        !! column indices, on output sorted

        real(FP),intent(inout),dimension(nz)::val
        !! values, on output sorted and summed

        logical :: mask(nz)

        integer :: jmin, k, r, jnew(nz), nzn, cnt
        real(FP), dimension(nz) :: valnew

        mask=.true.
        jnew = 0
        valnew = 0.0_FP
        cnt = 0
        r = 0
        nzn = nz

        do while (r < nzn)
            r = r+1
            valnew(r) = 0
            jmin = minval(j, mask)
            cnt = 0
            do k = 1,nz
                if (j(k) == jmin) then
                    mask(k) = .false.
                    valnew(r) = valnew(r) + val(k)
                    cnt = cnt+1
                endif
            enddo
            nzn = nzn - cnt + 1
            jnew(r) = jmin
        enddo

        nz = nzn
        j = jnew
        val = valnew

    end subroutine sortsum_csrrow

    subroutine display(self)
        !! displays some information on matrix
        implicit none

        class(csrmat_t), intent(in) :: self
        !!csr matrix

        write(get_stdout(), 101) self%ndim, self%ncol, self%nnz
101     format(1X,'ndim = ',I10,1X,'; ncol = ',I10,1X,' ; nnz = ',I10,1X,' ;')

    end subroutine display

    subroutine expose_data(self, data_object)
        !! Exposes csrmat data through csrmat_data_t object
        class(csrmat_t), intent(in), target :: self
        !! Instance of the type
        type(csrmat_data_t), intent(out) :: data_object
        !! Destination csrmat data object that contains the exposed data of the
        !! csrmat object.

        ! Copy values
        data_object%ndim = self%ndim
        data_object%ncol = self%ncol
        data_object%nnz  = self%nnz

        ! Now assign pointers
        data_object%i    = c_loc(self%i)
        data_object%j    = c_loc(self%j)
        data_object%val  = c_loc(self%val)

    end subroutine expose_data

    subroutine destructor(self)
        !! destructor, frees memory
        type(csrmat_t), intent(inout) :: self
        !!csr matrix

        self%ndim = 0
        self%ncol = 0
        self%nnz  = 0

        if (allocated(self%i)) deallocate(self%i)
        if (allocated(self%j)) deallocate(self%j)
        if (allocated(self%val)) deallocate(self%val)

    end subroutine destructor

    subroutine csr_times_vec(a, x, y)
        !! Multiplies matrix with vector y = A*x
        !! Thread safe, i.e. can be called from within parallel region
        implicit none
        type(csrmat_t), intent(in) :: a
        !!csr matrix
        real(FP), dimension(a%ncol), intent(in) :: x
        !! vector x
        real(FP), dimension(a%ndim), intent(out) :: y
        !! vector y (result)

        integer :: row

#ifdef __NVCOMPILER
        !$OMP SIMD
#else
        !$OMP DO SIMD
#endif
        do row = 1, a%ndim
            y(row) = dot_product(             &
                a%val(a%i(row):a%i(row+1)-1), &
                x(a%j(a%i(row):a%i(row+1)-1)))
        enddo
#ifdef __NVCOMPILER
        !$OMP END SIMD
#else
        !$OMP END DO SIMD
#endif

    end subroutine csr_times_vec

    pure real(FP) function csr_times_vec_single(a, x, row)
        !! Computes element `row` of vector y = A*x
        !! Thread safe, i.e. can be called from within parallel region
        implicit none
        type(csrmat_t), intent(in) :: a
        !!csr matrix
        real(FP), dimension(a%ncol), intent(in) :: x
        !! vector x
        integer, intent(in) :: row
        !! row of matrix

        csr_times_vec_single = dot_product(   &
                a%val(a%i(row):a%i(row+1)-1), &
                x(a%j(a%i(row):a%i(row+1)-1)))

    end function


    subroutine csr_residuum(a, x, b, res, resmax, epsb)
        !! Computes the residuum ||Ax-b|| / (||b||+eps)
        type(csrmat_t), intent(in) :: a
        !!csr matrix
        real(FP), dimension(a%ncol), intent(in) :: x
        !! Vector x
        real(FP), dimension(a%ndim), intent(in) :: b
        !! Vector b
        real(FP), intent(out) :: res
        !! Residuum (root mean square)
        real(FP), intent(out), optional :: resmax
        !! Residuum (maximum)
        real(FP), intent(in), optional :: epsb
        !! Tolerance for division of b in residual
        !! Dafault value FP_EPS

        integer :: row
        real(FP) :: y, bnrm, rmax, epsb_val

        if (present(epsb)) then
            epsb_val = epsb
        else
            epsb_val = FP_EPS
        endif

        bnrm = 0.0_FP
        res = 0.0_FP
        rmax = 0.0_FP
        !$OMP PARALLEL PRIVATE(row, y)
        !$OMP DO REDUCTION(+: res, bnrm) REDUCTION(max: rmax)
        do row = 1, a%ndim
            ! multiply y = a*x
            y = dot_product(a%val(a%i(row):a%i(row+1)-1), x(a%j(a%i(row):a%i(row+1)-1)))

            bnrm = bnrm + b(row)**2
            res  = res + (y - b(row))**2
            rmax = max(rmax, abs(y - b(row)))
        enddo
        !$OMP END DO
        !$OMP END PARALLEL

        res = res / (bnrm + epsb_val)
        res = sqrt(res)

        if (present(resmax)) then
            resmax = rmax
        endif

    end subroutine

    real(FP) function diffvecs_nrm(n, x, y)
        !! computes norm of difference of two vectors, i.e. ||x-y|| / n
        implicit none

        integer, intent(in) :: n
        !! dimension of vectors

        real(FP), dimension(n), intent(in) :: x
        !! vector x

        real(FP), dimension(n), intent(in) :: y
        !! vector y

        integer :: i
        real(FP) :: d

        d = 0.0_FP
        !$OMP PARALLEL DO PRIVATE(i) REDUCTION(+:d)
        do i = 1, n
            d = d + (x(i)-y(i))**2
        enddo
        !$OMP END PARALLEL DO

        diffvecs_nrm = sqrt(d / (1.0_FP*n) )

    end function diffvecs_nrm

#ifdef ENABLE_MKL
    subroutine csr_transpose(a, atransp)
        !! Transposes matrix a

        ! With MKL enabled, first it converts matrix into COO format 
        ! and transposes it in COO format, then uses MKL routine 
        ! to convert from COO to CSR format.
       type(csrmat_t), intent(in) :: a
        !! CSR matrix
        type(csrmat_t), allocatable, intent(out) :: atransp
        !! Transposed matrix in CSR format

        integer :: i, k, info
        integer, dimension(a%nnz) ::  atransp_i_coo,  atransp_j_coo
        integer, dimension(6) :: job_coo_to_csr


        if (allocated(atransp)) deallocate(atransp)
        allocate(atransp)

        atransp%ndim = a%ncol
        atransp%ncol = a%ndim
        atransp%nnz  = a%nnz
        allocate(atransp%i(atransp%ndim+1))
        allocate(atransp%j(atransp%nnz))
        allocate(atransp%val(atransp%nnz))
        
        ! atransp in coo format
        !$omp parallel default(none) &
        !$omp private(i, k) shared(a, atransp_i_coo, atransp_j_coo)
        !$omp do
        do i = 1, a%ndim
            do k = a%i(i), a%i(i+1)-1
                atransp_j_coo(k) = i
                atransp_i_coo(k) = a%j(k)
            enddo
        enddo
        !$omp end do
        !$omp end parallel

        ! Use MKL dcsrcoo routine to convert atransp from coo to sorted csr format
        job_coo_to_csr(1) = 2 ! coo -> csr (sorted)
        job_coo_to_csr(2) = 1 ! one based indexing csr
        job_coo_to_csr(3) = 1 ! one based indexing coo
        job_coo_to_csr(5) = atransp%nnz
        job_coo_to_csr(6) = 0
        call MKL_dcsrcoo(job_coo_to_csr, atransp%ndim, atransp%val, atransp%j, &
                         atransp%i, atransp%nnz, a%val, atransp_i_coo,  &
                         atransp_j_coo, info)

        if (info /= 0) then
            call handle_error("CSR-transpose failed!", &
                              PARALLAX_ERR_CSRMAT, __LINE__, __FILE__, &
                              additional_info=error_info_t("MKL info:", [info]))
        endif
    end subroutine
#else
    subroutine csr_transpose(a, atransp)
        !! Transposes matrix a
        
        ! When MKL is not enabled, the in-house routine transpose_csr 
        ! below is used instead
        type(csrmat_t), intent(in) :: a
        !! CSR matrix
        type(csrmat_t), allocatable, intent(out) :: atransp
        !! Transposed matrix in CSR format

        integer :: i, k, info
        integer, dimension(a%nnz) ::  atransp_i_coo,  atransp_j_coo
        integer, dimension(6) :: job_coo_to_csr


        if (allocated(atransp)) deallocate(atransp)
        allocate(atransp)

        atransp%ndim = a%ncol
        atransp%ncol = a%ndim
        atransp%nnz  = a%nnz
        allocate(atransp%i(atransp%ndim+1))
        allocate(atransp%j(atransp%nnz))
        allocate(atransp%val(atransp%nnz))
        
        ! MKL-free implementation of csr matrix transpose        
        call transpose_csr(a%ndim, a%ncol, a%nnz, &
                           a%i, a%j, a%val, &
                           atransp%i, atransp%j, atransp%val)
    end subroutine csr_transpose
#endif

    subroutine transpose_csr(nrow, ncol, nnz, & 
                             row_ptr, col_ind, values, &
                             trans_row_ptr, trans_col_ind, trans_values)
        !! MKL-free subroutine to transpose a sparse matrix in CSR format.
        implicit none
        ! Arguments
        integer, intent(in) :: nrow, ncol, nnz
        !! Dimensions of the original CSR matrix.

        integer, intent(in) :: row_ptr(nrow+1), col_ind(nnz)
        real(FP), intent(in) :: values(nnz)
        !! Row pointer, column index and element values of the original CSR matrix
        
        integer, intent(out) :: trans_row_ptr(ncol+1), trans_col_ind(nnz)
        real(FP), intent(out) :: trans_values(nnz)
        !! Row pointer, column index and element values of the transposed CSR matrix
        
        integer :: i, j, k, count(ncol)
        integer, allocatable :: temp_ind(:)
        real(FP), allocatable :: temp_values(:)
        integer :: start, endd
        ! Initialize transposed row pointer array
        trans_row_ptr = 0
        trans_col_ind = 0
        trans_values = 0.0

        ! Initialize count array to zero
        count = 0

        ! Count the number of entries in each column of the original matrix
        do i = 1, nnz
            count(col_ind(i)) = count(col_ind(i)) + 1
        end do
        ! Set up the row pointers for the transposed matrix
        trans_row_ptr(1) = 1
        do i = 2, ncol+1
            trans_row_ptr(i) = trans_row_ptr(i-1) + count(i-1)
        end do

        ! Reset the count array to use it for positioning in the transposed matrix
        count = 0

        !$omp parallel do private(i, j, k, start, endd) &
        !$omp& shared(nrow, ncol, row_ptr, col_ind, values, trans_row_ptr, trans_col_ind, trans_values, count)
        do i = 1, nrow
            start = row_ptr(i)
            endd = row_ptr(i+1) - 1

            do j = start, endd
                k = col_ind(j)
                !$omp critical
                trans_col_ind(trans_row_ptr(k) + count(k)) = i
                trans_values(trans_row_ptr(k) + count(k)) = values(j)
                count(k) = count(k) + 1
                !$omp end critical
            end do
        end do
        !$omp end parallel do

        ! Sort each row of the transposed matrix
        !$omp parallel do private(i, temp_ind, temp_values) shared(trans_row_ptr, trans_col_ind, trans_values)
        do i = 1, ncol
            if (trans_row_ptr(i) < trans_row_ptr(i+1)) then
                allocate(temp_ind(trans_row_ptr(i+1)-trans_row_ptr(i)))
                allocate(temp_values(trans_row_ptr(i+1)-trans_row_ptr(i)))

                temp_ind = trans_col_ind(trans_row_ptr(i):trans_row_ptr(i+1)-1)
                temp_values = trans_values(trans_row_ptr(i):trans_row_ptr(i+1)-1) 
                call sort_row(trans_row_ptr(i+1)-trans_row_ptr(i), temp_ind, temp_values)         
                trans_col_ind(trans_row_ptr(i):trans_row_ptr(i+1)-1) = temp_ind
                trans_values(trans_row_ptr(i):trans_row_ptr(i+1)-1) = temp_values
                
                deallocate(temp_ind)
                deallocate(temp_values)
            end if
        end do
        !$omp end parallel do

    end subroutine transpose_csr


end module