module csrmat_m !! Algebra routines for !! - vectors !! - sparse matrices in csr format use list_operations_m, only : sort_row use precision_m, only : FP, FP_EPS use netcdf use, intrinsic :: iso_c_binding, only: c_int32_t, c_ptr, c_loc use screen_io_m, only: get_stdout use error_handling_m, only: handle_error, handle_error_netcdf, error_info_t use status_codes_m, only: PARALLAX_ERR_CSRMAT implicit none public :: csr_times_vec public :: csr_times_vec_single public :: csr_residuum public :: sortsum public :: diffvecs_nrm public :: csr_transpose private :: sortsum_csrrow private :: transpose_csr type, public, bind(C) :: csrmat_data_t !! Data type used to expose `csrmat_t` data to C/C++ !! Please see `csrmat_t` for descriptions of individual !! properties/members. !! Values are copied directly. !! Arrays are exposed with pointers. integer(c_int32_t) :: ndim integer(c_int32_t) :: ncol integer(c_int32_t) :: nnz type(c_ptr) :: i type(c_ptr) :: j type(c_ptr) :: val end type csrmat_data_t type, public :: csrmat_t !! Compressed-sparse-row (CSR) matrix format integer :: ndim !! Number of rows (dimension) integer :: ncol !! Number of columns integer :: nnz !! Number of non-zero elements integer, allocatable, dimension(:) :: i !dir$ attributes align:64 :: i !! i-array in CSR format, of dimension ndim+1 integer, allocatable, dimension(:) :: j !dir$ attributes align:64 :: j !! j-array in CSR format, of dimension nnz real(FP), allocatable, dimension(:) :: val !dir$ attributes align:64 :: val !! val-array in CSR format, of dimension nnz contains procedure :: write_netcdf procedure :: read_netcdf procedure :: sortsum procedure :: create_copy final :: destructor procedure :: display procedure, public :: expose_data end type csrmat_t contains subroutine read_netcdf(self, fgid) !! Reads csr matrix from file class(csrmat_t), intent(inout) :: self !! csr matrix integer, intent(in) :: fgid !! File or group id number of existing Netcdf4 file integer :: nf90_stat integer :: iddim_i, iddim_nnz, id_i, id_j, id_val integer :: dimi ! read global attributes (metadata) nf90_stat = nf90_get_att(fgid, NF90_GLOBAL,'ndim', self%ndim) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_get_att(fgid, NF90_GLOBAL, 'ncol', self%ncol) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) ! get dimensions nf90_stat = nf90_inq_dimid(fgid, 'dimi', iddim_i) ! dimension of i is ndim+1 call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_inquire_dimension(fgid, iddim_i, len = dimi) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_inq_dimid(fgid, 'nnz', iddim_nnz) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_inquire_dimension(fgid, iddim_nnz, len = self%nnz) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) ! allocate fields if (.not. allocated(self%i)) then allocate( self%i( dimi ) ) else call handle_error("CSR-fields must not be allocated (i)!", & PARALLAX_ERR_CSRMAT, __LINE__, __FILE__) endif if (.not. allocated(self%j)) then allocate( self%j( self%nnz ) ) else call handle_error("CSR-fields must not be allocated (j)!", & PARALLAX_ERR_CSRMAT, __LINE__, __FILE__) endif if (.not. allocated(self%val)) then allocate( self%val( self%nnz ) ) else call handle_error("CSR-fields must not be allocated (nnz)!", & PARALLAX_ERR_CSRMAT, __LINE__, __FILE__) endif ! read fields nf90_stat = nf90_inq_varid(fgid, 'i', id_i) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_get_var(fgid, id_i, self%i) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_inq_varid(fgid, 'j', id_j) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_get_var(fgid, id_j, self%j) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_inq_varid(fgid, 'val', id_val) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_get_var(fgid, id_val, self%val) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) end subroutine subroutine write_netcdf(self, fgid) !! writes csr matrix to file implicit none class(csrmat_t), intent(in) :: self !!csr matrix integer, intent(in) :: fgid !! file or group id number of existing Netcdf4 file integer :: nf90_stat integer :: iddim_i, iddim_nnz integer :: id_i, id_j, id_val ! write global attributes (metadata) nf90_stat = nf90_put_att(fgid, NF90_GLOBAL,'ndim', self%ndim) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_put_att(fgid, NF90_GLOBAL, 'ncol', self%ncol) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) ! define dimensions nf90_stat = nf90_def_dim(fgid, "dimi", self%ndim+1, iddim_i) ! dimension of i is ndim+1 call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_def_dim(fgid, "nnz", self%nnz, iddim_nnz) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) ! define variables nf90_stat = nf90_def_var(fgid, "i", NF90_INT, iddim_i, id_i ) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_def_var(fgid, "j", NF90_INT, iddim_nnz, id_j ) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_def_var(fgid, "val", NF90_DOUBLE, iddim_nnz, id_val ) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) ! end of definition nf90_stat = nf90_enddef(fgid) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) ! put variables nf90_stat = nf90_put_var(fgid, id_i, self%i ) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_put_var(fgid, id_j, self%j(1 : self%nnz) ) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) nf90_stat = nf90_put_var(fgid, id_val, self%val(1 : self%nnz) ) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) ! put into redefinition state nf90_stat = nf90_redef(fgid) call handle_error_netcdf(nf90_stat, __LINE__, __FILE__) end subroutine write_netcdf subroutine create_copy(self, res) !! Creates a copy of the matrix class(csrmat_t), intent(in) :: self !! Matrix type(csrmat_t), intent(out) :: res !! Copy of matrix integer :: row, k res%ndim = self%ndim res%ncol = self%ncol res%nnz = self%nnz allocate(res%i(res%ndim+1)) allocate(res%j(res%nnz)) allocate(res%val(res%nnz)) !$omp parallel default(none) private(row, k) shared(self, res) !$omp do do row = 1, self%ndim+1 res%i(row) = self%i(row) enddo !$omp end do !$omp do do k = 1, self%nnz res%j(k) = self%j(k) res%val(k) = self%val(k) enddo !$omp end do !$omp end parallel end subroutine subroutine sortsum(self) !! sorts and sums entries in ascending j-order for each row implicit none class(csrmat_t), intent(inout) :: self !!csr matrix integer, parameter :: wsize = 20 integer, dimension( self%ndim+1 ) :: icsr2 integer :: row, j, nz, nzt, js integer, dimension(wsize) :: workj real(FP), dimension(wsize) :: workr nz = 0 do row = 1, self%ndim nzt = self%i(row+1) - self%i(row) js = self%i(row) do j = 1, nzt workj(j) = self%j(js+j-1) workr(j) = self%val(js+j-1) enddo call sortsum_csrrow(nzt, workj, workr) icsr2(row) = nz+1 js = icsr2(row) do j = 1, nzt self%j(js+j-1) = workj(j) self%val(js+j-1) = workr(j) enddo nz = nz + nzt enddo icsr2( self%ndim+1 ) = nz+1 self%i=icsr2 self%nnz = nz end subroutine sortsum subroutine sortsum_csrrow(nz, j, val) !! uniques and sorts values of j in ascending order for single row !! corresponding values are summed up in val implicit none integer,intent(inout)::nz !! number of nonzeros in row (before and aafter) integer,intent(inout),dimension(nz)::j !! column indices, on output sorted real(FP),intent(inout),dimension(nz)::val !! values, on output sorted and summed logical :: mask(nz) integer :: jmin, k, r, jnew(nz), nzn, cnt real(FP), dimension(nz) :: valnew mask=.true. jnew = 0 valnew = 0.0_FP cnt = 0 r = 0 nzn = nz do while (r < nzn) r = r+1 valnew(r) = 0 jmin = minval(j, mask) cnt = 0 do k = 1,nz if (j(k) == jmin) then mask(k) = .false. valnew(r) = valnew(r) + val(k) cnt = cnt+1 endif enddo nzn = nzn - cnt + 1 jnew(r) = jmin enddo nz = nzn j = jnew val = valnew end subroutine sortsum_csrrow subroutine display(self) !! displays some information on matrix implicit none class(csrmat_t), intent(in) :: self !!csr matrix write(get_stdout(), 101) self%ndim, self%ncol, self%nnz 101 format(1X,'ndim = ',I10,1X,'; ncol = ',I10,1X,' ; nnz = ',I10,1X,' ;') end subroutine display subroutine expose_data(self, data_object) !! Exposes csrmat data through csrmat_data_t object class(csrmat_t), intent(in), target :: self !! Instance of the type type(csrmat_data_t), intent(out) :: data_object !! Destination csrmat data object that contains the exposed data of the !! csrmat object. ! Copy values data_object%ndim = self%ndim data_object%ncol = self%ncol data_object%nnz = self%nnz ! Now assign pointers data_object%i = c_loc(self%i) data_object%j = c_loc(self%j) data_object%val = c_loc(self%val) end subroutine expose_data subroutine destructor(self) !! destructor, frees memory type(csrmat_t), intent(inout) :: self !!csr matrix self%ndim = 0 self%ncol = 0 self%nnz = 0 if (allocated(self%i)) deallocate(self%i) if (allocated(self%j)) deallocate(self%j) if (allocated(self%val)) deallocate(self%val) end subroutine destructor subroutine csr_times_vec(a, x, y) !! Multiplies matrix with vector y = A*x !! Thread safe, i.e. can be called from within parallel region implicit none type(csrmat_t), intent(in) :: a !!csr matrix real(FP), dimension(a%ncol), intent(in) :: x !! vector x real(FP), dimension(a%ndim), intent(out) :: y !! vector y (result) integer :: row #ifdef __NVCOMPILER !$OMP SIMD #else !$OMP DO SIMD #endif do row = 1, a%ndim y(row) = dot_product( & a%val(a%i(row):a%i(row+1)-1), & x(a%j(a%i(row):a%i(row+1)-1))) enddo #ifdef __NVCOMPILER !$OMP END SIMD #else !$OMP END DO SIMD #endif end subroutine csr_times_vec pure real(FP) function csr_times_vec_single(a, x, row) !! Computes element `row` of vector y = A*x !! Thread safe, i.e. can be called from within parallel region implicit none type(csrmat_t), intent(in) :: a !!csr matrix real(FP), dimension(a%ncol), intent(in) :: x !! vector x integer, intent(in) :: row !! row of matrix csr_times_vec_single = dot_product( & a%val(a%i(row):a%i(row+1)-1), & x(a%j(a%i(row):a%i(row+1)-1))) end function subroutine csr_residuum(a, x, b, res, resmax, epsb) !! Computes the residuum ||Ax-b|| / (||b||+eps) type(csrmat_t), intent(in) :: a !!csr matrix real(FP), dimension(a%ncol), intent(in) :: x !! Vector x real(FP), dimension(a%ndim), intent(in) :: b !! Vector b real(FP), intent(out) :: res !! Residuum (root mean square) real(FP), intent(out), optional :: resmax !! Residuum (maximum) real(FP), intent(in), optional :: epsb !! Tolerance for division of b in residual !! Dafault value FP_EPS integer :: row real(FP) :: y, bnrm, rmax, epsb_val if (present(epsb)) then epsb_val = epsb else epsb_val = FP_EPS endif bnrm = 0.0_FP res = 0.0_FP rmax = 0.0_FP !$OMP PARALLEL PRIVATE(row, y) !$OMP DO REDUCTION(+: res, bnrm) REDUCTION(max: rmax) do row = 1, a%ndim ! multiply y = a*x y = dot_product(a%val(a%i(row):a%i(row+1)-1), x(a%j(a%i(row):a%i(row+1)-1))) bnrm = bnrm + b(row)**2 res = res + (y - b(row))**2 rmax = max(rmax, abs(y - b(row))) enddo !$OMP END DO !$OMP END PARALLEL res = res / (bnrm + epsb_val) res = sqrt(res) if (present(resmax)) then resmax = rmax endif end subroutine real(FP) function diffvecs_nrm(n, x, y) !! computes norm of difference of two vectors, i.e. ||x-y|| / n implicit none integer, intent(in) :: n !! dimension of vectors real(FP), dimension(n), intent(in) :: x !! vector x real(FP), dimension(n), intent(in) :: y !! vector y integer :: i real(FP) :: d d = 0.0_FP !$OMP PARALLEL DO PRIVATE(i) REDUCTION(+:d) do i = 1, n d = d + (x(i)-y(i))**2 enddo !$OMP END PARALLEL DO diffvecs_nrm = sqrt(d / (1.0_FP*n) ) end function diffvecs_nrm #ifdef ENABLE_MKL subroutine csr_transpose(a, atransp) !! Transposes matrix a ! With MKL enabled, first it converts matrix into COO format ! and transposes it in COO format, then uses MKL routine ! to convert from COO to CSR format. type(csrmat_t), intent(in) :: a !! CSR matrix type(csrmat_t), allocatable, intent(out) :: atransp !! Transposed matrix in CSR format integer :: i, k, info integer, dimension(a%nnz) :: atransp_i_coo, atransp_j_coo integer, dimension(6) :: job_coo_to_csr if (allocated(atransp)) deallocate(atransp) allocate(atransp) atransp%ndim = a%ncol atransp%ncol = a%ndim atransp%nnz = a%nnz allocate(atransp%i(atransp%ndim+1)) allocate(atransp%j(atransp%nnz)) allocate(atransp%val(atransp%nnz)) ! atransp in coo format !$omp parallel default(none) & !$omp private(i, k) shared(a, atransp_i_coo, atransp_j_coo) !$omp do do i = 1, a%ndim do k = a%i(i), a%i(i+1)-1 atransp_j_coo(k) = i atransp_i_coo(k) = a%j(k) enddo enddo !$omp end do !$omp end parallel ! Use MKL dcsrcoo routine to convert atransp from coo to sorted csr format job_coo_to_csr(1) = 2 ! coo -> csr (sorted) job_coo_to_csr(2) = 1 ! one based indexing csr job_coo_to_csr(3) = 1 ! one based indexing coo job_coo_to_csr(5) = atransp%nnz job_coo_to_csr(6) = 0 call MKL_dcsrcoo(job_coo_to_csr, atransp%ndim, atransp%val, atransp%j, & atransp%i, atransp%nnz, a%val, atransp_i_coo, & atransp_j_coo, info) if (info /= 0) then call handle_error("CSR-transpose failed!", & PARALLAX_ERR_CSRMAT, __LINE__, __FILE__, & additional_info=error_info_t("MKL info:", [info])) endif end subroutine #else subroutine csr_transpose(a, atransp) !! Transposes matrix a ! When MKL is not enabled, the in-house routine transpose_csr ! below is used instead type(csrmat_t), intent(in) :: a !! CSR matrix type(csrmat_t), allocatable, intent(out) :: atransp !! Transposed matrix in CSR format integer :: i, k, info integer, dimension(a%nnz) :: atransp_i_coo, atransp_j_coo integer, dimension(6) :: job_coo_to_csr if (allocated(atransp)) deallocate(atransp) allocate(atransp) atransp%ndim = a%ncol atransp%ncol = a%ndim atransp%nnz = a%nnz allocate(atransp%i(atransp%ndim+1)) allocate(atransp%j(atransp%nnz)) allocate(atransp%val(atransp%nnz)) ! MKL-free implementation of csr matrix transpose call transpose_csr(a%ndim, a%ncol, a%nnz, & a%i, a%j, a%val, & atransp%i, atransp%j, atransp%val) end subroutine csr_transpose #endif subroutine transpose_csr(nrow, ncol, nnz, & row_ptr, col_ind, values, & trans_row_ptr, trans_col_ind, trans_values) !! MKL-free subroutine to transpose a sparse matrix in CSR format. implicit none ! Arguments integer, intent(in) :: nrow, ncol, nnz !! Dimensions of the original CSR matrix. integer, intent(in) :: row_ptr(nrow+1), col_ind(nnz) real(FP), intent(in) :: values(nnz) !! Row pointer, column index and element values of the original CSR matrix integer, intent(out) :: trans_row_ptr(ncol+1), trans_col_ind(nnz) real(FP), intent(out) :: trans_values(nnz) !! Row pointer, column index and element values of the transposed CSR matrix integer :: i, j, k, count(ncol) integer, allocatable :: temp_ind(:) real(FP), allocatable :: temp_values(:) integer :: start, endd ! Initialize transposed row pointer array trans_row_ptr = 0 trans_col_ind = 0 trans_values = 0.0 ! Initialize count array to zero count = 0 ! Count the number of entries in each column of the original matrix do i = 1, nnz count(col_ind(i)) = count(col_ind(i)) + 1 end do ! Set up the row pointers for the transposed matrix trans_row_ptr(1) = 1 do i = 2, ncol+1 trans_row_ptr(i) = trans_row_ptr(i-1) + count(i-1) end do ! Reset the count array to use it for positioning in the transposed matrix count = 0 !$omp parallel do private(i, j, k, start, endd) & !$omp& shared(nrow, ncol, row_ptr, col_ind, values, trans_row_ptr, trans_col_ind, trans_values, count) do i = 1, nrow start = row_ptr(i) endd = row_ptr(i+1) - 1 do j = start, endd k = col_ind(j) !$omp critical trans_col_ind(trans_row_ptr(k) + count(k)) = i trans_values(trans_row_ptr(k) + count(k)) = values(j) count(k) = count(k) + 1 !$omp end critical end do end do !$omp end parallel do ! Sort each row of the transposed matrix !$omp parallel do private(i, temp_ind, temp_values) shared(trans_row_ptr, trans_col_ind, trans_values) do i = 1, ncol if (trans_row_ptr(i) < trans_row_ptr(i+1)) then allocate(temp_ind(trans_row_ptr(i+1)-trans_row_ptr(i))) allocate(temp_values(trans_row_ptr(i+1)-trans_row_ptr(i))) temp_ind = trans_col_ind(trans_row_ptr(i):trans_row_ptr(i+1)-1) temp_values = trans_values(trans_row_ptr(i):trans_row_ptr(i+1)-1) call sort_row(trans_row_ptr(i+1)-trans_row_ptr(i), temp_ind, temp_values) trans_col_ind(trans_row_ptr(i):trans_row_ptr(i+1)-1) = temp_ind trans_values(trans_row_ptr(i):trans_row_ptr(i+1)-1) = temp_values deallocate(temp_ind) deallocate(temp_values) end if end do !$omp end parallel do end subroutine transpose_csr end module