perf_m.f90 Source File


Source Code

module perf_m
    !! Profiling
    use MPI
    use precision_m, only: DP
    use screen_io_m, only: get_stderr, get_stdout
    use comm_handling_m, only: is_master
    implicit none

    integer, private, parameter :: nperf_max=128
    !! Maximum number of profiling sections
    integer, private, parameter :: clen=128
    !! Maximum length of profiling description
    integer :: nperf = 0
    !! Number of profiling sections
    integer, dimension(nperf_max), private :: perf_ncalls = 0
    !! Number that a section was called
    logical, dimension(nperf_max), private :: perf_on = .false.
    !! Indicates if profiling is switched on/off
    real(DP), dimension(nperf_max), private :: perf_tstart = 0.0_DP
    !! Start times of profiling sections
    real(DP), dimension(nperf_max), private :: perf_time = 0.0_DP
    !! Elapsed time in profiling sections
    real(DP), dimension(nperf_max), private :: perf_time_last = 0.0_DP
    !! Elapsed time of last profiling sections
    character(len=clen), dimension(nperf_max), private :: perf_name = ''
    !! Names for profiling description

    public :: perf_reset
    public :: perf_start
    public :: perf_stop
    public :: perf_print
    public :: perf_get_time_last

    private :: find_perf_index

contains

    integer function find_perf_index(nm)
        !! Finds index of perf section, returns zero if section was not found
        character(len=*), intent(in) :: nm

        integer :: i
        character(len=clen) :: nmc

        nmc = nm

        find_perf_index = 0
        do i = 1, nperf
            if (nmc == perf_name(i)) then
                find_perf_index = i
            endif
        enddo

    end function


    subroutine perf_reset(nm)
        !! Resets profiling (if nm is not present, all profiling sections are reset)
        character(len=*), intent(in), optional ::nm
        !! Name of profiling section that shall be reset

        integer :: iperf, i

        if (present(nm)) then
            iperf = find_perf_index(nm)
            if (iperf /= 0) then
                do i = iperf + 1, nperf
                    perf_ncalls(i-1)    = perf_ncalls(i)
                    perf_on(i-1)        = perf_on(i)
                    perf_tstart(i-1)    = perf_tstart(i)
                    perf_time(i-1)      = perf_time(i)
                    perf_time_last(i-1) = perf_time_last(i)
                    perf_name(i-1)      = perf_name(i)
                enddo
                nperf = nperf - 1
            else
                write(get_stderr(), *) &
                    'error(perf_reset), section could not be found',nm
                error stop
            endif
        else
            do iperf = 1, nperf_max
                perf_ncalls(iperf)    = 0
                perf_on(iperf)        = .false.
                perf_tstart(iperf)    = 0.0_DP
                perf_time(iperf)      = 0.0_DP
                perf_time_last(iperf) = 0.0_DP
                perf_name(iperf)      = ''
            enddo
            nperf = 0
        endif

    end subroutine

    subroutine perf_start(nm)
        !! Starts profiling
        character(len=*), intent(in) :: nm
        !! Name of profiling section

        integer :: iperf

        iperf = find_perf_index(nm)
        if (iperf == 0) then
            nperf = nperf + 1
            if (nperf > nperf_max) then
                write(get_stderr(), *) &
                    'error(perf_start), maximum number of &
                    &profiling section reached'
                error stop
            endif
            iperf = nperf
            perf_name(iperf) = nm
        else
            if (perf_on(iperf)) then
                write(get_stderr(), *) &
                    'error(perf_start), section already on', nm
                error stop
            endif
        endif

        perf_on(iperf) = .true.
        perf_tstart(iperf) = MPI_wtime()

    end subroutine

    subroutine perf_stop(nm)
        !! Starts profiling
        character(len=*), intent(in) :: nm
        !! Name of profiling section

        integer :: iperf

        iperf = find_perf_index(nm)
        if (iperf == 0) then
            write(get_stderr(), *) &
                'error(perf_stop), section not available', nm
            error stop
        else
            if (.not.perf_on(iperf)) then
                write(get_stderr(), *) &
                    'error(perf_stop), section is not active', nm
                error stop
            endif
        endif

        perf_on(iperf)        = .false.
        perf_ncalls(iperf)    = perf_ncalls(iperf)+1
        perf_time_last(iperf) = MPI_wtime() - perf_tstart(iperf)
        perf_time(iperf)      = perf_time(iperf) + perf_time_last(iperf)

    end subroutine

    real(DP) function perf_get_time_last(nm)
        !! Returns time [s] spent in last profiling section
        character(len=*), intent(in) :: nm
        !! Name of profiling section

        integer :: iperf

        iperf = find_perf_index(nm)
        if (iperf == 0) then
            write(get_stderr(), *) &
                'error(perf_get_time_last), section not available', nm
            error stop
        endif

        perf_get_time_last =  perf_time_last(iperf)

    end function

    subroutine perf_print(nm, nm_ref, comm)
        !! Prints profiling statistics to screen
        character(len=*), intent(in), optional :: nm
        !! Name of profiling section that shall be printed,
        !! if not present all available profiling sections are printed
        character(len=*), intent(in), optional :: nm_ref
        !! Name of profiling section that is a reference section,
        !! if not present, section with maximum time is chosen as reference
        integer, intent(in), optional :: comm
        !! Prints profiling evaluated over processes in comm
        !! if not present only statistics for master is performed

        integer :: iperf, iref, inm, ierr, tmp(1)
        integer, dimension(nperf) :: ncalls
        real(DP), dimension(nperf) :: time_all
        real(DP) :: time_av, frac

        if (present(comm)) then
            call MPI_Allreduce(perf_ncalls, ncalls, nperf, MPI_INTEGER, MPI_SUM, comm, ierr)
            call MPI_Allreduce(perf_time, time_all, nperf, MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr)
        else
            ncalls(1:nperf) = perf_ncalls(1:nperf)
            time_all(1:nperf) = perf_time(1:nperf)
        endif
        if (present(nm)) then
            inm = find_perf_index(nm)
            if (inm == 0) then
                write(get_stderr(), *) &
                    'error(perf_print), section not available', nm
                error stop
            endif
        endif

        if (present(nm_ref)) then
            iref = find_perf_index(nm_ref)
            if (iref == 0) then
                write(get_stderr(), *) &
                    'error(perf_print), &
                    &section for reference not available', nm_ref
                error stop
            endif
        else
            tmp = maxloc(perf_time)
            iref = tmp(1)
        endif

        if (is_master()) then

            if (.not.present(nm)) then
                ! print header
                write(get_stdout(), *) ''
                write(get_stdout(), *) &
                    'Result of profiling -----------------------------------'
                write(get_stdout(), 107) &
                    '         Section name          ','    #calls    ',&
                    '   time [s]   ',' time/#calls  ','rel. time [%] '
                107 FORMAT(A32,A14,A14,A14,A14)

                do iperf = 1, nperf
                    time_av = time_all(iperf) / max(ncalls(iperf),1)
                    frac    = time_all(iperf) / time_all(iref) * 100
                    write(get_stdout(), 100) &
                        perf_name(iperf), ncalls(iperf), &
                        time_all(iperf), time_av, frac
                100 FORMAT(A32,3X,I6,7X,F0.6,7X,F0.6,7X,F7.3)
                enddo

                if (.not.present(nm_ref)) then
                    write(get_stdout(), *) &
                        '----------------------------&
                        &---------------------------'
                endif

            else

                time_av = time_all(inm) / max(ncalls(inm),1)
                frac    = time_all(inm) / time_all(iref) * 100
                write(get_stdout(), 100) &
                    perf_name(inm), ncalls(inm), time_all(inm), time_av, frac
                write(get_stdout(), *) ''
            endif

        endif

    end subroutine

end module perf_m