!! Copyright (C) 2016 X. Andrade
!!
!! This program is free software; you can redistribute it and/or modify
!! it under the terms of the GNU General Public License as published by
!! the Free Software Foundation; either version 2, or (at your option)
!! any later version.
!!
!! This program is distributed in the hope that it will be useful,
!! but WITHOUT ANY WARRANTY; without even the implied warranty of
!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!! GNU General Public License for more details.
!!
!! You should have received a copy of the GNU General Public License
!! along with this program; if not, write to the Free Software
!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
!! 02110-1301, USA.
!!

#include "global.h"

module debug_oct_m
  use global_oct_m
  use namespace_oct_m
  use mpi_oct_m
  use mpi_debug_oct_m
  use loct_oct_m
  use parser_oct_m

  implicit none

  private
  public ::             &
    debug_t,            &
    debug_init,         &
    debug_enable,       &
    debug_disable,      &
    debug_delete_trace, &
    debug_open_trace,   &
    debug,              &
    epoch_time_diff,    &
#ifndef NDEBUG
    debug_push_sub,     &
    debug_pop_sub,      &
#endif
    debug_clean_path

  type debug_t
    private
    logical, public :: info
    logical, public :: trace
    logical, public :: trace_term
    logical, public :: trace_file
    logical :: extra_checks
    logical, public :: interaction_graph
    logical, public :: interaction_graph_full
    logical, public :: propagation_graph
    logical, public :: instrument
    integer :: bits
    character(len=MAX_PATH_LEN), public :: instr_sub_name
    integer, public :: instr_tool
  end type debug_t

  type(debug_t), save :: debug

  !> max_lun is currently 99, i.e. we can hardwire unit_offset above 1000
  integer, parameter :: unit_offset = 1000

  interface
    subroutine debug_verrou_start_instrumentation() bind(C)
    end subroutine debug_verrou_start_instrumentation

    subroutine debug_verrou_stop_instrumentation() bind(C)
    end subroutine debug_verrou_stop_instrumentation

    subroutine debug_fenv_start_instrumentation() bind(C)
    end subroutine debug_fenv_start_instrumentation

    subroutine debug_fenv_stop_instrumentation() bind(C)
    end subroutine debug_fenv_stop_instrumentation
  end interface

contains

  subroutine debug_init(this, namespace)
    type(debug_t),     intent(out)   :: this
    type(namespace_t), intent(in)    :: namespace

    character(len=256) :: node_hook
    logical :: file_exists, mpi_debug_hook
    integer :: sec, usec
    type(block_t) :: blk
    integer :: line

    !%Variable Debug
    !%Type flag
    !%Default no
    !%Section Execution::Debug
    !%Description
    !% This variable controls the amount of debugging information
    !% generated by Octopus. You can use include more than one option
    !% with the + operator.
    !%Option no 0
    !% (default) <tt>Octopus</tt> does not enter debug mode.
    !%Option info 1
    !% Octopus prints additional information to the terminal.
    !%Option trace 2
    !% Octopus generates a stack trace as it enters end exits
    !% subroutines. This information is reported if Octopus stops with
    !% an error.
    !%Option trace_term 4
    !% The trace is printed to the terminal as Octopus enters or exits subroutines. This slows down execution considerably.
    !%Option trace_file 8
    !% The trace is written to files in the <tt>debug</tt>
    !% directory. For each node (when running in parallel) there is a file called
    !% <tt>debug_trace.&lt;rank&gt;</tt>. Writing these files slows down the code by a huge factor and
    !% it is usually only necessary for parallel runs.
    !%Option extra_checks 16
    !% This enables Octopus to perform some extra checks, to ensure
    !% code correctness, that might be too costly for regular runs.
    !%Option interaction_graph 32
    !% Octopus generates a dot file containing the graph for a multisystem run.
    !%Option interaction_graph_full 64
    !% Octopus generates a dot file containing the graph for a multisystem run including ghost interactions.
    !%Option propagation_graph 128
    !% Octopus generates a file with information for the propagation diagram.
    !%Option instrument 256
    !% Octopus adds instrumentation to functions specified in an <tt>InstrumentFunctions</tt> block.
    !%End
    call parse_variable(namespace, 'Debug', OPTION__DEBUG__NO, this%bits)

    call from_bits(this)

    !%Variable InstrumentFunctions
    !%Type block
    !%Section Execution::Debug
    !%Description
    !% This input options controls which routines are going to be instrumented
    !% for the tools selected using the <tt>Debug=instrument</tt> option.
    !%
    !%  <br>%<tt>InstrumentFunctions
    !%   <br>&nbsp;&nbsp;"function_name" | instrumentation_tool
    !%  <br>%</tt>
    !%
    !% Here is an example to better understand how this works:
    !%
    !%  <br>%<tt>InstrumentFunctions
    !%   <br>&nbsp;&nbsp;"grid/grid.F90.grid_init_from_grid_stage_1" | verrou
    !%  <br>%</tt>
    !%
    !% NOTE: Currently only a single function can be instrumented!
    !%
    !% Available instrumentation tools:
    !%Option verrou 1
    !% Verrou helps you look for floating-point round-off errors.
    !%Option fenv 2
    !% Enable floating-point exceptions. Requires Octopus to be compiled against glibc.
    !%End
    if (parse_block(namespace, "InstrumentFunctions", blk) == 0) then
      ! TODO: Allow instrumentation of more than a single function
      if (parse_block_n(blk) .gt. 1) then
        write(stderr,'(a)') "Only single function can be instrumented!"
        call mpi_world%abort()
        call loct_exit_failure()
      end if

      do line = 0, parse_block_n(blk) - 1
        call parse_block_string(blk, line, 0, this%instr_sub_name)
        call parse_block_integer(blk, line, 1, this%instr_tool)
        select case (this%instr_tool)
        case (OPTION__INSTRUMENTFUNCTIONS__VERROU)
          write(stderr,'(a)') "Instrumenting " // trim(this%instr_sub_name) // " for Verrou"
#if !defined(HAVE_VERROU)
          write(stderr,'(a)') "requires VERROU but that library was not linked."
          call mpi_world%abort()
          call loct_exit_failure()
#endif
        case (OPTION__INSTRUMENTFUNCTIONS__FENV)
          write(stderr,'(a)') "Instrumenting " // trim(this%instr_sub_name) // " with floating-point exceptions"
        case default
          ASSERT(.false.) ! Should not happen
        end select
      end do
      call parse_block_end(blk)
    end if

    call mpi_debug_init(mpi_world%rank, this%info)

    if (this%info) then
      !%Variable MPIDebugHook
      !%Type logical
      !%Default no
      !%Section Execution::Debug
      !%Description
      !% When debugging the code in parallel it is usually difficult to find the origin
      !% of race conditions that appear in MPI communications. This variable introduces
      !% a facility to control separate MPI processes. If set to yes, all nodes will
      !% start up, but will get trapped in an endless loop. In every cycle of the loop
      !% each node is sleeping for one second and is then checking if a file with the
      !% name <tt>node_hook.xxx</tt> (where <tt>xxx</tt> denotes the node number) exists. A given node can
      !% only be released from the loop if the corresponding file is created. This allows
      !% to selectively run, <i>e.g.</i>, a compute node first followed by the master node. Or, by
      !% reversing the file creation of the node hooks, to run the master first followed
      !% by a compute node.
      !%End
      call parse_variable(global_namespace, 'MPIDebugHook', .false., mpi_debug_hook)
      if (mpi_debug_hook) then
        call loct_gettimeofday(sec, usec)
        call epoch_time_diff(sec,usec)
        write(stdout,'(a,i6,a,i6.6,20x,a)') '* I ',sec,'.',usec,' | MPI debug hook'

        write(stdout,'(a,i3,a)') 'node:', mpi_world%rank, ' In debug hook'
        write(node_hook,'(i3.3)') mpi_world%rank
        file_exists = .false.

        do while (.not. file_exists)
          inquire(file='node_hook.'//node_hook, exist=file_exists)
          call loct_nanosleep(1,0)
          write(stdout,'(a,i3,a)') 'node:', mpi_world%rank, &
            ' - still sleeping. To release me touch: node_hook.'//trim(node_hook)
        end do

        write(stdout,'(a,i3,a)') 'node:', mpi_world%rank, ' Leaving debug hook'
        ! remove possible debug hooks
        call loct_rm('node_hook.'//trim(node_hook))

        call loct_gettimeofday(sec, usec)
        call epoch_time_diff(sec,usec)
        write(stdout,'(a,i6,a,i6.6,20x,a)') '* O ', sec, '.', usec,' | MPI debug hook'
      end if
    end if

  end subroutine debug_init

  !--------------------------------------------------

  subroutine debug_enable(this)
    type(debug_t), intent(inout) :: this

    this%info       = .true.
    this%trace      = .true.
    this%trace_term = .true.
    this%trace_file = .true.
    this%interaction_graph = .true.
    this%interaction_graph_full = .true.
    this%propagation_graph = .true.

  end subroutine debug_enable

  !--------------------------------------------------

  subroutine debug_disable(this)
    type(debug_t), intent(inout) :: this

    call from_bits(this)

  end subroutine debug_disable

  !--------------------------------------------------

  subroutine debug_delete_trace()

    integer :: iunit
    character(len=6) :: filenum

    iunit = mpi_world%rank + unit_offset
    write(filenum, '(i6.6)') iunit - unit_offset
    call loct_mkdir('debug')
    call loct_rm('debug/debug_trace.node.'//filenum)

  end subroutine debug_delete_trace

  ! ---------------------------------------------------------

  subroutine debug_open_trace(iunit)
    integer, intent(out) :: iunit

    character(len=6) :: filenum

    iunit = mpi_world%rank + unit_offset
    write(filenum, '(i6.6)') iunit - unit_offset
    call loct_mkdir('debug')
    open(iunit, file = 'debug/debug_trace.node.'//filenum, &
      action='write', status='unknown', position='append')

  end subroutine debug_open_trace

  ! ---------------------------------------------------------

  subroutine from_bits(this)
    type(debug_t), intent(inout) :: this

    this%info         = (bitand(this%bits, OPTION__DEBUG__INFO)         /= 0)
    this%trace_term   = (bitand(this%bits, OPTION__DEBUG__TRACE_TERM)   /= 0)
    this%trace_file   = (bitand(this%bits, OPTION__DEBUG__TRACE_FILE)   /= 0)
    this%trace        = (bitand(this%bits, OPTION__DEBUG__TRACE)        /= 0) .or. this%trace_term .or. this%trace_file
    this%extra_checks = (bitand(this%bits, OPTION__DEBUG__EXTRA_CHECKS) /= 0) .or. this%trace_term .or. this%trace_file
    this%interaction_graph      = (bitand(this%bits, OPTION__DEBUG__INTERACTION_GRAPH)      /= 0)
    this%interaction_graph_full = (bitand(this%bits, OPTION__DEBUG__INTERACTION_GRAPH_FULL) /= 0)
    this%propagation_graph      = (bitand(this%bits, OPTION__DEBUG__PROPAGATION_GRAPH)      /= 0)
    this%instrument = (bitand(this%bits, OPTION__DEBUG__INSTRUMENT) /= 0)

  end subroutine from_bits


  ! ---------------------------------------------------------
  subroutine epoch_time_diff(sec, usec)
    integer, intent(inout) :: sec
    integer, intent(inout) :: usec

    ! this is called by push/pop so there cannot be a push/pop in this routine

    call time_diff(s_epoch_sec, s_epoch_usec, sec, usec)
  end subroutine epoch_time_diff


  ! ---------------------------------------------------------
  !> Computes t2 <- t2-t1. sec1,2 and usec1,2 are
  !! seconds,microseconds of t1,2
  subroutine time_diff(sec1, usec1, sec2, usec2)
    integer, intent(in)    :: sec1
    integer, intent(in)    :: usec1
    integer, intent(inout) :: sec2
    integer, intent(inout) :: usec2

    ! this is called by push/pop so there cannot be a push/pop in this routine

    ! Correct overflow.
    if (usec2 - usec1  <  0) then
      usec2 = 1000000 + usec2
      if (sec2 >= sec1) then
        sec2 = sec2 - 1
      end if
    end if

    ! Replace values.
    if (sec2 >= sec1) then
      sec2 = sec2 - sec1
    end if
    usec2 = usec2 - usec1

  end subroutine time_diff


#ifndef NDEBUG
  ! ---------------------------------------------------------
  !> Push a routine to the debug trace.
  subroutine debug_push_sub(sub_name)
    character(len=*), intent(in) :: sub_name

    integer, parameter :: MAX_RECURSION_LEVEL = 50
    integer iunit, sec, usec

    if (debug%instrument) then
      if (debug_clean_path(sub_name) == trim(debug%instr_sub_name)) then
        select case (debug%instr_tool)
        case (OPTION__INSTRUMENTFUNCTIONS__VERROU)
          call debug_verrou_start_instrumentation()
        case (OPTION__INSTRUMENTFUNCTIONS__FENV)
          call debug_fenv_start_instrumentation()
        case default
          ASSERT(.false.) ! cannot happen
        end select
      end if
    end if

    if (.not. debug%trace) return

    call loct_gettimeofday(sec, usec)
    call epoch_time_diff(sec, usec)

    no_sub_stack = no_sub_stack + 1
    if (no_sub_stack >= MAX_RECURSION_LEVEL) then
      sub_stack(MAX_RECURSION_LEVEL) = 'debug_push_sub'
      write(stderr, '(a,i3,a)') 'Too many recursion levels in debug trace (max=', MAX_RECURSION_LEVEL, ')'
      call mpi_world%abort()
      stop
    end if

    sub_stack(no_sub_stack)  = trim(debug_clean_path(sub_name))
    time_stack(no_sub_stack) = loct_clock()

    if (debug%trace_file) then
      call debug_open_trace(iunit)
      call push_sub_write(iunit)
      ! close file to ensure flushing
      close(iunit)
    end if

    if (debug%trace_term .and. mpi_grp_is_root(mpi_world)) then
      ! write to stderr if we are node 0
      call push_sub_write(stderr)
    end if

  contains

    subroutine push_sub_write(iunit_out)
      integer,  intent(in) :: iunit_out

      integer :: ii
      character(len=1000) :: tmpstr

      write(tmpstr,'(a,i6,a,i6.6,f20.6,i8,a)') "* I ", &
        sec, '.', usec, &
        loct_clock(), &
        loct_get_memory_usage() / 1024, " | "
      do ii = no_sub_stack - 1, 1, -1
        write(tmpstr, '(2a)') trim(tmpstr), "..|"
      end do
      write(tmpstr, '(2a)') trim(tmpstr), trim(debug_clean_path(sub_name))
      write(iunit_out, '(a)') trim(tmpstr)

    end subroutine push_sub_write

  end subroutine debug_push_sub

  ! ---------------------------------------------------------
  !> Pop a routine from the debug trace.
  subroutine debug_pop_sub(sub_name)
    character(len=*), intent(in) :: sub_name

    character(len=80) :: sub_name_short
    integer iunit, sec, usec

    if (debug%instrument) then
      if (debug_clean_path(sub_name) == trim(debug%instr_sub_name)) then
        select case (debug%instr_tool)
        case (OPTION__INSTRUMENTFUNCTIONS__VERROU)
          call debug_verrou_stop_instrumentation()
        case (OPTION__INSTRUMENTFUNCTIONS__FENV)
          call debug_fenv_stop_instrumentation()
        case default
          ASSERT(.false.) ! cannot happen
        end select
      end if
    end if

    if (.not. debug%trace) return

    call loct_gettimeofday(sec, usec)
    call epoch_time_diff(sec, usec)

    if (no_sub_stack <= 0) then
      no_sub_stack = 1
      sub_stack(1) = 'pop_sub'
      write(stderr, '(a)') 'Too few recursion levels in debug trace'
      call mpi_world%abort()
      stop
    end if

    ! the name might be truncated in sub_stack, so we copy to a string
    ! of the same size
    sub_name_short = trim(debug_clean_path(sub_name))

    if (sub_name_short /= sub_stack(no_sub_stack)) then
      write(stderr, '(a)') 'Wrong sub name on pop_sub :'
      write(stderr, '(2a)') ' got      : ', sub_name_short
      write(stderr, '(2a)') ' expected : ', sub_stack(no_sub_stack)
      call mpi_world%abort()
      stop
    end if

    if (debug%trace_file) then
      call debug_open_trace(iunit)
      call pop_sub_write(iunit)
      ! close file to ensure flushing
      close(iunit)
    end if

    if (debug%trace_term .and. mpi_grp_is_root(mpi_world)) then
      ! write to stderr if we are node 0
      call pop_sub_write(stderr)
    end if

    no_sub_stack = no_sub_stack - 1

  contains

    subroutine pop_sub_write(iunit_out)
      integer, intent(in) :: iunit_out

      integer :: ii
      character(len=1000) :: tmpstr

      write(tmpstr,'(a,i6,a,i6.6,f20.6,i8, a)') "* O ", &
        sec, '.', usec, &
        loct_clock() - time_stack(no_sub_stack), &
        loct_get_memory_usage() / 1024, " | "
      do ii = no_sub_stack - 1, 1, -1
        write(tmpstr,'(2a)') trim(tmpstr), "..|"
      end do
      write(tmpstr,'(2a)') trim(tmpstr), trim(sub_stack(no_sub_stack))

      write(iunit_out, '(a)') trim(tmpstr)

    end subroutine pop_sub_write

  end subroutine debug_pop_sub
#endif

  ! -----------------------------------------------------------
  !> Prune a filename path to only include subdirectories of the "src" directory.
  character(len=MAX_PATH_LEN) function debug_clean_path(filename) result(clean_path)
    character(len=*), intent(in) :: filename

    integer :: pos

    pos = index(filename, 'src/', back = .true.)
    if (pos == 0) then
      ! 'src/' does not occur
      clean_path = filename
    else
      ! remove 'src/'
      clean_path = filename(pos+4:)
    end if

  end function debug_clean_path

end module debug_oct_m

!! Local Variables:
!! mode: f90
!! coding: utf-8
!! End:
