!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch
!! Copyright (C) 2021 N. Tancogne-Dejean
!!
!! This program is free software; you can redistribute it and/or modify
!! it under the terms of the GNU General Public License as published by
!! the Free Software Foundation; either version 2, or (at your option)
!! any later version.
!!
!! This program is distributed in the hope that it will be useful,
!! but WITHOUT ANY WARRANTY; without even the implied warranty of
!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!! GNU General Public License for more details.
!!
!! You should have received a copy of the GNU General Public License
!! along with this program; if not, write to the Free Software
!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
!! 02110-1301, USA.
!!

#include "global.h"

module ion_interaction_oct_m
  use atom_oct_m
  use comm_oct_m
  use debug_oct_m
  use global_oct_m
  use jellium_oct_m
  use distributed_oct_m
  use kpoints_oct_m
  use lattice_vectors_oct_m
  use loct_math_oct_m
  use messages_oct_m
  use mpi_oct_m
  use multicomm_oct_m
  use namespace_oct_m
  use parser_oct_m
  use profiling_oct_m
  use ps_oct_m
  use pseudopotential_oct_m
  use space_oct_m
  use species_oct_m
  use species_factory_oct_m

  implicit none

  private
  public ::                           &
    ion_interaction_t,                &
    ion_interaction_init,             &
    ion_interaction_end,              &
    ion_interaction_calculate,        &
    ion_interaction_init_parallelization, &
    ion_interaction_test,             &
    ion_interaction_stress

  type ion_interaction_t
    real(real64) :: alpha
    type(distributed_t) :: dist
  end type ion_interaction_t

  integer, parameter ::            &
    ION_COMPONENT_REAL     = 1,    &
    ION_COMPONENT_SELF     = 2,    &
    ION_COMPONENT_FOURIER  = 3,    &
    ION_NUM_COMPONENTS     = 3

contains

  subroutine ion_interaction_init(this, namespace, space, natoms)
    type(ion_interaction_t),      intent(out)   :: this
    type(namespace_t),            intent(in)    :: namespace
    class(space_t),               intent(in)    :: space
    integer,                      intent(in)    :: natoms

    PUSH_SUB(ion_interaction_init)

    !%Variable EwaldAlpha
    !%Type float
    !%Default 0.21
    !%Section Hamiltonian
    !%Description
    !% The value 'Alpha' that controls the splitting of the Coulomb
    !% interaction in the Ewald sum used to calculation the ion-ion
    !% interaction for periodic systems. This value affects the speed
    !% of the calculation, normally users do not need to modify it.
    !%End
    call parse_variable(namespace, 'EwaldAlpha', 0.21_real64, this%alpha)

    call distributed_nullify(this%dist, natoms)

    if (space%periodic_dim == 1) then
      call messages_write('For systems that are periodic in 1D, the interaction between', new_line = .true.)
      call messages_write('ions is not implemented. This affects the calculation', new_line = .true.)
      call messages_write('of total energy and forces, so both are zeroed.')
      call messages_warning(namespace=namespace)
    end if

    POP_SUB(ion_interaction_init)
  end subroutine ion_interaction_init

  subroutine ion_interaction_init_parallelization(this, natoms, mc)
    type(ion_interaction_t),      intent(inout) :: this
    integer,                      intent(in)    :: natoms
    type(multicomm_t),            intent(in)    :: mc

    PUSH_SUB(ion_interaction_init_parallelization)

    !As the code below is not parallelized with any of k-point, states nor domain
    !we can safely parallelize it over atoms
    if (debug%info) then
      call distributed_init(this%dist, natoms, mc%master_comm, "Ions")
    else
      call distributed_init(this%dist, natoms, mc%master_comm)
    end if

    POP_SUB(ion_interaction_init_parallelization)
  end subroutine ion_interaction_init_parallelization

  subroutine ion_interaction_end(this)
    type(ion_interaction_t), intent(inout) :: this

    PUSH_SUB(ion_interaction_end)

    this%alpha = -M_ONE

    call distributed_end(this%dist)

    POP_SUB(ion_interaction_end)
  end subroutine ion_interaction_end

  !> @brief Top level routine for computing electrostatic energies and forces between ions.
  !!
  !! For additional details about this routine, see
  !! http://octopus-code.org/wiki/Developers:Ion-Ion_interaction
  subroutine ion_interaction_calculate(this, space, latt, atom, natoms, pos, lsize, energy, force, &
    energy_components, force_components)
    type(ion_interaction_t),  intent(inout) :: this                       !< Ion interaction instance
    class(space_t),           intent(in)    :: space                      !< System dimensions and boundary conditions
    type(lattice_vectors_t),  intent(in)    :: latt                       !< Crystal lattice
    type(atom_t),             intent(in)    :: atom(:)                    !< Atoms
    integer,                  intent(in)    :: natoms                     !< Number of atoms == size(atoms)
    real(real64),             intent(in)    :: pos(1:space%dim,1:natoms)  !< Atomic positions
    real(real64),             intent(in)    :: lsize(:)                   !< Box half-lengths
    real(real64),             intent(out)   :: energy                     !< Total ion-ion electrostatic energy
    real(real64),             intent(out)   :: force(:, :)                !< Total force on each ion
    real(real64), optional,          intent(out)   :: energy_components(:)       !< Energy contributions
    real(real64), optional,          intent(out)   :: force_components(:, :, :)  !< Force contributions


    PUSH_SUB(ion_interaction_calculate)
    call profiling_in("ION_ION_INTERACTION")

    if (present(energy_components)) then
      ASSERT(ubound(energy_components, dim = 1) == ION_NUM_COMPONENTS)
      energy_components = M_ZERO
    end if

    if (present(force_components)) then
      ASSERT(all(ubound(force_components) == (/space%dim, natoms, ION_NUM_COMPONENTS/)))
      force_components = M_ZERO
    end if

    if (space%is_periodic() .and. any_species_is_jellium_sphere(atom)) then
      call messages_not_implemented('No periodic implementation of ion-ion energy for the jellium sphere')
    end if

    if (space%is_periodic()) then
      if (all_species_are_jellium_slab(atom)) then
        energy = jellium_slab_energy_periodic(space, atom, lsize)
        force = 0._real64
      else
        call ion_interaction_periodic(this, space, latt, atom, natoms, pos, energy, force, energy_components, force_components)
      end if
    else
      call ion_interaction_finite(this%dist, space, atom, pos, lsize, energy, force)
      energy = energy + jellium_self_energy_finite(this%dist, latt, atom, lsize)
    end if

    call profiling_out("ION_ION_INTERACTION")
    POP_SUB(ion_interaction_calculate)

  end subroutine ion_interaction_calculate

  !> @brief Electrostatic energy of a periodic jellium slab.
  !!
  !! From the inner potential been \f$ v(z) = 4\pi \rho_0 ( z^2/2 + L^2/8)\f$
  !! we get the energy as
  !! \f$U = 1/2 \int \rho(r) v(r) d^3r = \pi / 3 \rho^2 A L^3\f$
  function jellium_slab_energy_periodic(space, atom, lsize) result(energy)
    class(space_t),           intent(in)    :: space           !< System dimensions and boundary conditions
    type(atom_t),             intent(in)    :: atom(:)         !< Atoms
    real(real64),             intent(in)    :: lsize(:)        !< Box half-lengths
    real(real64)                            :: energy          !< Energy of periodic jellium slab

    real(real64) :: area

    ! Implementation assumes a single atom
    ASSERT(size(atom) == 1)
    ! This is only allowed if periodic dim = 2. In that case the lattice volume is in fact an area.
    ASSERT(space%periodic_dim == 2)

    select type(spec => atom(1)%species)
    type is (jellium_slab_t)
      area = lsize(1) * lsize(2) * M_FOUR
      energy = M_PI * spec%get_density(lsize) **2 * area * spec%thickness()**3 / M_THREE
    class default
      ASSERT(.false.)
    end select

  end function jellium_slab_energy_periodic

  !>@brief Electrostatic self-interaction for jellium instances, with orthogonal cells.
  !!
  !! If no atoms are of species SPECIES_JELLIUM or SPECIES_JELLIUM_SLAB, the returned
  !! energy is zero.
  !!
  !! @note
  !! One could make the function more readable using pack:
  !! ```fortran
  !! jellium_indices = pack([(i, i=1,natoms)], species == SPECIES_JELLIUM)
  !! jellium_slab_indices = pack([(i, i=1,natoms)], species == SPECIES_JELLIUM_SLAB)
  !! ```
  !! but it means looping over natoms an additional two times with no parallelism.
  !! @endnote
  function jellium_self_energy_finite(dist, latt, atom, lsize) result(energy)
    type(distributed_t),      intent(in)      :: dist        !< Atom MPI distribution
    type(lattice_vectors_t),  intent(in)      :: latt        !< Crystal lattice
    type(atom_t),             intent(in)      :: atom(:)     !< Atoms
    real(real64),             intent(in)      :: lsize(:)    !< Box half-lengths
    real(real64)                              :: energy      !< Jellium contribution to finite electrostatic energy

    real(real64)              :: zi                           !< Ion charge
    integer                   :: iatom
    logical                   :: lattice_is_orthogonal
    class(species_t), pointer :: spec

    PUSH_SUB(jellium_self_energy_finite)

    energy = 0._real64
    lattice_is_orthogonal = .not. latt%nonorthogonal

    do iatom = dist%start, dist%end
      spec => atom(iatom)%species
      zi = spec%get_zval()

      select type(spec)
      type is (jellium_sphere_t)
        energy = energy + (M_THREE / M_FIVE) * zi**2 / spec%radius()
        ! The part depending on the simulation sphere is neglected

      type is (jellium_slab_t)
        ! Jellium slab energy only implemented for orthogonal cells.
        ! One would need to replace (lsize(1) * lsize(2)) * spec%thickness()) with the triple product
        ASSERT(lattice_is_orthogonal)
        energy = energy + M_PI * zi**2 / (M_FOUR * lsize(1)*lsize(2)) * spec%thickness() / M_THREE
        ! The part depending on the simulation box transverse dimension is neglected
      end select
      nullify(spec)
    enddo

    call comm_allreduce(dist%mpi_grp, energy)

    POP_SUB(jellium_self_energy_finite)

  end function jellium_self_energy_finite

  !> @brief Electrostatic Ewald energy and forces for finite systems.
  subroutine ion_interaction_finite(dist, space, atom, pos, lsize, energy, force)
    type(distributed_t),      intent(in)    :: dist        !< Atom distribution
    class(space_t),           intent(in)    :: space       !< System dimensions
    type(atom_t),             intent(in)    :: atom(:)     !< Atoms
    real(real64),             intent(in)    :: pos(:,:)    !< Ion positions
    real(real64),             intent(in)    :: lsize(:)    !< Box half-lengths
    real(real64),             intent(out)   :: energy      !< Total electrostatic energy
    real(real64),             intent(out)   :: force(:, :) !< Forces

    class(species_t), pointer :: species_i, species_j
    real(real64)              :: r(space%dim), f(space%dim)
    real(real64)              :: r_mag                      !< |r_i - r_j|
    real(real64)              :: u_e                        !< Electrostatic energy
    real(real64)              :: zi, zj                     !< Ion charges
    integer                   :: iatom, jatom, natoms

    PUSH_SUB(ion_interaction_finite)

    natoms = size(atom)
    energy = M_ZERO
    force(1:space%dim, 1:natoms) = M_ZERO

    do iatom = dist%start, dist%end
      species_i => atom(iatom)%species
      zi = species_i%get_zval()

      do jatom = iatom + 1, natoms
        species_j => atom(jatom)%species
        zj = species_j%get_zval()

        r = pos(:, iatom) - pos(:, jatom)
        r_mag = norm2(r)
        u_e = zi * zj / r_mag

        energy = energy + u_e
        f(1:space%dim) = (u_e / r_mag**2) * r(1:space%dim)
        force(1:space%dim, iatom) = force(1:space%dim, iatom) + f(1:space%dim)
        force(1:space%dim, jatom) = force(1:space%dim, jatom) - f(1:space%dim)
      end do
    end do

    call comm_allreduce(dist%mpi_grp, energy)
    call comm_allreduce(dist%mpi_grp, force)

    nullify(species_i, species_j)

    POP_SUB(ion_interaction_finite)

  end subroutine ion_interaction_finite

  !> @brief Total Ewald electrostatic energy and forces, for 1D, 2D and 3D systems.
  subroutine ion_interaction_periodic(this, space, latt, atom, natoms, pos, energy, force, &
    energy_components, force_components)
    type(ion_interaction_t),   intent(in)    :: this                        !< Ion interaction instance
    class(space_t),            intent(in)    :: space                       !< System dimensions and boundary conditions
    type(lattice_vectors_t),   intent(in)    :: latt                        !< Crystal lattice
    type(atom_t),              intent(in)    :: atom(:)                     !< Atoms
    integer,                   intent(in)    :: natoms                      !< Number of atoms in system
    real(real64),              intent(in)    :: pos(1:space%dim,1:natoms)   !< Atomic positions
    real(real64),              intent(out)   :: energy                      !< Total ion-ion electrostatic energy
    real(real64),              intent(out)   :: force(:, :)                 !< Total force on each ion
    real(real64), optional,           intent(out)   :: energy_components(:)        !< Ion-ion energy contributions
    real(real64), optional,           intent(out)   :: force_components(:, :, :)   !< Force contributions

    real(real64) :: ereal, efourier, epseudo, eself     !< Ewald energy terms
    real(real64) :: charge                              !< Total ion charge

    PUSH_SUB(ion_interaction_periodic)

    energy = M_ZERO
    force(1:space%dim, 1:natoms) = M_ZERO

    call Ewald_short(this%dist, space, latt, atom, pos, this%alpha, ereal, force)
    if (present(force_components)) then
      force_components(1:space%dim, 1:natoms, ION_COMPONENT_REAL) = force(1:space%dim, 1:natoms)
    end if

    call Ewald_self_interaction(this%dist, atom, this%alpha, eself, charge)

    call profiling_in("EWALD_LONG")
    select case (space%periodic_dim)
    case (1)
      ! Warning added in init routine, such that it is not displayed per SCF step
      efourier = M_ZERO
      ! Do not confuse the user and set to zero all the other components
      ereal = M_ZERO
      eself = M_ZERO
      force = M_ZERO
      epseudo = M_ZERO
    case (2)
      ! The energy contribution of the long range part of the pseudo is
      ! not correctly accounted for in systems periodic in 1D or 2D, however
      ! this term should not appear here anyway. See Issue #950.
      epseudo = M_ZERO
      call Ewald_long_2D(this, space, latt, atom, natoms, pos, efourier, force)
    case (3)
      call Ewald_long_3D(this, space, latt, atom, natoms, pos, efourier, force, charge)
      !TODO(Alex/Nicolas) Issue #950. Refactor: Move G=0 correction from ion-ion energy to pseudopotential energy
      call pseudopotential_correction_3D(this%dist, latt, atom, charge, epseudo)
    end select
    call profiling_out("EWALD_LONG")

    if (present(energy_components)) then
      energy_components(ION_COMPONENT_REAL) = ereal
      energy_components(ION_COMPONENT_SELF) = eself
      energy_components(ION_COMPONENT_FOURIER) = efourier
    end if

    if (present(force_components)) then
      ! This is dependent on the order in which the force terms are computed
      force_components(1:space%dim, 1:natoms, ION_COMPONENT_FOURIER) = &
        force(1:space%dim, 1:natoms) - force_components(1:space%dim, 1:natoms, ION_COMPONENT_REAL)
    end if

    energy = ereal + efourier + eself + epseudo

    POP_SUB(ion_interaction_periodic)
  end subroutine ion_interaction_periodic

  !> @brief Short range component of the Ewald electrostatic energy and force.
  !!
  !! Computes the energy:
  !! \f[
  !!   E_{\text{short}} = \frac{1}{2} \sum_{i \neq j}^{N_{atom}} \sum_{\mathbf{T}}
  !!        \frac{Z_i Z_j \operatorname{erfc}\left(\sqrt{\alpha} |\mathbf{r}_{i j} + \mathbf{T}|\right)}{|\mathbf{r}_{i j} + \mathbf{T}|},
  !! \f]
  !! and the force:
  !! \f[
  !!  \mathbf{f}_j = \sum_i \sum_{\mathbf{T}} -Z_i Z_j (\mathbf{r}_{ij} + \mathbf{T}) \frac{\operatorname{erfc}}{|\mathbf{r}_{ij} + \mathbf{T}|}
  !!             + \frac{2\alpha}{\sqrt{\pi}}\frac{e^{-\alpha |\mathbf{r}_{ij} + \mathbf{T}|^2}}{|\mathbf{r}_{ij} + \mathbf{T}|^2}
  !! \f]
  !!
  !! ## Note on the Implementation
  !! The middle loop over atoms only iterates over the upper triangle, resulting in a large load
  !! imbalance for MPI, however it should still be an improvement w.r.t. iterating over all atoms.
  !! OMP cannot be used to collapse non-rectangular loops.
  !!
  !! One could alternatively collapse the loops over the whole matrix and distribute a single index, so each process
  !! would get its own set of (ia, ja, icopy). The individual indices could be recovered algebraically.
  subroutine Ewald_short(dist, space, latt, atom, pos, alpha, ereal, force)
    type(distributed_t),      intent(in)      :: dist         !< Atomic distribution
    class(space_t),           intent(in)      :: space        !< System dims and boundary conditions
    type(lattice_vectors_t),  intent(in)      :: latt         !< Lattice
    type(atom_t),             intent(in)      :: atom(:)      !< Atom properties
    real(real64),             intent(in)      :: pos(:, :)    !< Ion positions

    real(real64),             intent(in)      :: alpha        !< Broadening parameter, controlling range separation
    real(real64),             intent(out)     :: ereal        !< Realspace contribution to Ewald energy
    real(real64),             intent(inout)   :: force(:, :)  !< Forces.
    !                                                            Intent(inout) allows force contributions to be summed
    integer                  :: iatom, jatom, icopy, natoms
    real(real64)             :: rnorm, xi(space%dim)          !< Position vector |Ri - Rj + T|, and magnitude
    real(real64)             :: force_real(space%dim)         !< Real-space component of the force, for a given ion
    real(real64)             :: zi, zj                        !< Ionic charges
    real(real64)             :: erfc                          !< Complementary error function
    real(real64)             :: rcut                          !< Real space cutoff
    type(lattice_iterator_t) :: latt_iter                     !< Lattice iterator
    real(real64)             :: charge, coeff

    PUSH_SUB_WITH_PROFILE(Ewald_short)

    ereal = M_ZERO
    ! Origin of this value is not documented
    rcut = 6.0_real64 / alpha
    latt_iter = lattice_iterator_t(latt, rcut)
    natoms = size(atom)

    charge = M_ZERO
    do iatom = dist%start, dist%end
      if (.not. atom(iatom)%species%represents_real_atom()) cycle
      zi = atom(iatom)%species%get_zval()
      charge = charge + zi**2
    end do

    ! Diagonal terms iatom == jatom for all cells, except T=(0,0,0)
    ! Note: Only half of the copies are needed, by symmetries
    do icopy = 1, latt_iter%n_cells
      rnorm = norm2(latt_iter%get(icopy))
      if (rnorm < R_MIN_ATOM_DIST) cycle
      if (rnorm > rcut) cycle
      erfc = loct_erfc(alpha * rnorm)
      ereal = ereal + M_HALF * charge * erfc /rnorm
    end do

    coeff = M_TWO * alpha / sqrt(M_PI)

    !$omp parallel default(shared) private(iatom, jatom, zi, zj, icopy, xi, rnorm, erfc, force_real, charge) reduction(+:ereal, force)
    do iatom = dist%start, dist%end
      if (.not. atom(iatom)%species%represents_real_atom()) cycle
      zi = atom(iatom)%species%get_zval()

      ! Upper triangle, for all replica cells
      do jatom = iatom + 1, natoms
        zj = atom(jatom)%species%get_zval()

        charge = zi*zj

        !$omp do
        do icopy = 1, latt_iter%n_cells
          xi = pos(:, iatom) + latt_iter%get(icopy)
          rnorm = norm2(xi - pos(:, jatom))
          if (rnorm > rcut) cycle

          erfc = loct_erfc(alpha * rnorm) / rnorm

          ! Factor 1/2 omitted as one is only summing over upper triangle
          ereal = ereal + charge * erfc

          force_real(:) = charge * (xi - pos(:, jatom)) * &
            (erfc + coeff *exp(-(alpha*rnorm)**2)) / rnorm**2

          ! Upper trianglar contribution
          force(1:space%dim, jatom) = force(1:space%dim, jatom) - force_real

          ! Lower triangular contribution
          force(1:space%dim, iatom) = force(1:space%dim, iatom) + force_real
        end do
        !$omp end do

      end do
    end do
    !$omp end parallel

    call comm_allreduce(dist%mpi_grp, ereal)
    call comm_allreduce(dist%mpi_grp, force)

    POP_SUB_WITH_PROFILE(Ewald_short)
  end subroutine Ewald_short

  !>@ brief Ewald self-interaction energy
  !!
  !! The force exerted on an atom by itself is by definition zero.
  !! Also returns the total ion charge.
  subroutine Ewald_self_interaction(dist, atom, alpha, eself, charge)
    type(distributed_t),      intent(in)      :: dist         !< Atomic distribution
    type(atom_t),             intent(in)      :: atom(:)      !< Atom properties
    real(real64),             intent(in)      :: alpha        !< Broadening parameter, controlling range separation
    real(real64),             intent(out)     :: eself        !< Self-interaction energy
    real(real64),             intent(out)     :: charge       !< Total ionic charge

    integer                  :: iatom
    real(real64)             :: zi

    PUSH_SUB(Ewald_self_interaction)

    eself = M_ZERO
    charge = M_ZERO

    do iatom = dist%start, dist%end
      zi = atom(iatom)%species%get_zval()
      charge = charge + zi
      eself = eself - alpha / sqrt(M_PI) * zi**2
    end do

    call comm_allreduce(dist%mpi_grp, eself)
    call comm_allreduce(dist%mpi_grp, charge)

    POP_SUB(Ewald_self_interaction)
  end subroutine Ewald_self_interaction

  !>@brief Computes the long-range part of the 3D Ewald summation
  subroutine Ewald_long_3D(this, space, latt, atom, natoms, pos, efourier, force, charge)
    type(ion_interaction_t),   intent(in)    :: this        !< Ion interaction instance
    class(space_t),            intent(in)    :: space       !< System dimensions and boundary conditions
    type(lattice_vectors_t),   intent(in)    :: latt        !< Crystal lattice
    type(atom_t),              intent(in)    :: atom(:)     !< Atoms
    integer,                   intent(in)    :: natoms      !< Number of atoms in system
    real(real64),              intent(in)    :: pos(:,:)    !< (space%dim, natoms)
    real(real64),              intent(inout) :: efourier    !< Long-range part of total ion-ion energy
    real(real64),              intent(inout) :: force(:, :) !< (space%dim, natoms)
    real(real64),              intent(in)    :: charge      !< Total ionic charge

    real(real64) :: rcut, gmax_squared
    integer :: iatom
    integer :: ix, iy, iz, isph
    real(real64)   :: gvec(3), gred(3), gg2, gx
    real(real64)   :: factor
    complex(real64)   :: sumatoms, tmp(3), aa

    complex(real64), allocatable :: phase(:)

    PUSH_SUB(Ewald_long_3d)

    ASSERT(space%dim == 3)
    ASSERT(space%periodic_dim == 3)

    ! And the long-range part, using an Ewald sum
    SAFE_ALLOCATE(phase(1:natoms))

    ! get a converged value for the cutoff in g
    rcut = sqrt(minval(sum(latt%klattice**2, dim=1)))

    ! 9.5 is a constant that controls the range separation
    isph = ceiling(9.5_real64*this%alpha/rcut)

    ! First the G = 0 term (charge was calculated previously)
    efourier = -M_PI*charge**2/(M_TWO*this%alpha**2*latt%rcell_volume)

    ! Getting the G-shell cutoff
    gmax_squared = isph**2 * minval(sum(latt%klattice**2, dim=1))

    do ix = -isph, isph
      do iy = -isph, isph
        do iz = -isph, isph

          ! Exploit k <-> -k symmetry
          ! Only process one half of reciprocal space.
          ! g=0 must also be removed from the sum
          if (ix < 0) cycle
          if (ix == 0 .and. iy < 0) cycle
          if (ix == 0 .and. iy == 0 .and. iz <= 0) cycle

          gred = [ix, iy, iz]
          call kpoints_to_absolute(latt, gred, gvec)
          gg2 = dot_product(gvec, gvec)

          if (gg2 > gmax_squared*1.001_real64) cycle

          gx = -0.25_real64*gg2/this%alpha**2

          if (gx < -36.0_real64) cycle

          ! We have used the k-> -k symmetry, hence the factor 4
          factor = M_FOUR*M_PI/latt%rcell_volume*exp(gx)/gg2

          if (factor < epsilon(factor)) cycle

          sumatoms = M_z0
          !$omp parallel do private(iatom, gx, aa) reduction(+:sumatoms)
          do iatom = 1, natoms
            gx = sum(gvec*pos(:,iatom))
            aa = atom(iatom)%species%get_zval()*cmplx(cos(gx), sin(gx), real64)
            phase(iatom) = aa
            sumatoms = sumatoms + aa
          end do

          efourier = efourier + factor * real(sumatoms*conjg(sumatoms), real64)

          do iatom = 1, natoms
            tmp = M_zI*gvec*phase(iatom)
            force(1:space%dim, iatom) = force(1:space%dim, iatom) - factor*real(conjg(tmp)*sumatoms + tmp*conjg(sumatoms), real64)

          end do

        end do
      end do
    end do

    SAFE_DEALLOCATE_A(phase)

    POP_SUB(Ewald_long_3d)

  end subroutine Ewald_long_3D

  !>@brief Computes the long-range part of the 2D Ewald summation
  !!
  !! See In-Chul Yeh and Max L. Berkowitz, J. Chem. Phys. 111, 3155 (1999).
  subroutine Ewald_long_2D(this, space, latt, atom, natoms, pos, efourier, force)
    type(ion_interaction_t),   intent(in)    :: this
    class(space_t),            intent(in)    :: space
    type(lattice_vectors_t),   intent(in)    :: latt
    type(atom_t),              intent(in)    :: atom(:)
    integer,                   intent(in)    :: natoms
    real(real64),              intent(in)    :: pos(1:space%dim,1:natoms)
    real(real64),              intent(inout) :: efourier
    real(real64),              intent(inout) :: force(:, :) !< (space%dim, natoms)

    real(real64) :: rcut, gmax_squared
    integer :: iatom, jatom
    integer :: ix, iy, ix_max, iy_max
    real(real64)   :: gvec(space%dim), gg2, gx, gg_abs
    real(real64)   :: factor,factor1,factor2, coeff
    real(real64)   :: dz_max, dz_ij, erfc1, erfc2, tmp_erf
    real(real64), allocatable :: force_tmp(:,:)
    real(real64), parameter :: tol = 1e-10_real64

    PUSH_SUB(Ewald_long_2d)

    ASSERT(space%periodic_dim == 2)
    ASSERT(space%dim == 2 .or. space%dim == 3)

    ! And the long-range part, using an Ewald sum

    ! Searching maximum distance
    if (space%dim == 3) then
      dz_max = M_ZERO
      do iatom = 1, natoms
        do jatom = iatom + 1, natoms
          dz_max = max(dz_max, abs(pos(3, iatom) - pos(3, jatom)))
        end do
      end do
    else
      ! For a 2D system, all atoms are on the plane, so the distance is zero
      dz_max = M_ZERO
    end if

    !get a converged value for the cutoff in g
    rcut = M_TWO*this%alpha*4.6_real64 + M_TWO*this%alpha**2*dz_max
    if (dz_max > tol) then
      do
        if (rcut * dz_max >= M_MAX_EXP_ARG) exit  !Maximum double precision number
        erfc1 = M_ONE - loct_erf(this%alpha*dz_max + M_HALF*rcut/this%alpha)
        if (erfc1 * exp(rcut*dz_max) < 1.e-10_real64) exit
        rcut = rcut * 1.414_real64
      end do
    end if

    ix_max = ceiling(rcut/norm2(latt%klattice(:, 1)))
    iy_max = ceiling(rcut/norm2(latt%klattice(:, 2)))

    SAFE_ALLOCATE(force_tmp(1:space%dim, 1:natoms))
    force_tmp = M_ZERO

    ! First the G = 0 term
    efourier = M_ZERO
    factor = M_PI/latt%rcell_volume
    !$omp parallel do private(jatom, dz_ij, tmp_erf, factor1, factor2) reduction(+:efourier,force_tmp) &
    !$omp& collapse(2)
    do iatom = this%dist%start, this%dist%end
      do jatom = 1, natoms
        ! efourier
        if (space%dim == 3) then
          dz_ij = pos(3, iatom) - pos(3, jatom)
        else
          dz_ij = M_ZERO
        end if

        tmp_erf = loct_erf(this%alpha*dz_ij)
        factor1 = dz_ij*tmp_erf
        factor2 = exp(-(this%alpha*dz_ij)**2)/(this%alpha*sqrt(M_PI))

        efourier = efourier - factor &
          * atom(iatom)%species%get_zval()*atom(jatom)%species%get_zval() * (factor1 + factor2)

        ! force
        if (iatom == jatom)cycle
        if (abs(tmp_erf) < M_EPSILON) cycle

        if (space%dim == 3) then
          force_tmp(3, iatom) = force_tmp(3, iatom) - (- M_TWO*factor) &
            * atom(iatom)%species%get_zval()*atom(jatom)%species%get_zval() * tmp_erf
        end if

      end do
    end do

    ! Getting the G-shell cutoff
    gmax_squared = sum(ix_max*latt%klattice(:, 1)**2)
    gmax_squared = min(gmax_squared, sum(iy_max*latt%klattice(:, 2)**2))

    !$omp parallel do private(iy, gvec, gg2, gg_abs, factor, iatom, jatom, gx, dz_ij, erfc1, factor1, erfc2, factor2, coeff) &
    !$omp& collapse(2) reduction(+:efourier, force_tmp)
    do ix = -ix_max, ix_max
      do iy = -iy_max, iy_max

        gvec = ix*latt%klattice(:, 1) + iy*latt%klattice(:, 2)
        gg2 = sum(gvec**2)

        ! g=0 must be removed from the sum
        if (gg2 < M_EPSILON .or. gg2 > gmax_squared*1.001_real64) cycle
        gg_abs = sqrt(gg2)
        factor = M_HALF*M_PI/(latt%rcell_volume*gg_abs)

        do iatom = this%dist%start, this%dist%end
          do jatom = iatom, natoms
            ! efourier
            gx = sum(gvec(1:2) * (pos(1:2, iatom) - pos(1:2, jatom)))
            gx = gvec(1)*(pos(1, iatom) - pos(1, jatom)) + gvec(2)*(pos(2, iatom) - pos(2, jatom))
            if (space%dim == 3) then
              dz_ij = pos(3, iatom) - pos(3, jatom)
            else
              dz_ij = M_ZERO
            end if

            erfc1 = M_ONE - loct_erf(this%alpha*dz_ij + M_HALF*gg_abs/this%alpha)
            if (abs(erfc1) > M_EPSILON) then
              factor1 = exp(gg_abs*dz_ij)*erfc1
            else
              factor1 = M_ZERO
            end if
            erfc2 = M_ONE - loct_erf(-this%alpha*dz_ij + M_HALF*gg_abs/this%alpha)
            if (abs(erfc2) > M_EPSILON) then
              factor2 = exp(-gg_abs*dz_ij)*erfc2
            else
              factor2 = M_ZERO
            end if

            if (iatom == jatom) then
              coeff = M_ONE
            else
              coeff = M_TWO
            end if

            efourier = efourier &
              + factor * coeff &
              * atom(iatom)%species%get_zval()*atom(jatom)%species%get_zval() &
              * cos(gx)* ( factor1 + factor2)

            ! force
            if (iatom == jatom) cycle

            force_tmp(1:2, iatom) = force_tmp(1:2, iatom) &
              + M_TWO * factor * gvec(1:2) &
              * atom(iatom)%species%get_zval()*atom(jatom)%species%get_zval() &
              *sin(gx)*(factor1 + factor2)

            force_tmp(1:2, jatom) = force_tmp(1:2, jatom) &
              - M_TWO * factor * gvec(1:2) &
              * atom(iatom)%species%get_zval()*atom(jatom)%species%get_zval() &
              *sin(gx)*(factor1 + factor2)

            factor1 = gg_abs*erfc1 &
              - M_TWO*this%alpha/sqrt(M_PI)*exp(-(this%alpha*dz_ij + M_HALF*gg_abs/this%alpha)**2)
            if (abs(factor1) > M_EPSILON) then
              factor1 = factor1*exp(gg_abs*dz_ij)
            else
              factor1 = M_ZERO
            end if

            factor2 = gg_abs*erfc2 &
              - M_TWO*this%alpha/sqrt(M_PI)*exp(-(-this%alpha*dz_ij + M_HALF*gg_abs/this%alpha)**2)
            if (abs(factor2) > M_EPSILON) then
              factor2 = factor2*exp(-gg_abs*dz_ij)
            else
              factor2 = M_ZERO
            end if

            if (space%dim == 3) then
              force_tmp(3, iatom) = force_tmp(3, iatom) &
                - M_TWO*factor &
                * atom(iatom)%species%get_zval()*atom(jatom)%species%get_zval() &
                * cos(gx)* ( factor1 - factor2)
              force_tmp(3, jatom) = force_tmp(3, jatom) &
                + M_TWO*factor &
                * atom(iatom)%species%get_zval()*atom(jatom)%species%get_zval() &
                * cos(gx)* ( factor1 - factor2)
            end if

          end do
        end do


      end do
    end do

    call comm_allreduce(this%dist%mpi_grp, efourier)
    call comm_allreduce(this%dist%mpi_grp, force_tmp)

    force = force + force_tmp

    SAFE_DEALLOCATE_A(force_tmp)

    POP_SUB(Ewald_long_2d)
  end subroutine Ewald_long_2D

  !TODO(Alex/Nicolas) Issue #950. Refactor: Move G=0 correction from ion-ion energy to pseudopotential energy
  !> @brief G=0 component of Ewald energy arising from the pseudopotentials, for 3D systems.
  !!
  !! See J. Ihm, A. Zunger, M.L. Cohen, J. Phys. C 12, 4409 (1979)
  !! This is the term \alpha_1
  !! or
  !! Eq. 12.22 in the book of R. Martin.
  subroutine pseudopotential_correction_3D(dist, latt, atom, charge, epseudo)
    type(distributed_t),      intent(in)    :: dist        !< Atom MPI distribution
    type(lattice_vectors_t),  intent(in)    :: latt        !< Lattice
    type(atom_t),             intent(in)    :: atom(:)     !< Atoms
    real(real64),             intent(out)   :: epseudo     !< Ewald energy correction

    real(real64)             :: zi           !< Ionic charge
    real(real64)             :: charge       !< Total ionic charge
    integer                  :: iatom

    PUSH_SUB(pseudopotential_correction_3D)

    epseudo = M_ZERO
    do iatom = dist%start, dist%end
      select type(spec => atom(iatom)%species)
      class is(pseudopotential_t)
        zi = spec%get_zval()
        epseudo = epseudo + M_PI *zi * &
          (spec%ps%sigma_erf * sqrt(M_TWO))**2 / latt%rcell_volume * charge
      end select
    end do
    call comm_allreduce(dist%mpi_grp, epseudo)

    POP_SUB(pseudopotential_correction_3D)

  end subroutine pseudopotential_correction_3D

  !> @brief Computes the contribution to the stress tensor the ion-ion energy
  subroutine ion_interaction_stress(this, space, latt, atom, natoms, pos, stress_ii)
    type(ion_interaction_t),   intent(inout) :: this
    class(space_t),            intent(in)    :: space
    type(lattice_vectors_t),   intent(in)    :: latt
    type(atom_t),              intent(in)    :: atom(:)
    integer,                   intent(in)    :: natoms
    real(real64),              intent(in)    :: pos(1:space%dim,1:natoms)
    real(real64),              intent(out)   :: stress_ii(space%dim, space%dim)

    real(real64) :: stress_short(1:space%dim, 1:space%dim), stress_Ewald(1:space%dim, 1:space%dim)

    PUSH_SUB(ion_interaction_stress)

    stress_ii = M_ZERO

    ! Only implemented in the periodic case
    ASSERT(space%is_periodic())

    ! Short range part in real space
    call ion_interaction_stress_short(this, space, latt, atom, natoms, pos, stress_short)

    ! Long range part in Fourier space
    select case(space%periodic_dim)
    case(3)
      call Ewald_3D_stress(this, space, latt, atom, natoms, pos, stress_Ewald)
    case(2)
      call Ewald_2D_stress(this, space, latt, atom, natoms, pos, stress_Ewald)
    case default
      ASSERT(.false.)
    end select

    stress_ii = stress_short + stress_Ewald

    POP_SUB(ion_interaction_stress)
  end subroutine ion_interaction_stress

  ! ---------------------------------------------------------
  !> @brief Computes the short-range contribution to the stress tensor the ion-ion energy
  !!
  !! The formula that is implemented here corresponds to the short-range part of the expression B1 of
  !! Nielsen and Martin, Stresses in semiconductors: Ab initio calculations on Si, Ge, and GaAs
  !! PRB 32, 3792 (1985).
  !!
  !! \f[
  !! \sigma_{\alpha\beta}^{\rm Ewald-SR} = \frac{\sqrt{\epsilon}}{2\Omega} \sum_{\mathbf{\tau\sigma T}} Z_\tau Z_{\sigma} H(\sqrt{\epsilon}D) \frac{D_\alpha D_{\beta}}{D^2}\Bigg|_{\mathbf{D=x_{\sigma}-x_{\tau}+T\neq0}}\,,
  !! \f]
  !!
  !! where the function \f$H(x)\f$ is
  !!
  !! \f[
  !! H(x) = \frac{\partial[{\rm erfc}(x)]}{\partial x} - \frac{{\rm erfc}(x)}{x}
  !! \f]

  subroutine ion_interaction_stress_short(this, space, latt, atom, natoms, pos, stress_short)
    type(ion_interaction_t),   intent(inout) :: this
    class(space_t),            intent(in)    :: space
    type(lattice_vectors_t),   intent(in)    :: latt
    type(atom_t),              intent(in)    :: atom(:)
    integer,                   intent(in)    :: natoms
    real(real64),              intent(in)    :: pos(1:space%dim,1:natoms)
    real(real64),              intent(out)   :: stress_short(1:space%dim, 1:space%dim)

    real(real64) :: xi(space%dim)
    real(real64) :: r_ij, zi, zj, erfc, Hp, factor
    integer :: iatom, jatom, icopy, idir, jdir
    real(real64) :: alpha, rcut
    type(lattice_iterator_t) :: latt_iter

    PUSH_SUB(ion_interaction_stress_short)
    call profiling_in("ION_ION_STRESS_SHORT")

    ! Only implemented in the periodic case
    ASSERT(space%is_periodic())

    alpha = this%alpha

    ! See the code for the energy above to understand this parameter
    rcut = 6.0_real64/alpha

    ! the short-range part is calculated directly
    stress_short = M_ZERO
    latt_iter = lattice_iterator_t(latt, rcut)

    do iatom = this%dist%start, this%dist%end
      select type(spec => atom(iatom)%species)
      class is(jellium_t)
        cycle
      end select
      zi = atom(iatom)%species%get_zval()

      do icopy = 1, latt_iter%n_cells
        xi = pos(:, iatom) + latt_iter%get(icopy)

        do jatom = 1, natoms
          zj = atom(jatom)%species%get_zval()
          r_ij = norm2(xi - pos(:, jatom))

          if (r_ij < R_MIN_ATOM_DIST) cycle

          erfc = loct_erfc(alpha*r_ij)
          Hp = -M_TWO/sqrt(M_PI)*exp(-(alpha*r_ij)**2) - erfc/(alpha*r_ij)
          factor = M_HALF*zj*zi*alpha*Hp
          do idir = 1, space%periodic_dim
            do jdir = 1, space%periodic_dim
              stress_short(idir, jdir) = stress_short(idir, jdir) &
                - factor*(xi(idir) - pos(idir, jatom))*(xi(jdir) - pos(jdir, jatom))/(r_ij**2)
            end do
          end do

        end do
      end do
    end do

    if (this%dist%parallel) then
      call comm_allreduce(this%dist%mpi_grp, stress_short)
    end if

    stress_short = stress_short/latt%rcell_volume

    call profiling_out("ION_ION_STRESS_SHORT")

    POP_SUB(ion_interaction_stress_short)
  end subroutine ion_interaction_stress_short



  ! ---------------------------------------------------------
  !> @brief Computes the contribution to the stress tensor from the 3D Ewald sum
  !!
  !! The formula that is implemented here correspond to the long-range part of the expression B1 of
  !! Nielsen and Martin, Stresses in semiconductors: Ab initio calculations on Si, Ge, and GaAs
  !! PRB 32, 3792 (1985).
  !!
  !! \f[
  !! \sigma_{\alpha\beta}^{\rm Ewald-LR} = \frac{\pi}{2\Omega^2\epsilon} \sum_{\mathbf{G\neq0}} \frac{e^{-G^2/4\epsilon}}{G^2/4\epsilon}\left|\sum_\tau Z_\tau e^{i\mathbf{G}\cdot\mathbf{x}_\tau}\right|^2 \Big(2\frac{G_\alpha G_\beta}{G^2}(G^2/4\epsilon + 1) - \delta_{\alpha\beta}\Big) \nonumber\\
  !! + \frac{\pi}{2\Omega^2\epsilon}\left(\sum_{\tau} Z_\tau\right)^2\delta_{\alpha\beta}\,,
  !! \f]
  !!
  subroutine Ewald_3D_stress(this, space, latt, atom, natoms, pos, stress_Ewald)
    type(ion_interaction_t),   intent(inout) :: this
    class(space_t),            intent(in)    :: space
    type(lattice_vectors_t),   intent(in)    :: latt
    type(atom_t),              intent(in)    :: atom(:)
    integer,                   intent(in)    :: natoms
    real(real64),              intent(in)    :: pos(1:space%dim,1:natoms)
    real(real64),              intent(out)   :: stress_Ewald(3, 3)

    real(real64)   :: zi, rcut, gmax_squared
    integer :: iatom
    integer :: ix, iy, iz, isph, idim, idir, jdir
    real(real64)   :: gred(3), gvec(3), gg2, gx
    real(real64)   :: factor, charge, charge_sq, off_diagonal_weight
    complex(real64)   :: sumatoms, aa

    call profiling_in("STRESS_3D_EWALD")
    PUSH_SUB(Ewald_3D_stress)

    ! Currently this is only implemented for 3D
    ASSERT(space%dim == 3)
    ASSERT(space%periodic_dim == 3) ! Not working for mixed periodicity
    !                                 (klattice along the non-periodic directions is wrong)
    !                                 Anyway gg/gg2 is not correct for mixed periodicity

    stress_Ewald = M_ZERO

    ! And the long-range part, using an Ewald sum
    charge = M_ZERO
    charge_sq = M_ZERO
    do iatom = 1, natoms
      zi = atom(iatom)%species%get_zval()
      charge = charge + zi
      charge_sq = charge_sq + zi**2
    end do

    ! get a converged value for the cutoff in g
    rcut = huge(rcut)
    do idim = 1, space%periodic_dim
      rcut = min(rcut, sum(latt%klattice(1:space%periodic_dim, idim)**2))
    end do

    rcut = sqrt(rcut)

    isph = ceiling(9.5_real64*this%alpha/rcut)

    ! Getting the G-shell cutoff
    gmax_squared = isph**2 * minval(sum(latt%klattice**2, dim=1))

    do ix = -isph, isph
      do iy = -isph, isph
        do iz = -isph, isph

          ! Exploit k <-> -k symmetry
          ! Only process one half of reciprocal space.
          ! g=0 must also be removed from the sum
          if (ix < 0) cycle
          if (ix == 0 .and. iy < 0) cycle
          if (ix == 0 .and. iy == 0 .and. iz <= 0) cycle

          gred = [ix, iy, iz]
          call kpoints_to_absolute(latt, gred, gvec)
          gg2 = sum(gvec**2)

          ! g=0 must be removed from the sum
          if (gg2 > gmax_squared*1.001_real64) cycle

          gx = -0.25_real64*gg2/this%alpha**2

          if (gx < -36.0_real64) cycle

          ! We have used the k-> -k symmetry, hence the factor 4
          factor = M_FOUR*M_PI*exp(gx)/(latt%rcell_volume*gg2)

          if (factor < epsilon(factor)) cycle

          sumatoms = M_Z0

          do iatom = 1, natoms
            gx = sum(gvec*pos(:, iatom))
            aa = atom(iatom)%species%get_zval()*cmplx(cos(gx), sin(gx), real64)
            sumatoms = sumatoms + aa
          end do

          factor = factor*abs(sumatoms)**2
          off_diagonal_weight = - M_TWO*factor/gg2*(0.25_real64*gg2/this%alpha**2+M_ONE)

          do idir = 1, 3
            do jdir = 1, 3
              stress_Ewald(idir, jdir) = stress_Ewald(idir, jdir) &
                + gvec(idir) * gvec(jdir) * off_diagonal_weight
            end do
            stress_Ewald(idir, idir) = stress_Ewald(idir, idir) + factor
          end do

        end do
      end do
    end do


    ! The G = 0 term of the Ewald summation
    factor = M_HALF*M_PI*charge**2/(latt%rcell_volume*this%alpha**2)
    do idir = 1,3
      stress_Ewald(idir,idir) = stress_Ewald(idir,idir) - factor
    end do

    stress_Ewald = stress_Ewald / latt%rcell_volume


    call profiling_out("STRESS_3D_EWALD")
    POP_SUB(Ewald_3D_stress)

  end subroutine Ewald_3D_stress

  ! ---------------------------------------------------------
  !> @brief Computes the contribution to the stress tensor from the 2D Ewald sum
  !!
  !! The formula that is implemented here is derived from the corresponding energy
  !!
  !! \f[
  !!  \frac{\partial \gamma_{\rm Ewald, LR-2D}}{\partial \epsilon_{\alpha\beta}}\Bigg|_{\vec{\epsilon}=\vec{I}} = - \delta_{\alpha\beta} \gamma_{\rm Ewald, LR-2D} + \frac{\pi}{4A\alpha}\sum_{ij}Z_i Z_{j}\sum_{G\neq 0}\cos(\vec{G}\cdot\vec{r}_{ij})\frac{G_\alpha G_\beta}{G^2}\nonumber\\
  !! \Bigg( e^{G z_{ij}}\Bigg[ 2\alpha\Big(\frac{1}{G}-z_{ij}){\rm erfc}(\alpha z_{ij} + \frac{G}{2\alpha}) -{\rm erfc`}(\alpha z_{ij} + \frac{G}{2\alpha})\Bigg]\nonumber\\
  !! + e^{-G z_{ij}}\Bigg[ 2\alpha\Big(\frac{1}{G}+z_{ij}){\rm erfc}(-\alpha z_{ij} + \frac{G}{2\alpha})
  !! +{\rm erfc`}(-\alpha z_{ij} + \frac{G}{2\alpha})\Bigg] \Bigg)\,.
  !! \f]
  !!
  !!
  subroutine Ewald_2D_stress(this, space, latt, atom, natoms, pos, stress_Ewald)
    type(ion_interaction_t),   intent(inout) :: this
    type(space_t),             intent(in)    :: space
    type(lattice_vectors_t),   intent(in)    :: latt
    type(atom_t),              intent(in)    :: atom(:)
    integer,                   intent(in)    :: natoms
    real(real64),              intent(in)    :: pos(1:space%dim,1:natoms)
    real(real64),              intent(out)   :: stress_Ewald(3, 3)

    real(real64) :: rcut, efourier
    integer :: iatom, jatom, idir, jdir
    integer :: ix, iy, ix_max, iy_max
    real(real64)   :: gvec(3), gred(3), gg2, cos_gx, gg_abs, gmax_squared
    real(real64)   :: factor,factor1,factor2, coeff, e_ewald
    real(real64)   :: dz_max, z_ij, erfc1, erfc2, diff(3)
    real(real64), parameter   :: tol = 1e-10_real64

    PUSH_SUB(Ewald_2D_stress)

    ASSERT(space%periodic_dim == 2)
    ASSERT(space%dim == 3)

    stress_Ewald = M_ZERO

    ! Searching maximum distance
    dz_max = M_ZERO
    do iatom = 1, natoms
      do jatom = iatom + 1, natoms
        dz_max = max(dz_max, abs(pos(3, iatom) - pos(3, jatom)))
      end do
    end do

    !get a converged value for the cutoff in g
    ! Note: to understand these numbers, one needs to look into the energy routine for Ewald 2D
    rcut = M_TWO*this%alpha*4.6_real64 + M_TWO*this%alpha**2*dz_max
    if (dz_max > tol) then ! Else the code here does not work properly
      do
        if (rcut * dz_max >= M_MAX_EXP_ARG) exit  !Maximum double precision number
        erfc1 = M_ONE - loct_erf(this%alpha*dz_max + M_HALF*rcut/this%alpha)
        if (erfc1 * exp(rcut*dz_max) < tol) exit
        rcut = rcut * 1.414_real64
      end do
    end if

    ! First the G = 0 term
    efourier = M_ZERO
    factor = M_PI/latt%rcell_volume
    !$omp parallel do private(jatom, z_ij, factor1, factor2) reduction(+:efourier) collapse(2)
    do iatom = 1, natoms
      do jatom = 1, natoms
        z_ij = pos(3, iatom) - pos(3, jatom)

        factor1 = z_ij * loct_erf(this%alpha*z_ij)
        factor2 = exp(-(this%alpha*z_ij)**2)/(this%alpha*sqrt(M_PI))

        efourier = efourier - factor &
          * atom(iatom)%species%get_zval()*atom(jatom)%species%get_zval() * (factor1 + factor2)
      end do
    end do

    ! Adding the G=0 term
    do idir = 1, 2
      stress_Ewald(idir, idir) = efourier
    end do

    ! Getting the G-shell cutoff
    ix_max = ceiling(rcut/norm2(latt%klattice(:, 1)))
    iy_max = ceiling(rcut/norm2(latt%klattice(:, 2)))
    gmax_squared = sum(ix_max*latt%klattice(:, 1)**2)
    gmax_squared = min(gmax_squared, sum(iy_max*latt%klattice(:, 2)**2))

    !$omp parallel do private(iy, gvec, gg2, gg_abs, factor, iatom, jatom, diff, cos_gx, z_ij, idir, jdir, erfc1, factor1) &
    !$omp& private(erfc2, factor2, coeff, e_ewald) &
    !$omp& collapse(2) reduction(+:stress_Ewald)
    do ix = -ix_max, ix_max
      do iy = -iy_max, iy_max

        gred = [ix, iy, 0]
        call kpoints_to_absolute(latt, gred, gvec)
        gg2 = dot_product(gvec,gvec)

        ! g=0 must be removed from the sum
        if (gg2 < M_EPSILON .or. gg2 > gmax_squared*1.001_real64) cycle

        gg_abs = sqrt(gg2)
        factor = M_FOURTH*M_PI/(latt%rcell_volume*this%alpha*gg2)

        do iatom = 1, natoms
          do jatom = iatom, natoms
            diff = pos(:, iatom) - pos(:, jatom)
            cos_gx = cos(sum(gvec(1:2) * diff(1:2)))
            z_ij = diff(3)

            factor1 = screening_function_2d(this%alpha, z_ij, gg_abs, erfc1)
            factor2 = screening_function_2d(this%alpha,-z_ij, gg_abs, erfc2)

            if (iatom == jatom) then
              coeff = M_ONE
            else
              coeff = M_TWO
            end if

            do idir = 1, 2
              do jdir = 1, 2
                stress_Ewald(idir, jdir) = stress_Ewald(idir, jdir) &
                  - factor*gvec(idir)*gvec(jdir) * cos_gx * (factor1 + factor2) * coeff&
                  * atom(iatom)%species%get_zval()*atom(jatom)%species%get_zval()
              end do
            end do

            if (abs(erfc1) > M_EPSILON) then
              factor1 = exp(-gg_abs*z_ij)*erfc1
            else
              factor1 = M_ZERO
            end if
            if (abs(erfc2) > M_EPSILON) then
              factor2 = exp(gg_abs*z_ij)*erfc2
            else
              factor2 = M_ZERO
            end if

            e_ewald = M_HALF * M_PI/latt%rcell_volume * coeff &
              * atom(iatom)%species%get_zval() * atom(jatom)%species%get_zval() &
              * cos_gx / gg_abs * (factor1 + factor2)

            do idir = 1, 2
              stress_Ewald(idir, idir) = stress_Ewald(idir, idir) + e_ewald
            end do

          end do !jatom
        end do !iatom
      end do !iy
    end do !ix

    !call comm_allreduce(this%dist%mpi_grp, stress_Ewald)

    stress_Ewald = stress_Ewald / latt%rcell_volume

    POP_SUB(Ewald_2D_stress)
  end subroutine Ewald_2D_stress

  ! ---------------------------------------------------------
  !>@brief Auxiliary function for the Ewald 2D stress
  real(real64) function screening_function_2d(alpha, z_ij, gg_abs, erfc) result(factor)
    real(real64),  intent(in)  :: alpha
    real(real64),  intent(in)  :: z_ij
    real(real64),  intent(in)  :: gg_abs
    real(real64),  intent(out) :: erfc

    real(real64) :: arg

    arg = -alpha*z_ij + M_HALF*gg_abs/alpha
    erfc = M_ONE - loct_erf(arg)
    factor = M_TWO*alpha*(M_ONE/gg_abs + z_ij)*erfc - M_TWO/sqrt(M_PI)*exp(-arg**2)
    factor = factor*exp(-gg_abs*z_ij)

  end function screening_function_2d

  ! ---------------------------------------------------------

  subroutine ion_interaction_test(space, latt, atom, natoms, pos, lsize, &
    namespace, mc)
    class(space_t),           intent(in)    :: space
    type(lattice_vectors_t),  intent(in)    :: latt
    type(atom_t),             intent(in)    :: atom(:)
    integer,                  intent(in)    :: natoms
    real(real64),             intent(in)    :: pos(1:space%dim,1:natoms)
    real(real64),             intent(in)    :: lsize(:)
    type(namespace_t),        intent(in)    :: namespace
    type(multicomm_t),        intent(in)    :: mc

    type(ion_interaction_t) :: ion_interaction
    real(real64) :: energy
    real(real64), allocatable :: force(:, :), force_components(:, :, :)
    real(real64) :: energy_components(1:ION_NUM_COMPONENTS)
    integer :: iatom, idir

    PUSH_SUB(ion_interaction_test)

    call ion_interaction_init(ion_interaction, namespace, space, natoms)
    call ion_interaction_init_parallelization(ion_interaction, natoms, mc)

    SAFE_ALLOCATE(force(1:space%dim, 1:natoms))
    SAFE_ALLOCATE(force_components(1:space%dim, 1:natoms, 1:ION_NUM_COMPONENTS))

    call ion_interaction_calculate(ion_interaction, space, latt, atom, natoms, pos, lsize, energy, force, &
      energy_components = energy_components, force_components = force_components)

    call messages_write('Ionic energy        =')
    call messages_write(energy, fmt = '(f20.10)')
    call messages_info(namespace=namespace)

    call messages_write('Real space energy   =')
    call messages_write(energy_components(ION_COMPONENT_REAL), fmt = '(f20.10)')
    call messages_info(namespace=namespace)

    call messages_write('Self energy         =')
    call messages_write(energy_components(ION_COMPONENT_SELF), fmt = '(f20.10)')
    call messages_info(namespace=namespace)

    call messages_write('Fourier energy      =')
    call messages_write(energy_components(ION_COMPONENT_FOURIER), fmt = '(f20.10)')
    call messages_info(namespace=namespace)

    call messages_info(namespace=namespace)

    do iatom = 1, natoms
      call messages_write('Ionic force         atom')
      call messages_write(iatom)
      call messages_write(' =')
      do idir = 1, space%dim
        call messages_write(force(idir, iatom), fmt = '(f20.10)')
      end do
      call messages_info(namespace=namespace)

      call messages_write('Real space force    atom')
      call messages_write(iatom)
      call messages_write(' =')
      do idir = 1, space%dim
        call messages_write(force_components(idir, iatom, ION_COMPONENT_REAL), fmt = '(f20.10)')
      end do
      call messages_info(namespace=namespace)

      call messages_write('Fourier space force atom')
      call messages_write(iatom)
      call messages_write(' =')
      do idir = 1, space%dim
        call messages_write(force_components(idir, iatom, ION_COMPONENT_FOURIER), fmt = '(f20.10)')
      end do
      call messages_info(namespace=namespace)

      call messages_info(namespace=namespace)
    end do

    SAFE_DEALLOCATE_A(force)
    SAFE_DEALLOCATE_A(force_components)

    call ion_interaction_end(ion_interaction)

    POP_SUB(ion_interaction_test)
  end subroutine ion_interaction_test

end module ion_interaction_oct_m

!! Local Variables:
!! mode: f90
!! coding: utf-8
!! End:
