!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch
!!
!! This program is free software; you can redistribute it and/or modify
!! it under the terms of the GNU General Public License as published by
!! the Free Software Foundation; either version 2, or (at your option)
!! any later version.
!!
!! This program is distributed in the hope that it will be useful,
!! but WITHOUT ANY WARRANTY; without even the implied warranty of
!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!! GNU General Public License for more details.
!!
!! You should have received a copy of the GNU General Public License
!! along with this program; if not, write to the Free Software
!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
!! 02110-1301, USA.
!!

! ---------------------------------------------------------
!> Compute the Cholesky decomposition of real symmetric or complex Hermitian positive definite
!! matrix a, dim(a) = n x n. On return a = u^T u with u upper triangular matrix.
subroutine X(cholesky)(n, a, bof, err_code)
  integer,           intent(in)    :: n
  R_TYPE, contiguous,intent(inout) :: a(:, :)   !< (n,n)
  logical, optional, intent(inout) :: bof      !< Bomb on failure.
  integer, optional, intent(out)   :: err_code

  integer :: info

  call profiling_in(TOSTRING(X(CHOLESKY)))
  PUSH_SUB(X(cholesky))

  ASSERT(n > 0)
  ASSERT(not_in_openmp())

  call lapack_potrf('U', n, a(1, 1), lead_dim(a), info)
  if (info /= 0) then
    if (optional_default(bof, .true.)) then
      write(message(1), '(5a,i5)') 'In ', TOSTRING(X(cholesky)), ', LAPACK ', TOSTRING(X(potrf)), ' returned error message ', info
! http://www.netlib.org/lapack/explore-3.1.1-html/dpotrf.f.html and zpotrf.f.html
!      *  INFO    (output) INTEGER
!      *          = 0:  successful exit
!      *          < 0:  if INFO = -i, the i-th argument had an illegal value
!      *          > 0:  if INFO = i, the leading minor of order i is not
!      *                positive definite, and the factorization could not be
!      *                completed.
      if (info < 0) then
        write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
      else
        write(message(2), '(a,i5,a)') 'The leading minor of order ', info, ' is not positive definite.'
      end if
      call messages_fatal(2)
    else
      if (present(bof)) then
        bof = .true.
      end if
    end if
  else
    if (present(bof)) then
      bof = .false.
    end if
  end if
  if (present(err_code)) then
    err_code = info
  end if

  call profiling_out(TOSTRING(X(CHOLESKY)))
  POP_SUB(X(cholesky))
end subroutine X(cholesky)


! ---------------------------------------------------------
!> Computes all the eigenvalues and the eigenvectors of a real symmetric or complex Hermitian
!! generalized definite eigenproblem, of the form \f$ Ax=\lambda Bx \f$. B is also positive definite.
!!
!! For optimal performances, this uses the divide and conquer algoritm
subroutine X(geneigensolve)(n, a, b, e, preserve_mat, bof, err_code)
  integer,            intent(in)    :: n
  R_TYPE, contiguous, intent(inout) :: a(:, :)   !< (n,n)
  R_TYPE, contiguous, intent(inout) :: b(:, :)   !< (n,n)
  real(real64), contiguous,  intent(out)   :: e(:)     !< (n)
  logical,            intent(in)    :: preserve_mat !< If true, the matrix a and b on exit are the same
  !                                                !< as on input
  logical, optional,  intent(inout) :: bof      !< Bomb on failure.
  integer, optional,  intent(out)   :: err_code

  integer :: info, lwork, ii, jj, liwork
  integer, allocatable :: iwork(:)
#ifdef R_TCOMPLEX
  real(real64), allocatable :: rwork(:)
  integer :: lrwork
#endif
  R_TYPE, allocatable :: work(:), diag(:)

  call profiling_in(TOSTRING(X(DENSE_EIGENSOLVER)))
  PUSH_SUB(X(geneigensolve))

  ASSERT(n > 0)
  ASSERT(not_in_openmp())

  if (preserve_mat) then
    SAFE_ALLOCATE(diag(1:n))
    ! store the diagonal of b
    do ii = 1, n
      diag(ii) = b(ii, ii)
    end do
  end if

#ifdef R_TCOMPLEX
  allocate(work(1), rwork(1), iwork(1))
  call lapack_hegvd(1, 'V', 'U', n, a(1, 1), lead_dim(a), b(1, 1), lead_dim(b), e(1), &
    work(1), -1, rwork(1), -1, iwork(1), -1, info)
  lwork = int(work(1))
  lrwork = int(rwork(1))
  liwork = iwork(1)
  deallocate(work, rwork, iwork)
  SAFE_ALLOCATE(work(1:lwork))
  SAFE_ALLOCATE(rwork(1:lrwork))
  SAFE_ALLOCATE(iwork(1:liwork))
  call lapack_hegvd(1, 'V', 'U', n, a(1, 1), lead_dim(a), b(1, 1), lead_dim(b), e(1), &
    work(1), lwork, rwork(1), lrwork, iwork(1), liwork, info)

  SAFE_DEALLOCATE_A(rwork)
  SAFE_DEALLOCATE_A(iwork)
  SAFE_DEALLOCATE_A(work)
#else
  allocate(work(1), iwork(1))
  call lapack_sygvd(1, 'V', 'U', n, a(1, 1), lead_dim(a), b(1, 1), lead_dim(b), e(1), &
    work(1), -1, iwork(1), -1, info)
  lwork = int(work(1))
  liwork = iwork(1)
  deallocate(work, iwork)
  SAFE_ALLOCATE(work(1:lwork))
  SAFE_ALLOCATE(iwork(1:liwork))
  call lapack_sygvd(1, 'V', 'U', n, a(1, 1), lead_dim(a), b(1, 1), lead_dim(b), e(1), &
    work(1), lwork, iwork(1), liwork, info)
  SAFE_DEALLOCATE_A(work)
#endif

  if (preserve_mat) then
    ! b was destroyed, so we rebuild it
    do ii = 1, n
      do jj = 1, ii - 1
        b(jj, ii) = R_CONJ(b(ii, jj))
      end do
      b(ii, ii) = diag(ii)
    end do

    SAFE_DEALLOCATE_A(diag)
  end if

  if (info /= 0) then
    if (optional_default(bof, .true.)) then
      write(message(1),'(3a)') 'In ', TOSTRING(X(geneigensolve)), ', LAPACK '
#ifdef R_TCOMPLEX
      write(message(1),'(3a,i5)') trim(message(1)), TOSTRING(X(hegv)), ' returned error message ', info
#else
      write(message(1),'(3a,i5)') trim(message(1)), TOSTRING(X(sygv)), ' returned error message ', info
#endif
!*  INFO    (output) INTEGER
!*          = 0:  successful exit
!*          < 0:  if INFO = -i, the i-th argument had an illegal value
!*          > 0:  ZPOTRF or ZHEEV returned an error code:
!*             <= N:  if INFO = i, ZHEEV failed to converge;
!*                    i off-diagonal elements of an intermediate
!*                    tridiagonal form did not converge to zero;
!*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
!*                    minor of order i of B is not positive definite.
!*                    The factorization of B could not be completed and
!*                    no eigenvalues or eigenvectors were computed.
      if (info < 0) then
        write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
      else if (info <= n) then
        write(message(2), '(i5,a)') info, ' off-diagonal elements of an intermediate tridiagonal did not converge to zero.'
      else
        write(message(2), '(a,i5,a)') 'The leading minor of order ', info - n, ' of B is not positive definite.'
      end if
      call messages_fatal(2)
    else
      if (present(bof)) then
        bof = .true.
      end if
    end if
  else
    if (present(bof)) then
      bof = .false.
    end if
  end if
  if (present(err_code)) then
    err_code = info
  end if

  call profiling_out(TOSTRING(X(DENSE_EIGENSOLVER)))
  POP_SUB(X(geneigensolve))
end subroutine X(geneigensolve)


! ---------------------------------------------------------
!> Computes all the eigenvalues and the right (left) eigenvectors of a real or complex
!! (non-Hermitian) eigenproblem, of the form A*x=(lambda)*x
subroutine X(eigensolve_nonh)(n, a, e, err_code, side, sort_eigenvectors)
  integer,                intent(in)    :: n
  R_TYPE, contiguous,     intent(inout) :: a(:, :)   !< (n,n)
  complex(real64),  contiguous,     intent(out)   :: e(:)      !< (n)
  integer,      optional, intent(out)   :: err_code
  character(1), optional, intent(in)    :: side      !< which eigenvectors ('L' or 'R')
  logical,      optional, intent(in)    :: sort_eigenvectors !< only applies to complex version, sorts by real part

  integer              :: info, lwork, ii
  real(real64), allocatable   :: rwork(:), re(:)
  R_TYPE, allocatable  :: work(:), vl(:, :), vr(:, :), a_copy(:, :)
  complex(real64), allocatable   :: e_copy(:)
  character(1)         :: side_
  integer, allocatable :: ind(:)

  PUSH_SUB(X(eigensolve_nonh))

  ASSERT(n > 0)
  ASSERT(not_in_openmp())

  if (present(side)) then
    side_ = side
  else
    side_ = 'R'
  end if

  lwork = -1
  ! Initializing info, if not it can cause that the geev query mode fails.
  ! Besides, if info is not initialized valgrind complains about it.
  info = 0
  ! A bug in the query mode of zgeev demands that the working array has to be larger than 1
  ! problem here is that a value is written somewhere into the array whether it is
  ! allocated or not. I noticed that it happens (hopefully) always at an index which
  ! is below the matrix dimension.
  SAFE_ALLOCATE(work(1:n))
  SAFE_ALLOCATE(vl(1, 1))
  SAFE_ALLOCATE(vr(1:n, 1:n)) ! even in query mode, the size of vr is checked, so we allocate it
  SAFE_ALLOCATE(rwork(1))
  call lapack_geev('N', 'V', n, a, lead_dim(a), e, vl, lead_dim(vl), vr, lead_dim(vr), &
    work, lwork, rwork, info)
  if (info /= 0) then
    write(message(1),'(5a,i5)') 'In ', TOSTRING(X(eigensolve_nonh)), &
      ', LAPACK ', TOSTRING(X(geev)), ' workspace query returned error message ', info
    call messages_fatal(1)
  end if

  lwork = int(work(1))
  SAFE_DEALLOCATE_A(work)
  SAFE_DEALLOCATE_A(vl)
  SAFE_DEALLOCATE_A(vr)
  SAFE_DEALLOCATE_A(rwork)

  SAFE_ALLOCATE(work(1:lwork))
  SAFE_ALLOCATE(rwork(1:max(1, 2*n)))
  if (side_ == 'L'.or.side_ == 'l') then
    SAFE_ALLOCATE(vl(1:n, 1:n))
    SAFE_ALLOCATE(vr(1, 1))
    call lapack_geev('V', 'N', n, a, lead_dim(a), e, vl, lead_dim(vl), vr, lead_dim(vr), &
      work, lwork, rwork, info)
    a(1:n, 1:n) = vl(1:n, 1:n)
  else
    SAFE_ALLOCATE(vl(1, 1))
    SAFE_ALLOCATE(vr(1:n, 1:n))
    call lapack_geev('N', 'V', n, a, lead_dim(a), e, vl, lead_dim(vl), vr, lead_dim(vr), &
      work, lwork, rwork, info)
    a(1:n, 1:n) = vr(1:n, 1:n)
  end if
  SAFE_DEALLOCATE_A(work)
  SAFE_DEALLOCATE_A(rwork)
  SAFE_DEALLOCATE_A(vr)
  SAFE_DEALLOCATE_A(vl)

  if (info /= 0) then
    write(message(1),'(5a,i5)') 'In ', TOSTRING(X(eigensolve_nonh)), &
      ', LAPACK ', TOSTRING(X(geev)), ' returned error message ', info
!*  INFO    (output) INTEGER
!*          = 0:  successful exit
!*          < 0:  if INFO = -i, the i-th argument had an illegal value.
!*          > 0:  if INFO = i, the QR algorithm failed to compute all the
!*                eigenvalues, and no eigenvectors have been computed;
!*                elements i+1:N of WR and WI contain eigenvalues which
!*                have converged.
    if (info < 0) then
      write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
    else
      write(message(2), '(a,i5,a,i5,a)') 'Only eigenvalues ', info + 1, ' to ', n, ' could be computed.'
    end if
    call messages_fatal(2)
  end if
  if (present(err_code)) then
    err_code = info
  end if

  if (optional_default(sort_eigenvectors, .false.)) then
    SAFE_ALLOCATE(re(1:n))
    SAFE_ALLOCATE(ind(1:n))
    SAFE_ALLOCATE(e_copy(1:n))
    SAFE_ALLOCATE(a_copy(1:n, 1:n))
    re(:) = real(e(:), real64)
    e_copy(:) = e(:)
    a_copy(:, :) = a(:, :)
    call sort(re, ind)
    do ii = 1, n
      e(ii) = e_copy(ind(ii))
      a(1:n, ii) = a_copy(1:n, ind(ii))
    end do
    SAFE_DEALLOCATE_A(e_copy)
    SAFE_DEALLOCATE_A(a_copy)
    SAFE_DEALLOCATE_A(re)
    SAFE_DEALLOCATE_A(ind)
  end if

  POP_SUB(X(eigensolve_nonh))
end subroutine X(eigensolve_nonh)


! ---------------------------------------------------------
!> Computes the k lowest eigenvalues and the eigenvectors of a real symmetric or complex Hermitian
!! generalized definite eigenproblem, of the form  A*x=(lambda)*B*x. B is also positive definite.
subroutine X(lowest_geneigensolve)(k, n, a, b, e, v, preserve_mat, bof, err_code)
  integer,            intent(in)    :: k, n
  R_TYPE, contiguous, intent(inout) :: a(:, :)   !< (n, n)
  R_TYPE, contiguous, intent(inout) :: b(:, :)   !< (n, n)
  real(real64),  contiguous, intent(out)   :: e(:)     !< (n)
  R_TYPE, contiguous, intent(out)   :: v(:, :)   !< (n, n)
  logical,            intent(in)    :: preserve_mat !< If true, the matrix a and b on exit are the same
  !                                                !< as on input
  logical, optional,  intent(inout) :: bof      !< Bomb on failure.
  integer, optional,  intent(out)   :: err_code

  integer            :: m, iwork(5*n), ifail(n), info, lwork, ii, jj ! allocate me
  real(real64)       :: abstol
  R_TYPE, allocatable :: work(:), diaga(:), diagb(:)
#ifndef R_TREAL
  real(real64)       :: rwork(7*n)
#endif
  PUSH_SUB(X(lowest_geneigensolve))

  ASSERT(n > 0)
  ASSERT(not_in_openmp())

  abstol = 2*sfmin()

  if (preserve_mat) then
    SAFE_ALLOCATE(diaga(1:n))
    SAFE_ALLOCATE(diagb(1:n))

    ! store the diagonal of a and b
    do ii = 1, n
      diaga(ii) = a(ii, ii)
      diagb(ii) = b(ii, ii)
    end do
  end if


  ! Work size query.
  SAFE_ALLOCATE(work(1))

#ifdef R_TREAL
  call dsygvx(1, 'V', 'I', 'U', n, a(1, 1), lead_dim(a), b(1, 1), lead_dim(b), M_ZERO, M_ZERO, &
    1, k, abstol, m, e(1), v(1, 1), lead_dim(v), work(1), -1, iwork(1), ifail(1), info)
  if (info /= 0) then
    write(message(1),'(3a,i5)') 'In dlowest_geneigensolve, LAPACK ', &
      TOSTRING(dsygvx), ' workspace query returned error message ', info
    call messages_fatal(1)
  end if
  lwork = int(work(1))
  SAFE_DEALLOCATE_A(work)

  SAFE_ALLOCATE(work(1:lwork))

  call dsygvx(1, 'V', 'I', 'U', n, a(1, 1), lead_dim(a), b(1, 1), lead_dim(b), M_ZERO, M_ZERO, &
    1, k, abstol, m, e(1), v(1, 1), lead_dim(v), work(1), lwork, iwork(1), ifail(1), info)

#else
  call zhegvx(1, 'V', 'I', 'U', n, a(1, 1), lead_dim(a), b(1, 1), lead_dim(b), M_ZERO, M_ZERO, &
    1, k, abstol, m, e(1), v(1, 1), lead_dim(v), work(1), -1, rwork(1), iwork(1), ifail(1), info)
  if (info /= 0) then
    write(message(1),'(3a,i5)') 'In zlowest_geneigensolve, LAPACK ', &
      TOSTRING(zhegvx), ' workspace query returned error message ', info
    call messages_fatal(1)
  end if
  lwork = int(real(work(1)))
  SAFE_DEALLOCATE_A(work)

  SAFE_ALLOCATE(work(1:lwork))
  call zhegvx(1, 'V', 'I', 'U', n, a(1, 1), lead_dim(a), b(1, 1), lead_dim(b), M_ZERO, M_ZERO, &
    1, k, abstol, m, e(1), v(1, 1), lead_dim(v), work(1), lwork, rwork(1), iwork(1), ifail(1), info)
#endif

  if (preserve_mat) then
    ! b was destroyed, so we rebuild it
    do ii = 1, n
      do jj = 1, ii - 1
        a(jj, ii) = R_CONJ(a(ii, jj))
        b(jj, ii) = R_CONJ(b(ii, jj))
      end do
      a(ii, ii) = diaga(ii)
      b(ii, ii) = diagb(ii)
    end do

    SAFE_DEALLOCATE_A(diaga)
    SAFE_DEALLOCATE_A(diagb)
  end if


  SAFE_DEALLOCATE_A(work)

  if (info /= 0) then
    if (optional_default(bof, .true.)) then
#ifdef R_TREAL
      write(message(1),'(3a,i5)') 'In dlowest_geneigensolve, LAPACK ', &
        TOSTRING(dsygvx), ' returned error message ', info
#else
      write(message(1),'(3a,i5)') 'In zlowest_geneigensolve, LAPACK ', &
        TOSTRING(zhegvx), ' returned error message ', info
#endif
!        INFO    (output) INTEGER
!*          = 0:  successful exit
!*          < 0:  if INFO = -i, the i-th argument had an illegal value
!*          > 0:  DPOTRF or DSYEVX returned an error code:
!*             <= N:  if INFO = i, DSYEVX failed to converge;
!*                    i eigenvectors failed to converge.  Their indices
!*                    are stored in array IFAIL.
!*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
!*                    minor of order i of B is not positive definite.
!*                    The factorization of B could not be completed and
!*                    no eigenvalues or eigenvectors were computed.
      if (info < 0) then
        write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
      else if (info <= n) then
        write(message(2), *) info, ' eigenvectors failed to converge: ', ifail(1:info)
      else
        write(message(2), '(a,i5,a)') 'The leading minor of order ', info - n, ' of B is not positive definite.'
      end if
      call messages_fatal(2)
    else
      if (present(bof)) then
        bof = .true.
      end if
    end if
  else
    if (present(bof)) then
      bof = .false.
    end if
  end if
  if (present(err_code)) then
    err_code = info
  end if

  POP_SUB(X(lowest_geneigensolve))
end subroutine X(lowest_geneigensolve)

! ---------------------------------------------------------
!> Computes all eigenvalues and eigenvectors of a real symmetric  or hermitian square matrix A.
subroutine X(eigensolve)(n, a, e, bof, err_code)
  integer,            intent(in)    :: n
  R_TYPE, contiguous, intent(inout) :: a(:, :)   !< (n,n)
  real(real64),  contiguous, intent(out)   :: e(:)     !< (n)
  logical, optional,  intent(inout) :: bof      !< Bomb on failure.
  integer, optional,  intent(out)   :: err_code

  integer             :: info, lwork
  R_TYPE, allocatable :: work(:)
#ifndef R_TREAL
  real(real64), allocatable :: rwork(:)
#endif

  PUSH_SUB(X(eigensolve))
  call profiling_in(TOSTRING(X(DENSE_EIGENSOLVER)))

  ASSERT(n > 0)
  ASSERT(not_in_openmp())

  lwork = 6*n ! query?
  SAFE_ALLOCATE(work(1:lwork))
#ifdef R_TREAL
  call lapack_syev('V', 'U', n, a(1, 1), lead_dim(a), e(1), work(1), lwork, info)
#else
  SAFE_ALLOCATE(rwork(1:max(1, 3*n-2)))
  call lapack_heev('V','U', n, a(1, 1), lead_dim(a), e(1), work(1), lwork, rwork(1), info)
  SAFE_DEALLOCATE_A(rwork)
#endif
  SAFE_DEALLOCATE_A(work)

  if (info /= 0) then
    if (optional_default(bof, .true.)) then
#ifdef R_TREAL
      write(message(1),'(3a,i5)') 'In eigensolve, LAPACK ', TOSTRING(dsyev), ' returned error message ', info
#else
      write(message(1),'(3a,i5)') 'In eigensolve, LAPACK ', TOSTRING(zheev), ' returned error message   ', info
#endif
!*  INFO    (output) INTEGER
!*          = 0:  successful exit
!*          < 0:  if INFO = -i, the i-th argument had an illegal value
!*          > 0:  if INFO = i, the algorithm failed to converge; i
!*                off-diagonal elements of an intermediate tridiagonal
!*                form did not converge to zero.
      if (info < 0) then
        write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
      else
        write(message(2), '(i5,a)') info, ' off-diagonal elements of an intermediate tridiagonal did not converge to zero.'
      end if
      call messages_fatal(2)
    else
      if (present(bof)) then
        bof = .true.
      end if
    end if
  else
    if (present(bof)) then
      bof = .false.
    end if
  end if
  if (present(err_code)) then
    err_code = info
  end if

  call profiling_out(TOSTRING(X(DENSE_EIGENSOLVER)))
  POP_SUB(X(eigensolve))
end subroutine X(eigensolve)


! ---------------------------------------------------------
!> Computes the k lowest eigenvalues and the eigenvectors of a
!! standard symmetric-definite eigenproblem, of the form  A*x=(lambda)*x.
!! Here A is assumed to be symmetric.
subroutine X(lowest_eigensolve)(k, n, a, e, v, preserve_mat)
  integer,            intent(in)    :: k      !< Number of eigenvalues requested
  integer,            intent(in)    :: n      !< Dimensions of a
  R_TYPE, contiguous, intent(inout) :: a(:, :) !< (n, n)
  real(real64),  contiguous, intent(out)   :: e(:)   !< (n) The first k elements contain the selected eigenvalues in ascending order.
  R_TYPE, contiguous, intent(out)   :: v(:, :) !< (n, k)
  logical,            intent(in)    :: preserve_mat !< If true, the matrix a and b on exit are the same
  !                                                 !< as on input

  integer             :: m, iwork(5*n), ifail(n), info, lwork, ii, jj
  real(real64)        :: abstol
  R_TYPE, allocatable :: work(:), diaga(:)

  PUSH_SUB(X(lowest_eigensolve))

  ASSERT(n > 0)
  ASSERT(not_in_openmp())

  abstol = 2*sfmin()

  if (preserve_mat) then
    SAFE_ALLOCATE(diaga(1:n))

    ! store the diagonal of a and b
    do ii = 1, n
      diaga(ii) = a(ii, ii)
    end do
  end if


  ! Work size query.
  SAFE_ALLOCATE(work(1))
#ifdef R_TREAL
  call dsyevx('V', 'I', 'U', n, a(1, 1), n, M_ZERO, M_ZERO, &
    1, k, abstol, m, e(1), v(1, 1), n, work(1), -1, iwork(1), ifail(1), info)
  if (info /= 0) then
    write(message(1),'(3a,i5)') 'In dlowest_eigensolve, LAPACK ', &
      TOSTRING(dsyevx), ' workspace query returned error message ', info
    call messages_fatal(1)
  end if
  lwork = int(work(1))
  SAFE_DEALLOCATE_A(work)

  SAFE_ALLOCATE(work(1:lwork))
  call dsyevx('V', 'I', 'U', n, a(1, 1), n, M_ZERO, M_ZERO, &
    1, k, abstol, m, e(1), v(1, 1), n, work(1), lwork, iwork(1), ifail(1), info)
#else
  call zheevx('V', 'I', 'U', n, a(1, 1), n, M_ZERO, M_ZERO, &
    1, k, abstol, m, e(1), v(1, 1), n, work(1), -1, iwork(1), ifail(1), info)
  if (info /= 0) then
    write(message(1),'(3a,i5)') 'In zlowest_eigensolve, LAPACK ', &
      TOSTRING(zheevx), ' workspace query returned error message ', info
    call messages_fatal(1)
  end if
  lwork = int(work(1))
  SAFE_DEALLOCATE_A(work)

  SAFE_ALLOCATE(work(1:lwork))
  call zheevx('V', 'I', 'U', n, a(1, 1), n, M_ZERO, M_ZERO, &
    1, k, abstol, m, e(1), v(1, 1), n, work(1), lwork, iwork(1), ifail(1), info)

#endif

  if (preserve_mat) then
    ! b was destroyed, so we rebuild it
    do ii = 1, n
      do jj = 1, ii - 1
        a(jj, ii) = R_CONJ(a(ii, jj))
      end do
      a(ii, ii) = diaga(ii)
    end do

    SAFE_DEALLOCATE_A(diaga)
  end if


  SAFE_DEALLOCATE_A(work)

  if (info /= 0) then
#ifdef R_TREAL
    write(message(1),'(3a,i5)') &
      'In dlowest_eigensolve, LAPACK ', TOSTRING(dsyevx), ' returned error message ', info
#else
    write(message(1),'(3a,i5)') &
      'In zlowest_eigensolve, LAPACK ', TOSTRING(zheevx), ' returned error message ', info
#endif
!    http://www.netlib.org/lapack/explore-3.1.1-html/dsyevx.f.html
!*  INFO    (output) INTEGER
!*          = 0:  successful exit
!*          < 0:  if INFO = -i, the i-th argument had an illegal value
!*          > 0:  if INFO = i, then i eigenvectors failed to converge.
!*                Their indices are stored in array IFAIL.
    if (info < 0) then
      write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
    else
      write(message(2), *) info, ' eigenvectors failed to converge: ', ifail(1:info)
    end if
    call messages_fatal(2)
  end if

  POP_SUB(X(lowest_eigensolve))
end subroutine X(lowest_eigensolve)

! ---------------------------------------------------------
!> Invert a real symmetric or complex Hermitian square matrix a
R_TYPE function X(determinant)(n, a, preserve_mat) result(d)
  integer, intent(in)           :: n
  R_TYPE, contiguous, target, intent(inout) :: a(:, :) !< (n,n)
  logical, intent(in)           :: preserve_mat

  integer :: info, i
  integer, allocatable :: ipiv(:)
  R_TYPE, pointer, contiguous :: tmp_a(:,:)

  ! No PUSH_SUB, called too often

  ASSERT(n > 0)
  ASSERT(not_in_openmp())

  SAFE_ALLOCATE(ipiv(1:n))

  if (preserve_mat) then
    SAFE_ALLOCATE(tmp_a(1:n, 1:n))
    tmp_a(1:n, 1:n) = a(1:n, 1:n)
  else
    tmp_a => a
  end if

  call lapack_getrf(n, n, tmp_a(1, 1), lead_dim(tmp_a), ipiv(1), info)
  if (info < 0) then
    write(message(1), '(5a, i5)') 'In ', TOSTRING(X(determinant)), ', LAPACK ', TOSTRING(X(getrf)), ' returned info = ', info
    call messages_fatal(1)
  end if

  d = M_ONE
  do i = 1, n
    if (ipiv(i) /= i) then
      d = - d*tmp_a(i, i)
    else
      d = d*tmp_a(i, i)
    end if
  end do

  SAFE_DEALLOCATE_A(ipiv)
  if (preserve_mat) then
    SAFE_DEALLOCATE_P(tmp_a)
  end if

end function X(determinant)

! ---------------------------------------------------------
!> Invert a real symmetric or complex Hermitian square matrix a
subroutine X(direct_inverse)(n, a, det)
  integer,           intent(in)     :: n
  R_TYPE, contiguous,intent(inout)  :: a(:, :) !< (n,n)
  R_TYPE,  optional, intent(out)    :: det

  integer :: info, i
  integer, allocatable :: ipiv(:)
  R_TYPE, allocatable :: work(:)

  ! No PUSH_SUB, called too often

  ASSERT(n > 0)
  ASSERT(not_in_openmp())

  SAFE_ALLOCATE(work(1:n)) ! query?
  SAFE_ALLOCATE(ipiv(1:n))

  call lapack_getrf(n, n, a(1, 1), lead_dim(a), ipiv(1), info)
  if (info < 0) then
    write(message(1), '(5a, i5)') 'In ', TOSTRING(X(determinant)), ', LAPACK ', TOSTRING(X(getrf)), ' returned info = ', info
    call messages_fatal(1)
  end if

  if (present(det)) then
    det = M_ONE
    do i = 1, n
      if (ipiv(i) /= i) then
        det = - det*a(i, i)
      else
        det = det*a(i, i)
      end if
    end do
  end if


  call lapack_getri(n, a(1, 1), lead_dim(a), ipiv(1), work(1), n, info)
  if (info /= 0) then
    write(message(1), '(5a, i5)') 'In ', TOSTRING(X(determinant)), ', LAPACK ', TOSTRING(X(getri)), ' returned info = ', info
!    http://www.netlib.org/lapack/explore-3.1.1-html/zgetri.f.html
!*  INFO    (output) INTEGER
!*          = 0:  successful exit
!*          < 0:  if INFO = -i, the i-th argument had an illegal value
!*          > 0:  if INFO = i, U(i,i) is exactly zero; the matrix is
!*                singular and its inverse could not be computed.
    if (info < 0) then
      write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
    else
      write(message(2), '(a,i5,a)') 'Diagonal element ', info, ' of U is 0; matrix is singular.'
    end if
    call messages_fatal(2)
  end if

  SAFE_DEALLOCATE_A(work)
  SAFE_DEALLOCATE_A(ipiv)

end subroutine X(direct_inverse)


!> @brief Norm of a 2D matrix.
!!
!! The spectral norm of a matrix \f$A\f$ is the largest singular value of \f$A\f$.
!! i.e., the largest eigenvalue of the matrix \f$\sqrt{\dagger{A}A}\f$.
!!
!! @param[in] m, n       Dimensions of A
!! @param[in] a          2D matrix
!! @param[out] norm2     L2 norm of A
!TODO (#721): Add an optional argument preserve_mat to preserve the matrix.
! At the moment, it is destroyed on exit
subroutine X(matrix_norm2)(m, n, a, norm_l2, preserve_mat)
  integer,            intent(in)    :: m, n
  R_TYPE, contiguous, intent(inout) :: a(:, :)
  real(real64),       intent(out)   :: norm_l2
  logical, optional,  intent(in)    :: preserve_mat

  R_TYPE, allocatable :: u(:, :), vt(:, :)    !< Unitary matrices of SVD
  real(real64), allocatable :: sg_values(:)        !< Singular decomposition values
  integer :: min_dim                        !< Smallest dimension of a

  PUSH_SUB(X(matrix_norm2))

  ASSERT(n > 0)
  ASSERT(m > 0)
  ASSERT(not_in_openmp())
  min_dim = min(m, n)

  SAFE_ALLOCATE( u(m, m))
  SAFE_ALLOCATE(vt(n, n))
  SAFE_ALLOCATE(sg_values(min_dim))

  call X(singular_value_decomp)(m, n, a, u, vt, sg_values, preserve_mat)
  norm_l2 = maxval(sg_values)

  SAFE_DEALLOCATE_A(sg_values)
  SAFE_DEALLOCATE_A(vt)
  SAFE_DEALLOCATE_A(u)

  POP_SUB(X(matrix_norm2))
end subroutine X(matrix_norm2)


! ---------------------------------------------------------
!> Invert a real/complex symmetric square matrix a
subroutine X(sym_inverse)(uplo, n, a)
  character(1),       intent(in)     :: uplo
  integer,            intent(in)     :: n
  R_TYPE, contiguous, intent(inout)  :: a(:, :) !< (n,n)

  integer :: info
  integer, allocatable :: ipiv(:)
  R_TYPE, allocatable :: work(:)

  PUSH_SUB(X(sym_inverse))

  ASSERT(n > 0)
  ASSERT(not_in_openmp())

  SAFE_ALLOCATE(work(1:n)) ! query?
  SAFE_ALLOCATE(ipiv(1:n))

  call lapack_sytrf(uplo, n, a(1, 1), lead_dim(a), ipiv(1), work(1), n, info)
  if (info < 0) then
    write(message(1), '(5a, i5)') 'In ', TOSTRING(X(sym_inverse)), ', LAPACK ', TOSTRING(X(sytrf)), ' returned info = ', info
    call messages_fatal(1)
  end if

  call lapack_sytri(uplo, n, a(1, 1), lead_dim(a), ipiv(1), work(1), info)
  if (info /= 0) then
    write(message(1), '(5a, i5)') 'In ', TOSTRING(X(sym_inverse)), ', LAPACK ', TOSTRING(X(sytri)), ' returned info = ', info
!    http://www.netlib.org/lapack/explore-3.1.1-html/dsytri.f.html
!*  INFO    (output) INTEGER
!*          = 0:  successful exit
!*          < 0:  if INFO = -i, the i-th argument had an illegal value
!*          > 0:  if INFO = i, U(i,i) is exactly zero; the matrix is
!*                singular and its inverse could not be computed.
    if (info < 0) then
      write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
    else
      write(message(2), '(a,i5,a)') 'Diagonal element ', info, ' of D is 0; matrix is singular.'
    end if
    call messages_fatal(2)
  end if

  SAFE_DEALLOCATE_A(work)
  SAFE_DEALLOCATE_A(ipiv)
  POP_SUB(X(sym_inverse))
end subroutine X(sym_inverse)

! MJV 9 nov 2016: why is this stuff explicitly set in cpp instead of using the
! macros X()??? For the moment I have replicated this strategy in svd below.
#ifdef R_TREAL
! ---------------------------------------------------------
!> compute the solution to a real system of linear equations A*X = B,
!!  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
subroutine dlinsyssolve(n, nrhs, a, b, x)
  integer,           intent(in)    :: n, nrhs
  real(real64), contiguous, intent(inout) :: a(:, :) !< (n, n)
  real(real64), contiguous, intent(inout) :: b(:, :) !< (n, nrhs)
  real(real64), contiguous, intent(out)   :: x(:, :) !< (n, nrhs)

  integer :: info
  integer, allocatable :: ipiv(:), iwork(:)
  real(real64) :: rcond
  real(real64), allocatable :: ferr(:), berr(:), work(:), r(:), c(:), af(:, :)
  character(1) :: equed

  ! no PUSH_SUB, called too often

  ASSERT(n > 0)
  ASSERT(not_in_openmp())
  ASSERT(ubound(a, dim=1) >= n)
  ASSERT(ubound(a, dim=2) >= n)
  ASSERT(ubound(b, dim=1) >= n)
  ASSERT(ubound(b, dim=2) >= nrhs)
  ASSERT(ubound(x, dim=1) >= n)
  ASSERT(ubound(x, dim=2) >= nrhs)

  SAFE_ALLOCATE(ipiv(1:n))
  SAFE_ALLOCATE(iwork(1:n)) ! query?
  SAFE_ALLOCATE(ferr(1:nrhs))
  SAFE_ALLOCATE(berr(1:nrhs))
  SAFE_ALLOCATE(work(1:4*n))
  SAFE_ALLOCATE(r(1:n))
  SAFE_ALLOCATE(c(1:n))
  SAFE_ALLOCATE(af(1:n, 1:n))

  call X(gesvx) ("N", "N", n, nrhs, a(1, 1), lead_dim(a), af(1, 1), n, ipiv(1), equed, r(1), c(1), &
    b(1, 1), lead_dim(b), x(1, 1), lead_dim(x), rcond, ferr(1), berr(1), work(1), iwork(1), info)

  if (info /= 0) then
    write(message(1), '(3a, i5)') 'In dlinsyssolve, LAPACK ', TOSTRING(X(gesvx)), ' returned info = ', info
!*  INFO    (output) INTEGER
!*          = 0:  successful exit
!*          < 0:  if INFO = -i, the i-th argument had an illegal value
!*          > 0:  if INFO = i, and i is
!*                <= N:  U(i,i) is exactly zero.  The factorization has
!*                       been completed, but the factor U is exactly
!*                       singular, so the solution and error bounds
!*                       could not be computed. RCOND = 0 is returned.
!*                = N+1: U is nonsingular, but RCOND is less than machine
!*                       precision, meaning that the matrix is singular
!*                       to working precision.  Nevertheless, the
!*                       solution and error bounds are computed because
!*                       there are a number of situations where the
!*                       computed solution can be more accurate than the
!*                       value of RCOND would suggest.
    if (info < 0) then
      write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
      call messages_fatal(2)
    else
      if (info == n+1) then
        message(2) = '(reciprocal of the condition number is less than machine precision)'
        call messages_warning(2)
      else
        write(message(2), '(a,i5,a)') 'Diagonal element ', info, ' of U is 0; matrix is singular.'
        call messages_fatal(2)
      end if
    end if
  end if

  SAFE_DEALLOCATE_A(ipiv)
  SAFE_DEALLOCATE_A(iwork)
  SAFE_DEALLOCATE_A(ferr)
  SAFE_DEALLOCATE_A(berr)
  SAFE_DEALLOCATE_A(work)
  SAFE_DEALLOCATE_A(r)
  SAFE_DEALLOCATE_A(c)
  SAFE_DEALLOCATE_A(af)

end subroutine dlinsyssolve

#elif R_TCOMPLEX

! ---------------------------------------------------------
!> compute the solution to a complex system of linear equations A*X = B,
!!  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
subroutine zlinsyssolve(n, nrhs, a, b, x)
  integer,            intent(in)    :: n, nrhs
  complex(real64), contiguous,  intent(inout) :: a(:, :) !< (n, n)
  complex(real64), contiguous,  intent(inout) :: b(:, :) !< (n, nrhs)
  complex(real64), contiguous,  intent(out)   :: x(:, :) !< (n, nrhs)

  integer              :: info
  integer, allocatable :: ipiv(:)
  real(real64),   allocatable :: rwork(:), ferr(:), berr(:), r(:), c(:)
  real(real64)         :: rcond
  complex(real64), allocatable   :: work(:), af(:, :)
  character(1)         :: equed

  PUSH_SUB(zlinsyssolve)

  ASSERT(n > 0)
  ASSERT(not_in_openmp())

  SAFE_ALLOCATE(ipiv(1:n)) ! query?
  SAFE_ALLOCATE(rwork(1:2*n))
  SAFE_ALLOCATE(ferr(1:nrhs))
  SAFE_ALLOCATE(berr(1:nrhs))
  SAFE_ALLOCATE(work(1:2*n))
  SAFE_ALLOCATE(r(1:n))
  SAFE_ALLOCATE(c(1:n))
  SAFE_ALLOCATE(af(1:n, 1:n))

  equed = 'N'

  call X(gesvx) ("N", "N", n, nrhs, a(1, 1), lead_dim(a), af(1, 1), lead_dim(af), &
    ipiv(1), equed, r(1), c(1), b(1, 1), lead_dim(b), x(1, 1), lead_dim(x), &
    rcond, ferr(1), berr(1), work(1), rwork(1), info)

  if (info /= 0) then
    write(message(1), '(3a, i5)') 'In zlinsyssolve, LAPACK ', TOSTRING(X(gesvx)), ' returned info = ', info
!    http://www.netlib.org/lapack/explore-3.1.1-html/zgesvx.f.html
!*  INFO    (output) INTEGER
!*          = 0:  successful exit
!*          < 0:  if INFO = -i, the i-th argument had an illegal value
!*          > 0:  if INFO = i, and i is
!*                <= N:  U(i,i) is exactly zero.  The factorization has
!*                       been completed, but the factor U is exactly
!*                       singular, so the solution and error bounds
!*                       could not be computed. RCOND = 0 is returned.
!*                = N+1: U is nonsingular, but RCOND is less than machine
!*                       precision, meaning that the matrix is singular
!*                       to working precision.  Nevertheless, the
!*                       solution and error bounds are computed because
!*                       there are a number of situations where the
!*                       computed solution can be more accurate than the
!*                       value of RCOND would suggest.
    if (info < 0) then
      write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
      call messages_fatal(2)
    else
      if (info == n+1) then
        message(2) = '(reciprocal of the condition number is less than machine precision)'
        call messages_warning(2)
      else
        write(message(2), '(a,i5,a)') 'Diagonal element ', info, ' of U is 0; matrix is singular.'
        call messages_fatal(2)
      end if
    end if
  end if

  SAFE_DEALLOCATE_A(ipiv)
  SAFE_DEALLOCATE_A(rwork)
  SAFE_DEALLOCATE_A(ferr)
  SAFE_DEALLOCATE_A(berr)
  SAFE_DEALLOCATE_A(work)
  SAFE_DEALLOCATE_A(r)
  SAFE_DEALLOCATE_A(c)
  SAFE_DEALLOCATE_A(af)

  POP_SUB(zlinsyssolve)
end subroutine zlinsyssolve
#endif


#ifdef R_TREAL

!> @brief Computes the singular value decomposition of a real M x N matrix a.
subroutine dsingular_value_decomp(m, n, a, u, vt, sg_values, preserve_mat)
  integer,             intent(in)    :: m, n
  real(real64),   contiguous, intent(inout) :: a(:, :)          !< (m,n)
  real(real64),   contiguous, intent(out)   :: u(:, :), vt(:, :) !< (m,m) (n,n)
  real(real64),   contiguous, intent(out)   :: sg_values(:)    !< (min(m,n))
  logical, optional,   intent(in)    :: preserve_mat

  interface
    subroutine X(gesvd)(jobu, jobvt, m, n, a, lda, s, u, ldu, &
      vt, ldvt, work, lwork, info)
      import real64
      implicit none
      character(1), intent(in)    :: jobu, jobvt
      integer,      intent(in)    :: m, n
      real(real64), intent(inout) :: a, u, vt ! a(lda,n), u(ldu,m), vt(ldvt,n)
      real(real64), intent(out)   :: work     ! work(lwork)
      integer,      intent(in)    :: lda, ldu, ldvt, lwork
      integer,      intent(out)   :: info
      real(real64), intent(out)   :: s        ! s(min(m,n))
    end subroutine X(gesvd)
  end interface

  integer :: info, lwork
  real(real64), allocatable :: work(:)
  logical :: preserve_mat_
  real(real64), allocatable :: tmp_a(:, :)

  PUSH_SUB(dsingular_value_decomp)

  ASSERT(n > 0)
  ASSERT(m > 0)
  ASSERT(not_in_openmp())

  ! double minimum lwork size to increase performance (see manpage)
  lwork = 2 * (2 * min(m, n) + max(m, n))

  SAFE_ALLOCATE(work(1:lwork)) ! query?

  preserve_mat_ = optional_default(preserve_mat, .true.)

  if (preserve_mat_) then
    SAFE_ALLOCATE(tmp_a(1:m, 1:n))
    tmp_a(1:m, 1:n) = a(1:m, 1:n)
  end if

  call X(gesvd)( 'A', 'A', m, n, a(1, 1), lead_dim(a), sg_values(1), u(1, 1), lead_dim(u), vt(1, 1), &
    lead_dim(vt), work(1), lwork, info)

  if (info /= 0) then
    write(message(1), '(3a, i7)') 'In dsingular_value_decomp, LAPACK ', TOSTRING(X(gesvd)), ' returned info = ', info
!    http://www.netlib.org/lapack/explore-3.1.1-html/dgesvd.f.html
!*  INFO    (output) INTEGER
!*          = 0:  successful exit.
!*          < 0:  if INFO = -i, the i-th argument had an illegal value.
!*          > 0:  if ZBDSQR did not converge, INFO specifies how many
!*                superdiagonals of an intermediate bidiagonal form B
!*                did not converge to zero. See the description of WORK
!*                above for details.
    if (info < 0) then
      write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
    else
      write(message(2), '(i5,a)') info, ' superdiagonal elements of an intermediate bidiagonal did not converge to zero.'
    end if
    call messages_fatal(2)
  end if

  if (preserve_mat_) then
    a(1:m, 1:n) = tmp_a(1:m, 1:n)
    SAFE_DEALLOCATE_A(tmp_a)
  end if

  SAFE_DEALLOCATE_A(work)
  POP_SUB(dsingular_value_decomp)
end subroutine dsingular_value_decomp

! TODO(Alex) Issue #1132.
! * Compare performance to dlalg_pseudo_inverse, and retain the fastest version.
! * If this is retained, refactor such that one uses the X macro
!> @brief Computes the inverse of a real M x N matrix, a, using the SVD decomposition.
subroutine dsvd_inverse(m, n, a, threshold)
  integer,                   intent(in)           :: m, n
  real(real64), contiguous,  intent(inout)        :: a(:, :)    !< Input  (m,n)
  !                                                               Output (n,m). a is replaced by its inverse transposed
  real(real64),              intent(in), optional :: threshold

  real(real64), allocatable :: u(:, :), vt(:, :)
  real(real64), allocatable :: sg_values(:)
  real(real64)   :: tmp
  real(real64)   :: sg_inverse, threshold_
  integer :: j, k, l, minmn

  ASSERT(n > 0)
  ASSERT(m > 0)
  ASSERT(not_in_openmp())
  minmn = min(m,n)

  SAFE_ALLOCATE( u(1:m, 1:m))
  SAFE_ALLOCATE(vt(1:n, 1:n))
  SAFE_ALLOCATE(sg_values(1:minmn))

  PUSH_SUB(dsvd_inverse)

  call dsingular_value_decomp(m, n, a, u, vt, sg_values)

  threshold_ = pseudoinverse_default_tolerance(m, n, sg_values)
  if (present(threshold)) threshold_ = threshold

  ! build inverse
  do j = 1, m
    do k = 1, n
      tmp = M_ZERO
      do l = 1, minmn
        if (sg_values(l) < threshold_) then
          !write(message(1), '(a)') 'In dsvd_inverse: singular value below threshold.'
          !call messages_warning(1)
          sg_inverse = M_ZERO
        else
          sg_inverse = M_ONE/sg_values(l)
        end if
        tmp = tmp + vt(l, k)*sg_inverse*u(j, l)
      end do
      a(j, k) = tmp
    end do
  end do

  SAFE_DEALLOCATE_A(sg_values)
  SAFE_DEALLOCATE_A(vt)
  SAFE_DEALLOCATE_A(u)
  POP_SUB(dsvd_inverse)
end subroutine dsvd_inverse

#elif R_TCOMPLEX

!> @brief Computes the singular value decomposition of a complex MxN matrix a.
subroutine zsingular_value_decomp(m, n, a, u, vt, sg_values, preserve_mat)
  integer,                      intent(in)    :: m, n
  complex(real64), contiguous,  intent(inout) :: a(:, :)          !< (m,n)
  complex(real64), contiguous,  intent(out)   :: u(:, :), vt(:, :) !< (n,n) and (m,m)
  real(real64),    contiguous,  intent(out)   :: sg_values(:)    !< (n)
  logical, optional,            intent(in)    :: preserve_mat

  interface
    subroutine X(gesvd) ( jobu, jobvt, m, n, a, lda, s, u, ldu, &
      vt, ldvt, work, lwork, rwork, info)
      import real64
      implicit none
      character(1),    intent(in)    :: jobu, jobvt
      integer,         intent(in)    :: m, n
      complex(real64), intent(inout) :: a, u, vt ! a(lda,n), u(ldu,m), vt(ldvt,n)
      complex(real64), intent(out)   :: work     ! work(lwork)
      integer,         intent(in)    :: lda, ldu, ldvt, lwork
      integer,         intent(out)   :: info
      real(real64),    intent(out)   :: s        ! s(min(m,n))
      real(real64),    intent(inout) :: rwork    ! rwork(5*min(m,n))
    end subroutine X(gesvd)
  end interface

  integer :: info, lwork
  complex(real64), allocatable :: work(:)
  real(real64), allocatable :: rwork(:)
  logical :: preserve_mat_
  complex(real64), allocatable :: tmp_a(:, :)

  PUSH_SUB(zsingular_value_decomp)

  ASSERT(n > 0)
  ASSERT(m > 0)
  ASSERT(not_in_openmp())

  ! double minimum lwork size to increase performance (see manpage)
  lwork = 2 * (2 * min(m, n) + max(m, n))

  SAFE_ALLOCATE(work(1:lwork)) ! query?
  SAFE_ALLOCATE(rwork(1:5*min(m, n)))

  preserve_mat_ = optional_default(preserve_mat, .true.)

  if (preserve_mat_) then
    SAFE_ALLOCATE(tmp_a(1:m, 1:n))
    tmp_a(1:m, 1:n) = a(1:m, 1:n)
  end if

  call X(gesvd)( 'A', 'A', m, n, a(1, 1), lead_dim(a), sg_values(1), u(1, 1), lead_dim(u), &
    vt(1, 1), lead_dim(vt), work(1), lwork, rwork(1), info)

  if (info /= 0) then
    write(message(1), '(3a, i5)') 'In zsingular_value_decomp, LAPACK ', TOSTRING(X(gesvd)), ' returned info = ', info
!    http://www.netlib.org/lapack/explore-3.1.1-html/zgesvd.f.html
!*  INFO    (output) INTEGER
!*          = 0:  successful exit.
!*          < 0:  if INFO = -i, the i-th argument had an illegal value.
!*          > 0:  if ZBDSQR did not converge, INFO specifies how many
!*                superdiagonals of an intermediate bidiagonal form B
!*                did not converge to zero. See the description of RWORK
!*                above for details.
    if (info < 0) then
      write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
    else
      write(message(2), '(i5,a)') info, ' superdiagonal elements of an intermediate bidiagonal did not converge to zero.'
    end if
    call messages_fatal(2)
  end if

  if (preserve_mat_) then
    a(1:m, 1:n) = tmp_a(1:m, 1:n)
    SAFE_DEALLOCATE_A(tmp_a)
  end if

  SAFE_DEALLOCATE_A(rwork)
  SAFE_DEALLOCATE_A(work)
  POP_SUB(zsingular_value_decomp)
end subroutine zsingular_value_decomp

! TODO(Alex) Issue #1132.
! * Compare performance to zlalg_pseudo_inverse, and retain the fastest version.
! * If this is retained, refactor such that one uses the X macro
!> @brief Computes inverse of a complex MxN matrix, a, using the SVD decomposition
subroutine zsvd_inverse(m, n, a, threshold)
  integer,                      intent(in)    :: m, n
  complex(real64), contiguous,  intent(inout) :: a(:, :)    !< Input (m,n)
  !                                                           Output (n,m). a is replaced by its inverse transposed
  real(real64),    optional,    intent(in)    :: threshold

  complex(real64), allocatable :: u(:, :), vt(:, :)
  real(real64),    allocatable :: sg_values(:)
  complex(real64) :: tmp
  real(real64)    :: sg_inverse, threshold_
  integer         :: j, k, l, minmn

  ASSERT(n > 0)
  ASSERT(m > 0)
  ASSERT(not_in_openmp())
  minmn = min(m,n)

  SAFE_ALLOCATE( u(1:m, 1:m))
  SAFE_ALLOCATE(vt(1:n, 1:n))
  SAFE_ALLOCATE(sg_values(1:minmn))

  PUSH_SUB(zsvd_inverse)

  call zsingular_value_decomp(m, n, a, u, vt, sg_values)

  threshold_ = pseudoinverse_default_tolerance(m, n, sg_values)
  if (present(threshold)) threshold_ = threshold

  ! build inverse
  do j = 1, m
    do k = 1, n
      tmp = M_ZERO
      do l = 1, minmn
        if (sg_values(l) < threshold_) then
          write(message(1), '(a)') 'In zsvd_inverse: singular value below threshold.'
          call messages_warning(1)
          sg_inverse = M_ZERO
        else
          sg_inverse = M_ONE/sg_values(l)
        end if
        tmp = tmp + conjg(vt(l, k))*sg_inverse*conjg(u(j, l))
      end do
      a(j, k) = tmp
    end do
  end do

  SAFE_DEALLOCATE_A(sg_values)
  SAFE_DEALLOCATE_A(vt)
  SAFE_DEALLOCATE_A(u)
  POP_SUB(zsvd_inverse)
end subroutine zsvd_inverse
#endif
! End of REAL/COMPLEX implementations


!> @brief Invert a matrix with the Moore-Penrose pseudo-inverse.
!!
!! SVD is used to find the U, V and Sigma matrices:
!! \f[
!!    A = U \Sigma V^\dagger
!! \f]
!! Diagonal terms in Sigma <= \p threshold are set to zero, and the inverse
!! is constructed as:
!! \f[]
!!  A^{-1} \approx V \Sigma U^\dagger
!! \f]
subroutine X(lalg_pseudo_inverse)(a, threshold)
  R_TYPE,          allocatable, intent(inout) :: a(:, :) !< Input: (m, n)
  !                              Output, a contains its inverse with shape (n, m)
  real(real64),    optional,    intent(in)    :: threshold

  integer                      :: m, n, min_mn, i
  real(real64)                 :: thres
  R_TYPE,          allocatable :: u(:, :), vt(:, :)
  real(real64),    allocatable :: sg_values(:)

  R_TYPE,            parameter :: alpha = M_ONE
  R_TYPE,            parameter :: beta = M_ZERO

  PUSH_SUB(X(lalg_pseudo_inverse))

  m = size(a, 1)
  n = size(a, 2)
  min_mn = min(m, n)

  ASSERT(m > 0)
  ASSERT(n > 0)
  ASSERT(not_in_openmp())

  SAFE_ALLOCATE( u(1:m, 1:m))
  SAFE_ALLOCATE(vt(1:n, 1:n))
  SAFE_ALLOCATE(sg_values(1:min_mn))

  ! A = U Sigma V^dagger
  call X(singular_value_decomp)(m, n, a, u, vt, sg_values)
  SAFE_DEALLOCATE_A(a)

  thres = optional_default(threshold, pseudoinverse_default_tolerance(m, n, sg_values))

  ! inv_A = V Sigma U^dagger == (V^dagger)^dagger Sigma (U)^dagger
  ! 1. Compute Sigma (U)^dagger
  ! 2. Compute V^dagger)^dagger [Sigma (U)^dagger]

  ! The whole diagonal of sg_values should element-multiply each column of U^T, however U is returned
  ! from SVD, so equivalently element-multiply each row of U to achieve this
  do i = 1, min_mn
    if (sg_values(i) > thres) then
      call lalg_scal(m, M_ONE / sg_values(i), u(:, i))
    else
      u(:, i) = M_ZERO
    endif
  enddo

  ! Inverse returned with correct shape
  SAFE_ALLOCATE(a(1:n, 1:m))
  ! TODO(Alex) It might be more efficient to do (u @ vt)
  ! then transpose it, rather than (vt).T @ u.T
  call lalg_gemm_cc(n, m, min_mn, alpha, vt, u, beta, a)

  SAFE_DEALLOCATE_A(sg_values)
  SAFE_DEALLOCATE_A(vt)
  SAFE_DEALLOCATE_A(u)

  POP_SUB(X(lalg_pseudo_inverse))

end subroutine X(lalg_pseudo_inverse)


!> Calculate the inverse of a real/complex upper triangular matrix (in
!! unpacked storage). (lower triangular would be a trivial variant of this)
subroutine X(upper_triangular_inverse)(n, a)
  integer,             intent(in)    :: n
  R_TYPE,  contiguous, intent(inout) :: a(:, :) !< (n,n)

  integer :: info

  interface
    subroutine X(trtri)(uplo, diag, n, a, lda, info)
      import real64
      implicit none
      character(1), intent(in)    :: uplo
      character(1), intent(in)    :: diag
      integer,      intent(in)    :: n
      R_TYPE,       intent(inout) :: a
      integer,      intent(in)    :: lda
      integer,      intent(out)   :: info
    end subroutine X(trtri)
  end interface

  PUSH_SUB(X(upper_triangular_inverse))

  ASSERT(n > 0)
  ASSERT(not_in_openmp())

  call X(trtri)('U', 'N', n, a(1, 1), lead_dim(a), info)

  if (info /= 0) then
    write(message(1), '(5a,i5)') &
      'In ', TOSTRING(Xinvert_upper_triangular), ', LAPACK ', TOSTRING(X(trtri)), ' returned error message ', info
!http://www.netlib.org/lapack/explore-3.1.1-html/dtrtri.f.html
!*  INFO    (output) INTEGER
!*          = 0: successful exit
!*          < 0: if INFO = -i, the i-th argument had an illegal value
!*          > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
!*               matrix is singular and its inverse can not be computed.
    if (info < 0) then
      write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
    else
      write(message(2), '(a,i5,a)') 'Diagonal element ', info, ' is 0; matrix is singular.'
    end if
    call messages_fatal(2)
  end if

  POP_SUB(X(upper_triangular_inverse))
end subroutine X(upper_triangular_inverse)



subroutine X(least_squares_vec)(nn, aa, bb, xx, preserve_mat)
  integer,             intent(in)            :: nn
  R_TYPE,  contiguous, intent(inout), target :: aa(:, :)
  R_TYPE,  contiguous, intent(in)            :: bb(:)
  R_TYPE,  contiguous, intent(out)           :: xx(:)
  logical,             intent(in)            :: preserve_mat

  R_TYPE :: dlwork
  R_TYPE, allocatable :: work(:)
  integer :: rank, info
  real(real64), allocatable :: ss(:)
#ifndef R_TREAL
  real(real64), allocatable :: rwork(:)
#endif
  R_TYPE, pointer, contiguous :: tmp_aa(:, :)

  PUSH_SUB(X(least_squares_vec))

  ASSERT(not_in_openmp())

  if (preserve_mat) then
    SAFE_ALLOCATE(tmp_aa(1:nn, 1:nn))
    tmp_aa(1:nn, 1:nn) = aa(1:nn, 1:nn)
  else
    tmp_aa => aa
  end if

  xx(1:nn) = bb(1:nn)

  SAFE_ALLOCATE(ss(1:nn))

#ifdef R_TREAL
  call lapack_gelss(nn, nn, 1, tmp_aa(1, 1), lead_dim(tmp_aa), xx(1), nn, ss(1), -1.0_real64, rank, dlwork, -1, info)

  SAFE_ALLOCATE(work(1:int(dlwork)))

  call lapack_gelss(nn, nn, 1, tmp_aa(1, 1), lead_dim(tmp_aa), xx(1), nn, ss(1), -1.0_real64, rank, work(1), int(dlwork), info)
#else
  SAFE_ALLOCATE(rwork(1:5*nn))
  call lapack_gelss(nn, nn, 1, tmp_aa(1, 1), lead_dim(tmp_aa), xx(1), nn, ss(1), -1.0_real64, rank, dlwork, -1, rwork(1), info)

  SAFE_ALLOCATE(work(1:int(dlwork)))

  call lapack_gelss(nn, nn, 1, tmp_aa(1, 1), lead_dim(tmp_aa), xx(1), nn, ss(1), -1.0_real64, rank, work(1), &
    int(dlwork), rwork(1), info)
  SAFE_DEALLOCATE_A(rwork)
#endif

  SAFE_DEALLOCATE_A(ss)
  if (preserve_mat) then
    SAFE_DEALLOCATE_P(tmp_aa)
  end if

  if (info /= 0) then
    write(message(1), '(5a,i5)') &
      'In ', TOSTRING(X(lalg_least_squares_vec)), ', LAPACK ', TOSTRING(X(gelss)), ' returned error mess  age ', info
    !https://www.netlib.org/lapack/lapack-3.1.1/html/zgelss.f.html
    !*  INFO    (output) INTEGER
    !*          = 0:  successful exit
    !*          < 0:  if INFO = -i, the i-th argument had an illegal value.
    !*          > 0:  the algorithm for computing the SVD failed to converge;
    !*                if INFO = i, i off-diagonal elements of an intermediate
    !*                bidiagonal form did not converge to zero.
    if (info < 0) then
      write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
    else
      write(message(2), '(a,i5,a)') 'Off-diagonal element ', info, ' of an intermediate bidiagonal form did not converge to zero.'
    end if
    call messages_fatal(2)
  end if

  POP_SUB(X(least_squares_vec))

end subroutine X(least_squares_vec)

! ---------------------------------------------------------
!> Computes all the eigenvalues and the eigenvectors of a real symmetric or
!! complex Hermitian eigenproblem in parallel using ScaLAPACK or ELPA on all
!! processors
!! n: dimension of matrix
!! a: input matrix, on exit: contains eigenvectors
!! e: eigenvalues
subroutine X(eigensolve_parallel)(n, a, e, bof, err_code)
  implicit none
  integer,                  intent(in)    :: n
  R_TYPE,       contiguous, intent(inout) :: a(:, :)   !< (n,n)
  real(real64), contiguous, intent(out)   :: e(:)     !< (n)
  logical,      optional,   intent(inout) :: bof      !< Bomb on failure.
  integer,      optional,   intent(out)   :: err_code

#ifdef HAVE_SCALAPACK
  integer :: info, ii
  integer :: np, np_rows, np_cols, block_size
  type(blacs_proc_grid_t) :: proc_grid
  integer :: desc(BLACS_DLEN)
  integer :: nb_rows, nb_cols
  R_TYPE, allocatable :: b(:, :), eigenvectors(:, :)
#ifdef HAVE_ELPA
  class(elpa_t), pointer :: elpa
#else
  R_TYPE, allocatable :: work(:)
  R_TYPE :: worksize
#ifdef R_TCOMPLEX
  R_TYPE, allocatable :: rwork(:)
  R_TYPE :: rworksize
#endif
#endif
#endif


#ifndef HAVE_SCALAPACK
  write(message(1), '(a)') 'Called eigensolve_parallel but compiled without ScaLAPACK support.'
  write(message(2), '(a)') 'Using non-parallel solver as fallback.'
  call messages_info(2)
  call X(eigensolve)(n, a, e, bof, err_code)
#else

  PUSH_SUB(X(eigensolve_parallel))
  call profiling_in("DENSE_EIGENSOLVER_PARALLEL")

  ASSERT(n > 0)

  ! structure of function:
  ! 1. choose processor distribution, BLACS grid layout
  ! 2. initialize and allocate distributed matrices, compute scalapack descriptors
  ! 3. copy entries to distributed matrix
  ! 4. eigensolver settings (allocate workspace)
  ! 5. call eigensolver
  ! 6. copy eigenvector entries back from distributed matrix
  ! 7. deallocate and finalize unused matrices/structures


  ! 1. choose processor distribution, BLACS grid layout
  ! processor layout follows recommendations in the ScaLAPACK user guide,
  ! Section 5.3.1: linear for sizes <9; otherwise as square as possible
  np = mpi_world%size
  if (np < 9) then
    np_rows = 1
    np_cols = np
  else
    do ii = floor(sqrt(real(np))), 2, -1
      if (mod(np, ii) == 0) then
        np_rows = ii
        exit
      end if
    end do
    np_cols = np / np_rows
  end if

  ! recommended block size: 64, take smaller value for smaller matrices
  block_size = min(64, n/np_cols)

  call blacs_proc_grid_init(proc_grid, mpi_world, procdim = (/np_rows, np_cols/))

  ! 2. initialize and allocate distributed matrices, compute scalapack descriptors
  ! get size of local matrix b
  nb_rows = max(1, numroc(n, block_size, proc_grid%myrow, 0, proc_grid%nprow))
  nb_cols = max(1, numroc(n, block_size, proc_grid%mycol, 0, proc_grid%npcol))

  ! get ScaLAPACK descriptor
  call descinit(desc(1), n, n, block_size, block_size, 0, 0, proc_grid%context, &
    nb_rows, info)

  SAFE_ALLOCATE(b(nb_rows, nb_cols))
  SAFE_ALLOCATE(eigenvectors(nb_rows, nb_cols))

  ! 3. copy entries to distributed matrix
  ! assumes (0,0) as start of processor grid (also needed for ELPA)
  ! use pXlacp3 helper function from ScaLAPACK to distribute the matrix a
  call pX(lacp3)(m=n, i=1, a=b(1, 1), desca=desc(1), b=a(1, 1), ldb=n, ii=0, jj=0, rev=1)

#ifdef HAVE_ELPA
  ! 4. eigensolver settings (allocate workspace)
  if (elpa_init(20170403) /= elpa_ok) then
    write(message(1),'(a)') "ELPA API version not supported"
    call messages_fatal(1)
  end if
  elpa => elpa_allocate()

  ! set parameters describing the matrix
  call elpa%set("na", n, info)
  call elpa%set("nev", n, info)
  call elpa%set("local_nrows", nb_rows, info)
  call elpa%set("local_ncols", nb_cols, info)
  call elpa%set("nblk", block_size, info)
  call elpa%set("mpi_comm_parent", mpi_world%comm%MPI_VAL, info)
  call elpa%set("process_row", proc_grid%myrow, info)
  call elpa%set("process_col", proc_grid%mycol, info)

  info = elpa%setup()

  call elpa%set("solver", elpa_solver_2stage, info)

  ! 5. call eigensolver
  call elpa%eigenvectors(b, e, eigenvectors, info)

  ! error handling
  if (info /= elpa_ok) then
    write(message(1),'(a,i6,a,a)') "Error in ELPA, code: ", info, ", message: ", &
      elpa_strerr(info)
    call messages_fatal(1)
  end if

  call elpa_deallocate(elpa)
  call elpa_uninit()

#else
  ! use ScaLAPACK solver if ELPA not available
  ! 4. eigensolver settings (allocate workspace)
  ! workspace query
#ifdef R_TREAL
  call pdsyev(jobz='V', uplo='U', n=n, &
    a=b(1, 1), ia=1, ja=1, desca=desc(1), w=e(1), &
    z=eigenvectors(1, 1), iz=1, jz=1, descz=desc(1), &
    work=worksize, lwork=-1, info=info)
#else
  call pzheev(jobz='V', uplo='U', n=n, &
    a=b(1, 1), ia=1, ja=1, desca=desc(1), w=e(1), &
    z=eigenvectors(1, 1), iz=1, jz=1, descz=desc(1), &
    work=worksize, lwork=-1, rwork=rworksize, lrwork=-1, info=info)
#endif

  if (info /= 0) then
    write(message(1),'(a,i6)') "ScaLAPACK workspace query failure, error code=", info
    call messages_fatal(1)
  end if

  SAFE_ALLOCATE(work(1:int(worksize)))
#ifdef R_TCOMPLEX
  SAFE_ALLOCATE(rwork(1:int(rworksize)))
#endif

  ! 5. call eigensolver
#ifdef R_TREAL
  call pdsyev(jobz='V', uplo='U', n=n, &
    a=b(1, 1) , ia=1, ja=1, desca=desc(1), w=e(1), &
    z=eigenvectors(1, 1), iz=1, jz=1, descz=desc(1), &
    work=work(1), lwork=int(worksize), info=info)
#else
  if (n == 1) then
    ! pzheev from scalapack seems to return wrong eigenvectors for one state,
    ! so we do not call it in this case.
    e(1) = real(b(1, 1), real64)
    eigenvectors(1, 1) = R_TOTYPE(M_ONE)
  else
    call pzheev(jobz='V', uplo='U', n=n, &
      a=b(1, 1), ia=1, ja=1, desca=desc(1), w=e(1), &
      z=eigenvectors(1, 1), iz=1, jz=1, descz=desc(1), &
      work=work(1), lwork=int(worksize), &
      rwork=rwork(1), lrwork=int(rworksize), info=info)
  end if
#endif

  SAFE_DEALLOCATE_A(work)
#ifdef R_TCOMPLEX
  SAFE_DEALLOCATE_A(rwork)
#endif

  ! error handling
  if (info /= 0) then
    if (optional_default(bof, .true.)) then
#ifdef R_TCOMPLEX
      write(message(1),'(3a,i5)') trim(message(1)), TOSTRING(pX(heev)), &
        ' returned error message ', info
#else
      write(message(1),'(3a,i5)') trim(message(1)), TOSTRING(pX(syev)), &
        ' returned error message ', info
#endif
!*  INFO    (global output) INTEGER
!*          = 0:  successful exit
!*          < 0:  If the i-th argument is an array and the j-entry had
!*                an illegal value, then INFO = -(i*100+j), if the i-th
!*                argument is a scalar and had an illegal value, then
!*                INFO = -i.
!*          > 0:  If INFO = 1 through N, the i(th) eigenvalue did not
!*                converge in DSTEQR2 after a total of 30*N iterations.
!*                If INFO = N+1, then PDSYEV has detected heterogeneity
!*                by finding that eigenvalues were not identical across
!*                the process grid.  In this case, the accuracy of
!*                the results from PDSYEV cannot be guaranteed.
      if (info < 0) then
        write(message(2), '(a,i5,a)') 'Argument number ', -info, ' had an illegal value.'
      else if (info == n+1) then
        write(message(2), '(a)') 'Eigenvalues were not identical over the process grid.'
      else
        write(message(2), '(i5,a)') info, 'th eigenvalue did not converge.'
      end if
      call messages_fatal(2)
    else
      if (present(bof)) then
        bof = .true.
      end if
    end if
  else
    if (present(bof)) then
      bof = .false.
    end if
  end if
  if (present(err_code)) then
    err_code = info
  end if
#endif
!(HAVE_ELPA)

  ! 6. copy eigenvector entries back from distributed matrix
  ! use pXlacp3 helper function from ScaLAPACK to collect the eigenvectors on
  ! all cores
  call pX(lacp3)(m=n, i=1, a=eigenvectors(1, 1), desca=desc(1), b=a(1,1), ldb=n, ii=-1, jj=-1, rev=0)

  ! 7. deallocate and finalize unused matrices/structures
  SAFE_DEALLOCATE_A(b)
  SAFE_DEALLOCATE_A(eigenvectors)

  call blacs_proc_grid_end(proc_grid)

  call profiling_out("DENSE_EIGENSOLVER_PARALLEL")
  POP_SUB(X(eigensolve_parallel))
#endif
!(HAVE_SCALAPACK)
end subroutine X(eigensolve_parallel)

! ---------------------------------------------------------
!> An interface to different method to invert a matrix
!!
!! The possible methods are: svd, dir, sym, upp
!! For the SVD, an optional argument threshold an be specified
!! For the direct inverse, an optional output determinant can be obtained
!! For the symmetric matrix case, the optional argument uplo must be specified
subroutine X(inverse)(n, a, method, det, threshold, uplo)
  integer,                    intent(in)     :: n
  R_TYPE, contiguous,         intent(inout)  :: a(:, :) !< (n,n)
  character(len=3),           intent(in)     :: method
  R_TYPE,           optional, intent(out)    :: det       !< Determinant of the matrix. Direct inversion only
  real(real64),     optional, intent(in)     :: threshold !< Threshold for the SVD pseudoinverse
  character(len=1), optional, intent(in) :: uplo !< Is the symmetric matrix stored in the upper or lower part?

  PUSH_SUB(X(inverse))

  select case(method)
  case('svd','SVD') ! Moore-Penrose pseudoinverse
    ASSERT(.not. present(det))
    call X(svd_inverse)(n, n, a, threshold)
  case('dir','DIR')
    call X(direct_inverse)(n, a, det)
  case('sym', 'SYM')
    ASSERT(present(uplo))
    ASSERT(.not. present(det))
    call X(sym_inverse)(uplo, n, a)
  case('upp', 'UPP')
    ASSERT(.not. present(det))
    call X(upper_triangular_inverse)(n, a)
  case default
    write(message(1), *) 'Internal error: unrecognized option for X(inverse) : ', method
    call messages_fatal(1)
  end select

  POP_SUB(X(inverse))
end subroutine X(inverse)


!>@brief This routine calculates a function of a matrix by using an eigenvalue decomposition.
!!
!! For the hermitian case:
!! \f[
!!   A = V D V^T \implies fun(A) = V fun(D) V^T
!! \f]
!! and in general
!! \f[
!!   A = V D V^-1 \implies fun(A) = V fun(D) V^-1
!! \f]
!! where \f$V\f$ are the eigenvectors, and \f$D\f$ is a diagonal matrix containing the eigenvalues.
!!
!! In addition, this function can compute \f$fun(factor*A)\f$ for a complex factor.
!!
!! This is slow but it is simple to implement, and for the moment it
!! does not affect performance.
subroutine X(lalg_matrix_function)(n, factor, a, fun_a, fun, hermitian)
  integer,  intent(in)      :: n            !< dimension of the matrix A
  R_TYPE,   intent(in)      :: factor       !< complex factor
  R_TYPE,   intent(in)      :: a(:, :)      !< matrix A
  R_TYPE,   intent(inout)   :: fun_a(:, :)  !< fun(A)
  interface
    R_TYPE function fun(z)
      import real64
      R_TYPE, intent(in) :: z
    end
  end interface
  logical,           intent(in)      :: hermitian !< is the matrix hermitian?

  R_TYPE, allocatable :: evectors(:, :), zevalues(:)
  complex(real64), allocatable :: complex_evalues(:)
  real(real64), allocatable :: evalues(:)

  integer :: i

  PUSH_SUB(X(lalg_matrix_function))

  ASSERT(size(a, 1) >= n)

  SAFE_ALLOCATE(evectors(1:n, 1:n))

  if (hermitian) then
    SAFE_ALLOCATE(evalues(1:n))
    SAFE_ALLOCATE(zevalues(1:n))

    evectors(1:n, 1:n) = a(1:n, 1:n)

    call lalg_eigensolve(n, evectors, evalues)

    do i = 1, n
      zevalues(i) = fun(factor*evalues(i))
    end do

    do i = 1, n
#ifdef R_TREAL
      fun_a(1:n, i) = zevalues(1:n)*evectors(i, 1:n)
#else
      fun_a(1:n, i) = zevalues(1:n)*conjg(evectors(i, 1:n))
#endif
    end do

    fun_a(1:n, 1:n) = matmul(evectors(1:n, 1:n), fun_a(1:n, 1:n))

    SAFE_DEALLOCATE_A(evalues)
    SAFE_DEALLOCATE_A(zevalues)
  else
    SAFE_ALLOCATE(complex_evalues(1:n))

    evectors(1:n, 1:n) = a(1:n, 1:n)

    call X(eigensolve_nonh)(n, evectors, complex_evalues)

#ifdef R_TREAL
    message(1) = "Matrix function not implemented for real, non-symmetric matrices."
    call messages_fatal(1)
#else
    do i = 1, n
      complex_evalues(i) = fun(factor*complex_evalues(i))
    end do

    fun_a(1:n, 1:n) = evectors(1:n, 1:n)

    call lalg_inverse(n, evectors, 'dir')

    do i = 1, n
      evectors(1:n, i) = complex_evalues(1:n)*evectors(1:n, i)
    end do

    fun_a(1:n, 1:n) = matmul(fun_a(1:n, 1:n), evectors(1:n, 1:n))
#endif

    SAFE_DEALLOCATE_A(complex_evalues)
  end if

  SAFE_DEALLOCATE_A(evectors)

  POP_SUB(X(lalg_matrix_function))
end subroutine X(lalg_matrix_function)


!! Local Variables:
!! mode: f90
!! coding: utf-8
!! End:
