      subroutine init_omp
#ifdef _OPENMP
      use omp_lib
#endif
      use minima
      use linmin
      implicit none
#ifdef MPI
      include 'mpif.h'
#endif
      include "DIMENSIONS"
      include "COMMON.CHAIN"
      include "COMMON.IOUNITS"
      include "COMMON.INTERACT"
      include "COMMON.TIME1"
      include "COMMON.OMP"
      double precision time00
      integer i
#ifdef _OPENMP
      write(*,*)'omp_get_max_threads =',omp_get_max_threads()
      if (omp_get_max_threads().gt.max_fg_threads) then
        write (iout,*)"ERROR: Max number of threads greater than ",
     &                "compile-time defined maximum:"
        write (iout,*)"  ",omp_get_max_threads(), ">", max_fg_threads
        write (iout,*)"  Increase max_fg_threads constant in DIMENSIONS"
        stop
      endif
#endif
#ifdef MPI
      time00=MPI_Wtime()
#endif
      call clear_eelec_values(max_fg_threads)
      call clear_eelec_arrays(max_fg_threads)
      call clear_escp_values(max_fg_threads)
      call clear_escp_arrays(max_fg_threads)
      call clear_egb_arrays(max_fg_threads)

      do i=1,nres
        abs_itype(i)=iabs(itype(i))
        abs_itel(i)=iabs(itel(i))
      enddo

      ! initialize minima
      maxiter=0
      nextiter=0
      fctmin=0.0d0
      hguess=0.0d0

      ! initialize linmin
      stpmax=0.0d0

#ifdef MPI
      time_omp_init=MPI_Wtime()-time00
#endif
      return
      end

c-----------------------------------------------------------------------
      subroutine split_list_for_threads(from_ik,to_ik,
     &                                  first_blk,last_blk,
     &                                  first_j,last_j,
     &                                  max_threads,threads_used,
     &                                  from,to,
     &                                  i_list,i_list_size,
     &                                  block_list,block_list_size,
     &                                  interaction_limit,limited)
      implicit none
#ifdef MPI
      include 'mpif.h'
#endif
      include 'COMMON.TIME1'
      integer max_threads,threads_used,from,to
      integer i_list_size,block_list_size
      integer from_ik(max_threads),to_ik(max_threads)
      integer first_blk(max_threads),last_blk(max_threads)
      integer first_j(max_threads),last_j(max_threads)
      integer i_list(2,0:i_list_size)
      integer block_list(2,block_list_size)
      integer*8 num_interactions,num_tails,inters_per_thread,work_size
      integer ikont,i,jblock,block_size,block_start,block_end
      integer thread,jstart,jend
      integer interaction_limit
      integer l_from_ik(0:max_threads+1),l_to_ik(0:max_threads+1)
      integer l_first_blk(0:max_threads+1),l_last_blk(0:max_threads+1)
      integer l_first_j(0:max_threads+1),l_last_j(0:max_threads+1)
      logical limited,started
      double precision time00
#ifdef MPI
      time00=MPI_Wtime()
#endif
      num_interactions=0
!$OMP PARALLEL DO DEFAULT(SHARED) REDUCTION(+:num_interactions)
!$OMP& PRIVATE(i,jstart,jend,jblock,block_size)
      do ikont=from,to
        i=i_list(1,ikont)
        jstart=i_list(2,ikont-1)+1
        jend=i_list(2,ikont)
        !do jblock=i_list(2,ikont-1)+1,i_list(2,ikont)
        do jblock=jstart,jend
          if(block_list(1,jblock).eq.0) cycle
          block_size=block_list(2,jblock)-block_list(1,jblock)+1
          num_interactions=num_interactions+block_size
        enddo
      enddo

      limited=.false.

      if((interaction_limit.gt.0).and.
     &   (num_interactions .lt. interaction_limit)) then
        threads_used=1
        limited=.true.
      endif

      inters_per_thread=num_interactions/threads_used
      num_tails=num_interactions - inters_per_thread*threads_used

      thread=0
      work_size=0
      started=.true.

      do ikont=from,to
        i=i_list(1,ikont)
        jstart=i_list(2,ikont-1)+1
        jend=i_list(2,ikont)
        do jblock=jstart,jend
          block_start=block_list(1,jblock)
          block_end=block_list(2,jblock)
          block_size=block_end-block_start+1
          if(.not.started) then
            l_from_ik(thread)=ikont
            l_first_blk(thread)=jblock
            l_first_j(thread)=block_start
            started=.true.
          endif
          do while (block_size.gt.0)
            if (block_size.lt.work_size) then
              work_size=work_size-block_size
              block_size=0

              l_to_ik(thread)=ikont
              l_last_blk(thread)=jblock
              l_last_j(thread)=block_end
            else
              !if (thread.gt.0) then
                l_to_ik(thread)=ikont
                l_last_blk(thread)=jblock
                l_last_j(thread)=block_start+work_size-1
              !endif

              block_size=block_size-work_size
              block_start=block_start+work_size

              thread=thread+1
              if(block_start.le.block_end) then
                l_from_ik(thread)=ikont
                l_first_blk(thread)=jblock
                l_first_j(thread)=block_start
                started=.true.
              else
                started=.false.
              endif
              work_size=inters_per_thread
              if (thread.le.num_tails) work_size=work_size+1
            endif
          enddo
        enddo
      enddo
      from_ik(1:max_threads)=l_from_ik(1:max_threads)
      to_ik(1:max_threads)=l_to_ik(1:max_threads)
      first_blk(1:max_threads)=l_first_blk(1:max_threads)
      last_blk(1:max_threads)=l_last_blk(1:max_threads)
      first_j(1:max_threads)=l_first_j(1:max_threads)
      last_j(1:max_threads)=l_last_j(1:max_threads)
#ifdef MPI
      time_split_lists=time_split_lists+MPI_Wtime()-time00
#endif
      end
c-----------------------------------------------------------------------
      subroutine split_work_for_threads(from,to,low,high,
     &                                  my_thread,threads_used)
      implicit none
      !include"DIMENSIONS"
      !include"COMMON.CHAIN"
      integer low,high,my_thread,threads_used
      integer work,per_thread,rest,from,to
c      if (threads_used.eq.0)print *,"S MyRank threads_used",threads_used
      !work=(nres-0)+1
      work=(high-low)+1
      per_thread=work/threads_used
      rest=work-per_thread*threads_used

      from=low+per_thread*(my_thread-1)
      to=from+per_thread-1

      from=from+min(my_thread-1,rest)
      to=to+min(my_thread,rest)

      !if (my_thread.eq.1) from=min(from,low)
      !if (my_thread.eq.threads_used) to=max(to,high)
      end
c-----------------------------------------------------------------------
c computes
c do i=to,from,-1
c   output(1:3,i)=output(1:3,i+1)+input(1:3,i+1)
c enddo
c note that input and output must have one element more than
c range_hi points to
      subroutine suffix_sum3(output,input,range_lo,range_hi,
     &                       threads_used)
      use omp_lib
      implicit none
      include"DIMENSIONS"
      integer i,n
      integer range_lo,range_hi,from,to
      double precision output(3,range_lo:range_hi+1)
      double precision input(3,range_lo:range_hi+1)
      double precision acc(3)
      integer my_thread,threads_used
      double precision th_sum(8,0:max_fg_threads+1)
      integer th_from(max_fg_threads+1),th_to(max_fg_threads+1)

      th_sum(:,0)=0.0d0
      th_sum(:,threads_used+1)=0.0d0

!$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(threads_used)
!$OMP& PRIVATE(from,to,acc,i,my_thread)
      my_thread=omp_get_thread_num()+1
      !from=range_lo+((range_hi-range_lo)/threads_used)*(my_thread-1)
      !to=range_lo+((range_hi-range_lo)/threads_used)*my_thread-1
      !if(my_thread.eq.threads_used) to=range_hi
      call split_work_for_threads(from,to,range_lo,range_hi,
     &                            my_thread,threads_used)
      th_from(my_thread)=from
      th_to(my_thread)=to
   
      if(my_thread.eq.threads_used) then
        acc=output(:,range_hi+1)
      else
        acc=0.0d0
      endif

      !do i=n-1,1,-1
      do i=to,from,-1
        acc=acc+input(:,i+1)
        output(:,i)=acc
      enddo
      th_sum(1:3,my_thread)=acc
!$OMP END PARALLEL

      do i=threads_used,1,-1
        th_sum(1:3,i)=th_sum(1:3,i)+th_sum(1:3,i+1)
      enddo

!$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(threads_used)
!$OMP& PRIVATE(from,to,acc,i,my_thread)
      my_thread=omp_get_thread_num()+1
      from=th_from(my_thread)
      to=th_to(my_thread)

      acc=th_sum(1:3,my_thread+1)
      do i=from,to
        output(:,i)=output(:,i)+acc
      enddo
!$OMP END PARALLEL
      end
c-----------------------------------------------------------------------
c computes
c do i=from,to
c   io(i)=sum(io(from:i))
c enddo
      subroutine iprefix_sum(io,range_lo,range_hi,threads_used)
      use omp_lib
      implicit none
      include 'DIMENSIONS'
      integer i,n
      integer range_lo,range_hi,from,to
      integer io(range_lo:range_hi)
      integer acc
      integer my_thread,threads_used
      integer th_sum(16,0:max_fg_threads)
      integer th_from(max_fg_threads+1),th_to(max_fg_threads+1)

      th_sum(1,0)=0

!$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(threads_used)
!$OMP& PRIVATE(my_thread,from,to,acc,i)
      my_thread=omp_get_thread_num()+1
      !from=range_lo+((range_hi-range_lo)/threads_used)*(my_thread-1)
      !to=range_lo+((range_hi-range_lo)/threads_used)*my_thread-1
      !if(my_thread.eq.threads_used) to=range_hi
      call split_work_for_threads(from,to,range_lo,range_hi,
     &                            my_thread,threads_used)
      th_from(my_thread)=from
      th_to(my_thread)=to
   
      acc=0
      do i=from,to
        acc=acc+io(i)
        io(i)=acc
      enddo
      th_sum(1,my_thread)=acc
!$OMP END PARALLEL

      do i=1,threads_used
        th_sum(1,i)=th_sum(1,i)+th_sum(1,i-1)
      enddo

!$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(threads_used)
!$OMP& PRIVATE(my_thread,from,to,acc,i)
      my_thread=omp_get_thread_num()+1
      from=th_from(my_thread)
      to=th_to(my_thread)

      acc=th_sum(1,my_thread-1)
      do i=from,to
        io(i)=io(i)+acc
      enddo
!$OMP END PARALLEL
      end

c-----------------------------------------------------------------------
      subroutine calculate_derived_params
      implicit none
      include 'DIMENSIONS'
      include 'COMMON.INTERACT'
      include 'COMMON.EGB'
      integer i,j,itypi,itypj
      do i=1,ntyp
        do j=1,ntyp
          aa_aq_j(j,i)=aa_aq(i,j)
          bb_aq_j(j,i)=bb_aq(i,j)
          aa_lipaq(j,i)=aa_lip(i,j)-aa_aq(i,j)
          bb_lipaq(j,i)=bb_lip(i,j)-bb_aq(i,j)
          sigma_j(j,i)=sigma(i,j)
          chi_j(j,i)=chi(i,j)
        enddo
      enddo
      end

c-----------------------------------------------------------------------
      double precision function debug_div(a,b)
      implicit none
      double precision a,b
      if (b.ne.0.0d0) then
        debug_div=a/b
      else
        debug_div = 0.0d0
      endif
      return
      end
c-------------------------------------------------------------------------
      subroutine dist_sq2(a,b,d)
      implicit none
      double precision a(3),b(3),c(3),d
      call wrapvec(a-b,c)
      d=sum(c*c)
      end

c-----------------------------------------------------------------------
      subroutine check_list_needed(itime_mat,imatupdate,update_lists)
      implicit none
#ifdef MPI
      include 'mpif.h'
#endif
      include 'DIMENSIONS'
      include 'COMMON.CHAIN'
      include 'COMMON.IOUNITS'
      include 'COMMON.TIME1'
      include 'COMMON.OMP'
      integer itime_mat,imatupdate,i
      logical update_lists
      double precision maxd1,maxd2,maxd3,d
      double precision dist_sq
      double precision time00

#ifdef MPI
      time00=MPI_Wtime()
#endif

      !update_lists=mod(itime_mat,imatupdate).eq.0

      update_lists=.false.
      if(itime_mat.eq.0) update_lists=.true.
      
      if(.not. update_lists) then
        maxd1=0
        maxd2=0
        maxd3=0
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(d) REDUCTION(max:maxd1)
        do i=1,nres
          call dist_sq2(shift_base_c(:,i),c_tobox(:,i),d)
          maxd1=max(d,maxd1)
        enddo
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(d) REDUCTION(max:maxd2)
        do i=1,nres
          call dist_sq2(shift_base_c(:,nres+i),c_tobox(:,nres+i),d)
          maxd2=max(d,maxd2)
        enddo
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(d) REDUCTION(max:maxd3)
        do i=1,nres-1
          call dist_sq2(shift_base_cp(:,i),cp(:,i),d)
          maxd3=max(d,maxd3)
        enddo

        !write(iout,*)'XX',maxd1,maxd2,maxd3
        maxd1=max(maxd1,maxd2)
        maxd1=max(maxd1,maxd3)
   
        !write(*,*)'MAXSHIFT',d,'vs',r_buff_list*r_buff_list
        if(maxd1.gt.(0.25d0*r_buff_list*r_buff_list)) then
          update_lists=.true.
          !write(iout,*)'UPDATE at',itime_mat,maxd1
        endif
      endif

      if(update_lists) then
!$OMP PARALLEL DO DEFAULT(SHARED)
        do i=1,nres
          shift_base_c(:,i)=c_tobox(:,i)
        enddo
!$OMP PARALLEL DO DEFAULT(SHARED)
        do i=1,nres
          shift_base_c(:,nres+i)=c_tobox(:,nres+i)
        enddo
!$OMP PARALLEL DO DEFAULT(SHARED)
        do i=1,nres-1
          shift_base_cp(:,i)=cp(:,i)
        enddo
      endif
#ifdef MPI
      time_check_rebuild_lists=time_check_rebuild_lists
     &                        +MPI_Wtime()-time00
#endif

      end
