#include"simdstat"

      subroutine eelec(ees,evdw1,eel_loc,eello_turn3,eello_turn4)
C
C This subroutine calculates the average interaction energy and its gradient
C in the virtual-bond vectors between non-adjacent peptide groups, based on
C the potential described in Liwo et al., Protein Sci., 1993, 2, 1715.
C The potential depends both on the distance of peptide-group centers and on
C the orientation of the CA-CA virtual bonds.
C
      implicit none
#ifdef MPI
      include 'mpif.h'
#else
      double precision tcpu
#endif
      include 'DIMENSIONS'
      include 'COMMON.CONTROL'
      include 'COMMON.SETUP'
      include 'COMMON.IOUNITS'
      include 'COMMON.GEO'
      include 'COMMON.VAR'
      include 'COMMON.LOCAL'
      include 'COMMON.CHAIN'
      include 'COMMON.DERIV'
      include 'COMMON.INTERACT'
      include 'COMMON.CORRMAT'
      include 'COMMON.TORSION'
      include 'COMMON.VECTORS'
      include 'COMMON.FFIELD'
      include 'COMMON.TIME1'
      include 'COMMON.SPLITELE'
#ifdef SHIELD
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      double precision ees,evdw1,eel_loc,eello_turn3,eello_turn4
      double precision fac,time01
      double precision scalar
      integer i,j,k,threads_used,max_threads_used
      !integer doturn(maxres)
      !common /eelectmpcommon/ doturn
#ifdef MOMENT
      double precision scal_el /1.0d0/
#else
      double precision scal_el /0.5d0/

#endif
C 12/13/98
C 13-go grudnia roku pamietnego...
cd      write(iout,*) 'In EELEC'
cd      do i=1,nloctyp
cd        write(iout,*) 'Type',i
cd        write(iout,*) 'B1',B1(:,i)
cd        write(iout,*) 'B2',B2(:,i)
cd        write(iout,*) 'CC',CC(:,:,i)
cd        write(iout,*) 'DD',DD(:,:,i)
cd        write(iout,*) 'EE',EE(:,:,i)
cd      enddo
cd      call check_vecgrad
cd      stop
      if (icheckgrad.eq.1) then
        do i=1,nres-1
          fac=1.0d0/dsqrt(scalar(dc(1,i),dc(1,i)))
          do k=1,3
            dc_norm(k,i)=dc(k,i)*fac
          enddo
c          write (iout,*) 'i',i,' fac',fac
        enddo
      endif
      max_threads_used=1
      if (wel_loc.gt.0.0d0 .or. wcorr4.gt.0.0d0 .or. wcorr5.gt.0.0d0
     &    .or. wcorr6.gt.0.0d0 .or. wturn3.gt.0.0d0 .or.
     &    wturn4.gt.0.0d0 .or. wturn6.gt.0.0d0) then
c        call vec_and_deriv
#ifdef TIMING
#ifdef MPI
        time01=MPI_Wtime()
#else
        time01=tcpu()
#endif
#endif
        call set_matrices
c       call calculate_mugrads
c       call calculate_muuzs
#ifdef TIMING
#ifdef MPI
        time_mat=time_mat+MPI_Wtime()-time01
#else
        time_mat=time_mat+tcpu()-time01
#endif
#endif
      endif
cd      do i=1,nres-1
cd        write (iout,*) 'i=',i
cd        do k=1,3
cd        write (iout,'(i5,2f10.5)') k,uy(k,i),uz(k,i)
cd        enddo
cd        do k=1,3
cd          write (iout,'(f10.5,2x,3f10.5,2x,3f10.5)')
cd     &     uz(k,i),(uzgrad(k,l,1,i),l=1,3),(uzgrad(k,l,2,i),l=1,3)
cd        enddo
cd      enddo
      t_eelecij=0.0d0
      ees=0.0D0
      evdw1=0.0D0
      eel_loc=0.0d0
      eello_turn3=0.0d0
      eello_turn4=0.0d0
cd      print '(a)','Enter EELEC'
cd      write (iout,*) 'iatel_s=',iatel_s,' iatel_e=',iatel_e
      !doturn(1:nres)=0
      !eelecij_doturn(1:nres)=0
c
c
c 9/27/08 AL Split the interaction loop to ensure load balancing of turn terms
C
C Loop over i,i+2 and i,i+3 pairs of the peptide groups
C
C 14/01/2014 TURN3,TUNR4 does not go under periodic boundary condition
!      k=0
!      do i=iturn3_start,iturn3_end
!c        if (i.le.1) cycle
!C        write(iout,*) "tu jest i",i
!        if (itype(i).eq.ntyp1 .or. itype(i+1).eq.ntyp1
!     &  .or. itype(i+2).eq.ntyp1
!     &  .or. itype(i+3).eq.ntyp1) cycle
!        k=k+1
!        !eelecij_work(k).i=i
!        !eelecij_work(k).j=i+2
!        eelecij_work_turn3(k).i=i
!        eelecij_work_turn3(k).j=i+2
!        if (wturn3.gt.0.0d0) doturn(i)=or(doturn(i),1)
!      enddo

      !eelecij_work_size_turn3=k
      if (wel_loc.ne.0.0d0) then
        call eelecij(eelecij_work_turn3,eelecij_work_size_turn3,2,
     &               threads_used,scal_el)
      else
        call eelecij_nowl(eelecij_work_turn3,eelecij_work_size_turn3,2,
     &                    threads_used,scal_el)
      endif
      max_threads_used=max0(threads_used,max_threads_used)

!      k=0
!      do i=iturn4_start,iturn4_end
!        if (i.lt.1) cycle
!        if (itype(i).eq.ntyp1 .or. itype(i+1).eq.ntyp1
!     &    .or. itype(i+3).eq.ntyp1
!     &    .or. itype(i+4).eq.ntyp1) cycle
!c        write(iout,*) "JESTEM W PETLI"
!        k=k+1
!        !eelecij_work(k).i=i
!        !eelecij_work(k).j=i+3
!        eelecij_work_turn4(k).i=i
!        eelecij_work_turn4(k).j=i+3
!        if (wturn4.gt.0.0d0 .and. itype(i+2).ne.ntyp1)
!    &    doturn(i)=or(doturn(i),2)
!      enddo   ! i

      !eelecij_work_size_turn4=k
      if (wel_loc.ne.0.0d0) then
        call eelecij(eelecij_work_turn4,eelecij_work_size_turn4,3,
     &               threads_used,1.0d0)
      else
        call eelecij_nowl(eelecij_work_turn4,eelecij_work_size_turn4,3,
     &                    threads_used,1.0d0)
      endif
      max_threads_used=max0(threads_used,max_threads_used)

      call calculate_turns(eelecij_doturn,eello_turn3,eello_turn4)

      if (wel_loc.ne.0.0d0) then
        call eelecij2(threads_used)
      else
        call eelecij2_nowl(threads_used)
      endif
      max_threads_used=max0(threads_used,max_threads_used)

      call sum_eelec_values(ees,evdw1,eel_loc,max_threads_used)
      call sum_eelec_arrays(max_threads_used)
c      write (iout,*) "Number of loop steps in EELEC:",ind
cd      do i=1,nres
cd        write (iout,'(i3,3f10.5,5x,3f10.5)') 
cd     &     i,(gel_loc(k,i),k=1,3),gel_loc_loc(i)
cd      enddo
      return
      end
C-------------------------------------------------------------------------------
      subroutine scalevec(vec,sc,output)
      double precision vec(3),sc,output(3)
      output(1)=sc*vec(1)
      output(2)=sc*vec(2)
      output(3)=sc*vec(3)
      return
      end
C-------------------------------------------------------------------------------
      subroutine offsetvecs(a,b,delta)
      double precision a(3),b(3),delta(3)
      integer i
      do i=1,3
        a(i)=a(i)+delta(i)
        b(i)=b(i)-delta(i)
      enddo
      return
      end
C-------------------------------------------------------------------------------
      subroutine matvec3(mat,vec,output)
      implicit none
      double precision mat(3,3),vec(3),output(3)
      output(:)=mat(:,1)*vec(1)+mat(:,2)*vec(2)+mat(:,3)*vec(3)
      return
      end
C-------------------------------------------------------------------------------
      subroutine unormderiv_identity(erij,rmij,erder)
      implicit none
      double precision erij(3),rmij,erder(3,3)
      erder(1,1)=(1.0d0-erij(1)*erij(1))*rmij
      erder(2,1)=-erij(2)*erij(1)*rmij
      erder(3,1)=-erij(3)*erij(1)*rmij
      erder(1,2)=erder(2,1)
      erder(2,2)=(1.0d0-erij(2)*erij(2))*rmij
      erder(3,2)=-erij(3)*erij(2)*rmij
      erder(3,3)=(1.0d0-erij(3)*erij(3))*rmij
      erder(1,3)=erder(3,1)
      erder(2,3)=erder(3,2)
      return
      end
C-------------------------------------------------------------------------------
      subroutine calculate_aggij(i,j,agg,aggi,aggi1,aggj,aggj1,axx)
      implicit none
      include 'DIMENSIONS'
      include 'COMMON.CHAIN'
      include 'COMMON.INTERACT'
      include 'COMMON.VECTORS'

      double precision agg(3,4),aggi(3,4),aggi1(3,4),
     &    aggj(3,4),aggj1(3,4)
      double precision a22,a23,a32,a33
      double precision axx(2,2)
      double precision ury,urz,vry,vrz,fac,rij,rmij,rrmij
      double precision uryvry,uryvrz,urzvry,urzvrz,fac3
      double precision xyzj(3),erij(3)
      !double precision erder(3,3)
      double precision uyjvry(3),uzjvrz(3),uyiury(3),uziurz(3)
      double precision ghalf
      double precision ggg(3),gggi(3)
      integer i,j,k,l
      integer flip_row,iteli,itelj
      double precision scalar

      call wrapvec(cp(:,j)-cp(:,i),xyzj)
      rij=scalar(xyzj,xyzj)
      rrmij=1.0D0/rij
      rij=dsqrt(rij)
      rmij=1.0D0/rij

      iteli=itel(i)
      itelj=itel(j)
      if (j.eq.i+2 .and. itelj.eq.2) iteli=2
      fac=sqrt_ael6(iteli,itelj)*rmij*rrmij
      erij=xyzj*rmij

      ury=scalar(uy(:,2,i),erij)
      vry=scalar(uy(:,1,j),erij)
      urz=scalar(uz(:,i),erij)
      vrz=scalar(uz(:,j-1),erij)
      uryvry=ury*vry
      uryvrz=ury*vrz
      urzvry=urz*vry
      urzvrz=urz*vrz
      a22=fac*(scalar(uy(:,2,i),uy(:,1,j))-3.0d0*uryvry)
      a23=-fac*(scalar(uy(:,2,i),uz(:,j-1))-3.0d0*uryvrz)
      a32=-fac*(scalar(uz(:,i),uy(:,1,j))-3.0d0*urzvry)
      a33=fac*(scalar(uz(:,i),uz(:,j-1))-3.0d0*urzvrz)

      ggg=(-3.0d0*rrmij)*xyzj
      fac3=-3.0d0*fac*rmij
c AL Formulas for the derivatives in erij folded to simpler forms.
c    erder no longer needed.

      agg(:,1)=(a22*ggg)+fac3*(uy(:,2,i)*vry+uy(:,1,j)*ury-
     &   2.0d0*uryvry*erij(:))
      agg(:,2)=(a23*ggg)-fac3*(uy(:,2,i)*vrz+uz(:,j-1)*ury-
     &   2.0d0*uryvrz*erij(:))
      agg(:,3)=(a32*ggg)-fac3*(uz(:,i)*vry+uy(:,1,j)*urz-
     &   2.0d0*urzvry*erij(:))
      agg(:,4)=(a33*ggg)+fac3*(uz(:,i)*vrz+uz(:,j-1)*urz-
     &   2.0d0*urzvrz*erij(:))

C-KMO aggi(k,1)=fac*(scalar(uygrad(1,k,1,i),uy(1,j))-3.0d0*uryg(k,2)*vry)
C-KMO where uryg(k,2)=scalar(uygrad(1,k,1,i),erij(1))
C-KMO so we have aggi(k,1)=fac*P
C-KMO where P=scalar(uygrad(1,k,1,i),uy(1,j))+X*scalar(uygrad(1,k,1,i),erij)
C-KMO where X=-3.0d0*vry
C-KMO since scalar (i.e. dot product) is bilinear we have
C-KMO P=scalar(uygrad(1,k,1,i),(uy(1,j)+X*erij))
C-KMO P=scalar(uygrad(1,k,1,i),(uy(1,j)-3.0d0*erij*vry))
C-KMO the same follows for other expressions
C-KMO note that now the second argument to scalar does not depend on k
      uyjvry=fac*(uy(:,1,j)-(3.0d0*vry)*erij(:))
      uzjvrz=fac*(uz(:,j-1)-(3.0d0*vrz)*erij(:))
      uyiury=fac*(uy(:,2,i)-(3.0d0*ury)*erij(:))
      uziurz=fac*(uz(:,i)-(3.0d0*urz)*erij(:))
      do k=1,3
C Derivatives in DC(i) 
        aggi(k,1)=scalar(uygrad(:,k,1,2,i),uyjvry)
        aggi(k,2)=-scalar(uygrad(:,k,1,2,i),uzjvrz)
        aggi(k,3)=-scalar(uzgrad(:,k,1,i),uyjvry)
        aggi(k,4)=scalar(uzgrad(:,k,1,i),uzjvrz)
C Derivatives in DC(i+1)
        aggi1(k,1)=scalar(uygrad(:,k,2,2,i),uyjvry)
        aggi1(k,2)=-scalar(uygrad(:,k,2,2,i),uzjvrz)
        aggi1(k,3)=-scalar(uzgrad(:,k,2,i),uyjvry)
        aggi1(k,4)=scalar(uzgrad(:,k,2,i),uzjvrz)
C Derivatives in DC(j)
        aggj(k,1)=scalar(uygrad(:,k,2,1,j),uyiury)
        aggj(k,2)=-scalar(uzgrad(:,k,2,j-1),uyiury)
        aggj(k,3)=-scalar(uygrad(:,k,2,1,j),uziurz)
        aggj(k,4)=scalar(uzgrad(:,k,2,j-1),uziurz)
C Derivatives in DC(j-1)
        aggj1(k,1)=scalar(uygrad(:,k,1,1,j),uyiury)
        aggj1(k,2)=-scalar(uzgrad(:,k,1,j-1),uyiury)
        aggj1(k,3)=-scalar(uygrad(:,k,1,1,j),uziurz)
        aggj1(k,4)=scalar(uzgrad(:,k,1,j-1),uziurz)
      enddo

      aggi=aggi+0.5d0*agg
      aggj=aggj+0.5d0*agg
      aggi1=aggi1+agg
      !if (j.eq.nres-1 .and. i.lt.j-2) aggj1=aggj1+agg
      if (i.lt.j-2) aggj1=aggj1+agg

      axx(1,1)=a22
      axx(1,2)=a23
      axx(2,1)=a32
      axx(2,2)=a33
#ifdef DEBUG
      write (iout,*) "aggi aggi1 aggj aggj1"
      do k=1,4
        write (iout,'(i5,4(3f10.5,5x))') k,(aggi(l,k),l=1,3),
     &     (aggi1(l,k),l=1,3),(aggj(l,k),l=1,3),(aggj1(l,k),l=1,3)
      enddo
#endif
      return
      end
C-------------------------------------------------------------------------------
!      subroutine eelecij(eelecij_work,num_items,offset,
!     &                   threads_used,scal_el)
!#ifdef _OPENMP
!      use omp_lib
!#endif
!      implicit none
!      include 'DIMENSIONS'
!      include 'COMMON.CHAIN'
!      include 'COMMON.SPLITELE'
!#ifdef SHIELD
!      include 'COMMON.EELEC.SHIELD'
!#else
!      include 'COMMON.EELEC'
!#endif
!      include 'COMMON.LIPID'
!      integer num_items,threads_used,offset
!      double precision xyzj(3),rij,scal_el
!      integer my_thread,range_lo,range_hi,from,to
!      integer lasti,lastj,i,j,item,seq_idx,nseq_idx
!      record /eelecij_work_chunk/ seq_work
!      record /eelecij_work_chunk/ nseq_work
!      !record /eelecij_workitem/ eelecij_work(num_items)
!      integer eelecij_work(num_items)
!      record /eelec_thread_data/ th_data
!      logical full
!      double precision scalar
!
!      !range_lo=1
!      !range_hi=num_items
!
!#ifdef _OPENMP
!      threads_used=omp_get_max_threads()
!      ! ensure at least 4 elements per thread to avoid overlap
!      !if(range_hi-range_lo+1 .lt. 4*threads_used) then
!      !  threads_used=1
!      !  my_thread=1
!      !endif
!#else
!      threads_used=1
!#endif
!!$OMP PARALLEL
!!$OMP& NUM_THREADS(threads_used) DEFAULT(SHARED)
!!$OMP& PRIVATE(
!!$OMP&   i,j,xyzj,rij,lasti,lastj,
!!$OMP&   seq_work,seq_idx,nseq_work,nseq_idx,full,
!!$OMP&   my_thread,item,from,to,th_data
!!$OMP& )
!#ifdef _OPENMP
!      my_thread=omp_get_thread_num()+1
!#else
!      my_thread=1
!#endif
!      call eelec_fill_thread_data(th_data)
!
!      !from=range_lo+((range_hi-range_lo)/threads_used)*(my_thread-1)
!      !to=range_lo+((range_hi-range_lo)/threads_used)*my_thread-1
!      !if(my_thread.eq.threads_used) to=range_hi
!      call split_work_for_threads(from,to,1,num_items,
!     &                            my_thread,threads_used)
!
!
!      seq_idx=0
!      nseq_idx=0
!      lasti=0
!      lastj=0
!
!      do item=from,to
!        !i=eelecij_work(item).i
!        !j=eelecij_work(item).j
!        i=eelecij_work(item)
!        j=i+offset
!        LOG_STAT(300,1)
!        call wrapvec(cp(:,j)-cp(:,i),xyzj)
!        rij=scalar(xyzj,xyzj)
!        if(rij.ge.r_cut_int_sq) cycle
!        LOG_STAT(301,1)
!
!        if(i.ne.lasti) then
!          if(seq_idx.gt.0) then
!!DIR$ NOINLINE
!            call eelecij_do_work_seq(seq_work,lasti,
!     &                               seq_work.j(1),
!     &                               seq_work.j(seq_idx),
!     &                               eelecij_value(my_thread),
!     &                               eelecij_array(my_thread),
!     &                               th_data,scal_el)
!            LOG_STAT(302,seq_idx)
!            seq_idx=0
!          endif
!          if(nseq_idx.gt.0) then
!!DIR$ NOINLINE
!            call eelecij_do_work_nonseq(nseq_work,
!     &                                  lasti,1,nseq_idx,
!     &                                  eelecij_value(my_thread),
!     &                                  eelecij_array(my_thread),
!     &                                  th_data,scal_el)
!            LOG_STAT(303,nseq_idx)
!            nseq_idx=0
!          endif
!          lasti=i
!          lastj=-1
!        endif
!
!        if((j.eq.lastj+1).or.(seq_idx.eq.0)) then
!          call eelec_queue_work(seq_work,seq_idx,i,j,
!     &                          xyzj,rij,full)
!          lastj=j
!          if(full) then
!!DIR$ NOINLINE
!            call eelecij_do_work_seq(seq_work,lasti,
!     &                               seq_work.j(1),
!     &                               seq_work.j(seq_idx),
!     &                               eelecij_value(my_thread),
!     &                               eelecij_array(my_thread),
!     &                               th_data,scal_el)
!            LOG_STAT(302,seq_idx)
!            seq_idx=0
!            lastj=-1
!          endif
!        else
!          if((seq_idx.lt.simd_seq_cutoff) .and.
!     &       (nseq_idx+seq_idx.lt.eelec_buffer_size)) then
!            LOG_STAT(304,seq_idx)
!            call eelecij_move_work(seq_work,seq_idx,
!     &                             nseq_work,nseq_idx)
!          else
!!DIR$ NOINLINE
!            call eelecij_do_work_seq(seq_work,lasti,
!     &                               seq_work.j(1),
!     &                               seq_work.j(seq_idx),
!     &                               eelecij_value(my_thread),
!     &                               eelecij_array(my_thread),
!     &                               th_data,scal_el)
!            LOG_STAT(302,seq_idx)
!            seq_idx=0
!          endif
!          call eelec_queue_work(nseq_work,nseq_idx,i,j,
!     &                          xyzj,rij,full)
!          lastj=j
!          if(full) then
!!DIR$ NOINLINE
!            call eelecij_do_work_nonseq(nseq_work,lasti,
!     &                                  1,nseq_idx,
!     &                                  eelecij_value(my_thread),
!     &                                  eelecij_array(my_thread),
!     &                                  th_data,scal_el)
!            LOG_STAT(303,nseq_idx)
!            nseq_idx=0
!            lastj=-1
!          endif
!        endif
!      enddo
!
!      if(seq_idx.gt.0) then
!!DIR$ NOINLINE
!        call eelecij_do_work_seq(seq_work,lasti,
!     &                           seq_work.j(1),
!     &                           seq_work.j(seq_idx),
!     &                           eelecij_value(my_thread),
!     &                           eelecij_array(my_thread),
!     &                           th_data,scal_el)
!        LOG_STAT(302,seq_idx)
!      endif
!      if(nseq_idx.gt.0) then
!!DIR$ NOINLINE
!        call eelecij_do_work_nonseq(nseq_work,lasti,
!     &                              1,nseq_idx,
!     &                              eelecij_value(my_thread),
!     &                              eelecij_array(my_thread),
!     &                              th_data,scal_el)
!        LOG_STAT(303,nseq_idx)
!      endif
!!$OMP END PARALLEL
!      return
!      end
!C-------------------------------------------------------------------------------
!      subroutine eelecij2(threads_used)
!#ifdef _OPENMP
!      use omp_lib
!#endif
!      implicit none
!      include 'DIMENSIONS'
!      include 'COMMON.CHAIN'
!      include 'COMMON.INTERACT'
!      include 'COMMON.SPLITELE'
!#ifdef SHIELD
!      include 'COMMON.EELEC.SHIELD'
!#else
!      include 'COMMON.EELEC'
!#endif
!      include 'COMMON.LIPID'
!      integer threads_used
!      integer from_blk,to_blk,from_j,to_j
!      double precision xyzj(3),rij
!      integer my_thread,from,to
!      integer lastj,i,j,ikont,jblock,item,seq_idx,nseq_idx
!      integer from_ik,to_ik,first_blk,last_blk,first_j,last_j
!      record /eelecij_work_chunk/ seq_work
!      record /eelecij_work_chunk/ nseq_work
!      record /eelec_thread_data/ th_data
!      logical full,limited
!      double precision scalar
!
!      threads_used = pp_threads
!
!!$OMP PARALLEL IF(threads_used.gt.1)
!!$OMP& NUM_THREADS(threads_used) DEFAULT(SHARED)
!!$OMP& PRIVATE(
!!$OMP&   i,j,ikont,jblock,xyzj,rij,lastj,from_j,to_j,
!!$OMP&   seq_work,seq_idx,nseq_work,nseq_idx,full,
!!$OMP&   from_ik,to_ik,first_blk,last_blk,first_j,last_j,
!!$OMP&   my_thread,item,from,to,from_blk,to_blk,th_data
!!$OMP& )
!#ifdef _OPENMP
!      my_thread=omp_get_thread_num()+1
!#else
!      my_thread=1
!#endif
!      call eelec_fill_thread_data(th_data)
!
!      seq_idx=0
!      nseq_idx=0
!      lastj=0
!
!      from_ik=pp_from_ik(my_thread)
!      to_ik=pp_to_ik(my_thread)
!      first_blk=pp_first_blk(my_thread)
!      last_blk=pp_last_blk(my_thread)
!      first_j=pp_first_j(my_thread)
!      last_j=pp_last_j(my_thread)
!
!      do ikont=from_ik,to_ik
!        i=newcontlistppi(1,ikont)
!        from_blk=newcontlistppi(2,ikont-1)+1
!        to_blk=newcontlistppi(2,ikont)
!
!        if(ikont.eq.from_ik) from_blk=first_blk
!        if(ikont.eq.to_ik) to_blk=last_blk
!
!        lastj=-1
!
!        do jblock=from_blk,to_blk
!          from_j=newcontlistppj(1,jblock)
!          to_j=newcontlistppj(2,jblock)
!          if((ikont.eq.from_ik).and.(jblock.eq.from_blk)) from_j=first_j
!          if((ikont.eq.to_ik).and.(jblock.eq.to_blk)) to_j=last_j
!
!          do j=from_j,to_j
!            LOG_STAT(300,1)
!            call wrapvec(cp(:,j)-cp(:,i),xyzj)
!            rij=scalar(xyzj,xyzj)
!            if(rij.ge.r_cut_int_sq) cycle
!            LOG_STAT(301,1)
!
!            if((j.eq.lastj+1).or.(seq_idx.eq.0)) then
!              call eelec_queue_work(seq_work,seq_idx,i,j,
!     &                              xyzj,rij,full)
!              lastj=j
!              if(full) then
!!DIR$ NOINLINE
!                call eelecij_do_work_seq(seq_work,i,
!     &                                   seq_work.j(1),
!     &                                   seq_work.j(seq_idx),
!     &                                   eelecij_value(my_thread),
!     &                                   eelecij_array(my_thread),
!     &                                   th_data,1.0d0)
!                LOG_STAT(302,seq_idx)
!                seq_idx=0
!                lastj=-1
!              endif
!            else
!              if((seq_idx.lt.simd_seq_cutoff) .and.
!     &           (nseq_idx+seq_idx.lt.eelec_buffer_size)) then
!                LOG_STAT(304,seq_idx)
!                call eelecij_move_work(seq_work,seq_idx,
!     &                                 nseq_work,nseq_idx)
!              else
!!DIR$ NOINLINE
!                call eelecij_do_work_seq(seq_work,i,
!     &                                   seq_work.j(1),
!     &                                   seq_work.j(seq_idx),
!     &                                   eelecij_value(my_thread),
!     &                                   eelecij_array(my_thread),
!     &                                   th_data,1.0d0)
!                LOG_STAT(302,seq_idx)
!                seq_idx=0
!              endif
!              call eelec_queue_work(nseq_work,nseq_idx,
!     &                              i,j,xyzj,rij,
!     &                              full)
!              lastj=j
!              if(full) then
!!DIR$ NOINLINE
!                call eelecij_do_work_nonseq(nseq_work,i,
!     &                                      1,nseq_idx,
!     &                                      eelecij_value(my_thread),
!     &                                      eelecij_array(my_thread),
!     &                                      th_data,1.0d0)
!                LOG_STAT(303,nseq_idx)
!                nseq_idx=0
!                lastj=-1
!              endif
!            endif
!          enddo  ! j
!        enddo  ! jblock
!
!        if(seq_idx.gt.0) then
!!DIR$ NOINLINE
!          call eelecij_do_work_seq(seq_work,i,
!     &                             seq_work.j(1),
!     &                             seq_work.j(seq_idx),
!     &                             eelecij_value(my_thread),
!     &                             eelecij_array(my_thread),
!     &                             th_data,1.0d0)
!          LOG_STAT(302,seq_idx)
!          seq_idx=0
!        endif
!        if(nseq_idx.gt.0) then
!!DIR$ NOINLINE
!          call eelecij_do_work_nonseq(nseq_work,i,
!     &                                1,nseq_idx,
!     &                                eelecij_value(my_thread),
!     &                                eelecij_array(my_thread),
!     &                                th_data,1.0d0)
!          LOG_STAT(303,nseq_idx)
!          nseq_idx=0
!        endif
!      enddo  ! ikont
!!$OMP END PARALLEL
!
!      return
!      end

C-----------------------------------------------------------------------------
      subroutine eturn3(i,eello_turn3,my_thread)
C Third- and fourth-order contributions from turns
      implicit none
      include 'DIMENSIONS'
      include 'COMMON.IOUNITS'
      include 'COMMON.GEO'
      include 'COMMON.VAR'
      include 'COMMON.LOCAL'
      include 'COMMON.CHAIN'
      include 'COMMON.DERIV'
      include 'COMMON.INTERACT'
      include 'COMMON.CORRMAT'
      include 'COMMON.TORSION'
      include 'COMMON.VECTORS'
      include 'COMMON.FFIELD'
      include 'COMMON.CONTROL'
#ifdef SHIELD
      include 'COMMON.SHIELD'
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      include 'COMMON.LIPID'
      integer i,j,k,l,ilist,iresshield,my_thread

      double precision eello_turn3
      double precision eello_t3,gloc_theta1,gloc_theta2,
     &  gel_loc_turn3_cos,gel_loc_turn3_sin,rlocshield
      double precision agg(3,4),aggi(3,4),aggi1(3,4),
     &                 aggj(3,4),aggj1(3,4)
      double precision faclipij

      double precision auxmat(2,2),auxmat1(2,2),auxmat2(2,2),
     &  auxgmat1(2,2),auxgmat2(2,2)
      double precision auxmat3(2,2),a_temp(2,2)
      double precision auxgmatt1(2,2),auxgmatt2(2,2)
      double precision gtcosEUg(2,2),gtsinEUg(2,2)
      double precision trace
      double precision trace_ae
      j=i+2
      faclipij=(lipid_cache(i).ss+lipid_cache(j).ss)*0.5d0*lipscale
     &        +1.0d0
      call calculate_aggij(i,j,agg,aggi,aggi1,aggj,aggj1,a_temp)
c      write (iout,*) "eturn3",i,j,j+1,j-1
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
C               Third-order contributions
C
C                 (i+2)o----(i+3)
C                      | |
C                      | |
C                 (i+1)o----i
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C Compute the eturn3 contrib
      call matmat2(EUg(1,1,i+1),EE(1,1,i+2),auxmat(1,1))
      call transpose2(auxmat(1,1),auxmat1(1,1))
      eello_t3=trace_ae(a_temp,auxmat1)
!     call mat_matt_2_trace(a_temp(1,1),auxmat(1,1),trace)
!     eello_t3=0.5d0*trace
#ifdef SHIELD
     &  *fac_shield(i)*fac_shield(j)
#endif
      eello_turn3=eello_turn3+eello_t3
     &  *faclipij
#ifdef DEBUG
      if (energy_dec) write (iout,'(6heturn3,2i5,0pf7.3)') i,i+2,
     &    eello_t3
#endif
C Compute eturn3 derivative in theta(i+1)
      call matmat2(gtEUg(1,1,i+1),EE(1,1,i+2),auxgmat1(1,1))
      !call mat_matt_2_trace(a_temp(1,1),auxmat1(1,1),trace)
      !gloc_theta1=0.5d0*trace
      call transpose2(auxgmat1(1,1),auxgmatt1(1,1))

      gloc_theta1=trace_ae(a_temp,auxgmatt1)
     &   *faclipij
#ifdef SHIELD
     &  *fac_shield(i)*fac_shield(j)
#endif
c Compute eturn3 derivative in theta(i+2)
      call matmat2(EUg(1,1,i+1),gtEE(1,1,i+2),auxgmat2(1,1))
      !call mat_matt_2_trace(a_temp(1,1),auxgmat2(1,1),trace)
      !gloc_theta2=0.5d0*trace
      call transpose2(auxgmat2(1,1),auxgmatt2(1,1))
      gloc_theta2=trace_ae(a_temp,auxgmatt2)
     &  *faclipij
#ifdef SHIELD
     &  *fac_shield(i)*fac_shield(j)
#endif
#ifdef SHIELD
      if (shield_mode.eq.0) then
      fac_shield(i)=1.0
      fac_shield(j)=1.0
C     else
C       fac_shield(i)=0.4
C       fac_shield(j)=0.6
      endif
#endif
#ifdef SHIELD
C Derivatives in shield mode
      if ((fac_shield(i).gt.0).and.(fac_shield(j).gt.0).and.
     &  (shield_mode.gt.0)) then
C          print *,i,j

        do ilist=1,ishield_list(i)
          iresshield=shield_list(ilist,i)
          do k=1,3
          rlocshield=grad_shield_side(k,ilist,i)*eello_t3/fac_shield(i)
C     &        *2.0
            gshieldx_t3(k,iresshield)=gshieldx_t3(k,iresshield)+
     &                rlocshield
     &     +grad_shield_loc(k,ilist,i)*eello_t3/fac_shield(i)
            gshieldc_t3(k,iresshield-1)=gshieldc_t3(k,iresshield-1)
     &        +rlocshield
          enddo
        enddo
        do ilist=1,ishield_list(j)
          iresshield=shield_list(ilist,j)
          do k=1,3
          rlocshield=grad_shield_side(k,ilist,j)*eello_t3/fac_shield(j)
C     &       *2.0
            gshieldx_t3(k,iresshield)=gshieldx_t3(k,iresshield)+
     &                rlocshield
     &     +grad_shield_loc(k,ilist,j)*eello_t3/fac_shield(j)
            gshieldc_t3(k,iresshield-1)=gshieldc_t3(k,iresshield-1)
     &               +rlocshield

          enddo
        enddo

        do k=1,3
          gshieldc_t3(k,i)=gshieldc_t3(k,i)+
     &              grad_shield(k,i)*eello_t3/fac_shield(i)
          gshieldc_t3(k,j)=gshieldc_t3(k,j)+
     &              grad_shield(k,j)*eello_t3/fac_shield(j)
          gshieldc_t3(k,i-1)=gshieldc_t3(k,i-1)+
     &              grad_shield(k,i)*eello_t3/fac_shield(i)
          gshieldc_t3(k,j-1)=gshieldc_t3(k,j-1)+
     &              grad_shield(k,j)*eello_t3/fac_shield(j)
        enddo

      endif
#endif
C Derivatives in cos(gamma)*sin(theta1)*sini(theta2) and
C sin(gamma)*sin(theta1)*sin(theta2)
      gtcosEUg(1,1)=-EE(1,1,i+1)
      gtcosEUg(1,2)= EE(1,2,i+1)
      gtcosEUg(2,1)=-EE(2,1,i+1)
      gtcosEUg(2,2)= EE(2,2,i+1)
      gtsinEUg(1,1)=-EE(1,2,i+1)
      gtsinEUg(1,2)=-EE(1,1,i+1)
      gtsinEUg(2,1)=-EE(2,2,i+1)
      gtsinEUg(2,2)=-EE(2,1,i+1)
#ifdef DEBUG
      write (iout,*) "i+1 gtcosEUg gtsinEUg EE atemp"
      do k=1,2
        write (iout,'(4(2f10.5,5x))') gtcosEUg(k,1),gtcosEUg(k,2),
     &     gtsinEUg(k,1),gtsinEUg(k,2),EE(k,1,i+2),EE(k,2,i+2),
     &     a_temp(k,1),a_temp(k,2)
      enddo
#endif
      call matmat2(gtcosEUg(1,1),EE(1,1,i+2),auxmat2(1,1))
      !call mat_matt_2_trace(a_temp(1,1),auxmat2(1,1),trace)
      !gel_loc_turn3_cos=0.5d0*trace
      call transpose2(auxmat2(1,1),auxmat3(1,1))
      gel_loc_turn3_cos=trace_ae(a_temp,auxmat3)
     & *faclipij
#ifdef SHIELD
     &  *fac_shield(i)*fac_shield(j)
#endif
      call matmat2(gtsinEUg(1,1),EE(1,1,i+2),auxmat2(1,1))
      !call mat_matt_2_trace(a_temp(1,1),auxmat2(1,1),trace)
      !gel_loc_turn3_sin=0.5d0*trace
      call transpose2(auxmat2(1,1),auxmat3(1,1))
      gel_loc_turn3_sin=trace_ae(a_temp,auxmat3)
     & *faclipij
#ifdef SHIELD
     &  *fac_shield(i)*fac_shield(j)
#endif
#ifdef DEBUG
      write (iout,'(i5,2(a,f10.5,5x))') i,"  gel_loc_turn3_cos",
     &  gel_loc_turn3_cos," gel_loc_turn3_cos", gel_loc_turn3_sin
#endif
c Derivatives in theta1 and theta2
C Cartesian derivatives
      do l=1,3
c       ghalf1=0.5d0*agg(l,1)
c       ghalf2=0.5d0*agg(l,2)
c       ghalf3=0.5d0*agg(l,3)
c       ghalf4=0.5d0*agg(l,4)
        a_temp(1,1)=aggi(l,1)!+ghalf1
        a_temp(1,2)=aggi(l,2)!+ghalf2
        a_temp(2,1)=aggi(l,3)!+ghalf3
        a_temp(2,2)=aggi(l,4)!+ghalf4
!       call vecmat_mat_trace(aggi(1,1),l,auxmat1,trace)
        !gcorr3_turn(l,i)=gcorr3_turn(l,i)+trace_ae(a_temp,auxmat1)
        eelecij_turn(i,my_thread).gcorr3_turn(l)=
     &    eelecij_turn(i,my_thread).gcorr3_turn(l)
     &    +trace_ae(a_temp,auxmat1)
!    &    eelecij_turn(i,my_thread).gcorr3_turn(l)+0.5d0*trace
     &    *faclipij
#ifdef SHIELD
     &    *fac_shield(i)*fac_shield(j)
#endif

        a_temp(1,1)=aggi1(l,1)!+agg(l,1)
        a_temp(1,2)=aggi1(l,2)!+agg(l,2)
        a_temp(2,1)=aggi1(l,3)!+agg(l,3)
        a_temp(2,2)=aggi1(l,4)!+agg(l,4)
!       call vecmat_mat_trace(aggi1(1,1),l,auxmat1,trace)
!       gcorr3_turn(l,i+1)=gcorr3_turn(l,i+1)+trace_ae(a_temp,auxmat1)
        eelecij_turn(i+1,my_thread).gcorr3_turn(l)=
     &    eelecij_turn(i+1,my_thread).gcorr3_turn(l)
     &    +trace_ae(a_temp,auxmat1)
!    &    eelecij_turn(i+1,my_thread).gcorr3_turn(l)+0.5d0*trace
     &      *faclipij
#ifdef SHIELD
     &      *fac_shield(i)*fac_shield(j)
#endif
        a_temp(1,1)=aggj(l,1)!+ghalf1
        a_temp(1,2)=aggj(l,2)!+ghalf2
        a_temp(2,1)=aggj(l,3)!+ghalf3
        a_temp(2,2)=aggj(l,4)!+ghalf4
!       gcorr3_turn(l,j)=gcorr3_turn(l,j)+trace_ae(a_temp,auxmat1)
!       call vecmat_mat_trace(aggj(1,1),l,auxmat1,trace)
        eelecij_turn(j,my_thread).gcorr3_turn(l)=
     &    eelecij_turn(j,my_thread).gcorr3_turn(l)
     &    +trace_ae(a_temp,auxmat1)
!    &    eelecij_turn(j,my_thread).gcorr3_turn(l)+0.5d0*trace
     &    *faclipij
#ifdef SHIELD
     &    *fac_shield(i)*fac_shield(j)
#endif
        a_temp(1,1)=aggj1(l,1)
        a_temp(1,2)=aggj1(l,2)
        a_temp(2,1)=aggj1(l,3)
        a_temp(2,2)=aggj1(l,4)
!       gcorr3_turn(l,j-1)=gcorr3_turn(l,j-1)+trace_ae(a_temp,auxmat1)
!       call vecmat_mat_trace(aggj1(1,1),l,auxmat1,trace)
        eelecij_turn(j-1,my_thread).gcorr3_turn(l)=
     &    eelecij_turn(j-1,my_thread).gcorr3_turn(l)
     &    +trace_ae(a_temp,auxmat1)
!    &    eelecij_turn(j-1,my_thread).gcorr3_turn(l)+0.50d0*trace
     &    *faclipij
#ifdef SHIELD
     &    *fac_shield(i)*fac_shield(j)
#endif
      enddo
      eelecij_turn(i,my_thread).gcorr3_turn(3)=
     &  eelecij_turn(i,my_thread).gcorr3_turn(3)+
     &  lipid_cache(i).ssgrad*eello_t3/4.0d0*lipscale
      eelecij_turn(j,my_thread).gcorr3_turn(3)=
     &  eelecij_turn(j,my_thread).gcorr3_turn(3)+
     &  lipid_cache(j).ssgrad*eello_t3/4.0d0*lipscale
      eelecij_turn(i-1,my_thread).gcorr3_turn(3)=
     &  eelecij_turn(i-1,my_thread).gcorr3_turn(3)+
     &  lipid_cache(i).ssgrad*eello_t3/4.0d0*lipscale
      eelecij_turn(j-1,my_thread).gcorr3_turn(3)=
     &  eelecij_turn(j-1,my_thread).gcorr3_turn(3)+
     &  lipid_cache(j).ssgrad*eello_t3/4.0d0*lipscale
#ifdef DEBUG
      write (iout,*) "gcorr3_turn before angles"
      write (iout,'(i5,3f10.5)') i-1,(gcorr3_turn(k,i-1),k=1,3)
      write (iout,'(i5,3f10.5)') i,(gcorr3_turn(k,i),k=1,3)
      write (iout,'(i5,3f10.5)') i+1,(gcorr3_turn(k,i+1),k=1,3)
      write (iout,'(i5,3f10.5)') j-1,(gcorr3_turn(k,j-1),k=1,3)
      write (iout,'(i5,3f10.5)') j,(gcorr3_turn(k,j),k=1,3)
#endif
c Add contributions from "angles".
#ifdef DEBUG
      write (iout,*) "dcgs1s2tab dsgs1s2tab"
      do k=1,3
        write (iout,'(i5,2(3f10.5,5x))') k,dcgs1s2tab(1,1,i+3),
     &    dcgs1s2tab(1,2,i+3),dcgs1s2tab(1,3,i+3),
     &    dsgs1s2tab(k,1,i+3),dsgs1s2tab(k,2,i+3),dsgs1s2tab(k,3,i+3)
      enddo
      write (iout,*) "gel_loc_turn3_cos",gel_loc_turn3_cos,
     & "gel_loc_turn3_sin",gel_loc_turn3_sin
      write (iout,*) "dcosttab i+2 dcosttab i+3"
      do k=1,3
        write (iout,'(i5,2(2f10.5,5x))') k,dcosttab(k,1,i+2),
     &    dcosttab(k,2,i+2),dcosttab(k,1,i+3),dcosttab(k,2,i+3)
      enddo
      write (iout,*)"gloc_theta1",gloc_theta1," gloc_theta2",gloc_theta2
#endif
!     gcorr3_turn(:,i)=gcorr3_turn(:,i)
      eelecij_turn(i,my_thread).gcorr3_turn(:)=
     &  eelecij_turn(i,my_thread).gcorr3_turn(:)
     &  +gel_loc_turn3_cos*dcgs1s2tab(:,1,i+3)
     &  +gel_loc_turn3_sin*dsgs1s2tab(:,1,i+3)
     &  +gloc_theta1*dcosttab(:,1,i+2)
!     gcorr3_turn(:,i+1)=gcorr3_turn(:,i+1)
      eelecij_turn(i+1,my_thread).gcorr3_turn(:)=
     &  eelecij_turn(i+1,my_thread).gcorr3_turn(:)
     & +gel_loc_turn3_cos*dcgs1s2tab(:,2,i+3)
     & +gel_loc_turn3_sin*dsgs1s2tab(:,2,i+3)
     & +gloc_theta1*dcosttab(:,2,i+2)
     & +gloc_theta2*dcosttab(:,1,i+3)
!     gcorr3_turn(:,i+2)=gcorr3_turn(:,i+2)
      eelecij_turn(i+2,my_thread).gcorr3_turn(:)=
     &  eelecij_turn(i+2,my_thread).gcorr3_turn(:)
     & +gel_loc_turn3_cos*dcgs1s2tab(:,3,i+3)
     & +gel_loc_turn3_sin*dsgs1s2tab(:,3,i+3)
     & +gloc_theta2*dcosttab(:,2,i+3)
#ifdef DEBUG
      write (iout,*) "gcorr3_turn after angles"
      write (iout,'(i5,3f10.5)') i-1,(gcorr3_turn(k,i-1),k=1,3)
      write (iout,'(i5,3f10.5)') i,(gcorr3_turn(k,i),k=1,3)
      write (iout,'(i5,3f10.5)') i+1,(gcorr3_turn(k,i+1),k=1,3)
      write (iout,'(i5,3f10.5)') j-1,(gcorr3_turn(k,j-1),k=1,3)
      write (iout,'(i5,3f10.5)') j,(gcorr3_turn(k,j),k=1,3)
#endif
      return
      end
C-------------------------------------------------------------------------------
      subroutine eturn4(i,eello_turn4,my_thread)
C Third- and fourth-order contributions from turns
      implicit none
      include 'DIMENSIONS'
      include 'COMMON.IOUNITS'
      include 'COMMON.GEO'
      include 'COMMON.VAR'
      include 'COMMON.LOCAL'
      include 'COMMON.CHAIN'
      include 'COMMON.DERIV'
      include 'COMMON.INTERACT'
      include 'COMMON.CORRMAT'
      include 'COMMON.TORSION'
      include 'COMMON.VECTORS'
      include 'COMMON.FFIELD'
      include 'COMMON.CONTROL'
#ifdef SHIELD
      include 'COMMON.SHIELD'
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      include 'COMMON.LIPID'

      double precision eello_turn4
      integer i,j,k,l,ilist,iresshield,my_thread

      double precision agg(3,4),aggi(3,4),aggi1(3,4),
     &                 aggj(3,4),aggj1(3,4)
      double precision rlocshield,faclipij
      double precision s3,gsEE1,gsEE2,gsEE3,eello_t4,gloc_theta1,
     &  gloc_theta2,gloc_theta3,gel_loc_turn4_phicos1,
     &  gel_loc_turn4_phisin1,gel_loc_turn4_phicos2,
     &  gel_loc_turn4_phisin2,eello_t4_theta,sint3inv
      double precision auxmat(2,2),auxmat1(2,2),a_temp(2,2),
     &  e1t(2,2),e2t(2,2),e3t(2,2),gte1t(2,2),gte2t(2,2),gte3t(2,2),
     &  E3tuE2tuE1t(2,2),gtE3tuE2tuE1t(2,2),E3tugtE2tuE1t(2,2),
     &  E3tuE2tugtE1t(2,2),E3tuE2t(2,2),E2tuE1t(2,2)
      double precision gtcosEUg(2,2),gtsinEUg(2,2)
      double precision trace_ae
      j=i+3
      faclipij=(lipid_cache(i).ss+lipid_cache(j).ss)*0.5d0*lipscale
     &        +1.0d0
      call calculate_aggij(i,j,agg,aggi,aggi1,aggj,aggj1,a_temp)
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
C               Fourth-order contributions
C
C                 (i+3)o----(i+4)
C                     /  |
C               (i+2)o   |
C                     \  |
C                 (i+1)o----i
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
c        write (iout,*) "eturn4 i",i," j",j," j1",j+1," j2",j-1
c        write(iout,*)"WCHODZE W PROGRAM"
      sint3inv=1.0d0/sinttab(i+3)
      call transpose2(EUg(1,1,i+1),e1t(1,1))
      call transpose2(Eug(1,1,i+2),e2t(1,1))
      call transpose2(EE(1,1,i+3),e3t(1,1))
C Ematrix derivative in theta
      call transpose2(gtEUg(1,1,i+1),gte1t(1,1))
      call transpose2(gtEug(1,1,i+2),gte2t(1,1))
      call transpose2(gtEE(1,1,i+3),gte3t(1,1))
c Compute the ETURN4 energy contribution
      call matmat2(e3t,e2t,E3tuE2t)
      call matmat2(E3tuE2t,e1t,e3tue2tue1t)
c      s3=0.5d0*(a_temp(1,1)*e3tue2tue1t(1,1)
c     &  +a_temp(1,2)*e3tue2tue1t(2,1)+a_temp(2,1)*e3tue2tue1t(1,2)
c     &  +a_temp(2,2)*e3tue2tue1t(2,2))
      s3=trace_ae(a_temp,E3tuE2tue1t)
c Derivatives of ETURN4 in cos(theta(i+1))
      call matmat2(E3tuE2t,gtE1t,E3tuE2tugtE1t)
      gsEE1=trace_ae(a_temp,E3tuE2tugtE1t)
c Derivatives of ETURN4 in cos(theta(i+2))
      call matmat2(E3t,gtE2t,auxmat)
      call matmat2(auxmat,E1t,E3tugtE2tuE1t)
      gsEE2=trace_ae(a_temp,E3tugtE2tuE1t)
c Derivatives of ETURN4 in cos(theta(i+2))
      call matmat2(E2t,E1t,E2tuE1t)
      call matmat2(gtE3t,E2tuE1t,gtE3tuE2tuE1t)
      gsEE3=trace_ae(a_temp,gtE3tuE2tuE1t)
#ifdef SHIELD
      if (shield_mode.eq.0) then
        fac_shield(i)=1.0
        fac_shield(j)=1.0
C        else
C        fac_shield(i)=0.6
C        fac_shield(j)=0.4
      endif
#endif
      eello_t4=-s3*sint3inv
#ifdef SHIELD
     &  *fac_shield(i)*fac_shield(j)
#endif
      eello_turn4=eello_turn4+eello_t4
     &  *faclipij
#ifdef DEBUG
      if (energy_dec) write (iout,'(a6,2i5,0pf7.3,3f7.3)')
     &      'eturn4',i,j,eello_t4
#endif
#ifdef SHIELD
C Now derivative over shield:
      if ((fac_shield(i).gt.0).and.(fac_shield(j).gt.0).and.
     &    (shield_mode.gt.0)) then
C          print *,i,j

        do ilist=1,ishield_list(i)
          iresshield=shield_list(ilist,i)
          do k=1,3
           rlocshield=grad_shield_side(k,ilist,i)*eello_t4/fac_shield(i)
C     &      *2.0
           gshieldx_t4(k,iresshield)=gshieldx_t4(k,iresshield)+
     &       rlocshield
     &      +grad_shield_loc(k,ilist,i)*eello_t4/fac_shield(i)
            gshieldc_t4(k,iresshield-1)=gshieldc_t4(k,iresshield-1)
     &      +rlocshield
          enddo
        enddo
        do ilist=1,ishield_list(j)
          iresshield=shield_list(ilist,j)
          do k=1,3
           rlocshield=grad_shield_side(k,ilist,j)*eello_t4/fac_shield(j)
C     &     *2.0
           gshieldx_t4(k,iresshield)=gshieldx_t4(k,iresshield)+
     &              rlocshield
     &     +grad_shield_loc(k,ilist,j)*eello_t4/fac_shield(j)
           gshieldc_t4(k,iresshield-1)=gshieldc_t4(k,iresshield-1)
     &             +rlocshield
          enddo
        enddo

        do k=1,3
          gshieldc_t4(k,i)=gshieldc_t4(k,i)+
     &            grad_shield(k,i)*eello_t4/fac_shield(i)
          gshieldc_t4(k,j)=gshieldc_t4(k,j)+
     &            grad_shield(k,j)*eello_t4/fac_shield(j)
          gshieldc_t4(k,i-1)=gshieldc_t4(k,i-1)+
     &            grad_shield(k,i)*eello_t4/fac_shield(i)
          gshieldc_t4(k,j-1)=gshieldc_t4(k,j-1)+
     &              grad_shield(k,j)*eello_t4/fac_shield(j)
        enddo
      endif
#endif
      gloc_theta1=-gsEE1*sint3inv*faclipij
#ifdef SHIELD
     &  *fac_shield(i)*fac_shield(j)
#endif
      gloc_theta2=-gsEE2*sint3inv*faclipij
#ifdef SHIELD
     &  *fac_shield(i)*fac_shield(j)
#endif
      gloc_theta3=-gsEE3*sint3inv*faclipij
#ifdef SHIELD
     &  *fac_shield(i)*fac_shield(j)
#endif
C Derivatives in cos(gamma(i))*sint1*sint2 and
C sin(gamma(i))*sint1*sint2
C Derivatives of EUg(i+1)(T) in cos
      gtcosEUg(1,1)=-EE(1,1,i+1)
      gtcosEUg(2,1)= EE(1,2,i+1)
      gtcosEUg(1,2)=-EE(2,1,i+1)
      gtcosEUg(2,2)= EE(2,2,i+1)
C Derivatives of EUg(i+1)(T) in sin
      gtsinEUg(1,1)=-EE(1,2,i+1)
      gtsinEUg(2,1)=-EE(1,1,i+1)
      gtsinEUg(1,2)=-EE(2,2,i+1)
      gtsinEUg(2,2)=-EE(2,1,i+1)
C Derivatve of eturn4 contrib in cos
      call matmat2(E3tuE2t,gtcosEUg,auxmat)
      gel_loc_turn4_phicos1=-trace_ae(a_temp,auxmat)*sint3inv
     &*faclipij
#ifdef SHIELD
     &*fac_shield(i)*fac_shield(j)
#endif
C Derivatve of eturn4 contrib in sin
      call matmat2(E3tuE2t,gtsinEUg,auxmat)
      gel_loc_turn4_phisin1=-trace_ae(a_temp,auxmat)*sint3inv
     &*faclipij
#ifdef SHIELD
     &*fac_shield(i)*fac_shield(j)
#endif
C Derivatives in cos(gamma(i+1))*sint1*sint2 and
C sin(gamma(i+1))*sint1*sint2
C Derivatives of EUg(i+2)(T) in cos
      gtcosEUg(1,1)=-EE(1,1,i+2)
      gtcosEUg(2,1)= EE(1,2,i+2)
      gtcosEUg(1,2)=-EE(2,1,i+2)
      gtcosEUg(2,2)= EE(2,2,i+2)
C Derivatives of EUg(i+2)(T) in sin
      gtsinEUg(1,1)=-EE(1,2,i+2)
      gtsinEUg(2,1)=-EE(1,1,i+2)
      gtsinEUg(1,2)=-EE(2,2,i+2)
      gtsinEUg(2,2)=-EE(2,1,i+2)
C Derivatve of eturn4 contrib in cos
      call matmat2(E3t,gtcosEUg,auxmat)
      call matmat2(auxmat,E1t,auxmat1)
      gel_loc_turn4_phicos2=-trace_ae(a_temp,auxmat1)*sint3inv
     &  *faclipij
#ifdef SHIELD
     &  *fac_shield(i)*fac_shield(j)
#endif
C Derivatve of eturn4 contrib in sin
      call matmat2(E3t,gtsinEUg,auxmat)
      call matmat2(auxmat,E1t,auxmat1)
      gel_loc_turn4_phisin2=-trace_ae(a_temp,auxmat1)*sint3inv
     &  *faclipij
#ifdef SHIELD
     &  *fac_shield(i)*fac_shield(j)
#endif
C Cartesian derivatives of eturn4 contrib
C Derivatives in DC(i)
      do l=1,3
        a_temp(1,1)=aggi(l,1)
        a_temp(1,2)=aggi(l,2)
        a_temp(2,1)=aggi(l,3)
        a_temp(2,2)=aggi(l,4)
        eelecij_turn(i,my_thread).gcorr4_turn(l)=
     &     eelecij_turn(i,my_thread).gcorr4_turn(l)
     &     -trace_ae(a_temp,e3tue2tue1t)*sint3inv
     &     *faclipij
#ifdef SHIELD
     &     *fac_shield(i)*fac_shield(j)
#endif
C Derivatives in DC(i+1)
        a_temp(1,1)=aggi1(l,1)
        a_temp(1,2)=aggi1(l,2)
        a_temp(2,1)=aggi1(l,3)
        a_temp(2,2)=aggi1(l,4)
        eelecij_turn(i+1,my_thread).gcorr4_turn(l)=
     &     eelecij_turn(i+1,my_thread).gcorr4_turn(l)
     &    -trace_ae(a_temp,e3tue2tue1t)*sint3inv
     &    *faclipij
#ifdef SHIELD
     &    *fac_shield(i)*fac_shield(j)
#endif
C Derivatives in DC(j)
        a_temp(1,1)=aggj(l,1)
        a_temp(1,2)=aggj(l,2)
        a_temp(2,1)=aggj(l,3)
        a_temp(2,2)=aggj(l,4)
        eelecij_turn(j,my_thread).gcorr4_turn(l)=
     &     eelecij_turn(j,my_thread).gcorr4_turn(l)
     &    -trace_ae(a_temp,e3tue2tue1t)*sint3inv
     &    *faclipij
#ifdef SHIELD
     &    *fac_shield(i)*fac_shield(j)
#endif
C Derivatives in DC(j-1)
        a_temp(1,1)=aggj1(l,1)
        a_temp(1,2)=aggj1(l,2)
        a_temp(2,1)=aggj1(l,3)
        a_temp(2,2)=aggj1(l,4)
        eelecij_turn(j-1,my_thread).gcorr4_turn(l)=
     &     eelecij_turn(j-1,my_thread).gcorr4_turn(l)
     &    -trace_ae(a_temp,e3tue2tue1t)*sint3inv
     &    *faclipij
#ifdef SHIELD
     &  *fac_shield(i)*fac_shield(j)
#endif
      enddo
c Contributions from lipid bilayer
      gcorr4_turn(3,i)=gcorr4_turn(3,i)+
     &    lipid_cache(i).ssgrad*eello_t4/4.0d0*lipscale
      gcorr4_turn(3,j)=gcorr4_turn(3,j)+
     &    lipid_cache(j).ssgrad*eello_t4/4.0d0*lipscale
      gcorr4_turn(3,i-1)=gcorr4_turn(3,i-1)+
     &    lipid_cache(i).ssgrad*eello_t4/4.0d0*lipscale
      gcorr4_turn(3,j-1)=gcorr4_turn(3,j-1)+
     &    lipid_cache(j).ssgrad*eello_t4/4.0d0*lipscale
#ifdef DEBUG
      write (iout,*) "gcorr4_turn before angles"
      write (iout,'(i5,3f10.5)') i-1,(gcorr4_turn(k,i-1),k=1,3)
      write (iout,'(i5,3f10.5)') i,(gcorr4_turn(k,i),k=1,3)
      write (iout,'(i5,3f10.5)') i+1,(gcorr4_turn(k,i+1),k=1,3)
      write (iout,'(i5,3f10.5)') j-1,(gcorr4_turn(k,j-1),k=1,3)
      write (iout,'(i5,3f10.5)') j,(gcorr4_turn(k,j),k=1,3)
#endif
c Contributions from "angles" to the gradient
#ifdef DEBUG
      write (iout,*) "gloc_theta1",gloc_theta1,
     & " gloc_theta2",gloc_theta2," gloc_theta3",gloc_theta3
      write (iout,*) "gel_loc_turn4_phicos1",gel_loc_turn4_phicos1,
     & " gel_loc_turn4_phisin1",gel_loc_turn4_phisin1,
     & " gel_loc_turn4_phicos2",gel_loc_turn4_phicos2,
     & " gel_loc_turn4_phisin2",gel_loc_turn4_phisin2
#endif
      eello_t4_theta=eello_t4*costtab(i+3)*sint3inv*sint3inv
     &  *faclipij
      gcorr4_turn(:,i)=gcorr4_turn(:,i)
     & +gel_loc_turn4_phicos1*dcgs1s2tab(:,1,i+3)
     & +gel_loc_turn4_phisin1*dsgs1s2tab(:,1,i+3)
     & +gloc_theta1*dcosttab(:,1,i+2)
      gcorr4_turn(:,i+1)=gcorr4_turn(:,i+1)
     & +gel_loc_turn4_phicos1*dcgs1s2tab(:,2,i+3)
     & +gel_loc_turn4_phisin1*dsgs1s2tab(:,2,i+3)
     & +gel_loc_turn4_phicos2*dcgs1s2tab(:,1,i+4)
     & +gel_loc_turn4_phisin2*dsgs1s2tab(:,1,i+4)
     & +gloc_theta1*dcosttab(:,2,i+2)
     & +(gloc_theta2+eello_t4_theta)*dcosttab(:,1,i+3)
      gcorr4_turn(:,i+2)=gcorr4_turn(:,i+2)
     & +gel_loc_turn4_phicos1*dcgs1s2tab(:,3,i+3)
     & +gel_loc_turn4_phisin1*dsgs1s2tab(:,3,i+3)
     & +gel_loc_turn4_phicos2*dcgs1s2tab(:,2,i+4)
     & +gel_loc_turn4_phisin2*dsgs1s2tab(:,2,i+4)
     & +(gloc_theta2+eello_t4_theta)*dcosttab(:,2,i+3)
     & +gloc_theta3*dcosttab(:,1,i+4)
      gcorr4_turn(:,i+3)=gcorr4_turn(:,i+3)
     & +gel_loc_turn4_phicos2*dcgs1s2tab(:,3,i+4)
     & +gel_loc_turn4_phisin2*dsgs1s2tab(:,3,i+4)
     & +gloc_theta3*dcosttab(:,2,i+4)
#ifdef DEBUG
      write (iout,*) "gcorr4_turn after angles"
      write (iout,'(i5,3f10.5)') i-1,(gcorr4_turn(k,i-1),k=1,3)
      write (iout,'(i5,3f10.5)') i,(gcorr4_turn(k,i),k=1,3)
      write (iout,'(i5,3f10.5)') i+1,(gcorr4_turn(k,i+1),k=1,3)
      write (iout,'(i5,3f10.5)') j-1,(gcorr4_turn(k,j-1),k=1,3)
      write (iout,'(i5,3f10.5)') j,(gcorr4_turn(k,j),k=1,3)
#endif
      return
      end
C---------------------------------------------------------------------
      subroutine clear_eelec_values(num_threads)
      implicit none
      include 'DIMENSIONS'
#ifdef SHIELD
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      integer i,num_threads
      do i=1,num_threads
        call eelecij_value_clear(eelecij_value(i))
      enddo
      return
      end
C-------------------------------------------------------------------------------
      subroutine eelecij_value_clear(val)
      implicit none
      include 'DIMENSIONS'
#ifdef SHIELD
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      record /eelecij_value_delta/ val
      val.ees=0.0d0
      val.evdw1=0.0d0
      val.eel_loc=0.0d0
      return
      end
C-------------------------------------------------------------------------------
      subroutine eelecij_value_add(my_thread,val)
      implicit none
      include 'DIMENSIONS'
#ifdef SHIELD
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      integer my_thread
      record /eelecij_value_delta/ val
      eelecij_value(my_thread).ees=
     &  eelecij_value(my_thread).ees+val.ees
      eelecij_value(my_thread).evdw1=
     &  eelecij_value(my_thread).evdw1+val.evdw1
      eelecij_value(my_thread).eel_loc=
     &  eelecij_value(my_thread).eel_loc+val.eel_loc
      return
      end
C-------------------------------------------------------------------------------
      subroutine sum_eelec_values(ees,evdw1,eel_loc,num_threads)
      implicit none
      include 'DIMENSIONS'
#ifdef SHIELD
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      double precision ees,evdw1,eel_loc
      integer i,num_threads
      do i=1,num_threads
        ees=ees+eelecij_value(i).ees
        evdw1=evdw1+eelecij_value(i).evdw1
        eel_loc=eel_loc+eelecij_value(i).eel_loc
        eelecij_value(i).ees=0.0d0
        eelecij_value(i).evdw1=0.0d0
        eelecij_value(i).eel_loc=0.0d0
      enddo
      return
      end
C-------------------------------------------------------------------------------
      subroutine clear_eelec_arrays(num_threads)
      implicit none
      include 'DIMENSIONS'
      include 'COMMON.VAR'
      include 'COMMON.CHAIN'
      include 'COMMON.DERIV'
#ifdef SHIELD
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      integer i,j,num_threads
      do i=1,num_threads
        do j=-1,nres
          eelecij_array(i).gvdwppx(j)=0.0d0
          eelecij_array(i).gvdwppy(j)=0.0d0
          eelecij_array(i).gvdwppz(j)=0.0d0
          eelecij_array(i).gelc_longx(j)=0.0d0
          eelecij_array(i).gelc_longy(j)=0.0d0
          eelecij_array(i).gelc_longz(j)=0.0d0
          eelecij_array(i).gelcx(j)=0.0d0
          eelecij_array(i).gelcy(j)=0.0d0
          eelecij_array(i).gelcz(j)=0.0d0
          eelecij_array(i).gel_loc_longx(j)=0.0d0
          eelecij_array(i).gel_loc_longy(j)=0.0d0
          eelecij_array(i).gel_loc_longz(j)=0.0d0
          eelecij_array(i).gel_locx(j)=0.0d0
          eelecij_array(i).gel_locy(j)=0.0d0
          eelecij_array(i).gel_locz(j)=0.0d0
        enddo
        eelecij_array(i).gel_locx(-2)=0.0d0
        eelecij_array(i).gel_locy(-2)=0.0d0
        eelecij_array(i).gel_locz(-2)=0.0d0
        eelecij_array(i).gel_locx(nres+1)=0.0d0
        eelecij_array(i).gel_locy(nres+1)=0.0d0
        eelecij_array(i).gel_locz(nres+1)=0.0d0
        eelecij_array(i).gel_locx(nres+2)=0.0d0
        eelecij_array(i).gel_locy(nres+2)=0.0d0
        eelecij_array(i).gel_locz(nres+2)=0.0d0
      enddo
      do i=1,num_threads
        do j=-1,nres
         eelecij_turn(j,i).gcorr3_turn=0.0d0
         eelecij_turn(j,i).gcorr4_turn=0.0d0
#ifdef SHIELD
         eelecij_turn(j,i).gshieldc_t3=0.0d0
         eelecij_turn(j,i).gshieldx_t3=0.0d0
         eelecij_turn(j,i).gshieldc_t4=0.0d0
         eelecij_turn(j,i).gshieldx_t4=0.0d0
#endif
        enddo
      enddo
      return
      end
C-------------------------------------------------------------------------------
      subroutine sum_eelec_arrays(num_threads)
#ifdef _OPENMP
      use omp_lib
#endif
      implicit none
      include 'DIMENSIONS'
      include 'COMMON.VAR'
      include 'COMMON.LOCAL'
      include 'COMMON.CHAIN'
      include 'COMMON.DERIV'
      include 'COMMON.FFIELD'
      include 'COMMON.CORRMAT'
#ifdef SHIELD
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      double precision ggg(3),ggg1(3),ggg2(3),ggg3(3),res(3),g,h
      integer i,j,num_threads,j1,from,to,from2,to2,range_lo,range_hi
      integer threads_used,my_thread
      double precision scalar
      !range_lo=-1
      !range_lo=0
      !range_hi=nres

#ifdef _OPENMP
      threads_used=omp_get_max_threads()
      ! ensure at least 4 elements per thread to avoid overlap
      !if(range_hi-range_lo+1 .lt. 4*threads_used) threads_used=1
#else
      threads_used=1
#endif

!$OMP PARALLEL NUM_THREADS(threads_used) DEFAULT(SHARED)
!$OMP& FIRSTPRIVATE(my_thread)
!$OMP& PRIVATE(from,to,from2,to2,i,j,ggg,ggg1,ggg2,ggg3,res,g,h)
#ifdef _OPENMP
      my_thread=omp_get_thread_num()+1
#else
      my_thread=1
#endif
      !from=range_lo+((range_hi-range_lo)/threads_used)*(my_thread-1)
      !to=range_lo+((range_hi-range_lo)/threads_used)*my_thread-1
      !if(my_thread.eq.threads_used) to=range_hi
      call split_work_for_threads(from,to,0,nres,my_thread,threads_used)

      gel_loc(:,from:to)=0.0d0

      from2=from
      if(my_thread.eq.1) from2=0
      to2=to
      if(my_thread.eq.threads_used) to2=range_hi-1

      do j=from,to
        ggg=0.0d0
        do i=1,num_threads
          ggg(1)=ggg(1)+eelecij_array(i).gvdwppx(j)
          ggg(2)=ggg(2)+eelecij_array(i).gvdwppy(j)
          ggg(3)=ggg(3)+eelecij_array(i).gvdwppz(j)
        enddo
        gvdwpp(:,j)=ggg
      enddo

      do j=from,to
        ggg=0.0d0
        do i=1,num_threads
          ggg(1)=ggg(1)+eelecij_array(i).gelc_longx(j)
          ggg(2)=ggg(2)+eelecij_array(i).gelc_longy(j)
          ggg(3)=ggg(3)+eelecij_array(i).gelc_longz(j)
        enddo
        gelc_long(:,j)=ggg
      enddo

      do j=from,to
        ggg=0.0d0
        do i=1,num_threads
          ggg(1)=ggg(1)+eelecij_array(i).gelcx(j)
          ggg(2)=ggg(2)+eelecij_array(i).gelcy(j)
          ggg(3)=ggg(3)+eelecij_array(i).gelcz(j)
        enddo
        if(j.le.0) then
          gelc(:,j)=0.0d0
        else
          gelc(:,j)=ggg*vbld_inv(j+1)
        endif

        ggg=0.0d0
        do i=1,num_threads
          if (isnan(eelecij_array(i).gel_loc_longx(j))) 
     &      print *,"i",i," j",j," NaN in eelecij_array",i,j
          ggg(1)=ggg(1)+eelecij_array(i).gel_loc_longx(j)
          ggg(2)=ggg(2)+eelecij_array(i).gel_loc_longy(j)
          ggg(3)=ggg(3)+eelecij_array(i).gel_loc_longz(j)
        enddo
        if (isnan(ggg(1))) print *,"i",i," j",j," 2 NaN in ggg"
        gel_loc_long(:,j)=ggg
c        if(isnan(gel_loc_long(1,j)))print *,"i",i," j",j," 2 NaN in ggg"

        ggg1=0.0d0
        ggg2=0.0d0
        ggg3=0.0d0
        !TODO: 3 times more summing than necessary
        do i=1,num_threads
          ggg1(1)=ggg1(1)+eelecij_array(i).gel_locx(j+1)
          ggg1(2)=ggg1(2)+eelecij_array(i).gel_locy(j+1)
          ggg1(3)=ggg1(3)+eelecij_array(i).gel_locz(j+1)

          ggg2(1)=ggg2(1)+eelecij_array(i).gel_locx(j)
          ggg2(2)=ggg2(2)+eelecij_array(i).gel_locy(j)
          ggg2(3)=ggg2(3)+eelecij_array(i).gel_locz(j)

          ggg3(1)=ggg3(1)+eelecij_array(i).gel_locx(j-1)
          ggg3(2)=ggg3(2)+eelecij_array(i).gel_locy(j-1)
          ggg3(3)=ggg3(3)+eelecij_array(i).gel_locz(j-1)
        enddo
        !gel_loc(:,j)=ggg2

        gel_loc(:,j)=0.0d0
        gel_loc(1,j)=gel_loc(1,j)+scalar(ggg1,muder(:,1,1,j+1))
        gel_loc(2,j)=gel_loc(2,j)+scalar(ggg1,muder(:,2,1,j+1))
        gel_loc(3,j)=gel_loc(3,j)+scalar(ggg1,muder(:,3,1,j+1))

        gel_loc(1,j)=gel_loc(1,j)+scalar(ggg2,muder(:,1,2,j))
        gel_loc(2,j)=gel_loc(2,j)+scalar(ggg2,muder(:,2,2,j))
        gel_loc(3,j)=gel_loc(3,j)+scalar(ggg2,muder(:,3,2,j))

        gel_loc(1,j)=gel_loc(1,j)+scalar(ggg3,muder(:,1,3,j-1))
        gel_loc(2,j)=gel_loc(2,j)+scalar(ggg3,muder(:,2,3,j-1))
        gel_loc(3,j)=gel_loc(3,j)+scalar(ggg3,muder(:,3,3,j-1))
      enddo

!$OMP BARRIER
      do i=1,num_threads
        eelecij_array(i).gvdwppx(from:to)=0.0d0
        eelecij_array(i).gvdwppy(from:to)=0.0d0
        eelecij_array(i).gvdwppz(from:to)=0.0d0

        eelecij_array(i).gelc_longx(from:to)=0.0d0
        eelecij_array(i).gelc_longy(from:to)=0.0d0
        eelecij_array(i).gelc_longz(from:to)=0.0d0

        eelecij_array(i).gelcx(from:to)=0.0d0
        eelecij_array(i).gelcy(from:to)=0.0d0
        eelecij_array(i).gelcz(from:to)=0.0d0

        eelecij_array(i).gel_loc_longx(from:to)=0.0d0
        eelecij_array(i).gel_loc_longy(from:to)=0.0d0
        eelecij_array(i).gel_loc_longz(from:to)=0.0d0

        eelecij_array(i).gel_locx(from:to)=0.0d0
        eelecij_array(i).gel_locy(from:to)=0.0d0
        eelecij_array(i).gel_locz(from:to)=0.0d0

        if (my_thread.eq.1) then
          eelecij_array(i).gel_locx(0)=0.0d0
          eelecij_array(i).gel_locy(0)=0.0d0
          eelecij_array(i).gel_locz(0)=0.0d0

          eelecij_array(i).gel_locx(nres+1)=0.0d0
          eelecij_array(i).gel_locy(nres+1)=0.0d0
          eelecij_array(i).gel_locz(nres+1)=0.0d0
        endif
      enddo
!$OMP END PARALLEL
      return
      end
C-----------------------------------------------------------------------------
      subroutine eelec_queue_work(chunk,idx,i,j,xyzj,rij,full)
      implicit none
      include 'DIMENSIONS'
      include 'COMMON.CHAIN'
      include 'COMMON.INTERACT'
#ifdef SHIELD
      include 'COMMON.SHIELD'
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      include 'COMMON.LIPID'
c 4/26/02 - AL scaling factor for 1,4 repulsive VDW interactions
      integer idx,i,j,iteli,itelj
      double precision xyzj(3),rij
      logical full
      record /eelecij_work_chunk/ chunk

      !iteli=itel(i)
      !itelj=itel(j)
      !if (j.eq.i+2 .and. itelj.eq.2) iteli=2

      idx=idx+1
      chunk.j(idx)=j
      chunk.x(idx)=xyzj(1)
      chunk.y(idx)=xyzj(2)
      chunk.z(idx)=xyzj(3)
      chunk.r(idx)=rij
      !chunk.aaa(idx)=app(iteli,itelj)
      !chunk.bbb(idx)=bpp(iteli,itelj)
      !chunk.ael6(idx)=ael6(iteli,itelj)
      !chunk.ael3(idx)=ael3(iteli,itelj)
      !chunk.sqrt_ael6(idx)=sqrt_ael6(iteli,itelj)
      chunk.lipid_j_ss(idx)=lipid_cache(j).ss
      chunk.lipid_j_ssgrad(idx)=lipid_cache(j).ssgrad
      !if (j.eq.i+2) then
      !  chunk.scal_el(idx)=scal_el
      !else
      !  chunk.scal_el(idx)=1.0d0
      !endif
#ifdef SHIELD
      chunk.fac_shield_j(idx)=fac_shield(j)
#endif

      full=idx.eq.eelec_buffer_size
      return
      end
C-----------------------------------------------------------------------------
      subroutine eelecij_move_work(from,from_idx,to,to_idx)
      implicit none
      include 'DIMENSIONS'
#ifdef SHIELD
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      integer from_idx,to_idx,idx
      record /eelecij_work_chunk/ from,to
      do idx=1,from_idx
        to_idx=to_idx+1
        to.j(to_idx)=from.j(idx)
        to.x(to_idx)=from.x(idx)
        to.y(to_idx)=from.y(idx)
        to.z(to_idx)=from.z(idx)
        to.r(to_idx)=from.r(idx)
        !to.aaa(to_idx)=from.aaa(idx)
        !to.bbb(to_idx)=from.bbb(idx)
        !to.ael6(to_idx)=from.ael6(idx)
        !to.ael3(to_idx)=from.ael3(idx)
        !to.sqrt_ael6(to_idx)=from.sqrt_ael6(idx)
        to.lipid_j_ss(to_idx)=from.lipid_j_ss(idx)
        to.lipid_j_ssgrad(to_idx)=from.lipid_j_ssgrad(idx)
        !to.scal_el(to_idx)=from.scal_el(idx)
#ifdef SHIELD
        to.fac_shield_j(to_idx)=from.fac_shield_j(idx)
#endif

      enddo
      from_idx=0
      return
      end
C-----------------------------------------------------------------------------
      subroutine calculate_turns(doturn,eello_turn3,eello_turn4)
#ifdef _OPENMP
      use omp_lib
#endif
      INCLUDE 'DIMENSIONS'
      INCLUDE 'COMMON.CHAIN'
#ifdef SHIELD
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      double precision eello_turn3,eello_turn4
      double precision th_eello_turn(8,max_fg_threads)
      integer*1 doturn(maxres)
      integer i,threads_used,range_lo,range_hi,my_thread,from,to

      !range_lo=1
      !range_hi=nres
      th_eello_turn=0.0d0

#ifdef _OPENMP
      threads_used=omp_get_max_threads()
#else
      threads_used=1
#endif
!$OMP PARALLEL IF(threads_used.gt.1) NUM_THREADS(threads_used)
!$OMP& DEFAULT(SHARED) PRIVATE(i,my_thread,from,to)
#ifdef _OPENMP
      my_thread=omp_get_thread_num()+1
#else
      my_thread=1
#endif
      !from=range_lo+((range_hi-range_lo)/threads_used)*(my_thread-1)
      !to=range_lo+((range_hi-range_lo)/threads_used)*my_thread-1
      !if(my_thread.eq.threads_used) to=range_hi
      call split_work_for_threads(from,to,1,nres,my_thread,threads_used)

      do i=from,to
        if(and(doturn(i),1).ne.0) then
          call eturn3(i,th_eello_turn(1,my_thread),my_thread)
        endif
        if(and(doturn(i),2).ne.0) then
          call eturn4(i,th_eello_turn(2,my_thread),my_thread)
        endif
      enddo
!$OMP END PARALLEL

      eello_turn3=sum(th_eello_turn(1,1:threads_used))
      eello_turn4=sum(th_eello_turn(2,1:threads_used))
      call sum_eturn_arrays(threads_used)
      return
      end
C-------------------------------------------------------------------------------
      subroutine sum_eturn_arrays(num_threads)
#ifdef _OPENMP
      use omp_lib
#endif
      implicit none
      include 'DIMENSIONS'
      include 'COMMON.VAR'
      include 'COMMON.CHAIN'
      include 'COMMON.DERIV'
#ifdef SHIELD
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      integer i,j,num_threads
      double precision ggg(3,2),g(3)

      do i=-1,0
        do j=1,num_threads
          gcorr3_turn(:,i)=
     &               gcorr3_turn(:,i)+eelecij_turn(i,j).gcorr3_turn
          gcorr4_turn(:,i)=
     &               gcorr4_turn(:,i)+eelecij_turn(i,j).gcorr4_turn
#ifdef SHIELD
          gshieldc_t3(:,i)=
     &               gshieldc_t3(:,i)+eelecij_turn(i,j).gshieldc_t3
          gshieldx_t3(:,i)=
     &               gshieldx_t3(:,i)+eelecij_turn(i,j).gshieldx_t3
          gshieldc_t4(:,i)=
     &               gshieldc_t4(:,i)+eelecij_turn(i,j).gshieldc_t4
          gshieldx_t4(:,i)=
     &               gshieldx_t4(:,i)+eelecij_turn(i,j).gshieldx_t4
#endif

          eelecij_turn(i,j).gcorr3_turn=0.0d0
          eelecij_turn(i,j).gcorr4_turn=0.0d0
#ifdef SHIELD
          eelecij_turn(i,j).gshieldc_t3=0.0d0
          eelecij_turn(i,j).gshieldx_t3=0.0d0
          eelecij_turn(i,j).gshieldc_t4=0.0d0
          eelecij_turn(i,j).gshieldx_t4=0.0d0
#endif
        enddo
      enddo

!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,ggg,g)
      do i=1,nres
        ggg(:,1)=0.0d0
        ggg(:,2)=0.0d0
        do j=1,num_threads
          ggg(:,1)=ggg(:,1)+eelecij_turn(i,j).gcorr3_turn
          ggg(:,2)=ggg(:,2)+eelecij_turn(i,j).gcorr4_turn
          eelecij_turn(i,j).gcorr3_turn=0.0d0
          eelecij_turn(i,j).gcorr4_turn=0.0d0
        enddo
        gcorr3_turn(:,i)=gcorr3_turn(:,i)+ggg(:,1)
        gcorr4_turn(:,i)=gcorr4_turn(:,i)+ggg(:,2)

#ifdef SHIELD
        ggg=0.0d0
        do j=1,num_threads
          ggg=ggg+eelecij_turn(i,j).gshieldc_t3
          eelecij_turn(i,j).gshieldc_t3=0.0d0
        enddo
        gshieldc_t3(:,i)=gshieldc_t3(:,i)+ggg

        ggg=0.0d0
        do j=1,num_threads
          ggg=ggg+eelecij_turn(i,j).gshieldx_t3
          eelecij_turn(i,j).gshieldx_t3=0.0d0
        enddo
        gshieldx_t3(:,i)=gshieldx_t3(:,i)+ggg

        ggg=0.0d0
        do j=1,num_threads
          ggg=ggg+eelecij_turn(i,j).gshieldc_t4
          eelecij_turn(i,j).gshieldc_t4=0.0d0
        enddo
        gshieldc_t4(:,i)=gshieldc_t4(:,i)+ggg

        ggg=0.0d0
        do j=1,num_threads
          ggg=ggg+eelecij_turn(i,j).gshieldx_t4
          eelecij_turn(i,j).gshieldx_t4=0.0d0
        enddo
        gshieldx_t4(:,i)=gshieldx_t4(:,i)+ggg
        enddo
#endif

      enddo
!$OMP END PARALLEL DO

      return
      end
C-----------------------------------------------------------------------
      subroutine mat_matt_2_trace(a1,a2,trace)
      implicit none
      double precision a1(2,2),a2(2,2)
      double precision ai3_11,ai3_22
      double precision trace

      ai3_11=a1(1,1)*a2(1,1)+a1(1,2)*a2(1,2)
      ai3_22=a1(2,1)*a2(2,1)+a1(2,2)*a2(2,2)

      trace=ai3_11+ai3_22
      end
C----------------------------------------------------------------------
      subroutine vecmat_mat_trace(vecmat,l,mat,trace)
      implicit none
      integer l
      double precision vecmat(3,4),mat(2,2),trace
      trace=vecmat(l,1)*mat(1,1)
     &     +vecmat(l,2)*mat(2,1)  ! non-T
     &     +vecmat(l,3)*mat(1,2)  ! non-T
     &     +vecmat(l,4)*mat(2,2)
      end subroutine
C---------------------------------------------------------------------
      double precision function trace_ae(a,e)
      double precision a(2,2),e(2,2)
      trace_ae=0.5d0*(a(1,1)*e(1,1)+a(1,2)*e(2,1)+a(2,1)*e(1,2)
     &  +a(2,2)*e(2,2))
      return
      end
C-----------------------------------------------------------------------
      subroutine eelec_fill_thread_data(th_data)
      implicit none
      include 'DIMENSIONS'
      include 'COMMON.INTERACT'
#ifdef SHIELD
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      integer my_thread,i,j
      record /eelec_thread_data/ th_data

      do i=1,2
        do j=1,2
          th_data.aaa(j,i)=app(i,j)
          th_data.bbb(j,i)=bpp(i,j)
          th_data.ael6(j,i)=ael6(i,j)
          th_data.ael3(j,i)=ael3(i,j)
          th_data.sqrt_ael6(j,i)=sqrt_ael6(i,j)
        enddo
      enddo
      end
C-----------------------------------------------------------------------
      subroutine init_eelec
      implicit none
      include 'DIMENSIONS'
      include 'COMMON.CHAIN'
      include 'COMMON.LOCAL'
      include 'COMMON.INTERACT'
      include 'COMMON.FFIELD'
#ifdef SHIELD
      include 'COMMON.EELEC.SHIELD'
#else
      include 'COMMON.EELEC'
#endif
      integer k,i

      eelecij_doturn(1:nres)=0

      k=0
      do i=iturn3_start,iturn3_end
c        if (i.le.1) cycle
C        write(iout,*) "tu jest i",i
        if(itype(i).eq.ntyp1
     &     .or. itype(i+1).eq.ntyp1
     &     .or. itype(i+2).eq.ntyp1
     &     .or. itype(i+3).eq.ntyp1) cycle
        k=k+1
        !eelecij_work_turn3(k).i=i
        !eelecij_work_turn3(k).j=i+2
        eelecij_work_turn3(k)=i
        if (wturn3.gt.0.0d0)
     &    eelecij_doturn(i)=or(eelecij_doturn(i),1)
      enddo
      eelecij_work_size_turn3=k

      k=0
      do i=iturn4_start,iturn4_end
        if(i.lt.1) cycle
        if(itype(i).eq.ntyp1
     &     .or. itype(i+1).eq.ntyp1
     &     .or. itype(i+3).eq.ntyp1
     &     .or. itype(i+4).eq.ntyp1) cycle
        k=k+1
        !eelecij_work_turn4(k).i=i
        !eelecij_work_turn4(k).j=i+3
        eelecij_work_turn4(k)=i
        if (wturn4.gt.0.0d0 .and. itype(i+2).ne.ntyp1)
     &    eelecij_doturn(i)=or(eelecij_doturn(i),2)
      enddo   ! i
      eelecij_work_size_turn4=k

      end

C=======================================================================
#define EELEC_WITH_WEL_LOC
#define EELECIJ            eelecij
#define EELECIJ2           eelecij2
#define EELEC_SEQ          eelecij_do_work_seq
#define EELEC_NSEQ         eelecij_do_work_nonseq
#include"eelec_t.F"
#define EELEC_WORK_FN      EELEC_SEQ
#define EELEC_MAP_J(seq)   seq
#define EELEC_MAP_IDX(seq) seq-from+1
#include"eelec_work.F"
#undef EELEC_MAP_IDX
#undef EELEC_MAP_J
#undef EELEC_WORK_FN
#define EELEC_WORK_FN      EELEC_NSEQ
#define EELEC_MAP_J(seq)   chunk.j(seq)
#define EELEC_MAP_IDX(seq) seq
#include"eelec_work.F"
#undef EELEC_MAP_IDX
#undef EELEC_MAP_J
#undef EELEC_WORK_FN
#undef EELEC_NSEQ
#undef EELEC_SEQ
#undef EELECIJ
#undef EELECIJ2

#undef EELEC_WITH_WEL_LOC
#define EELECIJ            eelecij_nowl
#define EELECIJ2           eelecij2_nowl
#define EELEC_SEQ          eelecij_do_work_seq_nowl
#define EELEC_NSEQ         eelecij_do_work_nonseq_nowl
#include"eelec_t.F"
#define EELEC_WORK_FN      EELEC_SEQ
#define EELEC_MAP_J(seq)   seq
#define EELEC_MAP_IDX(seq) seq-from+1
#include"eelec_work.F"
#undef EELEC_MAP_IDX
#undef EELEC_MAP_J
#undef EELEC_WORK_FN
#define EELEC_WORK_FN      EELEC_NSEQ
#define EELEC_MAP_J(seq)   chunk.j(seq)
#define EELEC_MAP_IDX(seq) seq
#include"eelec_work.F"
#undef EELEC_MAP_IDX
#undef EELEC_MAP_J
#undef EELEC_WORK_FN
#undef EELEC_NSEQ
#undef EELEC_SEQ
#undef EELECIJ
#undef EELECIJ2
