      subroutine etor_kcc(etors)
      use omp_lib
      implicit none
      include 'DIMENSIONS'
      include 'COMMON.VAR'
      include 'COMMON.GEO'
      include 'COMMON.LOCAL'
      include 'COMMON.TORSION'
      include 'COMMON.INTERACT'
      include 'COMMON.DERIV'
      include 'COMMON.CHAIN'
      include 'COMMON.NAMES'
      include 'COMMON.IOUNITS'
      include 'COMMON.FFIELD'
      include 'COMMON.TORCNSTR'
      include 'COMMON.VECTORS'
      include 'COMMON.MD'
      include 'COMMON.CONTROL'
      integer i,j,k,l,itori,itori1,nval
      double precision etors
      double precision c1(0:maxval_kcc),c2(0:maxval_kcc)
      double precision cnphis1s2(0:maxtor_kcc),snphis1s2(0:maxtor_kcc),
     & cnphis1s2cphi(0:maxtor_kcc),cnphis1s2t1t2(0:maxtor_kcc),
     & snphis1s2cphi(0:maxtor_kcc),snphis1s2t1t2(0:maxtor_kcc),
     & dcost1(3,2),dcost2(3,2),dcgs1s2(3,3),dsgs1s2(3,3)
      double precision aux,sumvalc,sumvals,costhet1,sinthet1,costhet2,
     & sinthet2,sint1t2,sint1t2n,sint1t2n1,sphsth1sth2,
     & cphsth1sth2,cosphi,sinphi,gradvalct1,gradvalct2,gradvalst1,
     & gradvalst2,glocicg,glocisg,glocit1,glocit2,etori
      double precision phii
      double precision sumaux
      double precision th_etors(8,max_fg_threads)  ! 8 to fill cache line
      double precision gloctordcm3(3,-1:maxres)
      double precision gloctordcm2(3,-1:maxres)
      common /etorkccgloctor/ gloctordcm3,gloctordcm2
      integer threads_used,my_thread,from,to
#if defined(DEBUG) || defined (ENERGY_DEC)
      logical lprn
C Set lprn=.true. for debugging
      lprn=.false.
C      print *,"wchodze kcc"
#endif
      !etors=0.0D0
      th_etors=0.0d0
      gloctordcm3(:,iphi_start-3:iphi_start-1)=0.0d0
      gloctordcm2(:,iphi_start-3:iphi_start-1)=0.0d0
      gloctordcm3(:,iphi_end-3:iphi_end)=0.0d0
      gloctordcm2(:,iphi_end-2:iphi_end)=0.0d0
c      write (iout,*) "iphi_start,iphi_end",iphi_start,iphi_end
#ifdef _OPENMP
      threads_used=omp_get_max_threads()
#else
      threads_used=1
#endif
!$OMP PARALLEL DEFAULT(SHARED)
!$OMP& PRIVATE(i,j,k,l,nval,itori,itori1,glocit1,glocit2,
!$OMP&         sinthet1,sinthet2,costhet1,costhet2,c1,c2,etori,
!$OMP&         gradvalct1,gradvalct2,gradvalst1,gradvalst2,
!$OMP&         aux,sumvals,sumvalc,sint1t2,sint1t2n,sint1t2n1,
!$OMP&         cphsth1sth2,sphsth1sth2,dcgs1s2,dsgs1s2,
!$OMP&         cnphis1s2,snphis1s2,cnphis1s2cphi,snphis1s2cphi,
!$OMP&         cnphis1s2t1t2,snphis1s2t1t2,glocicg,glocisg,
!$OMP&         sumaux,
!$OMP&         sinphi,cosphi,dcost1,dcost2,my_thread,from,to)
#ifdef _OPENMP
      my_thread=omp_get_thread_num()+1
#else
      my_thread=1
#endif
      !from=iphi_start+((iphi_end-iphi_start)/threads_used)*(my_thread-1)
      !to=iphi_start+((iphi_end-iphi_start)/threads_used)*my_thread-1
      !if(my_thread.eq.threads_used) to=iphi_end
      call split_work_for_threads(from,to,iphi_start,iphi_end,
     &                            my_thread,threads_used)
      gloctordcm3(:,from-3:to-3)=0.0d0
      gloctordcm2(:,from-2:to-2)=0.0d0
      !do i=iphi_start,iphi_end
      do i=from,to
C ANY TWO ARE DUMMY ATOMS in row CYCLE
c        if (((itype(i-3).eq.ntyp1).and.(itype(i-2).eq.ntyp1)).or.
c     &      ((itype(i-2).eq.ntyp1).and.(itype(i-1).eq.ntyp1))  .or.
c     &      ((itype(i-1).eq.ntyp1).and.(itype(i).eq.ntyp1))) cycle
        if (itype(i-2).eq.ntyp1.or. itype(i-1).eq.ntyp1
     &      .or. itype(i).eq.ntyp1 .or. itype(i-3).eq.ntyp1) cycle
        itori=itortyp(itype(i-2))
        itori1=itortyp(itype(i-1))
c        phii=phi(i)
c        glocig=0.0D0
        glocicg=0.0d0
        glocisg=0.0d0
        glocit1=0.0d0
        glocit2=0.0d0
C to avoid multiple devision by 2
c        theti22=0.5d0*theta(i)
C theta 12 is the theta_1 /2
C theta 22 is theta_2 /2
c        theti12=0.5d0*theta(i-1)
C and appropriate sinus function
c        sinthet1=dsin(theta(i-1))
c        sinthet2=dsin(theta(i))
c        costhet1=dcos(theta(i-1))
c        costhet2=dcos(theta(i))
        costhet1=costtab(i-1)
        costhet2=costtab(i)
        sinthet1=sinttab(i-1)
        sinthet2=sinttab(i)
        do j=1,2
          dcost1(:,j)=dcosttab(:,j,i-1)
          dcost2(:,j)=dcosttab(:,j,i)
        enddo
c        dcost1(:,1)=-(dc_norm(:,i-2)+costhet1*dc_norm(:,i-3))
c     &    *vbld_inv(i-2)
c        write (iout,*) "i=",i
c        write (iout,*) costhet1,vbld_inv(i-2)
c        write (iout,*) dc_norm(:,i-2)
c        write (iout,*) dc_norm(:,i-3)
c        write (iout,*) "dcost1",dcost1(:,1)
c        write (iout,*) "dcost1tab",dcosttab(:,1,i-1)
c        dcost1(:,2)=-(dc_norm(:,i-3)+costhet1*dc_norm(:,i-2))
c     &    *vbld_inv(i-1)
c        dcost2(:,1)=-(dc_norm(:,i-1)+costhet2*dc_norm(:,i-2))
c     &    *vbld_inv(i-1)
c        dcost2(:,2)=-(dc_norm(:,i-2)+costhet2*dc_norm(:,i-1))
c     &    *vbld_inv(i)
c        write (iout,*) i," costhe1",costhet1,dcos(theta(i-1)),
c     &   " sinthe1",sinthet1,dsin(theta(i-1)),
c     &   " costhe2",costhet2,dcos(theta(i)),
c     &   " sinthe2",sinthet2,dsin(theta(i))
C to speed up lets store its mutliplication
        sint1t2=sinthet2*sinthet1
        sint1t2n=1.0d0
C \sum_{i=1}^n (sin(theta_1) * sin(theta_2))^n * (c_n* cos(n*gamma)
C +d_n*sin(n*gamma)) *
C \sum_{i=1}^m (1+a_m*Tb_m(cos(theta_1 /2))+b_m*Tb_m(cos(theta_2 /2)))
C we have two sum 1) Non-Chebyshev which is with n and gamma
        nval=nterm_kcc_Tb(itori,itori1)
c        nval=1
        c1(0)=0.0d0
        c2(0)=0.0d0
        c1(1)=1.0d0
        c2(1)=1.0d0
        do j=2,nval
          c1(j)=c1(j-1)*costhet1
          c2(j)=c2(j-1)*costhet2
        enddo
        etori=0.0d0
c        write (iout,*) "i=",i
        cphsth1sth2=cphsth1sth2tab(i)
        sphsth1sth2=sphsth1sth2tab(i)
c        write(iout,*) "cph sph",cphsth1sth2,sphsth1sth2
        do k=1,3
          dcgs1s2(:,k)=dcgs1s2tab(:,k,i)
c          write (iout,*) dcgs1s2(:,k)
        enddo
        do k=1,3
          dsgs1s2(:,k)=dsgs1s2tab(:,k,i)
c          write (iout,*) dsgs1s2(:,k)
        enddo
c cosines of the multiples of dihedral angle multiplied by the
c respective powers of the adjacent planar angles and their derivatives
c in these products and in the product of the sines of planar angles.
        call mytschebyshev1andgrad(nterm_kcc(itori,itori1),
     &    cphsth1sth2,sint1t2,cnphis1s2,cnphis1s2cphi,cnphis1s2t1t2)
c sines of the multiples of dihedral angle multiplied by the
c respective powers of the adjacent planar angles without final
c multiplication by sin(gamma)sin(theta1)sin(theta2) and derivatives
        call mytschebyshev2andgrad(nterm_kcc(itori,itori1),
     &    cphsth1sth2,sint1t2,snphis1s2,snphis1s2cphi,snphis1s2t1t2)
c        write (iout,*) "cnphis1s2cphi",
c     &    cnphis1s2cphi(1:nterm_kcc(itori,itori1))
c        write (iout,*) "snphis1s2cphi",
c     &    snphis1s2cphi(1:nterm_kcc(itori,itori1))
        sumaux=0.0d0
        do j=1,nterm_kcc(itori,itori1)
c          cosphi=dcos(j*phii)
c          sinphi=dsin(j*phii)
          sint1t2n1=sint1t2n
          sint1t2n=sint1t2n*sint1t2
          cosphi=cnphis1s2(j)
          sinphi=snphis1s2(j-1)*sphsth1sth2
c          write (iout,*) i,j,"cosphi",cosphi/sint1t2n,dcos(j*phii),
c     &     " sinphi",sinphi/sint1t2n,dsin(j*phii)
          sumvalc=0.0d0
          gradvalct1=0.0d0
          gradvalct2=0.0d0
          do k=1,nval
            do l=1,nval
              sumvalc=sumvalc+v1_kcc(l,k,j,itori1,itori)*c1(k)*c2(l)
              gradvalct1=gradvalct1+
     &           (k-1)*v1_kcc(l,k,j,itori1,itori)*c1(k-1)*c2(l)
              gradvalct2=gradvalct2+
     &           (l-1)*v1_kcc(l,k,j,itori1,itori)*c1(k)*c2(l-1)
            enddo
          enddo
c          sumvalc=0.0d0
c          gradvalct1=0.0d0
c          gradvalct2=0.0d0
          sumvals=0.0d0
          gradvalst1=0.0d0
          gradvalst2=0.0d0
          do k=1,nval
            do l=1,nval
              sumvals=sumvals+v2_kcc(l,k,j,itori1,itori)*c1(k)*c2(l)
              gradvalst1=gradvalst1+
     &           (k-1)*v2_kcc(l,k,j,itori1,itori)*c1(k-1)*c2(l)
              gradvalst2=gradvalst2+
     &           (l-1)*v2_kcc(l,k,j,itori1,itori)*c1(k)*c2(l-1)
            enddo
          enddo
c          sumvals=0.0d0
c          gradvalst1=0.0d0
c          gradvalst2=0.0d0
#ifdef DEBUG
          if (lprn) write (iout,*)j,"sumvalc",sumvalc," sumvals",sumvals
#endif
c          etori=etori+sint1t2n*(sumvalc*cosphi+sumvals*sinphi)
          etori=etori+sumvalc*cosphi+sumvals*sinphi
c          etori=etori+cosphi!+sinphi
c          etori=etori+sint1t2n
C glocicg is the derivative of the torsional energy in
C cos(phi)*cos(theta1)*cos(theta2)
          glocicg=glocicg+(sumvalc*cnphis1s2cphi(j)+
     &      sumvals*sphsth1sth2*snphis1s2cphi(j-1))
c          glocicg=glocicg+cnphis1s2cphi(j)
c          glocicg=0.0d0
C glocisg is the derivative of the torsional energy in
C sin(phi)*cos(theta1)*cos(theta2)
          glocisg=glocisg+sumvals*snphis1s2(j-1)
c           glocisg=glocisg+cnphis1s2(j-1)
c           glocisg=0.0d0
C now gradient over theta_1
c          sumvalc=0.0d0
c          sumvals=0.0d0
          aux=sumvalc*cnphis1s2t1t2(j)+sumvals*snphis1s2t1t2(j-1)
     &     *sphsth1sth2
          sumaux=sumaux+aux
c          aux=j*sint1t2n1
          glocit1=glocit1+(gradvalct1*cosphi+gradvalst1*sinphi)
!    &      -aux*sinthet2*costhet1/sinthet1
          glocit2=glocit2+(gradvalct2*cosphi+gradvalst2*sinphi)
!    &      -aux*sinthet1*costhet2/sinthet2
c          glocit1=0.0d0
c          glocit2=0.0d0
        enddo ! j
        glocit1=glocit1-sumaux*sinthet2*costhet1/sinthet1
        glocit2=glocit2-sumaux*sinthet1*costhet2/sinthet2
        !etors=etors+etori
        th_etors(1,my_thread)=th_etors(1,my_thread)+etori
c Calculate the derivatives in dC
!       gloctordc(:,i-3)=gloctordc(:,i-3)+glocicg*dcgs1s2(:,1)+
!    &   glocisg*dsgs1s2(:,1)+glocit1*dcost1(:,1)
!       gloctordc(:,i-2)=gloctordc(:,i-2)+glocicg*dcgs1s2(:,2)+
!    &   glocisg*dsgs1s2(:,2)+glocit1*dcost1(:,2)+glocit2*dcost2(:,1)
        gloctordcm3(:,i-3)=gloctordcm3(:,i-3)+glocicg*dcgs1s2(:,1)+
     &    glocisg*dsgs1s2(:,1)+glocit1*dcost1(:,1)
        gloctordcm2(:,i-2)=gloctordcm2(:,i-2)+glocicg*dcgs1s2(:,2)+
     &    glocisg*dsgs1s2(:,2)+glocit1*dcost1(:,2)+glocit2*dcost2(:,1)
        gloctordc(:,i-1)=gloctordc(:,i-1)+glocicg*dcgs1s2(:,3)+
     &    glocisg*dsgs1s2(:,3)+glocit2*dcost2(:,2)
#ifdef ENERGY_DEC
        if (lprn) 
     &    write (iout,*) "c1",(c1(k),k=0,nval)," c2",(c2(k),k=0,nval)
        if (energy_dec) then
          write (iout,'(2(1x,a,i6),2i3,3(a,f9.3),a,f10.5)') 
     &      restyp(itype(i-2)),i-2,restyp(itype(i-1)),i-1,itori,itori1,
     &      " the1",theta(i-1)*rad2deg," the2",theta(i)*rad2deg,
     &      " gam",phi(i)*rad2deg," etor",etori
        endif
#endif
      enddo
!$OMP END PARALLEL

!$OMP PARALLEL DO DEFAULT(SHARED)
      do i=iphi_start-2,iphi_end
        gloctordc(:,i-1)=gloctordc(:,i-1)
     &                  +gloctordcm2(:,i-1)
     &                  +gloctordcm3(:,i-1)
      enddo
#ifdef DEBUG
      if (lprn) then
        write (iout,*) "gloctordc"
        do i=1,nres
          write (iout,*) i,gloctordc(:,i)
        enddo
      endif
#endif
      etors=0.0d0
      do i=1,threads_used
        etors=etors+th_etors(1,i)
      enddo
#ifdef ENERGY_DEC
      if (energy_dec) write (iout,'(80(1h-))')
#endif
      return
      end
