C***********************  Contents  ****************************************
C* Sample driver program, VAX-11 Fortran; **********************************
C* HC: O(n^2) time, O(n^2) space hierarchical clustering, Fortran 77 *******
C* HCASS: determine cluster-memberships, Fortran 77. *********************** 
C* HCDEN: draw upper part of dendrogram, VAX-11 Fortran. *******************
C* Sample data set: last 36 lines. *****************************************
C***************************************************************************
C      REAL DATA(18,16),CRIT(18),MEMBR(18)
C      REAL CRITVAL(9)
C      INTEGER IA(18),IB(18)
C      INTEGER ICLASS(18,9),HVALS(9)
C      INTEGER IORDER(9),HEIGHT(9)
C      DIMENSION NN(18),DISNN(18)
C      REAL D(153)
C      LOGICAL FLAG(18)
C IN ABOVE, 18=N, 16=M, 9=LEV, 153=N(N-1)/2.
C
C
C	OPEN(UNIT=21,STATUS='OLD',FILE='SPECTR.DAT')
C
C
C      N = 18
C      M = 16
C      DO I=1,N
C        READ(21,100)(DATA(I,J),J=1,M)        
C      ENDDO
C 100  FORMAT(8F7.1)
C
C
C      LEN = (N*(N-1))/2
C      IOPT=1
C      CALL HC(N,M,LEN,IOPT,DATA,IA,IB,CRIT,MEMBR,NN,DISNN,FLAG,D)
C
C
C      LEV = 9
C      CALL HCASS(N,IA,IB,CRIT,LEV,ICLASS,HVALS,IORDER,CRITVAL,HEIGHT)
C
C
C      CALL HCDEN(LEV,IORDER,HEIGHT,CRITVAL)
C
C
C      END
C++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C
C                                                            C
C  HIERARCHICAL CLUSTERING using (user-specified) criterion. C
C                                                            C
C  Parameters:                                               C
C                                                            C
Cremoved  DATA(N,M)         input data matrix,               C
C  DISS(LEN)         dissimilarities in lower half diagonal  C
C                    storage; LEN = N.N-1/2,                 C
C  IOPT              clustering criterion to be used,        C
C  IA, IB, CRIT      history of agglomerations; dimensions   C
C                    N, first N-1 locations only used,       C
C  MEMBR, NN, DISNN  vectors of length N, used to store      C 
C                    cluster cardinalities, current nearest  C
C                    neighbour, and the dissimilarity assoc. C
C                    with the latter.                        C
C  FLAG              boolean indicator of agglomerable obj./ C
C                    clusters.                               C
C                                                            C
C  F. Murtagh, ESA/ESO/STECF, Garching, February 1986.       C
C                                                            C
C------------------------------------------------------------C
      SUBROUTINE HC(N,M,LEN,IOPT,IA,IB,CRIT,MEMBR,NN,DISNN,
     X                FLAG,DISS)
      REAL MEMBR(N)
      REAL DISS(LEN)
      INTEGER IA(N),IB(N)
      REAL CRIT(N)
      DIMENSION NN(N),DISNN(N)
      LOGICAL FLAG(N)
      REAL INF
      DATA INF/1.E+20/
C
C  Initializations
C
      DO I=1,N
         MEMBR(I)=1.
         FLAG(I)=.TRUE.
      ENDDO
      NCL=N
C
C  Construct dissimilarity matrix
C
      DO I=1,N-1
         DO J=I+1,N
            IND=IOFFSET(N,I,J)
cinput            DISS(IND)=0.
cinput            DO K=1,M
cinput               DISS(IND)=DISS(IND)+(DATA(I,K)-DATA(J,K))**2
cinput            ENDDO
            IF (IOPT.EQ.1) DISS(IND)=DISS(IND)/2.
C           (Above is done for the case of the min. var. method
C            where merging criteria are defined in terms of variances
C            rather than distances.)
          ENDDO
       ENDDO
C
C  Carry out an agglomeration - first create list of NNs
C
      DO I=1,N-1
         DMIN=INF
         DO J=I+1,N
            IND=IOFFSET(N,I,J)
            IF (DISS(IND).GE.DMIN) GOTO 500
               DMIN=DISS(IND)
               JM=J
  500    CONTINUE
         ENDDO
         NN(I)=JM
         DISNN(I)=DMIN
      ENDDO
C
  400 CONTINUE
C     Next, determine least diss. using list of NNs
      DMIN=INF
      DO I=1,N-1
         IF (.NOT.FLAG(I)) GOTO 600
         IF (DISNN(I).GE.DMIN) GOTO 600
            DMIN=DISNN(I)
            IM=I
            JM=NN(I)
  600    CONTINUE
      ENDDO
      NCL=NCL-1
C
C  This allows an agglomeration to be carried out.
C
      I2=MIN0(IM,JM)
      J2=MAX0(IM,JM)
      IA(N-NCL)=I2
      IB(N-NCL)=J2
      CRIT(N-NCL)=DMIN
C
C  Update dissimilarities from new cluster.
C
      FLAG(J2)=.FALSE.
      DMIN=INF
      DO K=1,N
         IF (.NOT.FLAG(K)) GOTO 800
         IF (K.EQ.I2) GOTO 800
         X=MEMBR(I2)+MEMBR(J2)+MEMBR(K)
         IF (I2.LT.K) THEN
                           IND1=IOFFSET(N,I2,K)
                      ELSE
                           IND1=IOFFSET(N,K,I2)
         ENDIF
         IF (J2.LT.K) THEN
                           IND2=IOFFSET(N,J2,K)
                      ELSE
                           IND2=IOFFSET(N,K,J2)
         ENDIF
         IND3=IOFFSET(N,I2,J2)
         XX=DISS(IND3)
C
C  WARD'S MINIMUM VARIANCE METHOD - IOPT=1.
C
         IF (IOPT.EQ.1) THEN
            DISS(IND1)=(MEMBR(I2)+MEMBR(K))*DISS(IND1)+
     X                 (MEMBR(J2)+MEMBR(K))*DISS(IND2)-
     X                 MEMBR(K)*XX
            DISS(IND1)=DISS(IND1)/X
         ENDIF
C
C  SINGLE LINK METHOD - IOPT=2.
C
         IF (IOPT.EQ.2) THEN
            DISS(IND1)=MIN(DISS(IND1),DISS(IND2))
         ENDIF
C
C  COMPLETE LINK METHOD - IOPT=3.
C
         IF (IOPT.EQ.3) THEN
            DISS(IND1)=MAX(DISS(IND1),DISS(IND2))
         ENDIF
C
C  AVERAGE LINK (OR GROUP AVERAGE) METHOD - IOPT=4.
C
         IF (IOPT.EQ.4) THEN
            DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2))/
     X                 (MEMBR(I2)+MEMBR(J2))
         ENDIF
C
C  MCQUITTY'S METHOD - IOPT=5.
C
         IF (IOPT.EQ.5) THEN
            DISS(IND1)=0.5*DISS(IND1)+0.5*DISS(IND2)
         ENDIF
C
C  MEDIAN (GOWER'S) METHOD - IOPT=6.
C
         IF (IOPT.EQ.6) THEN
            DISS(IND1)=0.5*DISS(IND1)+0.5*DISS(IND2)-0.25*XX
         ENDIF
C
C  CENTROID METHOD - IOPT=7.
C
         IF (IOPT.EQ.7) THEN
            DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2)-
     X          MEMBR(I2)*MEMBR(J2)*XX/(MEMBR(I2)+MEMBR(J2)))/
     X          (MEMBR(I2)+MEMBR(J2))
            ENDIF
C
         IF (I2.GT.K) GOTO 800
         IF (DISS(IND1).GE.DMIN) GOTO 800
            DMIN=DISS(IND1)
            JJ=K
  800    CONTINUE
      ENDDO
      MEMBR(I2)=MEMBR(I2)+MEMBR(J2)
      DISNN(I2)=DMIN
      NN(I2)=JJ
C
C  Update list of NNs insofar as this is required.
C
      DO I=1,N-1
         IF (.NOT.FLAG(I)) GOTO 900
         IF (NN(I).EQ.I2) GOTO 850
         IF (NN(I).EQ.J2) GOTO 850
         GOTO 900
  850    CONTINUE
C        (Redetermine NN of I:)
         DMIN=INF
         DO J=I+1,N
            IND=IOFFSET(N,I,J)
            IF (.NOT.FLAG(J)) GOTO 870
            IF (I.EQ.J) GOTO 870
            IF (DISS(IND).GE.DMIN) GOTO 870
               DMIN=DISS(IND)
               JJ=J
  870       CONTINUE
         ENDDO
         NN(I)=JJ
         DISNN(I)=DMIN
  900    CONTINUE
      ENDDO
C
C  Repeat previous steps until N-1 agglomerations carried out.
C
      IF (NCL.GT.1) GOTO 400
C
C
      RETURN
      END
C
C
      FUNCTION IOFFSET(N,I,J)
C  Map row I and column J of upper half diagonal symmetric matrix 
C  onto vector.
      IOFFSET=J+(I-1)*N-(I*(I+1))/2
      RETURN
      END
C+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C
C                                                               C
C  Given a HIERARCHIC CLUSTERING, described as a sequence of    C
C  agglomerations, derive the assignments into clusters for the C
C  top LEV-1 levels of the hierarchy.                           C
C  Prepare also the required data for representing the          C
C  dendrogram of this top part of the hierarchy.                C
C                                                               C
C  Parameters:                                                  C
C                                                               C
C  IA, IB, CRIT: vectors of dimension N defining the agglomer-  C
C                 ations.                                       C
C  LEV:          number of clusters in largest partition.       C
C  HVALS:        vector of dim. LEV, used internally only.      C
C  ICLASS:       array of cluster assignments; dim. N by LEV.   C
C  IORDER, CRITVAL, HEIGHT: vectors describing the dendrogram,  C
C                all of dim. LEV.                               C
C                                                               C
C  F. Murtagh, ESA/ESO/STECF, Garching, February 1986.          C
C                                                               C
C HISTORY                                                       C
C                                                               C
C Bounds bug fix, Oct. 1990, F. Murtagh.                        C
C Inserted line "IF (LOC.GT.LEV) GOTO 58" on line 48.  This was C
C occassioned by incorrect termination of this loop when I      C
C reached its (lower) extremity, i.e. N-LEV.  Without the       C
C /CHECK=(BOUNDS) option on VAX/VMS compilation, this inserted  C
C statement was not necessary.                                  C
C---------------------------------------------------------------C
      SUBROUTINE HCASS(N,IA,IB,CRIT,LEV,ICLASS,HVALS,IORDER,
     X        CRITVAL,HEIGHT)
      include 'sizesclu.dat'
      include 'COMMON.IOUNITS'
      integer ICLASS(maxconf,maxconf-1)
      INTEGER IA(N),IB(N),HVALS(LEV),IORDER(LEV),
     X        HEIGHT(LEV)
      REAL CRIT(N),CRITVAL(LEV)
C
C  Pick out the clusters which the N objects belong to,
C  at levels N-2, N-3, ... N-LEV+1 of the hierarchy.
C  The clusters are identified by the lowest seq. no. of
C  their members.
C  There are 2, 3, ... LEV clusters, respectively, for the
C  above levels of the hierarchy.
C
      HVALS(1)=1
      HVALS(2)=IB(N-1)
      LOC=3
      DO 59 I=N-2,N-LEV,-1
         DO 52 J=1,LOC-1
            IF (IA(I).EQ.HVALS(J)) GOTO 54
  52     CONTINUE
         HVALS(LOC)=IA(I)
         LOC=LOC+1
  54     CONTINUE
         DO 56 J=1,LOC-1
            IF (IB(I).EQ.HVALS(J)) GOTO 58
  56     CONTINUE
         IF (LOC.GT.LEV) GOTO 58
         HVALS(LOC)=IB(I)
         LOC=LOC+1
  58     CONTINUE
  59  CONTINUE
C
      DO 400 LEVEL=N-LEV,N-2
         DO 200 I=1,N
            ICL=I
            DO 100 ILEV=1,LEVEL
  100       IF (IB(ILEV).EQ.ICL) ICL=IA(ILEV)
            NCL=N-LEVEL
            ICLASS(I,NCL-1)=ICL
  200    CONTINUE
  400  CONTINUE
C
      DO 120 I=1,N
      DO 120 J=1,LEV-1
      DO 110 K=2,LEV
      IF (ICLASS(I,J).NE.HVALS(K)) GOTO 110
         ICLASS(I,J)=K
         GOTO 120
  110 CONTINUE
  120 CONTINUE
C
c      WRITE (iout,450) (j,j=2,LEV)
  450 FORMAT(4X,' SEQ NOS',8(i2,'CL'),10000(i3,'CL'))
c      WRITE (iout,470) (' ---',j=2,LEV)
  470 FORMAT(4X,' -------',10000a4)
      DO 500 I=1,N
c      WRITE (iout,600) I,(ICLASS(I,J),J=1,LEV-1) 
  600 FORMAT(I11,8I4,10000i5)                    
  500 CONTINUE
C
C  Determine an ordering of the LEV clusters (at level LEV-1)
C  for later representation of the dendrogram.
C  These are stored in IORDER.
C  Determine the associated ordering of the criterion values
C  for the vertical lines in the dendrogram.
C  The ordinal values of these criterion values may be used in
C  preference, and these are stored in HEIGHT.
C  Finally, note that the LEV clusters are renamed so that they
C  have seq. nos. 1 to LEV.
C
      IORDER(1)=IA(N-1)
      IORDER(2)=IB(N-1)
      CRITVAL(1)=0.0
      CRITVAL(2)=CRIT(N-1)
      HEIGHT(1)=LEV
      HEIGHT(2)=LEV-1
      LOC=2
      DO 700 I=N-2,N-LEV+1,-1
         DO 650 J=1,LOC
            IF (IA(I).EQ.IORDER(J)) THEN
C              Shift rightwards and insert IB(I) beside IORDER(J):
               DO 630 K=LOC+1,J+1,-1
                  IORDER(K)=IORDER(K-1)
                  CRITVAL(K)=CRITVAL(K-1)
                  HEIGHT(K)=HEIGHT(K-1)
  630          CONTINUE
               IORDER(J+1)=IB(I)
                CRITVAL(J+1)=CRIT(I)
                HEIGHT(J+1)=I-(N-LEV)
               LOC=LOC+1
            ENDIF
  650   CONTINUE
  700 CONTINUE
      DO 705 I=1,LEV
         DO 703 J=1,LEV
            IF (HVALS(I).EQ.IORDER(J)) THEN
               IORDER(J)=I
               GOTO 705
            ENDIF
  703    CONTINUE
  705 CONTINUE
C
      RETURN
      END
C+++++++++++++++++++++++++++++++++++++++++++++++++C
C                                                 C
C  Construct a DENDROGRAM of the top 8 levels of  C
C  a HIERARCHIC CLUSTERING.                       C
C                                                 C
C  Parameters:                                    C
C                                                 C
C  IORDER, HEIGHT, CRITVAL: vectors of length LEV C
C          defining the dendrogram.               C
C          These are: the ordering of objects     C
C          along the bottom of the dendrogram     C
C          (IORDER); the height of the vertical   C
C          above each object, in ordinal values   C
C          (HEIGHT); and in real values (CRITVAL).C
C                                                 C
C  NOTE: these vectors MUST have been set up with C
C        LEV = 9 in the prior call to routine     C
C        HCASS.
C                                                 C
C  F. Murtagh, ESA/ESO/STECF, Garching, Feb. 1986.C
C                                                 C 
C-------------------------------------------------C
      SUBROUTINE HCDEN(LEV,IORDER,HEIGHT,CRITVAL)
      include 'COMMON.IOUNITS'
      CHARACTER*80 LINE
      INTEGER IORDER(LEV),HEIGHT(LEV)
      REAL CRITVAL(LEV)
c      INTEGER OUT(3*LEV,3*LEV)
c      INTEGER UP,ACROSS,BLANK
      CHARACTER*1 OUT(3*LEV,3*LEV)
      CHARACTER*1 UP,ACROSS,BLANK
      DATA UP,ACROSS,BLANK/'|','-',' '/
C
C
      DO I=1,3*LEV
        DO J=1,3*LEV
          OUT(I,J)=BLANK
        ENDDO
      ENDDO
C
C
      DO I=3,3*LEV,3
         I2=I/3
C
         J2=3*LEV+1-3*HEIGHT(I2)
         DO J=3*LEV,J2,-1
            OUT(J,I)=UP
         ENDDO
C
         DO K=I,3,-1
            I3=INT((K+2)/3)
            IF ( (3*LEV+1-HEIGHT(I3)*3).LT.J2) GOTO 100
            OUT(J2,K)=ACROSS
         ENDDO
  100    CONTINUE
C
      ENDDO
C
C
      IC=3
      DO I=1,3*LEV
      IF (I.EQ.IC+1) THEN
                   IDUM=IC/3
                   IDUM=LEV-IDUM
                   DO L=1,LEV
                      IF (HEIGHT(L).EQ.IDUM) GOTO 190
                   ENDDO
  190              IDUM=L
c                   WRITE(iout,200) CRITVAL(IDUM),(OUT(I,J),J=1,3*LEV)
                   IC=IC+3
                   ELSE
                   LINE = ' '
c                   WRITE(iout,210) (OUT(I,J),J=1,3*LEV)
      ENDIF
  200 FORMAT(1H ,8X,F12.2,4X,27000A1)
  210 FORMAT(1H ,24X,27000A1)
      ENDDO
      WRITE(iout,250)
c      WRITE(iout,220)(IORDER(J),J=1,LEV)
c      WRITE(iout,250)
  220 FORMAT(1H ,24X,9000I3)
c      WRITE(iout,230) LEV
  230 FORMAT(1H ,13X,'CRITERION        CLUSTERS 1 TO ',i3)
c      WRITE(iout,240) LEV-1
  240 FORMAT(1H ,13X,'VALUES.      (TOP ',i3,' LEVELS OF HIERARCHY).')
  250 FORMAT(/)
C
C
      RETURN
      END
