PropClust/0000755000176200001440000000000013347130122012201 5ustar liggesusersPropClust/src/0000755000176200001440000000000013346062000012766 5ustar liggesusersPropClust/src/Makevars0000644000176200001440000000011113346232602014462 0ustar liggesusers#PKG_CFLAGS=-fopenmp #PKG_LIBS= -fopenmp -lgomp #PKG_FCFLAGS= -Wimplicit PropClust/src/PropClustParallelTrials.f900000644000176200001440000057045413345760677020144 0ustar liggesusers !Begin modules for word pairs ! MODULE CONSTANTS ! ! Determine double precision and set numerical constants. ! IMPLICIT NONE INTEGER, PARAMETER :: DBLE = KIND(0.0D0) REAL(KIND=DBLE), PARAMETER :: ZERO = 0.0_DBLE REAL(KIND=DBLE), PARAMETER :: ONE = 1.0_DBLE REAL(KIND=DBLE), PARAMETER :: TWO = 2.0_DBLE REAL(KIND=DBLE), PARAMETER :: THREE = 3.0_DBLE REAL(KIND=DBLE), PARAMETER :: FOUR = 4.0_DBLE REAL(KIND=DBLE), PARAMETER :: FIVE = 5.0_DBLE REAL(KIND=DBLE), PARAMETER :: SIX = 6.0_DBLE REAL(KIND=DBLE), PARAMETER :: SEVEN = 7.0_DBLE REAL(KIND=DBLE), PARAMETER :: EIGHT = 8.0_DBLE REAL(KIND=DBLE), PARAMETER :: NINE = 9.0_DBLE REAL(KIND=DBLE), PARAMETER :: TEN = 10.0_DBLE REAL(KIND=DBLE), PARAMETER :: HALF = ONE/TWO ! END MODULE CONSTANTS MODULE TOOLS ! ! This module contains various utilities. ! USE CONSTANTS IMPLICIT NONE CONTAINS ! SUBROUTINE KEY_SORT(LIST,PERMUTATION) ! ! This subroutine performs a key sort on the real LIST by the ! heap sort method. The returned permutation has the property that ! LIST(PERMUTATION(I))<=LIST(PERMUTATION(I+1)) for all relevant I. ! See: Nijenhuis A and Wilf HS (1978) "Combinatorial Algorithms for ! Computers and Calculators, 2nd Ed.", Academic Press. ! IMPLICIT NONE INTEGER :: I,J,K,L,N,PSTAR INTEGER, DIMENSION(:) :: PERMUTATION REAL(KIND=DBLE), DIMENSION(:) :: LIST ! ! Initialize the permutation. ! N = SIZE(LIST) DO I = 1,N PERMUTATION(I) = I END DO IF (N<=1) RETURN ! ! Carry out the heap sort on the permutation key. ! L = 1+N/2 K = N DO IF (L>1) THEN L = L-1 PSTAR = PERMUTATION(L) ELSE PSTAR = PERMUTATION(K) PERMUTATION(K) = PERMUTATION(1) K = K-1 IF (K==1) THEN PERMUTATION(1) = PSTAR RETURN END IF END IF I = L J = L+L DO WHILE (J<=K) IF (J=ONE.AND.X-MEAN>SIX*SQRT(MEAN)) THEN A = X+ONE B = X+TWO C = -MEAN+X*LOG(MEAN)-GAMLOG(A)+LOG(ONE+MEAN*B/(A*(B-MEAN))) LOG_POISSON_TAIL = C/LOG(TEN) ELSE LOG_POISSON_TAIL = LOG10(POISSON_TAIL(MEAN,X)) END IF END FUNCTION LOG_POISSON_TAIL FUNCTION STANDARD_GAMMA(A,X) ! ! This routine returns the gamma distribution function with shape ! parameter A and scale parameter 1 at the point X. ! IMPLICIT NONE INTEGER :: N REAL(KIND=DBLE) :: A,AN,BN,CN,DN,STANDARD_GAMMA,PN,SUM,TERM,X ! IF (X<=ZERO.OR.A<=ZERO) THEN STANDARD_GAMMA = ZERO RETURN ELSE IF (X<=A+ONE) THEN ! ! Use the power series expansion. ! TERM = EXP(-X+A*LOG(X)-GAMLOG(A+ONE)) STANDARD_GAMMA = TERM DO N = 1,100 TERM = TERM*X/(A+N) SUM = STANDARD_GAMMA+TERM IF (TERM/SUM0) THEN LOGLIK=LOGLIK+ADJ(J,I)*LOG(AHATPP)-AHATPP END IF END DO END DO CALC_FAKE_LOGLIK = LOGLIK END FUNCTION CALC_FAKE_LOGLIK FUNCTION CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) IMPLICIT NONE INTEGER :: I,J,NODES,CLUSTERS REAL, DIMENSION(:,:) :: ADJ REAL(KIND=8), DIMENSION(:,:) :: AHAT REAL(KIND=8), DIMENSION(:) :: PHAT REAL(KIND=8) :: CALC_L2NORM,AHATPP,L2NORM INTEGER, DIMENSION(:):: TESTMODULE L2NORM=0. DO I=1,NODES-1 DO J=I+1,NODES AHATPP=AHAT(TESTMODULE(J),TESTMODULE(I))*PHAT(I)*PHAT(J) L2NORM=L2NORM+(ADJ(J,I)-AHATPP)**2 END DO END DO CALC_L2NORM=L2NORM END FUNCTION CALC_L2NORM FUNCTION MODIFY_L2(ADJ,TESTMODULE,PHAT,AHAT,CURRENT_NODE, & NEW_CLUSTER, CURRENT_L2, NODES,CLUSTERS) !THIS FUNCTION MODIFIES THE L2 TO REFLECT A CHANGE IN CLUSTER ASSIGNMENT OF CURRENT_NODE !TO NEW_CLUSTER IMPLICIT NONE INTEGER :: I,J,NODES,CLUSTERS,CURRENT_NODE,NEW_CLUSTER REAL, DIMENSION(:,:) :: ADJ REAL(KIND=8), DIMENSION(:,:) :: AHAT REAL(KIND=8), DIMENSION(:) :: PHAT REAL(KIND=8) :: LOGLIK,AHATPP,CALC_FAKE_LOGLIK INTEGER, DIMENSION(:):: TESTMODULE REAL(KIND=8) :: MODIFY_L2,CURRENT_L2,TEMP,TEMP_SUM,PP TEMP=0. TEMP_SUM=0. DO I=1,NODES IF(I.NE.CURRENT_NODE) THEN PP=PHAT(I)*PHAT(CURRENT_NODE) TEMP=2*ADJ(I,CURRENT_NODE)*AHAT(TESTMODULE(I),TESTMODULE(CURRENT_NODE))*PP & -(AHAT(TESTMODULE(I),TESTMODULE(CURRENT_NODE))*PP)**2 & -2*ADJ(I,CURRENT_NODE)*AHAT(TESTMODULE(I),NEW_CLUSTER)*PP & +(AHAT(TESTMODULE(I),NEW_CLUSTER)*PP)**2 END IF TEMP_SUM=TEMP_SUM+TEMP END DO MODIFY_L2=CURRENT_L2+TEMP_SUM END FUNCTION MODIFY_L2 FUNCTION MODIFY_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,CURRENT_NODE, & NEW_CLUSTER, CURRENT_LOGLIK, NODES,CLUSTERS) !THIS FUNCTION MODIFIES THE L2 TO REFLECT A CHANGE IN CLUSTER ASSIGNMENT OF CURRENT_NODE !TO NEW_CLUSTER USE TOOLS IMPLICIT NONE INTEGER :: I,J,NODES,CLUSTERS,CURRENT_NODE,NEW_CLUSTER REAL, DIMENSION(:,:) :: ADJ REAL(KIND=8), DIMENSION(:,:) :: AHAT REAL(KIND=8), DIMENSION(:) :: PHAT REAL(KIND=8) :: LOGLIK,AHATPP,CALC_FAKE_LOGLIK INTEGER, DIMENSION(:):: TESTMODULE REAL(KIND=8) :: MODIFY_LOGLIK,CURRENT_LOGLIK,TEMP,TEMP_SUM,PP,AHAT1,AHAT2 TEMP=0. TEMP_SUM=0. DO I=1,NODES IF(I.NE.CURRENT_NODE) THEN PP=PHAT(I)*PHAT(CURRENT_NODE) AHAT1=AHAT(TESTMODULE(I),TESTMODULE(CURRENT_NODE)) AHAT2=AHAT(TESTMODULE(I),NEW_CLUSTER) IF(PP>0) THEN TEMP=-POISSON_LOGLIKELIHOOD(AHAT1*PP,FLOOR(ADJ(I,CURRENT_NODE))) & +POISSON_LOGLIKELIHOOD(AHAT2*PP,FLOOR(ADJ(I,CURRENT_NODE))) END IF END IF TEMP_SUM=TEMP_SUM+TEMP END DO MODIFY_LOGLIK=CURRENT_LOGLIK+TEMP_SUM END FUNCTION MODIFY_LOGLIK FUNCTION CALC_FACTORIZABILITY(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) IMPLICIT NONE INTEGER :: I,J,NODES,CLUSTERS REAL, DIMENSION(:,:) :: ADJ REAL(KIND=8), DIMENSION(:,:) :: AHAT REAL(KIND=8), DIMENSION(:) :: PHAT REAL(KIND=8) :: CALC_FACTORIZABILITY,L2NORM,SUM_SQUARES INTEGER, DIMENSION(:):: TESTMODULE !ADDITIONAL RESULTS FOR COMPARISON SUM_SQUARES=0. DO I=1,NODES-1 DO J=I+1,NODES SUM_SQUARES=SUM_SQUARES+(ADJ(J,I))**2 END DO END DO L2NORM=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) CALC_FACTORIZABILITY=1-L2NORM/SUM_SQUARES END FUNCTION CALC_FACTORIZABILITY SUBROUTINE INITIALIZE_CLUSTERS(ADJ,TESTMODULE,CLUSTERS,NODES,ROW_SUMS) IMPLICIT NONE INTEGER :: CLUSTERS,TEMP2,NODES,I,J INTEGER, DIMENSION(NODES) :: TESTMODULE,TEMP REAL, DIMENSION(NODES,NODES) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: ROW_SUMS, ROW_SUMS_TEMP INTEGER, DIMENSION(CLUSTERS) :: HUB_NODES !PRINT*, "STARTING CLUSTER INITIALIZATION" !THIS SHOULD GIVE A GOOD WAY FOR INITIAL CLUSTER ASSIGNMENTS, PHAT, AND AHAT. !PROBABLY GET THE (CLUSTER) NODES WITH THE HIGHEST ROW SUMS ON ADJ AND THOSE WOULD BE HUB NODES !THEN CLUSTER ALL THE OTHER NODES TO THE HUBNODE IT HAS THE HIGHEST ADJ WITH ROW_SUMS_TEMP=ROW_SUMS DO I=1,CLUSTERS TEMP2=MAXLOC(ROW_SUMS_TEMP,1) HUB_NODES(I)=TEMP2 !IF(I.EQ.1) THEN !PRINT*, "ROW_SUMS ", (ROW_SUMS(J), J=1,SIZE(ROW_SUMS)), " MAX LOC:", HUB_NODES(1) !END IF ROW_SUMS_TEMP(TEMP2)=0. END DO !PRINT*, "HUB NODES FOUND!" DO I=1,NODES TEMP2=1 DO J=2,CLUSTERS IF(ADJ(HUB_NODES(J),I).GE.ADJ(HUB_NODES(TEMP2),I)) THEN TEMP2=J END IF END DO TESTMODULE(I)=TEMP2 END DO DO I=1,CLUSTERS TESTMODULE(HUB_NODES(I))=I END DO END SUBROUTINE INITIALIZE_CLUSTERS SUBROUTINE CHECK_MAXAVGCONV(ADJ,TESTMODULE,CLUSTERS,NODES,ROW_SUMS) IMPLICIT NONE INTEGER :: CLUSTERS,TEMP2,NODES,I,J,ITERATION INTEGER, DIMENSION(NODES) :: TESTMODULE,TEMP REAL, DIMENSION(NODES,NODES) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: ROW_SUMS, ROW_SUMS_TEMP REAL(KIND=8), DIMENSION(CLUSTERS) :: CLUSTER_SUMS,TEMP_SUMS REAL(KIND=8), DIMENSION(CLUSTERS) :: TEMP_VEC INTEGER, DIMENSION(CLUSTERS) :: CLUSTER_SIZE LOGICAL :: NOT_CONVERGED !THIS SUBROUTINE CHECKS TO SEE IF THE CURRENT CLUSTER ASSIGNMENT MAXIMIZES THE AVERAGE INTRA-CLUSTER !ADJACENCY. (LOCAL MAX ONLY) !FINDS THE CLUSTER SIZE CLUSTER_SIZE=0 DO I=1,NODES CLUSTER_SIZE(TESTMODULE(I))=CLUSTER_SIZE(TESTMODULE(I))+1 END DO !FINDS THE INTRA-CLUSTER SUMS CLUSTER_SUMS=0 DO I=1,NODES-1 DO J=I+1,NODES IF(TESTMODULE(I).EQ.TESTMODULE(J)) THEN CLUSTER_SUMS(TESTMODULE(I))=CLUSTER_SUMS(TESTMODULE(I))+ADJ(J,I) END IF END DO END DO !MAIN ITERATION DO I=1,NODES TEMP2=TESTMODULE(I) DO J=1,CLUSTERS IF(J.NE.TEMP2) THEN !does nothing... END IF END DO END DO END SUBROUTINE CHECK_MAXAVGCONV SUBROUTINE QUICK_CLUSTER(ADJ,TESTMODULE,CLUSTERS,NODES,ROW_SUMS) IMPLICIT NONE INTEGER :: CLUSTERS,TEMP2,NODES,I,J,ITERATION INTEGER, DIMENSION(NODES) :: TESTMODULE,TEMP REAL, DIMENSION(NODES,NODES) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: ROW_SUMS, ROW_SUMS_TEMP REAL(KIND=8), DIMENSION(NODES,CLUSTERS) :: CLUSTER_SUMS REAL(KIND=8), DIMENSION(CLUSTERS) :: TEMP_VEC REAL(KIND=8), DIMENSION(CLUSTERS) :: CLUSTER_SIZE LOGICAL :: NOT_CONVERGED !THIS CLUSTERING METHOD ASSIGNS A NODE TO THE CLUSTER WITH THE HIGHEST ADJ SUM. IT CALCULATES !THE SUM OF THE ADJ NODE TO EACH NODES IN EACH CLUSTER AND ASSIGNS THE NODE TO THE CLUSTER WITH !THE MAX VALUE. ! !INITIALIZE THE CLUSTER_SUMS ! TEMP_VEC=0. ! CLUSTER_SIZE=0 ! DO J=1,NODES ! TEMP2=TESTMODULE(J) ! TEMP_VEC(TEMP2)=TEMP_VEC(TEMP2)+ADJ(J,1) ! CLUSTER_SIZE(TEMP2)=CLUSTER_SIZE(TEMP2)+1 ! END DO ! !CALL DBLEPR("CLUSTER SIZE INIT", -1,CLUSTER_SIZE,CLUSTERS) ! TEMP2=MINVAL(CLUSTER_SIZE,1) ! IF(TEMP2.LE.0) THEN ! TESTMODULE(MAXLOC(ADJ(:,1),1))=MINLOC(CLUSTER_SIZE,1) ! END IF ! !CALL DBLEPR("CLUSTER SIZE INIT", -1,CLUSTER_SIZE,CLUSTERS) ! !CALL DBLEPR("TEMP_VEC", -1,TEMP_VEC,CLUSTERS) ! !CALL DBLEPR("TEMP_VEC/CLUSTER_SIZE", -1,TEMP_VEC/CLUSTER_SIZE,CLUSTERS) !BEGIN RECLUSTERING NOT_CONVERGED=.TRUE. ITERATION=0 DO WHILE(NOT_CONVERGED) NOT_CONVERGED=.FALSE. ITERATION=ITERATION+1 DO I=1,NODES TEMP_VEC=0. CLUSTER_SIZE=0 DO J=1,NODES TEMP2=TESTMODULE(J) TEMP_VEC(TEMP2)=TEMP_VEC(TEMP2)+ADJ(J,I) CLUSTER_SIZE(TEMP2)=CLUSTER_SIZE(TEMP2)+1 END DO TEMP2=TESTMODULE(I) CLUSTER_SIZE(TEMP2)=CLUSTER_SIZE(TEMP2)-1 IF (MINVAL(CLUSTER_SIZE).GE.1) THEN TESTMODULE(I)=MAXLOC(TEMP_VEC/CLUSTER_SIZE,1) IF(TESTMODULE(I).NE.TEMP2) THEN NOT_CONVERGED=.TRUE. END IF ELSE IF (MINVAL(CLUSTER_SIZE,1).LE.0) THEN TESTMODULE(I)=MINLOC(CLUSTER_SIZE,1) IF(TESTMODULE(I).NE.TEMP2) THEN NOT_CONVERGED=.TRUE. END IF END IF ! !CALL DBLEPR("CLUSTER SIZE", -1,CLUSTER_SIZE,CLUSTERS) ! !CALL DBLEPR("TEMP_VEC", -1,TEMP_VEC,CLUSTERS) ! !CALL DBLEPR("TEMP_VEC/CLUSTER_SIZE", -1,TEMP_VEC/CLUSTER_SIZE,CLUSTERS) ! TEMP_VEC=CLUSTER_SUMS(I,:)/CLUSTER_SIZE ! TEMP_VEC(TESTMODULE(I))=CLUSTER_SUMS(I,TESTMODULE(I))/(CLUSTER_SIZE(TESTMODULE(I))-1.0) ! TEMP2=MAXLOC(TEMP_VEC,1) ! !!CALL INTPR("TEMP2",-1,TEMP2,1) ! IF(TESTMODULE(I).NE.TEMP2) THEN ! NOT_CONVERGED=.TRUE. ! DO J=1,NODES ! CLUSTER_SUMS(J,TESTMODULE(I))=CLUSTER_SUMS(J,TESTMODULE(I))-ADJ(J,I) ! CLUSTER_SUMS(J,TEMP2)=CLUSTER_SUMS(J,TEMP2)+ADJ(J,I) ! END DO ! CLUSTER_SIZE(TESTMODULE(I))=CLUSTER_SIZE(TESTMODULE(I))-1 ! CLUSTER_SIZE(TEMP2)=CLUSTER_SIZE(TEMP2)+1 ! TESTMODULE(I)=TEMP2 ! END IF END DO IF(ITERATION>5*NODES) THEN NOT_CONVERGED=.FALSE. ! !CALL INTPR("DID NOT QUICK CONVERGE",-1,1,0) END IF END DO !IF(ITERATION.LE.50) THEN ! !CALL INTPR("QUICK CONVERGED...",-1,1,0) !END IF END SUBROUTINE QUICK_CLUSTER SUBROUTINE QUICK_CLUSTER2(ADJ,TESTMODULE,CLUSTERS,NODES,ROW_SUMS) IMPLICIT NONE INTEGER :: CLUSTERS,TEMP2,NODES,I,J,K,L,M,ITERATION,OLD INTEGER, DIMENSION(NODES) :: TESTMODULE,TEMP REAL, DIMENSION(NODES,NODES) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: ROW_SUMS, ROW_SUMS_TEMP !REAL(KIND=8), DIMENSION(NODES,CLUSTERS) :: CLUSTER_SUMS REAL(KIND=8), DIMENSION(CLUSTERS) :: TEMP_SUMS,CLUSTER_SUMS,NEW_SUMS REAL(KIND=8), DIMENSION(CLUSTERS) :: CLUSTER_SIZES,NEW_SIZES,TEMP_SIZES,CRITS REAL(KIND=8) :: TEMP_CRIT,CRIT,NEW_CRIT LOGICAL :: NOT_CONVERGED !SETUP STEP CALCULATES THE INITIAL CLUSTER SUMS AND CLUSTER SIZES CRITS=0 CLUSTER_SUMS=0 CLUSTER_SIZES=0 DO J=1,NODES TEMP2=TESTMODULE(J) DO K=1,NODES IF(TEMP2.EQ.TESTMODULE(K)) THEN CLUSTER_SUMS(TEMP2)=CLUSTER_SUMS(TEMP2)+ADJ(K,J) END IF END DO CLUSTER_SIZES(TEMP2)=CLUSTER_SIZES(TEMP2)+1 END DO !CALCULATES THE CURRENT CRITERIA CRIT=0 DO I=1,CLUSTERS CRIT=CRIT+2*CLUSTER_SUMS(I)/(CLUSTER_SIZES(I)-1) END DO NEW_CRIT=CRIT NEW_SIZES=CLUSTER_SIZES NEW_SUMS=CLUSTER_SUMS TEMP_SUMS=CLUSTER_SUMS TEMP_SIZES=CLUSTER_SIZES NOT_CONVERGED=.TRUE. ITERATION=0 DO WHILE(NOT_CONVERGED) NOT_CONVERGED=.FALSE. ITERATION=ITERATION+1 DO I=1,NODES OLD=TESTMODULE(I) CRITS=0. DO J=1,CLUSTERS CLUSTER_SUMS=0 CLUSTER_SIZES=0 TESTMODULE(I)=J DO K=1,NODES TEMP2=TESTMODULE(K) DO L=1,NODES IF(TEMP2.EQ.TESTMODULE(L)) THEN CLUSTER_SUMS(TEMP2)=CLUSTER_SUMS(TEMP2)+ADJ(L,K) END IF END DO CLUSTER_SIZES(TEMP2)=CLUSTER_SIZES(TEMP2)+1 END DO IF(MINVAL(CLUSTER_SIZES).GE.2) THEN CRITS(J)=SUM(2.*CLUSTER_SUMS/(CLUSTER_SIZES-1.)) ! !CALL DBLEPR("?",-1,2.*CLUSTER_SUMS,CLUSTERS) ! !CALL DBLEPR("?",-1,(CLUSTER_SIZES-1.),CLUSTERS) ! !CALL DBLEPR("?",-1,2.*CLUSTER_SUMS/(CLUSTER_SIZES-1.),CLUSTERS) ! !CALL DBLEPR("CRITS",-1,CRITS,CLUSTERS) ! !CALL DBLEPR("CLUSTER SUMS",-1,CLUSTER_SUMS,CLUSTERS) ! !CALL DBLEPR("CLUSTER SIZES",-1,CLUSTER_SIZES,CLUSTERS) ELSE CRITS(J)=0 END IF END DO TEMP2=MAXLOC(CRITS,1) IF(TEMP2.NE.TESTMODULE(I)) THEN TESTMODULE(I)=TEMP2 NOT_CONVERGED=.TRUE. ! IF(ITERATION.EQ.1) THEN ! !CALL INTPR("QUICK2 CHANGED SOMETHING",-1,1,0) ! END IF END IF END DO IF(ITERATION>5*NODES) THEN NOT_CONVERGED=.FALSE. !CALL INTPR("DID NOT QUICK2 CONVERGE",-1,1,0) END IF END DO END SUBROUTINE QUICK_CLUSTER2 SUBROUTINE K_MEDIOIDS(ADJ,TESTMODULE,CLUSTERS,NODES,ROW_SUMS) !performs k-medioids clustering on the data. Note that clusters (Testmodule) should !be initialized. Furthermore, since an adjacency matrix is used we modify the criteria !to seek out the maximum adjacency rather than the minimum distance IMPLICIT NONE INTEGER :: CLUSTERS,TEMP2,NODES,I,J,K,L,M,ITERATION,OLD,ITERATIONS INTEGER, DIMENSION(NODES) :: TESTMODULE,TEMP REAL, DIMENSION(NODES,NODES) :: ADJ INTEGER, DIMENSION(CLUSTERS) :: MEDIOIDS,TEMP_MEDIOIDS REAL(KIND=8), DIMENSION(NODES) :: ROW_SUMS, ROW_SUMS_TEMP REAL(KIND=8), DIMENSION(CLUSTERS) :: TEMP_SUMS,CLUSTER_SUMS,NEW_SUMS REAL(KIND=8), DIMENSION(CLUSTERS) :: CLUSTER_SIZES,NEW_SIZES,TEMP_SIZES,CRITS REAL(KIND=8) :: TEMP_CRIT,CRIT,NEW_CRIT LOGICAL :: NOT_CONVERGED ROW_SUMS_TEMP=ROW_SUMS DO I=1,CLUSTERS TEMP2=MAXLOC(ROW_SUMS_TEMP,1) MEDIOIDS(I)=TEMP2 ROW_SUMS_TEMP(TEMP2)=0. END DO CRIT=0. DO I=1,NODES IF(ALL(MEDIOIDS.NE.I)) THEN CRIT=CRIT+ADJ(I,MEDIOIDS(TESTMODULE(I))) END IF END DO ITERATIONS=0 NOT_CONVERGED=.TRUE. DO WHILE(NOT_CONVERGED) NOT_CONVERGED=.FALSE. ITERATIONS=ITERATIONS+1 DO I=1,CLUSTERS DO J=1,NODES IF(ALL(MEDIOIDS.NE.I)) THEN TEMP_MEDIOIDS=MEDIOIDS TEMP_MEDIOIDS(I)=J TEMP_CRIT=0. DO K=1,NODES IF(ALL(TEMP_MEDIOIDS.NE.K)) THEN TEMP_CRIT=TEMP_CRIT+ADJ(K,TEMP_MEDIOIDS(TESTMODULE(K))) END IF END DO IF(TEMP_CRIT>CRIT) THEN MEDIOIDS=TEMP_MEDIOIDS CRIT=TEMP_CRIT NOT_CONVERGED=.TRUE. END IF IF(ITERATIONS>FLOOR(NODES/10.)) THEN !PRINT*, "K-MEDIOIDS DID NOT CONVERGE" !PRINT*, "TOTAL ITERATIONS: ", ITERATIONS CALL INTPR("K-MEDIOIDS DID NOT CONVERGE",-1,1,0) CALL INTPR("TOTAL ITERATIONS: ", -1, ITERATIONS,1) END IF END IF END DO END DO END DO END SUBROUTINE K_MEDIOIDS SUBROUTINE QUICK_CLUSTER_TRIAL(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM,L2) USE TOOLS IMPLICIT NONE INTEGER :: NODES,CLUSTERS,I,J,K,L,CLUSTER_ITERATIONS=0,L2I,INITBOOL,QSEC,MAP_LENGTH INTEGER, DIMENSION(NODES) :: TESTMODULE !INTEGER, DIMENSION(NODES) :: TEMP REAL(KIND=8), DIMENSION(NODES) :: PSUM REAL, DIMENSION(NODES,NODES) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: PHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: ASUM !REAL, DIMENSION(2) :: TIMEARRAYD,TIMEARRAYE REAL(KIND=8) ::L2NORM,FACTORIZABILITY,LOGLIK,SUM_SQUARES,CRITERIA !REAL :: TOTALTIME LOGICAL :: NOT_CONVERGED,L2,QNEWT=.FALSE.,UPHILL=.TRUE. !!CALL INTPR("INITIALIZING PARAMETERS",-1,1,0) DO I=1,NODES ADJ(I,I)=0 END DO !!CALL INTPR("INITIALIZED PARAMETERS",-1,1,0) !INITIALIZING CLUSTERS CALL INITIALIZE_CLUSTERS(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) !CALL QUICK_CLUSTER(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) !CALL INTPR("CLUSTERS QUICK1 INITIALIZED",-1,1,0) CALL QUICK_CLUSTER(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) !INITIALIZING ASUM CALL INITIALIZE_ASUM(ADJ,TESTMODULE,ASUM,NODES,CLUSTERS) !INITIALIZING PHAT AND AHAT PHAT=0. AHAT=0. CALL INITIALIZE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2) !INITIALIZING THE NORM OR LOGLIKELIHOOD IF(L2) THEN L2NORM=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) ELSE LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) END IF !ENDING PARAMETER INITIALIZATION !BEGINNING POISSON/L2 PARAMETER UPDATES QNEWT=.TRUE. QSEC=5 MAP_LENGTH=INT(CLUSTERS*(CLUSTERS-1)/2)+NODES CALL UPDATE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,L2NORM,LOGLIK) !!CALL INTPR("PARAMETERS INITIALIZED",-1,1,0) !BEGINNING MAIN CLUSTER UPDATE LOOP NOT_CONVERGED = .TRUE. CLUSTER_ITERATIONS=1 DO WHILE (NOT_CONVERGED) !!CALL INTPR("CLUSTER ITERATION",-1,CLUSTER_ITERATIONS,1) !PRINT*, "CLUSTER ITERATION NUMBER: ", CLUSTER_ITERATIONS CLUSTER_ITERATIONS=CLUSTER_ITERATIONS+1 NOT_CONVERGED = .FALSE. CALL UPDATE_CLUSTER_ASSIGNMENTS2(ADJ,TESTMODULE,PHAT,AHAT,NOT_CONVERGED,NODES,CLUSTERS, & PSUM,ASUM,L2NORM,LOGLIK,L2,QNEWT,QSEC,UPHILL,MAP_LENGTH) CALL UPDATE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,L2NORM,LOGLIK) !CALL UPDATE_PARAMETERS_QNEWTN(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,& ! L2NORM,LOGLIK,QSEC,UPHILL,MAP_LENGTH) IF(CLUSTER_ITERATIONS.GE.500) THEN NOT_CONVERGED = .FALSE. CALL INTPR("MAX ITERATION REACHED...RERUN USING CURRENT CLUSTERING FOR BETTER RESULTS",& -1,1,0) END IF IF(PHAT(1)+1.EQ.PHAT(1)) THEN NOT_CONVERGED=.FALSE. CALL INTPR("SOMETHING WENT WRONG...NON-REAL RESULTS...",-1,1,0) END IF END DO END SUBROUTINE QUICK_CLUSTER_TRIAL SUBROUTINE UPDATE_ASUM(ADJ,TRIALMODULE,NODE_POS,CURRENT_CLUSTER,NEW_CLUSTER,ASUM,NODES,CLUSTERS) !THIS SUBROUTINE UPDATES ASUM (THE NUMERATOR OF THE AHAT UPDATES) FOR NODE AT NODE_POS GOING FROM !ORIGINAL_CLUSTER TO NEW_CLUSTER IMPLICIT NONE INTEGER :: I,J,CURRENT_CLUSTER,NEW_CLUSTER,NODE_POS,ICLUST,OUTPUT_UNIT2,K,KK,NODES,CLUSTERS INTEGER, DIMENSION(:) :: TRIALMODULE REAL, DIMENSION(:,:) :: ADJ !REAL(KIND=8), DIMENSION(:) :: PSUM REAL(KIND=8), DIMENSION(:,:) :: ASUM REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: ASUM2 LOGICAL :: SUMS_LOGICALB4, SUMS_LOGICAL SUMS_LOGICALB4=.FALSE. SUMS_LOGICAL=.FALSE. !CHECK THIS!!!!!!!!!!!!checked DO I=1,NODES IF(I.NE.NODE_POS) THEN ICLUST=TRIALMODULE(I) IF((ICLUST.NE.CURRENT_CLUSTER).AND.(ICLUST.NE.NEW_CLUSTER)) THEN ASUM(CURRENT_CLUSTER,ICLUST)=ASUM(CURRENT_CLUSTER,ICLUST)-ADJ(I,NODE_POS) ASUM(NEW_CLUSTER,ICLUST)=ASUM(NEW_CLUSTER,ICLUST)+ADJ(I,NODE_POS) ASUM(ICLUST,NEW_CLUSTER)=ASUM(NEW_CLUSTER,ICLUST) ASUM(ICLUST,CURRENT_CLUSTER)=ASUM(CURRENT_CLUSTER,ICLUST) ELSE IF(ICLUST.EQ.CURRENT_CLUSTER) THEN ASUM(NEW_CLUSTER,ICLUST)=ASUM(NEW_CLUSTER,ICLUST)+ADJ(I,NODE_POS) ASUM(ICLUST,NEW_CLUSTER)=ASUM(NEW_CLUSTER,ICLUST) ELSE IF(ICLUST.EQ.NEW_CLUSTER) THEN ASUM(CURRENT_CLUSTER,ICLUST)=ASUM(CURRENT_CLUSTER,ICLUST)-ADJ(I,NODE_POS) ASUM(ICLUST,CURRENT_CLUSTER)=ASUM(CURRENT_CLUSTER,ICLUST) END IF END IF END DO END SUBROUTINE UPDATE_ASUM SUBROUTINE UPDATE_AHAT_MM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,ASUM,PCLUSTERSUM) IMPLICIT NONE INTEGER :: I,J,K,NODES,CLUSTERS INTEGER, DIMENSION(NODES) :: TESTMODULE REAL(KIND=8) :: TEMP REAL, DIMENSION(:,:) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: PHAT REAL(KIND=8), DIMENSION(CLUSTERS) :: ADEN,PCLUSTERSUM REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT,AN,ASUM LOGICAL :: L2 !AHAT UPDATES DO I=1,CLUSTERS-1 DO J=I+1,CLUSTERS IF(((PCLUSTERSUM(I)*PCLUSTERSUM(J)).NE.0).AND.(AN(J,I).NE.0)) THEN IF(L2) THEN AHAT(J,I)=(AN(J,I)**5*ASUM(J,I)/(PCLUSTERSUM(I)*PCLUSTERSUM(J)))**(1./6.) ELSE AHAT(J,I)=(AN(J,I)**2*ASUM(J,I)/(PCLUSTERSUM(I)*PCLUSTERSUM(J)))**(1./3.) END IF ELSE IF (AN(J,I).EQ.0) THEN !INCREASE_FLAG=.FALSE. !PRINT*, "AN(I,J)=0" IF(L2) THEN AHAT(J,I)=(ASUM(J,I)/(PCLUSTERSUM(I)*PCLUSTERSUM(J)))**(1./6.) ELSE AHAT(J,I)=(ASUM(J,I)/(PCLUSTERSUM(I)*PCLUSTERSUM(J)))**(1./3.) END IF ELSE !IF(ADEN(I,J).EQ.0) !INCREASE_FLAG=.FALSE. !PRINT*, "ADEN(I,J)=0" AHAT(J,I)=0. END IF AHAT(I,J)=AHAT(J,I) END DO END DO DO I=1,CLUSTERS AHAT(I,I)=1. END DO END SUBROUTINE UPDATE_AHAT_MM !CHECKED!!!!!!!! SUBROUTINE UPDATE_AHAT(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2I) IMPLICIT NONE INTEGER :: I,J,K,NODES,CLUSTERS,TESTI,TESTJ INTEGER, DIMENSION(NODES) :: TESTMODULE REAL(KIND=8) :: TEMP REAL, DIMENSION(:,:) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: PHAT REAL(KIND=8), DIMENSION(CLUSTERS) :: ADEN REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT,ANUM LOGICAL :: L2I !UPDATING AHAT ANUM=0. ADEN=0. TEMP=0. IF (L2I) THEN DO I=1,NODES ADEN(TESTMODULE(I))=ADEN(TESTMODULE(I))+PHAT(I)**2 END DO ELSE DO I=1,NODES ADEN(TESTMODULE(I))=ADEN(TESTMODULE(I))+PHAT(I) END DO END IF DO I=1,NODES-1 DO J=I+1,NODES IF(L2I) THEN !FROBENIUS UPDATES TEMP=ADJ(J,I)*PHAT(I)*PHAT(J) ELSE !POISSON UPDATES TEMP=ADJ(J,I) END IF TESTI=TESTMODULE(I) TESTJ=TESTMODULE(J) ANUM(TESTI,TESTJ)=ANUM(TESTI,TESTJ)+TEMP ANUM(TESTJ,TESTI)=ANUM(TESTI,TESTJ) END DO END DO DO I=1,CLUSTERS-1 DO J=I+1,CLUSTERS AHAT(J,I)=ANUM(J,I)/(ADEN(I)*ADEN(J)) AHAT(I,J)=AHAT(J,I) END DO END DO DO I=1,CLUSTERS AHAT(I,I)=1. END DO END SUBROUTINE UPDATE_AHAT SUBROUTINE UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,& NEW_L2,NEW_LOGLIK) USE TOOLS IMPLICIT NONE INTEGER, DIMENSION(:) :: TESTMODULE REAL, DIMENSION(:,:) :: ADJ REAL(KIND=8), DIMENSION(:) :: PHAT,PSUM REAL(KIND=8), DIMENSION(:,:) :: AHAT,ASUM INTEGER :: I,J,K,NODES,CLUSTERS REAL(KIND=8), DIMENSION(NODES) :: PN REAL(KIND=8), DIMENSION(CLUSTERS) :: PCLUSTERSUM,PDEN REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AN LOGICAL :: L2 REAL(KIND=8) :: NEW_LOGLIK,NEW_L2,TEMPNUM !THIS SUBROUTINE IMPLEMENTS POISSON UPDATES FOR MULTIGRAPH CLUSTERING WITH NO SELF EDGES PN=PHAT AN=AHAT TEMPNUM=0. PDEN=0. PCLUSTERSUM=0. IF(L2) THEN !USE L2 UPDATES !PDEN UPDATES DO I=1,NODES PCLUSTERSUM(TESTMODULE(I))=PCLUSTERSUM(TESTMODULE(I))+PN(I)**2 END DO ! DO I=1,CLUSTERS ! DO J=1,CLUSTERS ! PDEN(I)=PDEN(I)+AN(I,J)**2*PCLUSTERSUM(J) ! END DO ! END DO PDEN = MATMUL(AN**2,PCLUSTERSUM) !PHAT UPDATES DO I=1,NODES TEMPNUM=0. TEMPNUM=SUM(AN(TESTMODULE(:),TESTMODULE(I))*ADJ(:,I)*PN)!CHECKed ! DO J=1,NODES ! TEMPNUM=TEMPNUM+AN(TESTMODULE(I),TESTMODULE(J))*ADJ(J,I)*PN(J) ! END DO ! !CALL DBLEPR("TEMNUM",-1,TEMPNUM,1) ! !CALL DBLEPR("TEMPNUM2",-1,TEMPNUM2,1) IF(PDEN(TESTMODULE(I))-PN(I).EQ.0) THEN PHAT(I)=0.0 !NEED TO FIX THIS ELSE PHAT(I)=(PN(I)**5*TEMPNUM/(PDEN(TESTMODULE(I))-PN(I)**2))**(1./6.) END IF END DO ELSE !USING POISSON UPDATES DO I=1,NODES PCLUSTERSUM(TESTMODULE(I))=PCLUSTERSUM(TESTMODULE(I))+PN(I) END DO ! DO I=1,CLUSTERS ! DO J=1,CLUSTERS ! PDEN(I)=PDEN(I)+AN(J,I)*PCLUSTERSUM(J) ! END DO ! END DO PDEN = MATMUL(AN,PCLUSTERSUM) PHAT=((PN**2*PSUM)/(PDEN(TESTMODULE(:))-PN))**(1./3.) !CHECKed DO I=1,NODES ! IF((PDEN(TESTMODULE(I))-PN(I).NE.0).AND.(PN(I).NE.0)) THEN ! PHAT(I)=(PN(I)**2*PSUM(I)/(PDEN(TESTMODULE(I))-PN(I)))**(1./3.) ! ELSE IF((PDEN(TESTMODULE(I))-PN(I)).EQ.0) THEN IF((PDEN(TESTMODULE(I))-PN(I)).EQ.0) THEN !PRINT*, "(PDEN(TESTMODULE(I))-PN(I))=0" PHAT(I)=0. ELSE IF(PN(I).EQ.0) THEN !PRINT*, "PN(I)=0" PHAT(I)=(PSUM(I)/(PDEN(TESTMODULE(I))-PN(I)))**(1./3.) END IF END DO END IF !UPDATING AHAT CALL UPDATE_AHAT(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2) IF(L2) THEN NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) NEW_LOGLIK=1. ELSE NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) NEW_L2=1. END IF END SUBROUTINE UPDATE_PARAMETERS_ONCE SUBROUTINE UPDATE_PARAMETERS_ONCE_PARALLEL(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2, & PSUM,ASUM,NEW_L2,NEW_LOGLIK) USE TOOLS !USE OMP_LIB IMPLICIT NONE INTEGER, DIMENSION(:) :: TESTMODULE REAL, DIMENSION(:,:) :: ADJ REAL(KIND=8), DIMENSION(:) :: PHAT,PSUM REAL(KIND=8), DIMENSION(:,:) :: AHAT,ASUM INTEGER :: I,J,K,NODES,CLUSTERS REAL(KIND=8), DIMENSION(NODES) :: PN REAL(KIND=8), DIMENSION(CLUSTERS) :: PCLUSTERSUM,PDEN REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AN LOGICAL :: L2 REAL(KIND=8) :: NEW_LOGLIK,NEW_L2,TEMPNUM !$OMP THREADPRIVATE(TEMPNUM) !THIS SUBROUTINE IMPLEMENTS POISSON UPDATES FOR MULTIGRAPH CLUSTERING WITH NO SELF EDGES PN=PHAT AN=AHAT TEMPNUM=0. PDEN=0. PCLUSTERSUM=0. IF(L2) THEN !USE L2 UPDATES !PDEN UPDATES DO I=1,NODES PCLUSTERSUM(TESTMODULE(I))=PCLUSTERSUM(TESTMODULE(I))+PN(I)**2 END DO ! DO I=1,CLUSTERS ! DO J=1,CLUSTERS ! PDEN(I)=PDEN(I)+AN(I,J)**2*PCLUSTERSUM(J) ! END DO ! END DO PDEN = MATMUL(AN**2,PCLUSTERSUM) !PHAT UPDATES !$OMP PARALLEL DO DO I=1,NODES TEMPNUM=0. TEMPNUM=SUM(AN(TESTMODULE(:),TESTMODULE(I))*ADJ(:,I)*PN)!CHECKed ! DO J=1,NODES ! TEMPNUM=TEMPNUM+AN(TESTMODULE(I),TESTMODULE(J))*ADJ(J,I)*PN(J) ! END DO ! !CALL DBLEPR("TEMNUM",-1,TEMPNUM,1) ! !CALL DBLEPR("TEMPNUM2",-1,TEMPNUM2,1) IF(PDEN(TESTMODULE(I))-PN(I).EQ.0) THEN PHAT(I)=0.0 !NEED TO FIX THIS ELSE PHAT(I)=(PN(I)**5*TEMPNUM/(PDEN(TESTMODULE(I))-PN(I)**2))**(1./6.) END IF END DO !MISTAKE IS HERE------------------------------------- !$OMP END PARALLEL DO ELSE !USING POISSON UPDATES DO I=1,NODES PCLUSTERSUM(TESTMODULE(I))=PCLUSTERSUM(TESTMODULE(I))+PN(I) END DO ! DO I=1,CLUSTERS ! DO J=1,CLUSTERS ! PDEN(I)=PDEN(I)+AN(J,I)*PCLUSTERSUM(J) ! END DO ! END DO PDEN = MATMUL(AN,PCLUSTERSUM) PHAT=((PN**2*PSUM)/(PDEN(TESTMODULE(:))-PN))**(1./3.) !CHECKed DO I=1,NODES ! IF((PDEN(TESTMODULE(I))-PN(I).NE.0).AND.(PN(I).NE.0)) THEN ! PHAT(I)=(PN(I)**2*PSUM(I)/(PDEN(TESTMODULE(I))-PN(I)))**(1./3.) ! ELSE IF((PDEN(TESTMODULE(I))-PN(I)).EQ.0) THEN IF((PDEN(TESTMODULE(I))-PN(I)).EQ.0) THEN !PRINT*, "(PDEN(TESTMODULE(I))-PN(I))=0" PHAT(I)=0. ELSE IF(PN(I).EQ.0) THEN !PRINT*, "PN(I)=0" PHAT(I)=(PSUM(I)/(PDEN(TESTMODULE(I))-PN(I)))**(1./3.) END IF END DO END IF !UPDATING AHAT CALL UPDATE_AHAT(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2) IF(L2) THEN NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) NEW_LOGLIK=1. ELSE NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) NEW_L2=1. END IF END SUBROUTINE UPDATE_PARAMETERS_ONCE_PARALLEL ! SUBROUTINE UPDATE_PARAMETERS_ONCE_OLD(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,& ! NEW_L2,NEW_LOGLIK) ! IMPLICIT NONE ! INTEGER, DIMENSION(:) :: TESTMODULE ! REAL, DIMENSION(:,:) :: ADJ ! REAL(KIND=8), DIMENSION(:) :: PHAT,PSUM ! REAL(KIND=8), DIMENSION(:,:) :: AHAT,ASUM ! INTEGER :: I,J,K,NODES,CLUSTERS ! REAL(KIND=8), DIMENSION(NODES) :: PN ! REAL(KIND=8), DIMENSION(CLUSTERS) :: PCLUSTERSUM,PDEN ! REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AN ! LOGICAL :: L2 ! REAL(KIND=8) :: NEW_LOGLIK,NEW_L2 ! !THIS SUBROUTINE IMPLEMENTS POISSON UPDATES FOR MULTIGRAPH CLUSTERING WITH NO SELF EDGES ! ! AN=AHAT ! PN=PHAT ! ! PCLUSTERSUM=0. ! DO I=1,NODES ! PCLUSTERSUM(TESTMODULE(I))=PCLUSTERSUM(TESTMODULE(I))+PN(I) ! END DO ! ! !PHAT UPDATES ! ! PDEN=0. ! DO I=1,CLUSTERS ! DO J=1,CLUSTERS ! PDEN(I)=PDEN(I)+AN(I,J)*PCLUSTERSUM(J) ! END DO ! END DO ! ! DO I=1,NODES ! IF((PDEN(TESTMODULE(I))-PN(I).NE.0).AND.(PN(I).NE.0)) THEN ! IF(L2) THEN ! PHAT(I)=(PN(I)**5*PSUM(I)/(PDEN(TESTMODULE(I))-PN(I)))**(1./6.) ! ELSE ! PHAT(I)=(PN(I)**2*PSUM(I)/(PDEN(TESTMODULE(I))-PN(I)))**(1./3.) ! END IF ! ELSE IF((PDEN(TESTMODULE(I))-PN(I)).EQ.0) THEN ! !PRINT*, "(PDEN(TESTMODULE(I))-PN(I))=0" ! PHAT(I)=0. ! ELSE !IF PN(I)=0 ! !PRINT*, "PN(I)=0" ! IF(L2) THEN ! PHAT(I)=(PSUM(I)/(PDEN(TESTMODULE(I))-PN(I)))**(1./6.) ! ELSE ! PHAT(I)=(PSUM(I)/(PDEN(TESTMODULE(I))-PN(I)))**(1./3.) ! END IF ! END IF ! END DO ! ! !UPDATING AHAT ! CALL UPDATE_AHAT(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2) ! ! IF(L2) THEN ! NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) ! ELSE ! NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) ! END IF ! ! END SUBROUTINE UPDATE_PARAMETERS_ONCE_OLD ! ! SUBROUTINE UPDATE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,NEW_L2,& NEW_LOGLIK) USE TOOLS IMPLICIT NONE INTEGER, DIMENSION(:) :: TESTMODULE REAL, DIMENSION(:,:) :: ADJ REAL(KIND=8), DIMENSION(:) :: PHAT,PSUM REAL(KIND=8), DIMENSION(:,:) :: AHAT,ASUM INTEGER :: I,J,K,ITERATION,NODES,CLUSTERS,INCREASE_WARNINGS REAL(KIND=8), DIMENSION(NODES) :: PN REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AN LOGICAL :: NOT_CONVERGED2,L2 REAL(KIND=8) :: OLD_LOGLIK,NEW_LOGLIK,OLD_L2,NEW_L2,WRONG_WAY_MAX !THIS SUBROUTINE IMPLEMENTS POISSON UPDATES FOR MULTIGRAPH CLUSTERING WITH NO SELF EDGES !INITIALIZING THE COUNTERS ITERATION=0 WRONG_WAY_MAX=0. INCREASE_WARNINGS=0 NOT_CONVERGED2=.TRUE. DO WHILE(NOT_CONVERGED2) AN=AHAT PN=PHAT OLD_L2=NEW_L2 OLD_LOGLIK=NEW_LOGLIK ITERATION=ITERATION+1 !AHAT UPDATES CALL UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,NEW_L2, & NEW_LOGLIK) IF((ITERATION.GE.5)) THEN IF(L2) THEN NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) CALL CHECK_CONVERGENCE(OLD_L2,NEW_L2,ITERATION,NOT_CONVERGED2) ELSE NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) CALL CHECK_CONVERGENCE(OLD_LOGLIK,NEW_LOGLIK,ITERATION,NOT_CONVERGED2) END IF END IF IF(L2) THEN IF(OLD_L2-NEW_L2<0) THEN !PRINT*, "WARNING, INCREASE L2" INCREASE_WARNINGS=INCREASE_WARNINGS+1 !!CALL DBLEPR("WRONG WAY: ", -1, OLD_L2-NEW_L2,1) IF(ABS(OLD_L2-NEW_L2)>ABS(WRONG_WAY_MAX)) THEN WRONG_WAY_MAX=OLD_L2-NEW_L2 END IF !PRINT*, OLD_L2-NEW_L2 END IF ELSE IF(NEW_LOGLIK-OLD_LOGLIK<0) THEN !PRINT*, "WARNING, DECREASE LOGLIK" INCREASE_WARNINGS=INCREASE_WARNINGS+1 !PRINT*, NEW_LOGLIK-OLD_LOGLIK END IF END IF !CALL DBLEPR("L2: ", -1, NEW_L2,1) !CALL DBLEPR("PN(1): ",-1,PN(1),1) END DO !CALL INTPR("MM ITERATIONS: ",-1,ITERATION,1) !PRINT*, "PARAMETER UPDATE ITERATIONS: ", ITERATION !PRINT*, "WRONG WAY WARNINGS: ", INCREASE_WARNINGS !!CALL INTPR("PARAMETER UPDATE ITERATIONS: ",-1,ITERATION,1) IF(ABS(WRONG_WAY_MAX)>1E-10) THEN !CALL INTPR("WRONG WAY WARNINGS: ",-1,INCREASE_WARNINGS,1) !CALL DBLEPR("WRONG WAY MAX VALUE: ",-1,WRONG_WAY_MAX,1) END IF !CALL RWARN("RWARN") END SUBROUTINE UPDATE_PARAMETERS ! SUBROUTINE UPDATE_NEWTON(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,L2NORM,LOGLIK) ! ! USE TOOLS ! ! IMPLICIT NONE ! ! INTEGER, DIMENSION(:) :: TESTMODULE ! REAL, DIMENSION(:,:) :: ADJ ! REAL(KIND=8), DIMENSION(:) :: PHAT,PSUM ! REAL(KIND=8), DIMENSION(:,:) :: AHAT,ASUM ! INTEGER :: I,J,K,ITERATION,NODES,CLUSTERS,INCREASE_WARNINGS ! REAL(KIND=8), DIMENSION(NODES) :: PN ! REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AN ! REAL(KIND=8), :: NEGINV_HESSIAN ! LOGICAL :: NOT_CONVERGED,L2 ! REAL(KIND=8) :: OLD_LOGLIK,NEW_LOGLIK,OLD_L2,NEW_L2,WRONG_WAY_MAX ! ! NOT_CONVERGED=.TRUE. ! ITERATION=0 ! DO WHILE(NOT_CONVERGED) ! NOT_CONVERGED=.FALSE. ! ITERATION = ITERATION+1 ! ! ! PN=PHAT ! IF((ITERATION.GE.5)) THEN ! IF(L2) THEN ! NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) ! CALL CHECK_CONVERGENCE(OLD_L2,NEW_L2,ITERATION,NOT_CONVERGED2) ! ELSE ! NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) ! CALL CHECK_CONVERGENCE(OLD_LOGLIK,NEW_LOGLIK,ITERATION,NOT_CONVERGED2) ! END IF ! END IF ! END DO ! ! END SUBROUTINE UPDATE_NEWTON SUBROUTINE FINDINV(MATRIX, INVERSE, N, ERRORFLAG) !SUBROUTINE TO FIND THE INVERSE OF A SQUARE MATRIX !AUTHOR : LOUISDA16TH A.K.A ASHWITH J. REGO !REFERENCE : ALGORITHM HAS BEEN WELL EXPLAINED IN: !HTTP://MATH.UWW.EDU/~MCFARLAT/INVERSE.HTM !HTTP://WWW.TUTOR.MS.UNIMELB.EDU.AU/MATRIX/MATRIX_INVERSE.HTML IMPLICIT NONE !DECLARATIONS INTEGER, INTENT(IN) :: N INTEGER, INTENT(OUT) :: ERRORFLAG !RETURN ERROR STATUS. -1 FOR ERROR, 0 FOR NORMAL REAL(KIND=8), INTENT(IN), DIMENSION(N,N) :: MATRIX !INPUT MATRIX REAL(KIND=8), INTENT(OUT), DIMENSION(N,N) :: INVERSE !INVERTED MATRIX LOGICAL :: FLAG = .TRUE. INTEGER :: I, J, K, L REAL(KIND=8) :: M REAL(KIND=8), DIMENSION(N,2*N) :: AUGMATRIX !AUGMENTED MATRIX !AUGMENT INPUT MATRIX WITH AN IDENTITY MATRIX DO I = 1, N DO J = 1, 2*N IF (J <= N ) THEN AUGMATRIX(I,J) = MATRIX(I,J) ELSE IF ((I+N) == J) THEN AUGMATRIX(I,J) = 1 ELSE AUGMATRIX(I,J) = 0 ENDIF END DO END DO !REDUCE AUGMENTED MATRIX TO UPPER TRAINGULAR FORM DO K =1, N-1 IF (AUGMATRIX(K,K) == 0) THEN FLAG = .FALSE. DO I = K+1, N IF (AUGMATRIX(I,K) /= 0) THEN DO J = 1,2*N AUGMATRIX(K,J) = AUGMATRIX(K,J)+AUGMATRIX(I,J) END DO FLAG = .TRUE. EXIT ENDIF IF (FLAG .EQV. .FALSE.) THEN !!CALL INTPR("NON-INVERTIBLE MATRIX",-1,1,0) !PRINT*, "MATRIX IS NON - INVERTIBLE" INVERSE = 0 ERRORFLAG = -1 RETURN ENDIF END DO ENDIF DO J = K+1, N M = AUGMATRIX(J,K)/AUGMATRIX(K,K) DO I = K, 2*N AUGMATRIX(J,I) = AUGMATRIX(J,I) - M*AUGMATRIX(K,I) END DO END DO END DO !TEST FOR INVERTIBILITY DO I = 1, N IF (AUGMATRIX(I,I) == 0) THEN !PRINT*, "MATRIX IS NON - INVERTIBLE" !!CALL INTPR("NON-INVERTIBLE MATRIX",-1,1,0) INVERSE = 0 ERRORFLAG = -1 RETURN ENDIF END DO !MAKE DIAGONAL ELEMENTS AS 1 DO I = 1 , N M = AUGMATRIX(I,I) DO J = I , (2 * N) AUGMATRIX(I,J) = (AUGMATRIX(I,J) / M) END DO END DO !REDUCED RIGHT SIDE HALF OF AUGMENTED MATRIX TO IDENTITY MATRIX DO K = N-1, 1, -1 DO I =1, K M = AUGMATRIX(I,K+1) DO J = K, (2*N) AUGMATRIX(I,J) = AUGMATRIX(I,J) -AUGMATRIX(K+1,J) * M END DO END DO END DO !STORE ANSWER DO I =1, N DO J = 1, N INVERSE(I,J) = AUGMATRIX(I,J+N) END DO END DO ERRORFLAG = 0 END SUBROUTINE FINDINV SUBROUTINE TRIANGULAR_MAT_TO_VEC(MATRIX1,MATRIX1_LENGTH,VEC1,VEC1_LENGTH) IMPLICIT NONE !CHECKED INTEGER :: MATRIX1_LENGTH,J,K,COUNTER,VEC1_LENGTH REAL(KIND=8), DIMENSION(MATRIX1_LENGTH,MATRIX1_LENGTH) :: MATRIX1 REAL(KIND=8), DIMENSION(VEC1_LENGTH) :: VEC1 COUNTER=0 DO J=1,MATRIX1_LENGTH-1 DO K=J+1,MATRIX1_LENGTH COUNTER=COUNTER+1 VEC1(COUNTER)=MATRIX1(K,J) END DO END DO END SUBROUTINE TRIANGULAR_MAT_TO_VEC SUBROUTINE VEC_TO_TRIANGULAR_MAT(MATRIX1,MATRIX1_LENGTH,VEC1,VEC1_LENGTH) IMPLICIT NONE !CHECKED INTEGER :: VEC1_LENGTH,J,K,MATRIX1_LENGTH,COUNTER REAL(KIND=8), DIMENSION(VEC1_LENGTH) :: VEC1 REAL(KIND=8), DIMENSION(MATRIX1_LENGTH,MATRIX1_LENGTH) :: MATRIX1 COUNTER=0 DO J=1,MATRIX1_LENGTH-1 DO K=J+1,MATRIX1_LENGTH COUNTER=COUNTER+1 MATRIX1(J,K)=VEC1(COUNTER) MATRIX1(K,J)=VEC1(COUNTER) END DO END DO DO J=1,MATRIX1_LENGTH MATRIX1(J,J)=1. END DO END SUBROUTINE VEC_TO_TRIANGULAR_MAT SUBROUTINE COMPOSE_MAP(PHAT,AHAT,NODES,CLUSTERS,MAP,MAP_LENGTH) !THIS FUNCTION APPENDS THE VALUES OF AHAT AT THE END OF PHAT !CHECKED IMPLICIT NONE INTEGER :: I,J,NODES,CLUSTERS,VEC_LENGTH,MAP_LENGTH REAL(KIND=8), DIMENSION(MAP_LENGTH) :: MAP REAL(KIND=8), DIMENSION(NODES) :: PHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT REAL(KIND=8), ALLOCATABLE, DIMENSION(:) :: A_VEC !VEC_LENGTH=INT(CLUSTERS*(CLUSTERS-1)/2) VEC_LENGTH=MAP_LENGTH-NODES ALLOCATE(A_VEC(VEC_LENGTH)) CALL TRIANGULAR_MAT_TO_VEC(AHAT,CLUSTERS,A_VEC,VEC_LENGTH) DO I=1,NODES MAP(I)=PHAT(I) END DO DO J=1,VEC_LENGTH MAP(NODES+J)=A_VEC(J) END DO DEALLOCATE(A_VEC) END SUBROUTINE COMPOSE_MAP SUBROUTINE DECOMPOSE_MAP(PHAT,AHAT,NODES,CLUSTERS,MAP,MAP_LENGTH) !THIS FUNCTION APPENDS THE VALUES OF AHAT AT THE END OF PHAT !CHECKED IMPLICIT NONE INTEGER :: I,NODES,CLUSTERS,VEC_LENGTH,MAP_LENGTH REAL(KIND=8), DIMENSION(MAP_LENGTH) :: MAP REAL(KIND=8), DIMENSION(NODES) :: PHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT REAL(KIND=8), ALLOCATABLE, DIMENSION(:) :: A_VEC !VEC_LENGTH=INT(CLUSTERS*(CLUSTERS-1)/2) VEC_LENGTH=MAP_LENGTH-NODES ALLOCATE(A_VEC(VEC_LENGTH)) DO I=1,VEC_LENGTH A_VEC(I)=MAP(NODES+I) END DO CALL VEC_TO_TRIANGULAR_MAT(AHAT,CLUSTERS,A_VEC,VEC_LENGTH) DO I=1,NODES PHAT(I)=MAP(I) END DO DEALLOCATE(A_VEC) END SUBROUTINE DECOMPOSE_MAP SUBROUTINE SHIFT_MAT_LEFT(MATRIX,DIM,DIM_SHIFT) !SHIFTS THE CONTENTS OF A MATRIX ALONG DIM_SHIFT TO ADD A NEW ENTRY AT THE END !SHIFT_MAT_LEFT(MAP_U,TEMP2,QSEC) !CHECKED IMPLICIT NONE INTEGER :: DIM,DIM_SHIFT,I REAL(KIND=8), DIMENSION(DIM,DIM_SHIFT) :: MATRIX DO I=1,DIM_SHIFT-1 MATRIX(:,I)=MATRIX(:,I+1) END DO END SUBROUTINE SHIFT_MAT_LEFT !NEEDS TO BE TESTED SUBROUTINE CHECK_CONVERGENCE_NEW(OLD_VAL,NEW_VAL,ITERATION,NOTCONVERGED_BOOL) !CHECKS THE CONVERGENCE CRITERIA IMPLICIT NONE REAL(KIND=8) :: OLD_VAL,NEW_VAL,EPSILON,NUM_SIG INTEGER :: ITERATION LOGICAL :: NOTCONVERGED_BOOL !NUMBER OF SIGNIFICANT DIGITS DESIRED NUM_SIG=6. EPSILON=10**(-NUM_SIG) IF((ABS(OLD_VAL-NEW_VAL).LE.EPSILON*MAX(ABS(NEW_VAL),1.D0)).OR.(ITERATION.GE.2000)) THEN NOTCONVERGED_BOOL=.FALSE. !!CALL INTPR("CLUSTER ITERATION",-1,ITERATION,1) END IF ! IF((ABS((OLD_VAL-NEW_VAL)).LE.1E-2).OR.(ITERATION.GE.50000)) THEN ! NOTCONVERGED_BOOL=.FALSE. ! END IF END SUBROUTINE CHECK_CONVERGENCE_NEW !NEEDS TO BE TESTED SUBROUTINE CHECK_CONVERGENCE(OLD_VAL,NEW_VAL,ITERATION,NOTCONVERGED_BOOL) !CHECKS THE CONVERGENCE CRITERIA IMPLICIT NONE REAL(KIND=8) :: OLD_VAL,NEW_VAL INTEGER :: ITERATION LOGICAL :: NOTCONVERGED_BOOL IF((ABS((OLD_VAL-NEW_VAL)/(ABS(OLD_VAL)+1)).LE.1E-9).OR.(ITERATION.GE.2000)) THEN NOTCONVERGED_BOOL=.FALSE. !!CALL INTPR("CLUSTER ITERATION",-1,ITERATION,1) END IF ! IF((ABS((OLD_VAL-NEW_VAL)).LE.1E-2).OR.(ITERATION.GE.50000)) THEN ! NOTCONVERGED_BOOL=.FALSE. ! END IF END SUBROUTINE CHECK_CONVERGENCE SUBROUTINE CHECK_CONVERGENCE2(AHAT,AOLD,PHAT,POLD,NODES,CLUSTERS,ITERATION,NOTCONVERGED_BOOL) !CHECKS THE CONVERGENCE CRITERIA IMPLICIT NONE REAL(KIND=8), DIMENSION(:,:) :: AHAT, AOLD REAL(KIND=8), DIMENSION(:) :: PHAT,POLD REAL(KIND=8) :: SQUARES_SUM INTEGER :: ITERATION,NODES,CLUSTERS,I,J,K LOGICAL :: NOTCONVERGED_BOOL SQUARES_SUM=SUM((PHAT-POLD)**2) ! SQUARES_SUM=0. ! DO I=1,NODES ! SQUARES_SUM=SQUARES_SUM+(PHAT(I)-POLD(I))**2 ! END DO DO I=1,CLUSTERS-1 DO J=I+1,CLUSTERS SQUARES_SUM=SQUARES_SUM+(AHAT(J,I)-AOLD(J,I))**2 END DO END DO IF((SQUARES_SUM.LE.1E-2).OR.(ITERATION.GE.3000)) THEN NOTCONVERGED_BOOL=.FALSE. ELSE NOTCONVERGED_BOOL=.TRUE. END IF !IF((.NOT.NOTCONVERGED_BOOL).OR.(MOD(ITERATION,500).EQ.0)) THEN ! PRINT*, "SQUARES SUM: ", SQUARES_SUM !END IF ! IF((ABS((OLD_VAL-NEW_VAL)).LE.1E-2).OR.(ITERATION.GE.50000)) THEN ! NOTCONVERGED_BOOL=.FALSE. ! END IF END SUBROUTINE CHECK_CONVERGENCE2 FUNCTION FIRST_MATMULT(MATU,MATV,DIM1,DIM2) ! ! This routine returns: matmul(t(matu),matu)-matmul(t(matu),matv) ! ! IMPLICIT NONE INTEGER :: I,J,K,DIM1,DIM2 REAL(KIND=8), DIMENSION(DIM1,DIM2) :: MATU,MATV REAL(KIND=8), DIMENSION(DIM2,DIM2) :: MATA,MATB,FIRST_MATMULT ! MATA=0. MATB=0. FIRST_MATMULT=0. DO I=1,DIM2 DO J=1,DIM2 DO K=1,DIM1 MATA(J,I)=MATA(J,I)+MATU(K,J)*MATU(K,I) MATB(J,I)=MATB(J,I)+MATU(K,J)*MATV(K,I) END DO END DO END DO FIRST_MATMULT=MATA-MATB END FUNCTION FIRST_MATMULT !CHECKED SUBROUTINE UPDATE_PARAMETERS_QNEWTN(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2BOOL,PSUM,ASUM,& NEW_L2,NEW_LOGLIK,QSEC,UPHILL,VEC_LENGTH) USE TOOLS IMPLICIT NONE INTEGER :: NODES,CLUSTERS,QSEC,VEC_LENGTH REAL, DIMENSION(NODES,NODES) :: ADJ INTEGER, DIMENSION(NODES) :: TESTMODULE REAL(KIND=8), DIMENSION(NODES) :: PHAT,PSUM REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT,ASUM INTEGER :: I,J,ITERATION,ERRORFLAG,ITS_BEFORE_CHECK REAL(KIND=8), DIMENSION(NODES) :: PN REAL(KIND=8), DIMENSION(VEC_LENGTH) :: TEMP_VEC,TEMP_F,TEMP_F2,TEMP_NEW REAL(KIND=8), DIMENSION(VEC_LENGTH,QSEC) :: MAP_U,MAP_V !REAL(KIND=8), DIMENSION(QSEC,VEC_LENGTH) :: MAP_UT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AN REAL(KIND=8), DIMENSION(QSEC,QSEC) :: TEMP_MAT,TEMPINV_MAT LOGICAL :: NOT_CONVERGED2,L2BOOL,INCREASE_FLAG,TESTFLAG,UPHILL REAL(KIND=8) :: NEW_LOGLIK,NEW_L2,OLD_L2,OLD_LOGLIK REAL(KIND=8) :: F_LOGLIK,F2_LOGLIK,F_L2,F2_L2 INTEGER, DIMENSION(2) :: SHAPE_TEST !THIS SUBROUTINE IMPLEMENTS POISSON UPDATES FOR MULTIGRAPH CLUSTERING WITH NO SELF EDGES !NOTE THAT THIS USES A QUASI-NEWTON UPDATING SCHEME DESCRIBED IN !"A QUASI-NEWTON ACCELERATION FOR HIGH-DIMENSIONAL OPTIMIZATION" !BY HUA ZHOU, DAVID ALEXANDER, AND KENNETH LANGE !!CALL INTPR("WORKING...",-1,1,0) !STARTING UPDATES PARAMETER UPDATES UNTIL CONVERGENCE OR MAX COUNT REACHED ERRORFLAG=0 INCREASE_FLAG=.TRUE. NOT_CONVERGED2=.TRUE. ITERATION=0 AN=AHAT PN=PHAT TEMPINV_MAT=1. MAP_U=1. !ITS_BEFORE_CHECK=12 CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_VEC,VEC_LENGTH) !!CALL INTPR("WORKING...1",-1,1,0) !QUASI-NEWTON UPDATE SETUP DO I=1,QSEC+1 CALL UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2BOOL,PSUM,ASUM,NEW_L2,NEW_LOGLIK) CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F,VEC_LENGTH) IF(I.LE.QSEC) THEN MAP_U(:,I)=TEMP_F-TEMP_VEC END IF IF(I.GE.2) THEN MAP_V(:,I-1)=TEMP_F-TEMP_VEC END IF TEMP_VEC=TEMP_F END DO TEMP_NEW=TEMP_F !!CALL INTPR("WORKING...3",-1,1,0) OLD_L2=NEW_L2 OLD_LOGLIK=NEW_LOGLIK !!CALL INTPR("WORKING...3.1",-1,1,0) CALL UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2BOOL,PSUM,ASUM,F_L2,F_LOGLIK) CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F,VEC_LENGTH) !!CALL INTPR("WORKING...3.2",-1,1,0) DO WHILE(NOT_CONVERGED2) ITERATION=ITERATION+1 CALL UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2BOOL,PSUM,ASUM,F2_L2,F2_LOGLIK) !!CALL INTPR("WORKING...3.21",-1,1,0) CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F2,VEC_LENGTH) !!CALL INTPR("WORKING...3.22",-1,1,0) CALL SHIFT_MAT_LEFT(MAP_U,VEC_LENGTH,QSEC) !!CALL INTPR("WORKING...3.23",-1,1,0) !!CALL INTPR("SHAPE U",-1,SHAPE_TEST,2) MAP_U(:,QSEC)=TEMP_F-TEMP_VEC !!CALL INTPR("WORKING...3.3",-1,1,0) CALL SHIFT_MAT_LEFT(MAP_V,VEC_LENGTH,QSEC) MAP_V(:,QSEC)=TEMP_F2-TEMP_F !!CALL INTPR("WORKING...3.4",-1,1,0) !MAP_UT=TRANSPOSE(MAP_U) !SHAPE_TEST=SHAPE(MATMUL(MAP_UT,MAP_U)-MATMUL(MAP_UT,MAP_V)) !!CALL INTPR("SHAPE",-1,SHAPE_TEST,2) !TEMP_MAT=MATMUL(MAP_UT,MAP_U)-MATMUL(MAP_UT,MAP_V) TEMP_MAT=FIRST_MATMULT(MAP_U,MAP_V,VEC_LENGTH,QSEC) !!CALL INTPR("WORKING...3.5",-1,1,0) CALL FINDINV(TEMP_MAT,TEMPINV_MAT,QSEC,ERRORFLAG) !!CALL INTPR("WORKING...4",-1,1,0) IF (ERRORFLAG.LE.-1) THEN !!CALL INTPR("UPDATED PARAMETERS NORMALLY DUE TO SINGULAR MATRIX",-1,1,0) TEMP_NEW=TEMP_F2 NEW_LOGLIK=F2_LOGLIK NEW_L2=F2_L2 ELSE !!CALL INTPR("WORKING...4.1",-1,1,0) TEMP_NEW=TEMP_F-MATMUL(MAP_V,MATMUL(TEMPINV_MAT,MATMUL(TRANSPOSE(MAP_U),TEMP_VEC-TEMP_F))) !!CALL INTPR("WORKING...4.11",-1,1,0) DO I=1,VEC_LENGTH IF(TEMP_NEW(I).LE.0) THEN TEMP_NEW(I)=TEMP_VEC(I)/10 END IF END DO !!CALL INTPR("WORKING...4.12",-1,1,0) CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW,VEC_LENGTH) !!CALL INTPR("WORKING...4.2",-1,1,0) IF(L2BOOL) THEN NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS) ELSE NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS) END IF !!CALL INTPR("WORKING...4.5",-1,1,0) END IF !!CALL INTPR("WORKING...5",-1,1,0) IF((ITERATION.GE.3)) THEN CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW,VEC_LENGTH) CALL UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2BOOL,PSUM,ASUM,F_L2,F_LOGLIK) CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F,VEC_LENGTH) CALL DECOMPOSE_MAP(PHAT,AHAT,NODES,CLUSTERS,TEMP_NEW,VEC_LENGTH) IF(L2BOOL) THEN !CALL CHECK_CONVERGENCE2(AN,AHAT,PN,PHAT,NODES,CLUSTERS,ITERATION,NOT_CONVERGED2) !NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) CALL CHECK_CONVERGENCE(NEW_L2,F_L2,ITERATION,NOT_CONVERGED2) ELSE !CALL CHECK_CONVERGENCE2(AN,AHAT,PN,PHAT,NODES,CLUSTERS,ITERATION,NOT_CONVERGED2) !NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) CALL CHECK_CONVERGENCE(NEW_LOGLIK,F_LOGLIK,ITERATION,NOT_CONVERGED2) END IF END IF !!CALL INTPR("WORKING...6",-1,1,0) !CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW) !!CALL INTPR("WORKING...7",-1,1,0) TEMP_VEC=TEMP_NEW OLD_L2=NEW_L2 OLD_LOGLIK=NEW_LOGLIK !CALL DBLEPR("L2: ",-1,OLD_L2,1) !CALL DBLEPR("PN(1): ",-1,PN(1),1) ! PHAT=PN ! AHAT=AN END DO !CALL INTPR("MM ITERATIONS: ",-1,ITERATION,1) CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_VEC,VEC_LENGTH) IF(L2BOOL) THEN NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS) ELSE NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS) END IF PHAT=PN AHAT=AN !!CALL INTPR("QNEWT UPDATE ITERATIONS: ",-1,ITERATION,1) END SUBROUTINE UPDATE_PARAMETERS_QNEWTN !NEEDS TO BE CHECKED SUBROUTINE UPDATE_PARAMETERS_QNEWTN_PARALLEL(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,& L2BOOL,PSUM,ASUM,NEW_L2,NEW_LOGLIK,QSEC,UPHILL,VEC_LENGTH) USE TOOLS IMPLICIT NONE INTEGER :: NODES,CLUSTERS,QSEC,VEC_LENGTH REAL, DIMENSION(NODES,NODES) :: ADJ INTEGER, DIMENSION(NODES) :: TESTMODULE REAL(KIND=8), DIMENSION(NODES) :: PHAT,PSUM REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT,ASUM INTEGER :: I,J,ITERATION,ERRORFLAG REAL(KIND=8), DIMENSION(NODES) :: PN REAL(KIND=8), DIMENSION(VEC_LENGTH) :: TEMP_VEC,TEMP_F,TEMP_F2,TEMP_NEW REAL(KIND=8), DIMENSION(VEC_LENGTH,QSEC) :: MAP_U,MAP_V !REAL(KIND=8), DIMENSION(QSEC,VEC_LENGTH) :: MAP_UT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AN REAL(KIND=8), DIMENSION(QSEC,QSEC) :: TEMP_MAT,TEMPINV_MAT LOGICAL :: NOT_CONVERGED2,L2BOOL,INCREASE_FLAG,TESTFLAG,UPHILL REAL(KIND=8) :: NEW_LOGLIK,NEW_L2,OLD_L2,OLD_LOGLIK REAL(KIND=8) :: F_LOGLIK,F2_LOGLIK,F_L2,F2_L2 INTEGER, DIMENSION(2) :: SHAPE_TEST !THIS SUBROUTINE IMPLEMENTS POISSON UPDATES FOR MULTIGRAPH CLUSTERING WITH NO SELF EDGES !NOTE THAT THIS USES A QUASI-NEWTON UPDATING SCHEME DESCRIBED IN !"A QUASI-NEWTON ACCELERATION FOR HIGH-DIMENSIONAL OPTIMIZATION" !BY HUA ZHOU, DAVID ALEXANDER, AND KENNETH LANGE !!CALL INTPR("WORKING...",-1,1,0) !STARTING UPDATES PARAMETER UPDATES UNTIL CONVERGENCE OR MAX COUNT REACHED ERRORFLAG=0 INCREASE_FLAG=.TRUE. NOT_CONVERGED2=.TRUE. ITERATION=0 AN=AHAT PN=PHAT TEMPINV_MAT=1. MAP_U=1. CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_VEC,VEC_LENGTH) !!CALL INTPR("WORKING...1",-1,1,0) !QUASI-NEWTON UPDATE SETUP DO I=1,QSEC+1 CALL UPDATE_PARAMETERS_ONCE_PARALLEL(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2BOOL,& PSUM,ASUM,NEW_L2,NEW_LOGLIK) CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F,VEC_LENGTH) IF(I.LE.QSEC) THEN MAP_U(:,I)=TEMP_F-TEMP_VEC END IF IF(I.GE.2) THEN MAP_V(:,I-1)=TEMP_F-TEMP_VEC END IF TEMP_VEC=TEMP_F END DO TEMP_NEW=TEMP_F !!CALL INTPR("WORKING...3",-1,1,0) OLD_L2=NEW_L2 OLD_LOGLIK=NEW_LOGLIK !!CALL INTPR("WORKING...3.1",-1,1,0) CALL UPDATE_PARAMETERS_ONCE_PARALLEL(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2BOOL,& PSUM,ASUM,F_L2,F_LOGLIK) CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F,VEC_LENGTH) !!CALL INTPR("WORKING...3.2",-1,1,0) DO WHILE(NOT_CONVERGED2) ITERATION=ITERATION+1 CALL UPDATE_PARAMETERS_ONCE_PARALLEL(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2BOOL,& PSUM,ASUM,F2_L2,F2_LOGLIK) !!CALL INTPR("WORKING...3.21",-1,1,0) CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F2,VEC_LENGTH) !!CALL INTPR("WORKING...3.22",-1,1,0) CALL SHIFT_MAT_LEFT(MAP_U,VEC_LENGTH,QSEC) !!CALL INTPR("WORKING...3.23",-1,1,0) !!CALL INTPR("SHAPE U",-1,SHAPE_TEST,2) MAP_U(:,QSEC)=TEMP_F-TEMP_VEC !!CALL INTPR("WORKING...3.3",-1,1,0) CALL SHIFT_MAT_LEFT(MAP_V,VEC_LENGTH,QSEC) MAP_V(:,QSEC)=TEMP_F2-TEMP_F !!CALL INTPR("WORKING...3.4",-1,1,0) !MAP_UT=TRANSPOSE(MAP_U) !SHAPE_TEST=SHAPE(MATMUL(MAP_UT,MAP_U)-MATMUL(MAP_UT,MAP_V)) !!CALL INTPR("SHAPE",-1,SHAPE_TEST,2) !TEMP_MAT=MATMUL(MAP_UT,MAP_U)-MATMUL(MAP_UT,MAP_V) TEMP_MAT=FIRST_MATMULT(MAP_U,MAP_V,VEC_LENGTH,QSEC) !!CALL INTPR("WORKING...3.5",-1,1,0) CALL FINDINV(TEMP_MAT,TEMPINV_MAT,QSEC,ERRORFLAG) !!CALL INTPR("WORKING...4",-1,1,0) IF (ERRORFLAG.LE.-1) THEN !!CALL INTPR("UPDATED PARAMETERS NORMALLY DUE TO SINGULAR MATRIX",-1,1,0) TEMP_NEW=TEMP_F2 NEW_LOGLIK=F2_LOGLIK NEW_L2=F2_L2 ELSE !!CALL INTPR("WORKING...4.1",-1,1,0) TEMP_NEW=TEMP_F-MATMUL(MAP_V,MATMUL(TEMPINV_MAT,MATMUL(TRANSPOSE(MAP_U),TEMP_VEC-TEMP_F))) !!CALL INTPR("WORKING...4.11",-1,1,0) DO I=1,VEC_LENGTH IF(TEMP_NEW(I).LE.0) THEN TEMP_NEW(I)=TEMP_VEC(I)/10 END IF END DO !!CALL INTPR("WORKING...4.12",-1,1,0) CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW,VEC_LENGTH) !!CALL INTPR("WORKING...4.2",-1,1,0) IF(L2BOOL) THEN NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS) ELSE NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS) END IF !!CALL INTPR("WORKING...4.5",-1,1,0) END IF !!CALL INTPR("WORKING...5",-1,1,0) IF((ITERATION.GE.3)) THEN CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW,VEC_LENGTH) CALL UPDATE_PARAMETERS_ONCE_PARALLEL(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,& L2BOOL,PSUM,ASUM,F_L2,F_LOGLIK) CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F,VEC_LENGTH) CALL DECOMPOSE_MAP(PHAT,AHAT,NODES,CLUSTERS,TEMP_NEW,VEC_LENGTH) IF(L2BOOL) THEN !CALL CHECK_CONVERGENCE2(AN,AHAT,PN,PHAT,NODES,CLUSTERS,ITERATION,NOT_CONVERGED2) !NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) CALL CHECK_CONVERGENCE(NEW_L2,F_L2,ITERATION,NOT_CONVERGED2) ELSE !CALL CHECK_CONVERGENCE2(AN,AHAT,PN,PHAT,NODES,CLUSTERS,ITERATION,NOT_CONVERGED2) !NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) CALL CHECK_CONVERGENCE(NEW_LOGLIK,F_LOGLIK,ITERATION,NOT_CONVERGED2) END IF END IF !!CALL INTPR("WORKING...6",-1,1,0) !CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW) !!CALL INTPR("WORKING...7",-1,1,0) TEMP_VEC=TEMP_NEW OLD_L2=NEW_L2 OLD_LOGLIK=NEW_LOGLIK ! PHAT=PN ! AHAT=AN CALL DBLEPR("L2: ",-1,OLD_L2,1) CALL DBLEPR("PN(1): ",-1,PN(1),1) END DO CALL INTPR("MM ITERATIONS: ",-1,ITERATION,1) CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_VEC,VEC_LENGTH) IF(L2BOOL) THEN NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS) ELSE NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS) END IF PHAT=PN AHAT=AN !!CALL INTPR("QNEWT UPDATE ITERATIONS: ",-1,ITERATION,1) END SUBROUTINE UPDATE_PARAMETERS_QNEWTN_PARALLEL !NEEDS TO BE TESTED ! SUBROUTINE UPDATE_PARAMETERS_QNEWTN_WKS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,& ! NEW_L2,NEW_LOGLIK,QSEC,UPHILL) ! ! IMPLICIT NONE ! ! INTEGER, DIMENSION(:) :: TESTMODULE ! REAL, DIMENSION(:,:) :: ADJ ! REAL(KIND=8), DIMENSION(:) :: PHAT,PSUM ! REAL(KIND=8), DIMENSION(:,:) :: AHAT,ASUM ! INTEGER :: I,J,K,ITERATION,NODES,CLUSTERS,OUTPUT_UNIT3,TEMP,QSEC,ERRORFLAG,TEMP2!,MM_ITS,HALF ! REAL(KIND=8), DIMENSION(NODES) :: PN ! REAL(KIND=8), DIMENSION(CLUSTERS) :: PCLUSTERSUM,PDEN ! REAL(KIND=8), ALLOCATABLE, DIMENSION(:) :: TEMP_VEC,TEMP_F,TEMP_F2,TEMP_NEW!,TEMP_OLD ! REAL(KIND=8), ALLOCATABLE, DIMENSION(:,:) :: MAP_U,MAP_V ! REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AN ! REAL(KIND=8), DIMENSION(QSEC,QSEC) :: TEMP_MAT,TEMPINV_MAT ! LOGICAL :: NOT_CONVERGED2,L2,INCREASE_FLAG,TESTFLAG,UPHILL ! REAL(KIND=8) :: NEW_LOGLIK,NEW_L2,TEMP_LOGLIK,TEMP_L2,OLD_L2,OLD_LOGLIK ! REAL(KIND=8) :: F_LOGLIK,F2_LOGLIK,F_L2,F2_L2 ! !THIS SUBROUTINE IMPLEMENTS POISSON UPDATES FOR MULTIGRAPH CLUSTERING WITH NO SELF EDGES ! !NOTE THAT THIS USES A QUASI-NEWTON UPDATING SCHEME DESCRIBED IN ! !"A QUASI-NEWTON ACCELERATION FOR HIGH-DIMENSIONAL OPTIMIZATION" ! !BY HUA ZHOU, DAVID ALEXANDER, AND KENNETH LANGE ! ! !CALL INTPR("WORKING...",-1,1,0) ! TEMP=INT(CLUSTERS*(CLUSTERS-1)/2) ! TEMP2=TEMP+NODES ! !ALLOCATE(MAP(TEMP2,QSEC+2)) ! ALLOCATE(MAP_U(TEMP2,QSEC)) ! ALLOCATE(MAP_V(TEMP2,QSEC)) ! ALLOCATE(TEMP_VEC(TEMP2)) ! ALLOCATE(TEMP_F(TEMP2)) ! ALLOCATE(TEMP_F2(TEMP2)) ! ALLOCATE(TEMP_NEW(TEMP2)) ! ! ! !STARTING UPDATES PARAMETER UPDATES UNTIL CONVERGENCE OR MAX COUNT REACHED ! INCREASE_FLAG=.TRUE. ! NOT_CONVERGED2=.TRUE. ! AN=AHAT ! PN=PHAT ! CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_VEC) ! ! !QUASI-NEWTON UPDATE SETUP ! DO I=1,QSEC+1 ! CALL UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2,PSUM,ASUM,NEW_L2,NEW_LOGLIK) ! CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F) ! IF(I.LE.QSEC) THEN ! MAP_U(:,I)=TEMP_F-TEMP_VEC ! END IF ! IF(I.GE.2) THEN ! MAP_V(:,I-1)=TEMP_F-TEMP_VEC ! END IF ! TEMP_VEC=TEMP_F ! END DO ! TEMP_NEW=TEMP_F ! ! !CALL INTPR("WORKING...3",-1,1,0) ! ITERATION=0 ! TEMP_NEW=TEMP_VEC ! OLD_L2=NEW_L2 ! OLD_LOGLIK=NEW_LOGLIK ! ! CALL UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2,PSUM,ASUM,F_L2,F_LOGLIK) ! CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F) ! ! DO WHILE(NOT_CONVERGED2) ! ITERATION=ITERATION+1 ! CALL UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2,PSUM,ASUM,F2_L2,F2_LOGLIK) ! CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F2) ! CALL SHIFT_MAT_LEFT(MAP_U,TEMP2,QSEC) ! MAP_U(:,QSEC)=TEMP_F-TEMP_VEC ! CALL SHIFT_MAT_LEFT(MAP_V,TEMP2,QSEC) ! MAP_V(:,QSEC)=TEMP_F2-TEMP_F ! TEMP_MAT=MATMUL(TRANSPOSE(MAP_U),MAP_U)-MATMUL(TRANSPOSE(MAP_U),MAP_V) ! CALL FINDINV(TEMP_MAT,TEMPINV_MAT,QSEC,ERRORFLAG) ! ! !CALL INTPR("WORKING...4",-1,1,0) ! IF (ERRORFLAG.LE.-1) THEN ! !PRINT*, "UPDATED PARAMETERS NORMALLY DUE TO SINGULAR MATRIX" ! !CALL INTPR("UPDATED PARAMETERS NORMALLY DUE TO SINGULAR MATRIX",-1,1,0) ! TEMP_NEW=TEMP_F2 ! NEW_LOGLIK=F2_LOGLIK ! NEW_L2=F2_L2 ! ELSE ! !CALL INTPR("WORKING...4.1",-1,1,0) ! TEMP_NEW=TEMP_F-MATMUL(MAP_V,MATMUL(TEMPINV_MAT,MATMUL(TRANSPOSE(MAP_U),TEMP_VEC-TEMP_F))) ! !CALL INTPR("WORKING...4.11",-1,1,0) ! DO I=1,TEMP2 ! IF(TEMP_NEW(I).LE.0) THEN ! TEMP_NEW(I)=TEMP_VEC(I)/10 ! END IF ! END DO ! !CALL INTPR("WORKING...4.12",-1,1,0) ! CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW) ! !CALL INTPR("WORKING...4.2",-1,1,0) ! IF(L2) THEN ! TEMP_L2=CALC_L2NORM(ADJ,TESTMODULE,PN,AN) ! ELSE ! TEMP_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PN,AN) ! END IF ! !CALL INTPR("WORKING...4.5",-1,1,0) ! END IF ! !CALL INTPR("WORKING...5",-1,1,0) ! IF((ITERATION.GE.1)) THEN ! CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW) ! CALL UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2,PSUM,ASUM,F_L2,F_LOGLIK) ! CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F) ! CALL DECOMPOSE_MAP(PHAT,AHAT,NODES,CLUSTERS,TEMP_NEW) ! IF(L2) THEN ! CALL CHECK_CONVERGENCE2(AN,AHAT,PN,PHAT,NODES,CLUSTERS,ITERATION,NOT_CONVERGED2) ! !NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) ! !CALL CHECK_CONVERGENCE(OLD_L2,NEW_L2,ITERATION,NOT_CONVERGED2) ! ELSE ! CALL CHECK_CONVERGENCE2(AN,AHAT,PN,PHAT,NODES,CLUSTERS,ITERATION,NOT_CONVERGED2) ! !NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) ! !CALL CHECK_CONVERGENCE(OLD_LOGLIK,NEW_LOGLIK,ITERATION,NOT_CONVERGED2) ! END IF ! END IF ! !CALL INTPR("WORKING...6",-1,1,0) ! !CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW) ! ! !CALL INTPR("WORKING...7",-1,1,0) ! TEMP_VEC=TEMP_NEW ! OLD_L2=NEW_L2 ! OLD_LOGLIK=NEW_LOGLIK ! PHAT=PN ! AHAT=AN ! END DO ! ! CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_VEC) ! IF(L2) THEN ! NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PN,AN) ! ELSE ! NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PN,AN) ! END IF ! PHAT=PN ! AHAT=AN ! ! DEALLOCATE(MAP_U) ! DEALLOCATE(MAP_V) ! DEALLOCATE(TEMP_VEC) ! DEALLOCATE(TEMP_F) ! DEALLOCATE(TEMP_F2) ! DEALLOCATE(TEMP_NEW) ! ! !CALL INTPR("QNEWT UPDATE ITERATIONS: ",-1,ITERATION,1) ! ! END SUBROUTINE UPDATE_PARAMETERS_QNEWTN_WKS ! ! ! !!NEEDS TO BE TESTED ! SUBROUTINE UPDATE_PARAMETERS_QNEWTN_OLD(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,& ! NEW_L2,NEW_LOGLIK,QSEC,UPHILL) ! ! IMPLICIT NONE ! ! INTEGER, DIMENSION(:) :: TESTMODULE ! REAL, DIMENSION(:,:) :: ADJ ! REAL(KIND=8), DIMENSION(:) :: PHAT,PSUM ! REAL(KIND=8), DIMENSION(:,:) :: AHAT,ASUM ! INTEGER :: I,J,K,ITERATION,NODES,CLUSTERS,OUTPUT_UNIT3,TEMP,QSEC,ERRORFLAG,TEMP2,MM_ITS,HALF ! REAL(KIND=8), DIMENSION(NODES) :: PN ! REAL(KIND=8), DIMENSION(CLUSTERS) :: PCLUSTERSUM,PDEN ! REAL(KIND=8), ALLOCATABLE, DIMENSION(:) :: TEMP_VEC,TEMP_F,TEMP_F2,TEMP_NEW,TEMP_OLD ! REAL(KIND=8), ALLOCATABLE, DIMENSION(:,:) :: MAP_U,MAP_V ! REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AN ! REAL(KIND=8), DIMENSION(QSEC,QSEC) :: TEMP_MAT,TEMPINV_MAT ! LOGICAL :: NOT_CONVERGED2,L2,INCREASE_FLAG,TESTFLAG,UPHILL ! REAL(KIND=8) :: NEW_LOGLIK,NEW_L2,TEMP_LOGLIK,TEMP_L2,OLD_L2,OLD_LOGLIK ! REAL(KIND=8) :: F_LOGLIK,F2_LOGLIK,F_L2,F2_L2 ! !THIS SUBROUTINE IMPLEMENTS POISSON UPDATES FOR MULTIGRAPH CLUSTERING WITH NO SELF EDGES ! !NOTE THAT THIS USES A QUASI-NEWTON UPDATING SCHEME DESCRIBED IN ! !"A QUASI-NEWTON ACCELERATION FOR HIGH-DIMENSIONAL OPTIMIZATION" ! !BY HUA ZHOU, DAVID ALEXANDER, AND KENNETH LANGE ! ! !CALL INTPR("WORKING...",-1,1,0) ! TEMP=INT(CLUSTERS*(CLUSTERS-1)/2) ! TEMP2=TEMP+NODES ! !ALLOCATE(MAP(TEMP2,QSEC+2)) ! ALLOCATE(TEMP_VEC(TEMP2)) ! ALLOCATE(TEMP_F(TEMP2)) ! ALLOCATE(TEMP_F2(TEMP2)) ! ALLOCATE(TEMP_NEW(TEMP2)) ! ALLOCATE(TEMP_OLD(TEMP2)) ! ALLOCATE(MAP_U(TEMP2,QSEC)) ! ALLOCATE(MAP_V(TEMP2,QSEC)) !! TEMP_VEC=1. !! TEMP_F=2. !! TEMP_F2=3. !! TEMP_NEW=4. !! TEMP_OLD=5. !! MAP_U=6. !! MAP_V=7. !! TEMP_MULT=8. !! PDEN=1. !! !CALL DBLEPR("TEMP_VEC",-1,TEMP_VEC,11) !! !CALL DBLEPR("TEMP_F",-1,TEMP_F,11) !! !CALL DBLEPR("TEMP_F2",-1,TEMP_F2,11) !! !CALL DBLEPR("TEMP_NEW",-1,TEMP_NEW,11) !! !CALL DBLEPR("TEMP_OLD",-1,TEMP_OLD,11) !! !CALL DBLEPR("MAP_U",-1,MAP_U,55) !! !CALL DBLEPR("MAP_V",-1,MAP_V,55) !! !CALL DBLEPR("TEMP_MULT",-1,TEMP_MULT,121) !! !CALL DBLEPR("PDEN",-1,PDEN,10) !! !CALL INTPR("WORKING...2",-1,1,0) ! ! !STARTING UPDATES PARAMETER UPDATES UNTIL CONVERGENCE OR MAX COUNT REACHED ! INCREASE_FLAG=.TRUE. ! NOT_CONVERGED2=.TRUE. ! AN=AHAT ! PN=PHAT ! CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_VEC) ! ! !QUASI-NEWTON UPDATE SETUP ! DO I=1,QSEC+1 ! CALL UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2,PSUM,ASUM,NEW_L2,NEW_LOGLIK) ! CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F) ! IF(I.LE.QSEC) THEN ! MAP_U(:,I)=TEMP_F-TEMP_VEC ! END IF ! IF(I.GE.2) THEN ! MAP_V(:,I-1)=TEMP_F-TEMP_VEC ! END IF ! TEMP_VEC=TEMP_F ! END DO ! TEMP_NEW=TEMP_F ! ! !CALL INTPR("WORKING...3",-1,1,0) ! HALF=0 ! MM_ITS=0 ! ITERATION=0 ! TEMP_NEW=TEMP_VEC ! TEMP_OLD=TEMP_VEC ! OLD_L2=NEW_L2 ! OLD_LOGLIK=NEW_LOGLIK ! DO WHILE(NOT_CONVERGED2) ! ITERATION=ITERATION+1 ! CALL UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2,PSUM,ASUM,F_L2,F_LOGLIK) ! CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F) ! CALL UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PN,AN,NODES,CLUSTERS,L2,PSUM,ASUM,F2_L2,F2_LOGLIK) ! CALL COMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F2) ! CALL SHIFT_MAT_LEFT(MAP_U,TEMP2,QSEC) ! MAP_U(:,QSEC)=TEMP_F-TEMP_OLD ! CALL SHIFT_MAT_LEFT(MAP_V,TEMP2,QSEC) ! MAP_V(:,QSEC)=TEMP_F2-TEMP_F ! TEMP_MAT=MATMUL(TRANSPOSE(MAP_U),MAP_U)-MATMUL(TRANSPOSE(MAP_U),MAP_V) ! CALL FINDINV(TEMP_MAT,TEMPINV_MAT,QSEC,ERRORFLAG) ! ! !CALL INTPR("WORKING...4",-1,1,0) ! IF (ERRORFLAG.LE.-1) THEN ! !PRINT*, "UPDATED PARAMETERS NORMALLY DUE TO SINGULAR MATRIX" ! !CALL INTPR("UPDATED PARAMETERS NORMALLY DUE TO SINGULAR MATRIX",-1,1,0) ! TEMP_NEW=TEMP_F2 ! NEW_LOGLIK=F2_LOGLIK ! NEW_L2=F2_L2 ! MM_ITS=MM_ITS+1 ! ELSE ! !CALL INTPR("WORKING...4.1",-1,1,0) ! !!CALL DBLEPR("TEMP_MAT",-1,TEMP_MAT,QSEC*QSEC+1) ! !!CALL DBLEPR("TEMPINV_MAT",-1,TEMPINV_MAT,QSEC*QSEC+1) ! !!CALL DBLEPR("I",-1,MATMUL(TEMP_MAT,TEMPINV_MAT),QSEC*QSEC) ! !!CALL DBLEPR("MAP_U",-1,MAP_U,QSEC*TEMP2) ! !!CALL DBLEPR("MAP_U",-1,MAP_U,QSEC*TEMP2) ! !CALL INTPR("WORKING...4.1",-1,1,0) ! !CALL INTPR("WORKING...4.111",-1,1,0) ! TEMP_NEW=TEMP_F-MATMUL(MAP_V,MATMUL(TEMPINV_MAT,MATMUL(TRANSPOSE(MAP_U),TEMP_VEC-TEMP_F))) ! !CALL INTPR("WORKING...4.11",-1,1,0) ! DO I=1,TEMP2 ! IF(TEMP_NEW(I).LE.0) THEN ! !SINCE ! TEMP_NEW(I)=TEMP_OLD(I)/10 ! END IF ! END DO ! !CALL INTPR("WORKING...4.12",-1,1,0) ! CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW) ! !CALL INTPR("WORKING...4.2",-1,1,0) ! IF(L2) THEN !! DO I=1,TEMP2 !! IF(TEMP_NEW(I)>1) THEN !! TEMP_NEW(I)=1. !! END IF !! END DO ! TEMP_L2=CALC_L2NORM(ADJ,TESTMODULE,PN,AN) ! ELSE ! TEMP_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PN,AN) ! END IF ! !CALL INTPR("WORKING...4.5",-1,1,0) ! !THIS PART ENSURES THAT ONLY UPHILL STEPS ARE TAKEN ! IF(UPHILL) THEN ! IF(L2) THEN ! IF(TEMP_L2>OLD_L2) THEN ! TEMP_NEW=(TEMP_NEW+TEMP_OLD)/2 ! CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW) ! TEMP_L2=CALC_L2NORM(ADJ,TESTMODULE,PN,AN) ! IF(TEMP_L2>OLD_L2) THEN ! TEMP_NEW=TEMP_F2 ! CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW) ! !PRINT*, "UPDATED PARAMETERS NORMALLY DUE TO INCREASE L2" ! NEW_L2=F2_L2 ! MM_ITS=MM_ITS+1 ! ELSE ! NEW_L2=TEMP_L2 ! HALF=HALF+1 ! END IF ! ELSE ! NEW_L2=TEMP_L2 ! END IF ! ELSE ! !CALL INTPR("WORKING...4.7",-1,1,0) ! IF(TEMP_LOGLIK.LE.OLD_LOGLIK) THEN ! TEMP_NEW=(TEMP_NEW+TEMP_OLD)/2 ! CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW) ! TEMP_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PN,AN) ! IF(TEMP_LOGLIK.LE.OLD_LOGLIK) THEN ! TEMP_NEW=TEMP_F2 ! CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW) ! !PRINT*, "UPDATED PARAMETERS NORMALLY DUE TO DECREASE LOGLIK" ! NEW_LOGLIK=F2_LOGLIK ! MM_ITS=MM_ITS+1 ! ELSE ! HALF=HALF+1 ! NEW_LOGLIK=TEMP_LOGLIK ! END IF ! ELSE ! NEW_LOGLIK=TEMP_LOGLIK ! END IF ! END IF ! END IF ! END IF ! !CALL INTPR("WORKING...5",-1,1,0) ! !CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW) ! IF((ITERATION.GE.1)) THEN ! CALL DECOMPOSE_MAP(PHAT,AHAT,NODES,CLUSTERS,TEMP_F2) ! CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_F) ! IF(L2) THEN ! !NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PN,AN)!MAY NOT NEED THIS ! !CALL CHECK_CONVERGENCE(F_L2,F2_L2,ITERATION,NOT_CONVERGED2) ! CALL CHECK_CONVERGENCE2(AN,AHAT,PN,PHAT,NODES,CLUSTERS,ITERATION,NOT_CONVERGED2) ! ELSE ! !NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PN,AN)!MAY NOT NEED THIS ! !CALL CHECK_CONVERGENCE(F_LOGLIK,F2_LOGLIK,ITERATION,NOT_CONVERGED2) ! CALL CHECK_CONVERGENCE2(AN,AHAT,PN,PHAT,NODES,CLUSTERS,ITERATION,NOT_CONVERGED2) ! END IF ! END IF ! !CALL INTPR("WORKING...6",-1,1,0) ! CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_NEW) ! ! !CALL INTPR("WORKING...7",-1,1,0) ! TEMP_VEC=TEMP_NEW!MAY WANT TO TAKE THIS VALUE ONLY IF NOT_CONVERGED IS TRUE ! TEMP_OLD=TEMP_NEW ! OLD_L2=NEW_L2 ! OLD_LOGLIK=NEW_LOGLIK ! PHAT=PN ! AHAT=AN ! END DO ! ! CALL DECOMPOSE_MAP(PN,AN,NODES,CLUSTERS,TEMP_VEC) ! IF(L2) THEN ! NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PN,AN) ! ELSE ! NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PN,AN) ! END IF ! PHAT=PN ! AHAT=AN ! ! !CALL INTPR("QNEWT UPDATE ITERATIONS: ",-1,ITERATION,1) ! !CALL INTPR("HALF STEPS: ",-1,HALF,1) ! !CALL INTPR("MM UPDATE ITERATIONS: ",-1,MM_ITS,1) ! !!CALL INTPR("AHAT QNEWT: ",-1,AHAT,1) ! ! END SUBROUTINE UPDATE_PARAMETERS_QNEWTN_OLD ! SUBROUTINE UPDATE_CLUSTER_ASSIGNMENTS(ADJ,TESTMODULE,PHAT,AHAT,NOT_CONVERGED,NODES,CLUSTERS,& PSUM,ASUM,L2NORM,LOGLIK,L2,QNEWT,QSEC,UPHILL,MAP_LENGTH) USE TOOLS IMPLICIT NONE INTEGER :: I,J,K,ORIGINAL_ASSIGNMENT,NODES,CLUSTERS,QSEC,MAP_LENGTH INTEGER, DIMENSION(NODES) :: TESTMODULE,TRIALMODULE REAL, DIMENSION(:,:) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: PHAT,PTRIAL,PSUM REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT,ATRIAL,ASUM,ASUMTRIAL INTEGER, DIMENSION(CLUSTERS) :: CLUSTER_SIZES LOGICAL :: NOT_CONVERGED,L2,QNEWT,UPHILL REAL(KIND=8) :: LOGLIK,TRIAL_LOGLIK,L2NORM,TRIAL_L2NORM !THIS SHOULD CLUSTER USING THE POISSON CLUSTERING ALGORITHM AND !CHANGE NOT_CONVERGED TO TRUE IF A CLUSTER ASSIGNMENT CHANGES ! !CLUSTERING IS DONE BY SWAPPING ONE NODE FROM ITS ORIGINAL CLUSTER TO ALL POSSIBLE ALTERNATIVES !AND TAKING THE ONE WITH THE HIGHEST LOGLIKELIHOOD NOT_CONVERGED=.FALSE. TRIALMODULE=TESTMODULE ATRIAL=AHAT PTRIAL=PHAT ASUMTRIAL=ASUM TRIAL_LOGLIK=0. TRIAL_L2NORM=0. !COUNTS THE NUMBER OF NODES IN EACH CLUSTER !NEEDED TO ENSURE THAT NO CLUSTER IS REDUCED TO 0 NODES CLUSTER_SIZES=0 DO I=1,NODES CLUSTER_SIZES(TESTMODULE(I))=CLUSTER_SIZES(TESTMODULE(I))+1 END DO DO I=1,NODES ! IF(MOD(I,25).EQ.0) THEN ! PRINT*, "NODE NUMBER: ", I ! END IF IF(CLUSTER_SIZES(TESTMODULE(I))>1) THEN ORIGINAL_ASSIGNMENT=TESTMODULE(I) DO J=1,CLUSTERS IF(J.NE.ORIGINAL_ASSIGNMENT) THEN TRIALMODULE(I)=J CALL UPDATE_ASUM(ADJ,TRIALMODULE,I,TESTMODULE(I),J,ASUMTRIAL,NODES,CLUSTERS) CALL INITIALIZE_PARAMETERS(ADJ,TRIALMODULE,PTRIAL,ATRIAL,NODES,CLUSTERS,L2) !INITIALIZING THE NORM OR LOGLIKELIHOOD IF(L2) THEN TRIAL_L2NORM=CALC_L2NORM(ADJ,TRIALMODULE,PTRIAL,ATRIAL,NODES,CLUSTERS) ELSE TRIAL_LOGLIK=CALC_LOGLIK(ADJ,TRIALMODULE,PTRIAL,ATRIAL,NODES,CLUSTERS) END IF IF(QNEWT) THEN CALL UPDATE_PARAMETERS_QNEWTN(ADJ,TRIALMODULE,PTRIAL,ATRIAL,NODES,CLUSTERS,L2,& PSUM,ASUMTRIAL,TRIAL_L2NORM,TRIAL_LOGLIK,QSEC,UPHILL,MAP_LENGTH) ELSE CALL UPDATE_PARAMETERS(ADJ,TRIALMODULE,PTRIAL,ATRIAL,NODES,CLUSTERS,L2,PSUM, & ASUMTRIAL,TRIAL_L2NORM,TRIAL_LOGLIK) END IF !IF(L2) THEN !IF(ABS(MOD(I,2)).EQ.1) THEN ! PRINT*, "AFTER...TRIAL: ", TRIAL_L2NORM, "L2NORM: ", L2NORM, "NODE: ", I, "CLUSTER: ", J ! END IF !ELSE ! IF(ABS(MOD(I,2)).EQ.0) THEN ! PRINT*, "TRIAL: ", TRIAL_LOGLIK, "LOGLIK: ", LOGLIK, "NODE: ", I, "CLUSTER: ", J ! END IF !END IF !PAUSE IF(L2) THEN IF(TRIAL_L2NORMLOGLIK) THEN ASUM=ASUMTRIAL AHAT=ATRIAL PHAT=PTRIAL CLUSTER_SIZES(TESTMODULE(I))=CLUSTER_SIZES(TESTMODULE(I))-1 CLUSTER_SIZES(TRIALMODULE(I))=CLUSTER_SIZES(TRIALMODULE(I))+1 TESTMODULE(I)=TRIALMODULE(I) LOGLIK=TRIAL_LOGLIK NOT_CONVERGED=.TRUE. ELSE TRIALMODULE(I)=TESTMODULE(I) ATRIAL=AHAT PTRIAL=PHAT ASUMTRIAL=ASUM END IF END IF END IF END DO END IF END DO END SUBROUTINE UPDATE_CLUSTER_ASSIGNMENTS SUBROUTINE UPDATE_CLUSTER_ASSIGNMENTS2(ADJ,TESTMODULE,PHAT,AHAT,NOT_CONVERGED,NODES,CLUSTERS,& PSUM,ASUM,L2NORM,LOGLIK,L2,QNEWT,QSEC,UPHILL,MAP_LENGTH) USE TOOLS IMPLICIT NONE INTEGER :: I,J,K,ORIGINAL_ASSIGNMENT,NODES,CLUSTERS,QSEC,MAP_LENGTH INTEGER, DIMENSION(NODES) :: TESTMODULE,TRIALMODULE REAL, DIMENSION(:,:) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: PHAT,PTRIAL,PSUM REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT,ATRIAL,ASUM,ASUMTRIAL INTEGER, DIMENSION(CLUSTERS) :: CLUSTER_SIZES LOGICAL :: NOT_CONVERGED,L2,QNEWT,UPHILL,INNER_NOT_CONVERGED REAL(KIND=8) :: LOGLIK,TRIAL_LOGLIK,L2NORM,TRIAL_L2NORM,TEST_CRIT !THIS SHOULD CLUSTER USING THE POISSON CLUSTERING ALGORITHM AND !CHANGE NOT_CONVERGED TO TRUE IF A CLUSTER ASSIGNMENT CHANGES ! !CLUSTERING IS DONE BY SWAPPING ONE NODE FROM ITS ORIGINAL CLUSTER TO ALL POSSIBLE ALTERNATIVES !AND TAKING THE ONE WITH THE HIGHEST LOGLIKELIHOOD NOT_CONVERGED=.FALSE. TRIALMODULE=TESTMODULE ATRIAL=AHAT PTRIAL=PHAT ASUMTRIAL=ASUM TRIAL_LOGLIK=0. TRIAL_L2NORM=0. !COUNTS THE NUMBER OF NODES IN EACH CLUSTER !NEEDED TO ENSURE THAT NO CLUSTER IS REDUCED TO 0 NODES CLUSTER_SIZES=0 DO I=1,NODES CLUSTER_SIZES(TESTMODULE(I))=CLUSTER_SIZES(TESTMODULE(I))+1 END DO INNER_NOT_CONVERGED=.TRUE. DO WHILE(INNER_NOT_CONVERGED) INNER_NOT_CONVERGED=.FALSE. DO I=1,NODES ! IF(MOD(I,25).EQ.0) THEN ! PRINT*, "NODE NUMBER: ", I ! END IF IF(CLUSTER_SIZES(TESTMODULE(I))>1) THEN ORIGINAL_ASSIGNMENT=TESTMODULE(I) DO J=1,CLUSTERS IF(J.NE.ORIGINAL_ASSIGNMENT) THEN TRIALMODULE(I)=J !CALL UPDATE_ASUM(ADJ,TRIALMODULE,I,TESTMODULE(I),J,ASUMTRIAL,NODES,CLUSTERS) !CALCULATING THE NORM OR LOGLIKELIHOOD IF(L2) THEN !TEST_CRIT=CALC_L2NORM(ADJ,TRIALMODULE,PHAT, AHAT,NODES,CLUSTERS) TRIAL_L2NORM=MODIFY_L2(ADJ,TESTMODULE,PHAT,AHAT,I, & J, L2NORM, NODES,CLUSTERS) !CALL DBLEPR("TEST MODIFY",-1,(TEST_CRIT-TRIAL_L2NORM),1) !CALL DBLEPR("TRIAL L2NORM",-1,TRIAL_L2NORM,1) ELSE !TEST_CRIT=CALC_LOGLIK(ADJ,TRIALMODULE,PHAT,AHAT,NODES,CLUSTERS) TRIAL_LOGLIK=MODIFY_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,I, & J, LOGLIK, NODES,CLUSTERS) !CALL DBLEPR("TEST MODIFY",-1,(TEST_CRIT-TRIAL_LOGLIK)/TEST_CRIT,1) !CALL DBLEPR("TRIAL LOGLIK",-1,TRIAL_LOGLIK,1) END IF IF(L2) THEN IF(TRIAL_L2NORMLOGLIK) THEN CLUSTER_SIZES(TESTMODULE(I))=CLUSTER_SIZES(TESTMODULE(I))-1 CLUSTER_SIZES(TRIALMODULE(I))=CLUSTER_SIZES(TRIALMODULE(I))+1 TESTMODULE(I)=TRIALMODULE(I) ASUM=ASUMTRIAL LOGLIK=TRIAL_LOGLIK NOT_CONVERGED=.TRUE. INNER_NOT_CONVERGED=.TRUE. ELSE TRIALMODULE(I)=TESTMODULE(I) ASUMTRIAL=ASUM END IF END IF END IF END DO END IF END DO END DO END SUBROUTINE UPDATE_CLUSTER_ASSIGNMENTS2 FUNCTION ACTUAL_ADJ(ADJ,I,J) IMPLICIT NONE INTEGER :: I,J,K,ACTUAL_ADJ,TEMP_ADJ INTEGER(KIND=2), DIMENSION(:,:) :: ADJ IF(I>J) THEN TEMP_ADJ=ADJ(I,J)+ADJ(J,I)*10000 ELSE IF(I0) THEN ! WORD_SUMS(TEMP)=WORD_SUMS(I) ! WORDPAIR_COUNT(TEMP,:)=WORDPAIR_COUNT(I,:) ! WORDPAIR_COUNT(:,TEMP)=WORDPAIR_COUNT(:,I) ! LIST(TEMP)=LIST(I) ! WORD_SUMS(I)=0 ! WORDPAIR_COUNT(I,:)=0 ! WORDPAIR_COUNT(:,I)=0 ! LIST(I)=" " ! END IF ! END DO ! MIN_LIST(TEMP)=WORDS ! IF(TEMP.EQ.WORDS) THEN ! NOT_DONE=.FALSE. ! END IF ! END DO END SUBROUTINE FILTER_DATA SUBROUTINE REORDER_ADJ(ADJ,NODES,ORDERING,NODES2) !THIS SUBROUTINE REORDERS THE ADJ MATRIX SO THAT THE HIGHEST ROW SUM IS FIRST. IT ALSO GIVES !THE VALUE NODES2 WHICH IS EQUAL TO THE NUMBER OF NODES THAT ARE CONNECTED TO AT LEAST ONE OTHER !NODE SO THAT THE NODES WITH NO CONNECTIONS MAY BE EXCLUDED. !CHECKED! IMPLICIT NONE INTEGER :: NODES,I,J,NODES2,TEMP,TEMP2 REAL, DIMENSION(NODES,NODES) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: PSUM REAL, DIMENSION(NODES) :: TEMP_VEC INTEGER, DIMENSION(NODES) :: ORDERING CALL INITIALIZE_PSUM(ADJ,PSUM,NODES) DO I=1,NODES ORDERING(I)=I END DO NODES2=0 IF(MINVAL(PSUM(:)).EQ.0) THEN DO I=1,NODES TEMP=MAXLOC(PSUM,1) !SWAP COLUMNS TEMP_VEC=ADJ(:,I) ADJ(:,I)=ADJ(:,TEMP) ADJ(:,TEMP)=TEMP_VEC !SWAP ROWS TEMP_VEC=ADJ(I,:) ADJ(I,:)=ADJ(TEMP,:) ADJ(TEMP,:)=TEMP_VEC !UPDATE PSUM AND ORDERING VECTOR IF(PSUM(TEMP)>1E-10) THEN NODES2=NODES2+1 END IF PSUM(TEMP)=PSUM(I) PSUM(I)=-1 TEMP2=ORDERING(TEMP) ORDERING(TEMP)=ORDERING(I) ORDERING(I)=TEMP2 END DO END IF END SUBROUTINE REORDER_ADJ END MODULE MULTIGRAPH MODULE STRING_MANIPULATION ! ! This module contains routines for manipulating strings. ! CONTAINS ! FUNCTION POSITION_IN_ALPHABET(LETTER) ! ! This subroutine computes the position in the English alphabet of ! the given LETTER. ! IMPLICIT NONE CHARACTER(LEN=1) :: LETTER,APOSTROPHE = "'" INTEGER :: I,POSITION_IN_ALPHABET ! I = ICHAR(LETTER) IF (LETTER==APOSTROPHE) THEN POSITION_IN_ALPHABET = I ELSE IF (I>=ICHAR('a').AND.I<=ICHAR('z')) THEN POSITION_IN_ALPHABET = I ELSE IF (I>=ICHAR('A').AND.I<=ICHAR('Z')) THEN POSITION_IN_ALPHABET = I-ICHAR('A')+ICHAR('a') ELSE POSITION_IN_ALPHABET = 0 END IF END FUNCTION POSITION_IN_ALPHABET SUBROUTINE REMOVE_FORBIDDEN_CHARACTERS(PERMITTED,STRING) ! ! This subroutine blanks out all non-permitted characters. For ! example, one might want to remove all punctuation from STRING. ! IMPLICIT NONE CHARACTER(LEN=*) :: PERMITTED,STRING INTEGER :: I ! DO I = 1,LEN(STRING) IF (INDEX(PERMITTED,STRING(I:I))==0) THEN STRING(I:I) = " " END IF END DO END SUBROUTINE REMOVE_FORBIDDEN_CHARACTERS SUBROUTINE REMOVE_TRAILING_NUMBERS(STRING) !THIS SUBROUTINE REMOVES TRAILING NUMBERS FROM STRINGS SO THAT DISEASE 1 IS THE SAME AS DISEASE 2 !CHECKED! IMPLICIT NONE CHARACTER(LEN=*) :: STRING CHARACTER(LEN=100) :: PERMITTED INTEGER :: I,J,COUNTER LOGICAL :: FLAG PERMITTED = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" FLAG=.TRUE. COUNTER=LEN(STRING) DO WHILE (FLAG) FLAG=.FALSE. !!CALL INTPR("INDEX... ",-1,INDEX(TRIM(PERMITTED),STRING(COUNTER:COUNTER)),1) IF (INDEX(TRIM(PERMITTED),STRING(COUNTER:COUNTER)) == 0) THEN !!CALL INTPR("WORKED...",-1,1,0) STRING(COUNTER:COUNTER) = " " COUNTER=COUNTER-1 FLAG=.TRUE. END IF IF(COUNTER.EQ.0) THEN FLAG=.FALSE. END IF END DO IF((STRING(1:1).EQ."{").OR.(STRING(1:1).EQ."?")) THEN STRING(1:1)= " " END IF STRING=ADJUSTL(STRING) !Adding another ? here because some have 2 ?'s in the front... IF((STRING(1:1).EQ."[").OR.(STRING(1:1).EQ."?")) THEN STRING(1:1)= " " END IF STRING=ADJUSTL(STRING) END SUBROUTINE REMOVE_TRAILING_NUMBERS ! function upcase(string) result(upper) ! character(len=*), intent(in) :: string ! character(len=len(string)) :: upper ! integer :: j ! do j = 1,len(string) ! if(string(j:j) >= "a" .and. string(j:j) <= "z") then ! upper(j:j) = achar(iachar(string(j:j)) - 32) ! else ! upper(j:j) = string(j:j) ! end if ! end do ! end function upcase SUBROUTINE FIX_NAME(STRING) !THIS SUBROUTINE REMOVES TRAILING NUMBERS FROM STRINGS SO THAT DISEASE 1 IS THE SAME AS DISEASE 2 !CHECKED! IMPLICIT NONE CHARACTER(LEN=*) :: STRING CHARACTER(LEN=50) :: TEMPSTRING INTEGER :: I,J,COUNTER,SPACEVAL LOGICAL :: FLAG DO I=1,LEN(STRING) IF((STRING(I:I).EQ.'"').OR.(STRING(I:I).EQ."'")) THEN STRING(I:I) = " " END IF END DO FLAG=.TRUE. COUNTER=0 SPACEVAL=ICHAR(' ') DO WHILE(FLAG) FLAG=.FALSE. COUNTER=COUNTER+1 !THIS CHECKS IF THE CURRENT CHARACTER IS A SPACE OR TAB. TABS HAVE AN ICHAR OF 9 IF((ICHAR(STRING(COUNTER:COUNTER)).EQ.SPACEVAL) & .OR.(ICHAR(STRING(COUNTER:COUNTER)).EQ.9)) THEN TEMPSTRING=STRING(COUNTER+1:LEN(STRING)) STRING=TRIM(TEMPSTRING) FLAG=.TRUE. END IF IF(COUNTER.GE.LEN(STRING)) THEN FLAG=.FALSE. END IF END DO STRING=ADJUSTL(STRING) END SUBROUTINE FIX_NAME SUBROUTINE REMOVE_APOSTROPHES(STRING) ! ! This subroutine deletes apostrophe signs preceded by a blank ! or followed by a blank or a lower case d. ! IMPLICIT NONE CHARACTER(LEN=*) :: STRING INTEGER :: I,J ! DO I = 1,LEN(STRING) SELECT CASE(ICHAR(STRING(I:I))) CASE(ICHAR("'")) IF (I==1) THEN STRING(I:I) = " " END IF IF (I==LEN(STRING)) THEN STRING(I:I) = " " END IF IF (I>1) THEN J = POSITION_IN_ALPHABET(STRING(I-1:I-1)) IF (JICHAR('z')) THEN STRING(I:I) = " " END IF END IF IF (IICHAR('z')) THEN STRING(I:I) = " " END IF END IF END SELECT END DO END SUBROUTINE REMOVE_APOSTROPHES SUBROUTINE REPLACE_STRING(STRING,SUBSTITUTE,TARG) ! ! This subroutine replaces TARG with SUBSTITUTE in STRING. ! IMPLICIT NONE CHARACTER(LEN=*) :: STRING,SUBSTITUTE,TARG CHARACTER(LEN=LEN(STRING)) :: RIGHT INTEGER :: I,J ! J = 1 DO I = INDEX(STRING(J:),TARG) IF (I==0) RETURN I = I+J-1 J = I+LEN(SUBSTITUTE) RIGHT = STRING(I+LEN(TARG):) STRING(I:I+LEN(SUBSTITUTE)-1) = SUBSTITUTE STRING(I+LEN(SUBSTITUTE):) = RIGHT END DO END SUBROUTINE REPLACE_STRING SUBROUTINE PROCESS_LINE(LINE) ! ! This subroutine processes a line, deleting extraneous characters ! and replacing abbreviations whenever possible. ! IMPLICIT NONE CHARACTER(LEN=*) :: LINE CHARACTER(LEN=60) :: PERMITTED INTEGER :: I,J ! ! Remove all forbidden characters. ! PERMITTED = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.!?:;-' " CALL REMOVE_FORBIDDEN_CHARACTERS(PERMITTED,LINE) ! ! There are many unusual abbreviations in Shakespeare. ! CALL REPLACE_STRING(LINE,"est","'st") CALL REPLACE_STRING(LINE,"to it","to't") CALL REPLACE_STRING(LINE,"taken","ta'en") CALL REPLACE_STRING(LINE,"the ","th' ") CALL REPLACE_STRING(LINE," it","'t") CALL REPLACE_STRING(LINE," on "," o' ") CALL REPLACE_STRING(LINE," in "," i' ") CALL REPLACE_STRING(LINE,"tis","'tis") CALL REPLACE_STRING(LINE," in "," 'n") CALL REPLACE_STRING(LINE,"eve","e'e") CALL REPLACE_STRING(LINE,"er","'r") CALL REPLACE_STRING(LINE,"en","'n") CALL REPLACE_STRING(LINE,"over","o'er") ! CALL REPLACE_STRING(LINE,"ed","'d") !added this ! ! Remove the remaining extraneous apostrophes. ! CALL REMOVE_APOSTROPHES(LINE) !removed this AND REPLACED WITH TWO LINES BELOW ! PERMITTED = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.!?:;- " ! CALL REMOVE_FORBIDDEN_CHARACTERS(PERMITTED,LINE) END SUBROUTINE PROCESS_LINE SUBROUTINE SORT_STRINGS(LIST) ! ! This subroutine performs a heap sort on a list of strings. See: ! Nijenhuis A and Wilf HS (1978) "Combinatorial Algorithms for ! Computers and Calculators, 2nd ed", Chapter 15, Academic Press. ! IMPLICIT NONE CHARACTER(LEN=*), DIMENSION(:), INTENT(INOUT) :: LIST INTEGER :: I,J,K,L,N CHARACTER(LEN=LEN(LIST(1))) :: TEMP ! N = SIZE(LIST) IF (N<=1) RETURN L = 1+N/2 K = N DO IF (L>1) THEN L = L-1 TEMP = LIST(L) ELSE TEMP = LIST(K) LIST(K) = LIST(1) K = K-1 IF (K<=1) THEN LIST(1) = TEMP RETURN END IF END IF I = L J = L+L DO WHILE (J<=K) IF (J0) THEN K = K+1 WORD(K:K) = CHAR(J) END IF ! ! Add the current completed word to the list. ! IF (J==0.OR.I==LEN(LINE)) THEN IF (HYPHEN_ACTIVE) THEN LIST(WORDS) = TRIM(LIST(WORDS))//WORD(1:K) HYPHEN_ACTIVE = .FALSE. ELSE IF (K>0) THEN WORDS = WORDS+1 IF (WORDS>SIZE(LIST)) THEN ERROR = .TRUE. RETURN END IF LIST(WORDS) = WORD(1:K) !adds the word to the list END IF K = 0 END IF END DO ! ! Check if the last nonblank character is a hyphen. ! K = LEN_TRIM(LINE) IF (K>0) THEN HYPHEN_ACTIVE = LINE(K:K)=="-" ELSE HYPHEN_ACTIVE = .FALSE. END IF END SUBROUTINE EXTEND_WORD_LIST SUBROUTINE UPDATE_WORD_PAIR_COUNTS(LIST,LINE,WORD1,WORD2,WORDPAIR_COUNT, & WORDS,ERROR) ! ! This subroutine extracts word pairs from the current LINE and ! updates the word pair count for each pair encountered. ! IMPLICIT NONE CHARACTER(LEN=800) :: LINE CHARACTER(LEN=100) :: WORD1,WORD2 INTEGER :: A,B,ERROR_POSITION,I,J,K,LAST,WORDS LOGICAL :: ERROR CHARACTER(LEN=*), DIMENSION(WORDS) :: LIST INTEGER, DIMENSION(WORDS,WORDS)::WORDPAIR_COUNT ! ! Process the line character by character. K is the length of ! the current second word. ! K = LEN_TRIM(WORD2) ERROR = .FALSE. LAST = LEN_TRIM(LINE) !!CALL INTPR("BEGINNING DO LOOP ", -1,1,0) DO I = 1,LAST ! ! Extend the current second word. ! !!CALL INTPR("POS IN ALPH ",-1,1,0) J = POSITION_IN_ALPHABET(LINE(I:I)) !!CALL INTPR("POS IN ALPH 2",-1,1,0) IF (J>0) THEN K = K+1 WORD2(K:K) = CHAR(J) END IF !!CALL INTPR("POS IN ALPH 2.1",-1,1,0) ! ! If the last nonblank character of the line is a hyphen, then exit. ! IF (I==LAST.AND.LINE(I:I)=="-") RETURN !!CALL INTPR("POS IN ALPH 3",-1,1,0) ! ! Otherwise, if the current character is not a letter, then the second ! word has ended. ! IF (J==0.OR.I==LAST) THEN !!CALL INTPR("POS IN ALPH 4",-1,1,0) ! ! Update the count for the current pair of words. ! IF (WORD2/=" ") THEN !!CALL INTPR("WORD 2 OKAY ",-1,1,0) IF (WORD1/=" ") THEN !!CALL INTPR("LOOKING FOR WORDS ",-1,1,0) A = BISECT_STRING_LIST(LIST(1:WORDS),WORD1) B = BISECT_STRING_LIST(LIST(1:WORDS),WORD2) IF (A*B>0) THEN WORDPAIR_COUNT(A,B) = WORDPAIR_COUNT(A,B)+1 !!CALL INTPR("MID DO LOOP ",-1,1,0) ELSE ERROR = .TRUE. !PRINT*," MATCH ERROR"," WORD1 =",WORD1," WORD2 =",WORD2 !!CALL INTPR("MATCH ERROR COULD NOT FIND BOTH WORDS",-1,1,0) !stop commented out for R !STOP END IF END IF ! ! Copy the second word into the first word and reset the position in the ! second word. ! WORD1 = " " WORD1 = TRIM(WORD2) WORD2 = " " K = 0 END IF !!CALL INTPR("END IF ",-1,1,0) ! ! Check if the current character is a punctuation mark. If so, reinitialize ! both words. ! SELECT CASE(ICHAR(LINE(I:I))) CASE(ICHAR("."),ICHAR("?"),ICHAR("!"),ICHAR(":"),ICHAR(";"),ICHAR(",")) WORD1 = " " WORD2 = " " K = 0 END SELECT !!CALL INTPR("END SELECT ",-1,1,0) END IF END DO END SUBROUTINE UPDATE_WORD_PAIR_COUNTS SUBROUTINE UPDATE_ADJ_COUNTS(LIST,WORD1,WORD2,WORDPAIR_COUNT, & WORDS,ERROR) ! ! This subroutine extracts word pairs from the current LINE and ! updates the word pair count for each pair encountered. ! IMPLICIT NONE CHARACTER(LEN=100) :: WORD1,WORD2 INTEGER :: A,B,ERROR_POSITION,I,J,WORDS LOGICAL :: ERROR CHARACTER(LEN=*), DIMENSION(WORDS) :: LIST INTEGER, DIMENSION(WORDS,WORDS)::WORDPAIR_COUNT ! ! Update the count for the current pair of words. ! IF (WORD2/=" ") THEN !!CALL INTPR("WORD 2 OKAY ",-1,1,0) IF (WORD1/=" ") THEN !!CALL INTPR("LOOKING FOR WORDS ",-1,1,0) A = BISECT_STRING_LIST(LIST(1:WORDS),WORD1) B = BISECT_STRING_LIST(LIST(1:WORDS),WORD2) IF (A*B>0) THEN WORDPAIR_COUNT(A,B) = WORDPAIR_COUNT(A,B)+1 WORDPAIR_COUNT(B,A) = WORDPAIR_COUNT(B,A)+1 !!CALL INTPR("MID DO LOOP ",-1,1,0) ELSE ERROR = .TRUE. !PRINT*," MATCH ERROR"," WORD1 =",WORD1," WORD2 =",WORD2 !!CALL INTPR("MATCH ERROR COULD NOT FIND BOTH WORDS",-1,1,0) !stop commented out for R !STOP END IF END IF ! ! Copy the second word into the first word and reset the position in the ! second word. ! WORD1 = " " WORD2 = " " END IF END SUBROUTINE UPDATE_ADJ_COUNTS SUBROUTINE COUNT_LETTERPAIRS(LINE,WORD,LETTERPAIR_COUNT) ! ! Count the letter pairs within the words in LINE. WORD is the current ! partial word. ! IMPLICIT NONE CHARACTER(LEN=*) :: LINE,WORD INTEGER :: I,J,K,L,LAST,N INTEGER, DIMENSION(:,:) :: LETTERPAIR_COUNT ! ! Process the line character by character. N is the length of ! the current word. ! N = LEN_TRIM(WORD) LAST = LEN_TRIM(LINE) DO I = 1,LAST ! ! Extend the current word. ! J = POSITION_IN_ALPHABET(LINE(I:I)) IF (J>0) THEN N = N+1 WORD(N:N) = CHAR(J) END IF ! ! If the last nonblank character of the line is a hyphen, then exit with ! a partial word. ! IF (I==LAST.AND.LINE(I:I)=="-") RETURN ! ! Otherwise, if the current character is not a letter, then the current ! word has ended. ! IF (J==0.OR.I==LAST) THEN ! ! Update the letter pair counts and reinitialize the current word. ! DO L = 1,N-1 J = POSITION_IN_ALPHABET(WORD(L:L)) IF (J==ICHAR("'")) THEN J = 27 ELSE J = J-ICHAR('a')+1 END IF K = POSITION_IN_ALPHABET(WORD(L+1:L+1)) IF (K==ICHAR("'")) THEN K = 27 ELSE K = K-ICHAR('a')+1 END IF LETTERPAIR_COUNT(J,K) = LETTERPAIR_COUNT(J,K)+1 END DO N = 0 WORD = " " END IF END DO END SUBROUTINE COUNT_LETTERPAIRS ! END MODULE STRING_MANIPULATION !END MODULES SUBROUTINE propensitydecomposition(ADJ,TESTMODULE,PHAT,AHAT,FACTORIZABILITY,L2NORM,& NODES,CLUSTERS,L2I) USE MULTIGRAPH USE CONSTANTS USE TOOLS USE STRING_MANIPULATION IMPLICIT NONE INTEGER :: NODES,CLUSTERS,I,J,K,L,CLUSTER_ITERATIONS=0,L2I INTEGER, DIMENSION(NODES) :: TESTMODULE REAL(KIND=8), DIMENSION(NODES) :: PSUM REAL, DIMENSION(NODES,NODES) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: PHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: ASUM REAL(KIND=8) ::L2NORM,LOGLIK,FACTORIZABILITY,SUM_SQUARES,MEAN LOGICAL :: NOT_CONVERGED,L2 !!CALL INTPR("WORKING...",-1,1,0) IF(L2I>0) THEN L2=.TRUE. ELSE L2=.FALSE. END IF DO I=1,NODES ADJ(I,I)=0 END DO !INITIALIZING ALL THE PARAMETERS AND SUMS !INITIALIZING PHAT AND AHAT PHAT=0. AHAT=0. CALL INITIALIZE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2) !!CALL DBLEPR("PHAT",-1,PHAT,NODES) !INITIALIZING SUMS CALL INITIALIZE_PSUM(ADJ,PSUM,NODES) CALL INITIALIZE_ASUM(ADJ,TESTMODULE,ASUM,NODES,CLUSTERS) !INITIALIZING THE NORM OR LOGLIKELIHOOD IF(L2) THEN L2NORM=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) ELSE LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) END IF !ENDING PARAMETER INITIALIZATION !BEGINNING POISSON/L2 PARAMETER UPDATES CALL UPDATE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,L2NORM,LOGLIK) FACTORIZABILITY=CALC_FACTORIZABILITY(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) DO I = 1,NODES-1 DO J=I+1,NODES MEAN=PHAT(I)*PHAT(J)*AHAT(TESTMODULE(I),TESTMODULE(J)) IF(.NOT.L2) THEN IF(ADJ(I,J)>0) THEN ADJ(J,I)=LOG_POISSON_TAIL(MEAN,NINT(ADJ(I,J))) ELSE ADJ(J,I)=0. END IF ELSE ADJ(J,I)=0 END IF ADJ(I,J)=MEAN END DO END DO END SUBROUTINE propensitydecomposition SUBROUTINE propensityclustering(ADJ,TESTMODULE,PHAT,AHAT,FACTORIZABILITY,L2NORM,NODES,& CLUSTERS,L2I,INITBOOL) ! This program implements the poisson multigraph with clustering algorithm for R ! ! INPUT ! Here, the ADJ[Nodes,Nodes] (adjacency) matrix is given with values in (0,1) with zero diagonals ! Also CLUSTERS which is an integer giving the maximum number of clusters ! ! OUTPUT ! The output is TESTMODULE[Nodes] which is an integer vector in [1,#Clusters] ! Another output is PHAT[Nodes] which is the real valued estimated propensity for each node ! Also, AHAT[Clusters,Clusters] which is a real valued symmetric ! matrix giving the intercluster adjacency with 1 diagonals ! ! MAIN IDEA ! ADJ[I,J]=AHAT[TESTMODULE(I),TESTMODULE(J)]*PHAT(I)*PHAT(J) !CONTAINS USE MULTIGRAPH USE CONSTANTS USE TOOLS USE STRING_MANIPULATION IMPLICIT NONE INTEGER :: NODES,CLUSTERS,I,J,K,L,CLUSTER_ITERATIONS=0,L2I,INITBOOL,QSEC,MAP_LENGTH INTEGER, DIMENSION(NODES) :: TESTMODULE !INTEGER, DIMENSION(NODES) :: TEMP REAL(KIND=8), DIMENSION(NODES) :: PSUM REAL, DIMENSION(NODES,NODES) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: PHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: ASUM !REAL, DIMENSION(2) :: TIMEARRAYD,TIMEARRAYE REAL(KIND=8) ::L2NORM,FACTORIZABILITY,LOGLIK,SUM_SQUARES,MEAN !REAL :: TOTALTIME LOGICAL :: NOT_CONVERGED,L2,QNEWT=.FALSE.,UPHILL=.TRUE. !!CALL INTPR("INITIALIZING PARAMETERS",-1,1,0) DO I=1,NODES ADJ(I,I)=0 END DO IF(L2I>0) THEN L2=.TRUE. ELSE L2=.FALSE. END IF !INITIALIZING PSUM CALL INITIALIZE_PSUM(ADJ,PSUM,NODES) !INITIALIZING CLUSTERS IF(INITBOOL.GE.5) THEN CALL INITIALIZE_CLUSTERS(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) CALL QUICK_CLUSTER(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) !!CALL INTPR("CLUSTERS QUICK-INITIALIZED",-1,1,0) ELSE IF(INITBOOL.GE.1) THEN CALL INITIALIZE_CLUSTERS(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) !!CALL INTPR("CLUSTERS INITIALIZED",-1,1,0) ELSE !!CALL INTPR("USED GIVEN CLUSTER ASSIGNMENTS",-1,1,0) END IF !INITIALIZING ASUM CALL INITIALIZE_ASUM(ADJ,TESTMODULE,ASUM,NODES,CLUSTERS) !INITIALIZING PHAT AND AHAT PHAT=0. AHAT=0. CALL INITIALIZE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2) !INITIALIZING THE NORM OR LOGLIKELIHOOD IF(L2) THEN L2NORM=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) ELSE LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) END IF !ENDING PARAMETER INITIALIZATION !BEGINNING POISSON/L2 PARAMETER UPDATES QNEWT=.FALSE. QSEC=5 MAP_LENGTH=INT(CLUSTERS*(CLUSTERS-1)/2)+NODES CALL UPDATE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,L2NORM,LOGLIK) !!CALL INTPR("PARAMETERS INITIALIZED",-1,1,0) !BEGINNING MAIN CLUSTER UPDATE LOOP NOT_CONVERGED = .TRUE. CLUSTER_ITERATIONS=0 DO WHILE (NOT_CONVERGED) !!CALL INTPR("CLUSTER ITERATION",-1,CLUSTER_ITERATIONS,1) CLUSTER_ITERATIONS=CLUSTER_ITERATIONS+1 NOT_CONVERGED = .FALSE. CALL UPDATE_CLUSTER_ASSIGNMENTS(ADJ,TESTMODULE,PHAT,AHAT,NOT_CONVERGED,NODES,CLUSTERS, & PSUM,ASUM,L2NORM,LOGLIK,L2,QNEWT,QSEC,UPHILL,MAP_LENGTH) IF(CLUSTER_ITERATIONS.GE.MIN(NODES,40)) THEN NOT_CONVERGED = .FALSE. !!CALL INTPR("MAX ITERATION REACHED...RERUN USING CURRENT CLUSTERING FOR BETTER RESULTS",& ! -1,1,0) END IF END DO FACTORIZABILITY=CALC_FACTORIZABILITY(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) DO I = 1,NODES-1 DO J=I+1,NODES MEAN=PHAT(I)*PHAT(J)*AHAT(TESTMODULE(I),TESTMODULE(J)) IF(.NOT.L2) THEN IF(ADJ(I,J)>0) THEN ADJ(J,I)=LOG_POISSON_TAIL(MEAN,NINT(ADJ(I,J))) ELSE ADJ(J,I)=0. END IF ELSE ADJ(J,I)=0 END IF ADJ(I,J)=MEAN END DO END DO END SUBROUTINE propensityclustering SUBROUTINE propdecompaccel(ADJ,TESTMODULE,PHAT,AHAT,FACTORIZABILITY,CRITERIA,& NODES,CLUSTERS,L2I) USE MULTIGRAPH USE CONSTANTS USE TOOLS USE STRING_MANIPULATION IMPLICIT NONE INTEGER :: NODES,CLUSTERS,I,J,K,L2I,QSEC,MAP_LENGTH INTEGER, DIMENSION(NODES) :: TESTMODULE REAL(KIND=8), DIMENSION(NODES) :: PSUM REAL, DIMENSION(NODES,NODES) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: PHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: ASUM REAL(KIND=8) ::L2NORM,LOGLIK,FACTORIZABILITY,SUM_SQUARES,CRITERIA,MEAN LOGICAL :: L2,UPHILL !!CALL INTPR("WORKING...",-1,1,0) IF(L2I>0) THEN L2=.TRUE. ELSE L2=.FALSE. END IF DO I=1,NODES ADJ(I,I)=0 END DO !INITIALIZING ALL THE PARAMETERS AND SUMS !INITIALIZING PHAT AND AHAT PHAT=0. AHAT=0. CALL INITIALIZE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2) !INITIALIZING PSUM CALL INITIALIZE_PSUM(ADJ,PSUM,NODES) !INITIALIZING ASUM CALL INITIALIZE_ASUM(ADJ,TESTMODULE,ASUM,NODES,CLUSTERS) !INITIALIZING THE NORM OR LOGLIKELIHOOD IF(L2) THEN L2NORM=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) LOGLIK=1. ELSE LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) L2NORM=1. END IF !ENDING PARAMETER INITIALIZATION UPHILL=.FALSE. QSEC=5 MAP_LENGTH=INT(CLUSTERS*(CLUSTERS-1)/2)+NODES !BEGINNING POISSON/L2 PARAMETER UPDATES CALL UPDATE_PARAMETERS_QNEWTN(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,& L2NORM,LOGLIK,QSEC,UPHILL,MAP_LENGTH) FACTORIZABILITY=CALC_FACTORIZABILITY(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) IF(L2) THEN CRITERIA=L2NORM !!CALL DBLEPR("L2 GIVEN",-1,L2NORM,0) ELSE CRITERIA=LOGLIK !!CALL DBLEPR("LOGLIK GIVEN",-1,LOGLIK,0) END IF DO I = 1,NODES-1 DO J=I+1,NODES MEAN=PHAT(I)*PHAT(J)*AHAT(TESTMODULE(I),TESTMODULE(J)) IF(.NOT.L2) THEN IF(ADJ(I,J)>0) THEN ADJ(J,I)=LOG_POISSON_TAIL(MEAN,NINT(ADJ(I,J))) ELSE ADJ(J,I)=0. END IF ELSE ADJ(J,I)=0 END IF ADJ(I,J)=MEAN END DO END DO END SUBROUTINE propdecompaccel SUBROUTINE propdecompaccelparallel(ADJ,TESTMODULE,PHAT,AHAT,FACTORIZABILITY,CRITERIA,& NODES,CLUSTERS,L2I) USE MULTIGRAPH USE CONSTANTS USE TOOLS USE STRING_MANIPULATION IMPLICIT NONE INTEGER :: NODES,CLUSTERS,I,J,K,L2I,QSEC,MAP_LENGTH INTEGER, DIMENSION(NODES) :: TESTMODULE REAL(KIND=8), DIMENSION(NODES) :: PSUM REAL, DIMENSION(NODES,NODES) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: PHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: ASUM REAL(KIND=8) ::L2NORM,LOGLIK,FACTORIZABILITY,SUM_SQUARES,CRITERIA,MEAN LOGICAL :: L2,UPHILL !!CALL INTPR("WORKING...",-1,1,0) IF(L2I>0) THEN L2=.TRUE. ELSE L2=.FALSE. END IF DO I=1,NODES ADJ(I,I)=0 END DO !INITIALIZING ALL THE PARAMETERS AND SUMS !INITIALIZING PHAT AND AHAT PHAT=0. AHAT=0. CALL INITIALIZE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2) !INITIALIZING PSUM CALL INITIALIZE_PSUM(ADJ,PSUM,NODES) !INITIALIZING ASUM CALL INITIALIZE_ASUM(ADJ,TESTMODULE,ASUM,NODES,CLUSTERS) !INITIALIZING THE NORM OR LOGLIKELIHOOD IF(L2) THEN L2NORM=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) LOGLIK=1. ELSE LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) L2NORM=1. END IF !ENDING PARAMETER INITIALIZATION UPHILL=.FALSE. QSEC=5 MAP_LENGTH=INT(CLUSTERS*(CLUSTERS-1)/2)+NODES !BEGINNING POISSON/L2 PARAMETER UPDATES CALL UPDATE_PARAMETERS_QNEWTN_PARALLEL(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,& L2NORM,LOGLIK,QSEC,UPHILL,MAP_LENGTH) FACTORIZABILITY=CALC_FACTORIZABILITY(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) IF(L2) THEN CRITERIA=L2NORM !!CALL DBLEPR("L2 GIVEN",-1,L2NORM,0) ELSE CRITERIA=LOGLIK !!CALL DBLEPR("LOGLIK GIVEN",-1,LOGLIK,0) END IF DO I = 1,NODES-1 DO J=I+1,NODES MEAN=PHAT(I)*PHAT(J)*AHAT(TESTMODULE(I),TESTMODULE(J)) IF(.NOT.L2) THEN IF(ADJ(I,J)>0) THEN ADJ(J,I)=LOG_POISSON_TAIL(MEAN,NINT(ADJ(I,J))) ELSE ADJ(J,I)=0. END IF ELSE ADJ(J,I)=0 END IF ADJ(I,J)=MEAN END DO END DO END SUBROUTINE propdecompaccelparallel SUBROUTINE propclusttrial(ADJ,TESTMODULE,PHAT,AHAT,FACTORIZABILITY,CRITERIA,NODES,& CLUSTERS,L2I,INITBOOL) ! This program is similar to the above with the notable difference that it calculates ! parameters less often and thus should be faster. It calculates parameters once ! and assumes they are correct while reclustering nodes. Once no more nodes are reclustered ! it repeats the parameter calculation and reclustering until no nodes are moved. ! This program implements the poisson multigraph with clustering algorithm for R ! ! INPUT ! Here, the ADJ[Nodes,Nodes] (adjacency) matrix is given with values in (0,1) with zero diagonals ! Also CLUSTERS which is an integer giving the maximum number of clusters ! ! OUTPUT ! The output is TESTMODULE[Nodes] which is an integer vector in [1,#Clusters] ! Another output is PHAT[Nodes] which is the real valued estimated propensity for each node ! Also, AHAT[Clusters,Clusters] which is a real valued symmetric ! matrix giving the intercluster adjacency with 1 diagonals ! ! MAIN IDEA ! ADJ[I,J]=AHAT[TESTMODULE(I),TESTMODULE(J)]*PHAT(I)*PHAT(J) !CONTAINS USE MULTIGRAPH USE CONSTANTS USE TOOLS USE STRING_MANIPULATION IMPLICIT NONE INTEGER :: NODES,CLUSTERS,I,J,K,L,CLUSTER_ITERATIONS=0,L2I,INITBOOL,QSEC,MAP_LENGTH INTEGER, DIMENSION(NODES) :: TESTMODULE !INTEGER, DIMENSION(NODES) :: TEMP REAL(KIND=8), DIMENSION(NODES) :: PSUM REAL, DIMENSION(NODES,NODES) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: PHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: ASUM !REAL, DIMENSION(2) :: TIMEARRAYD,TIMEARRAYE REAL(KIND=8) ::L2NORM,FACTORIZABILITY,LOGLIK,SUM_SQUARES,CRITERIA,MEAN !REAL :: TOTALTIME LOGICAL :: NOT_CONVERGED,L2,QNEWT=.FALSE.,UPHILL=.TRUE. !!CALL INTPR("INITIALIZING PARAMETERS",-1,1,0) DO I=1,NODES ADJ(I,I)=0 END DO IF(L2I>0) THEN L2=.TRUE. ELSE L2=.FALSE. END IF !!CALL INTPR("INITIALIZED PARAMETERS",-1,1,0) !INITIALIZING PSUM CALL INITIALIZE_PSUM(ADJ,PSUM,NODES) !!CALL INTPR("INITIALIZED PSUM",-1,1,0) !INITIALIZING CLUSTERS IF(INITBOOL.GE.100) THEN CALL INITIALIZE_CLUSTERS(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) !CALL QUICK_CLUSTER2(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) CALL INTPR("CLUSTERS QUICK2 INITIALIZED",-1,1,0) CALL QUICK_CLUSTER2(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) ELSE IF(INITBOOL.GE.2) THEN CALL INITIALIZE_CLUSTERS(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) !CALL QUICK_CLUSTER(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) CALL INTPR("CLUSTERS K-MEDIOID INITIALIZED",-1,1,0) CALL K_MEDIOIDS(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) ELSE IF(INITBOOL.GE.1) THEN CALL INITIALIZE_CLUSTERS(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) !CALL QUICK_CLUSTER(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) CALL INTPR("CLUSTERS QUICK1 INITIALIZED",-1,1,0) CALL QUICK_CLUSTER(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) ELSE CALL INTPR("USED GIVEN CLUSTER ASSIGNMENTS",-1,1,0) END IF !INITIALIZING ASUM CALL INITIALIZE_ASUM(ADJ,TESTMODULE,ASUM,NODES,CLUSTERS) !INITIALIZING PHAT AND AHAT PHAT=0. AHAT=0. CALL INITIALIZE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2) !INITIALIZING THE NORM OR LOGLIKELIHOOD IF(L2) THEN L2NORM=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) ELSE LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) END IF !ENDING PARAMETER INITIALIZATION !BEGINNING POISSON/L2 PARAMETER UPDATES QNEWT=.TRUE. QSEC=5 MAP_LENGTH=INT(CLUSTERS*(CLUSTERS-1)/2)+NODES CALL UPDATE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,L2NORM,LOGLIK) !!CALL INTPR("PARAMETERS INITIALIZED",-1,1,0) !BEGINNING MAIN CLUSTER UPDATE LOOP NOT_CONVERGED = .TRUE. CLUSTER_ITERATIONS=1 DO WHILE (NOT_CONVERGED) !!CALL INTPR("CLUSTER ITERATION",-1,CLUSTER_ITERATIONS,1) !PRINT*, "CLUSTER ITERATION NUMBER: ", CLUSTER_ITERATIONS CLUSTER_ITERATIONS=CLUSTER_ITERATIONS+1 NOT_CONVERGED = .FALSE. CALL UPDATE_CLUSTER_ASSIGNMENTS2(ADJ,TESTMODULE,PHAT,AHAT,NOT_CONVERGED,NODES,CLUSTERS, & PSUM,ASUM,L2NORM,LOGLIK,L2,QNEWT,QSEC,UPHILL,MAP_LENGTH) !INITIALIZING ASUM CALL INITIALIZE_ASUM(ADJ,TESTMODULE,ASUM,NODES,CLUSTERS) CALL UPDATE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,L2NORM,LOGLIK) !CALL UPDATE_PARAMETERS_QNEWTN(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,& ! L2NORM,LOGLIK,QSEC,UPHILL,MAP_LENGTH) IF(CLUSTER_ITERATIONS.GE.500) THEN NOT_CONVERGED = .FALSE. CALL INTPR("MAX ITERATION REACHED...RERUN USING CURRENT CLUSTERING FOR BETTER RESULTS",& -1,1,0) END IF IF(PHAT(1)+1.EQ.PHAT(1)) THEN NOT_CONVERGED=.FALSE. CALL INTPR("SOMETHING WENT WRONG...NON-REAL RESULTS...",-1,1,0) END IF END DO CALL INTPR("ITERATIONS: ",-1, CLUSTER_ITERATIONS,1) FACTORIZABILITY=CALC_FACTORIZABILITY(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) IF(L2) THEN CRITERIA=L2NORM !!CALL DBLEPR("L2 GIVEN",-1,L2NORM,0) ELSE CRITERIA=LOGLIK !!CALL DBLEPR("LOGLIK GIVEN",-1,LOGLIK,0) END IF DO I = 1,NODES-1 DO J=I+1,NODES MEAN=PHAT(I)*PHAT(J)*AHAT(TESTMODULE(I),TESTMODULE(J)) IF(.NOT.L2) THEN IF(ADJ(I,J)>0) THEN ADJ(J,I)=LOG_POISSON_TAIL(MEAN,NINT(ADJ(I,J))) ELSE ADJ(J,I)=0. END IF ELSE ADJ(J,I)=0 END IF ADJ(I,J)=MEAN END DO END DO END SUBROUTINE propclusttrial SUBROUTINE propclustaccel(ADJ,TESTMODULE,PHAT,AHAT,FACTORIZABILITY,CRITERIA,NODES,& CLUSTERS,L2I,INITBOOL) ! This program implements the poisson multigraph with clustering algorithm for R ! ! INPUT ! Here, the ADJ[Nodes,Nodes] (adjacency) matrix is given with values in (0,1) with zero diagonals ! Also CLUSTERS which is an integer giving the maximum number of clusters ! ! OUTPUT ! The output is TESTMODULE[Nodes] which is an integer vector in [1,#Clusters] ! Another output is PHAT[Nodes] which is the real valued estimated propensity for each node ! Also, AHAT[Clusters,Clusters] which is a real valued symmetric ! matrix giving the intercluster adjacency with 1 diagonals ! ! MAIN IDEA ! ADJ[I,J]=AHAT[TESTMODULE(I),TESTMODULE(J)]*PHAT(I)*PHAT(J) !CONTAINS USE MULTIGRAPH USE CONSTANTS USE TOOLS USE STRING_MANIPULATION IMPLICIT NONE INTEGER :: NODES,CLUSTERS,I,J,K,L,CLUSTER_ITERATIONS=0,L2I,INITBOOL,QSEC,MAP_LENGTH INTEGER, DIMENSION(NODES) :: TESTMODULE !INTEGER, DIMENSION(NODES) :: TEMP REAL(KIND=8), DIMENSION(NODES) :: PSUM REAL, DIMENSION(NODES,NODES) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: PHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: ASUM !REAL, DIMENSION(2) :: TIMEARRAYD,TIMEARRAYE REAL(KIND=8) ::L2NORM,FACTORIZABILITY,LOGLIK,SUM_SQUARES,CRITERIA,MEAN !REAL :: TOTALTIME LOGICAL :: NOT_CONVERGED,L2,QNEWT=.FALSE.,UPHILL=.TRUE. !!CALL INTPR("INITIALIZING PARAMETERS",-1,1,0) DO I=1,NODES ADJ(I,I)=0 END DO IF(L2I>0) THEN L2=.TRUE. ELSE L2=.FALSE. END IF !!CALL INTPR("INITIALIZED PARAMETERS",-1,1,0) !INITIALIZING PSUM CALL INITIALIZE_PSUM(ADJ,PSUM,NODES) !!CALL INTPR("INITIALIZED PSUM",-1,1,0) !INITIALIZING CLUSTERS IF(INITBOOL.GE.100) THEN CALL INITIALIZE_CLUSTERS(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) !CALL QUICK_CLUSTER2(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) CALL INTPR("CLUSTERS QUICK2 INITIALIZED",-1,1,0) CALL QUICK_CLUSTER2(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) ELSE IF(INITBOOL.GE.5) THEN CALL INITIALIZE_CLUSTERS(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) !CALL QUICK_CLUSTER(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) CALL INTPR("CLUSTERS K-MEDIOID INITIALIZED",-1,1,0) CALL K_MEDIOIDS(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) ELSE IF(INITBOOL.GE.2) THEN CALL INITIALIZE_CLUSTERS(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) !CALL QUICK_CLUSTER(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) CALL INTPR("CLUSTERS QUICK1(OLD) INITIALIZED",-1,1,0) CALL QUICK_CLUSTER(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) ELSE IF(INITBOOL.GE.1) THEN CALL QUICK_CLUSTER_TRIAL(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM,L2) CALL INTPR("CLUSTERS QUICK INITIALIZED",-1,1,0) ELSE CALL INTPR("USED GIVEN CLUSTER ASSIGNMENTS",-1,1,0) END IF !INITIALIZING ASUM CALL INITIALIZE_ASUM(ADJ,TESTMODULE,ASUM,NODES,CLUSTERS) !INITIALIZING PHAT AND AHAT PHAT=0. AHAT=0. CALL INITIALIZE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2) !INITIALIZING THE NORM OR LOGLIKELIHOOD IF(L2) THEN L2NORM=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) ELSE LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) END IF !ENDING PARAMETER INITIALIZATION !BEGINNING POISSON/L2 PARAMETER UPDATES QNEWT=.TRUE. QSEC=5 MAP_LENGTH=INT(CLUSTERS*(CLUSTERS-1)/2)+NODES CALL UPDATE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2,PSUM,ASUM,L2NORM,LOGLIK) !!CALL INTPR("PARAMETERS INITIALIZED",-1,1,0) !BEGINNING MAIN CLUSTER UPDATE LOOP NOT_CONVERGED = .TRUE. CLUSTER_ITERATIONS=1 DO WHILE (NOT_CONVERGED) CALL INTPR("CLUSTER ITERATION",-1,CLUSTER_ITERATIONS,1) !PRINT*, "CLUSTER ITERATION NUMBER: ", CLUSTER_ITERATIONS CLUSTER_ITERATIONS=CLUSTER_ITERATIONS+1 NOT_CONVERGED = .FALSE. CALL UPDATE_CLUSTER_ASSIGNMENTS(ADJ,TESTMODULE,PHAT,AHAT,NOT_CONVERGED,NODES,CLUSTERS, & PSUM,ASUM,L2NORM,LOGLIK,L2,QNEWT,QSEC,UPHILL,MAP_LENGTH) IF(PHAT(1)+1.EQ.PHAT(1)) THEN NOT_CONVERGED=.FALSE. !!CALL INTPR("SOMETHING WENT WRONG...NON-REAL RESULTS...",-1,1,0) END IF IF(CLUSTER_ITERATIONS.GE.20) THEN NOT_CONVERGED = .FALSE. CALL INTPR("MAX ITERATION REACHED...RERUN USING CURRENT CLUSTERING FOR BETTER RESULTS",& -1,1,0) END IF END DO ! !!!!TRIAL STUFF BEGIN...----------------- ! CALL QUICK_CLUSTER(ADJ,TESTMODULE,CLUSTERS,NODES,PSUM) ! !!!!BEGINNING MAIN CLUSTER UPDATE LOOP ! NOT_CONVERGED = .TRUE. ! CLUSTER_ITERATIONS=0 ! DO WHILE (NOT_CONVERGED) ! !CALL INTPR("CLUSTER ITERATION",-1,CLUSTER_ITERATIONS,1) ! CLUSTER_ITERATIONS=CLUSTER_ITERATIONS+1 ! NOT_CONVERGED = .FALSE. ! CALL UPDATE_CLUSTER_ASSIGNMENTS(ADJ,TESTMODULE,PHAT,AHAT,NOT_CONVERGED,NODES,CLUSTERS, & ! PSUM,ASUM,L2NORM,LOGLIK,L2,QNEWT,QSEC,UPHILL,MAP_LENGTH) ! IF(CLUSTER_ITERATIONS.GE.MIN(NODES,40)) THEN ! NOT_CONVERGED = .FALSE. ! !CALL INTPR("MAX ITERATION REACHED...DID NOT CONVERGE YET...RERUN USING CURRENT & ! CLUSTERING FOR BETTER RESULTS.",-1,1,0) ! END IF ! END DO ! !!!!!TRIAL STUFF END....---------------- FACTORIZABILITY=CALC_FACTORIZABILITY(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) IF(L2) THEN CRITERIA=L2NORM !!CALL DBLEPR("L2 GIVEN",-1,L2NORM,0) ELSE CRITERIA=LOGLIK !!CALL DBLEPR("LOGLIK GIVEN",-1,LOGLIK,0) END IF DO I = 1,NODES-1 DO J=I+1,NODES MEAN=PHAT(I)*PHAT(J)*AHAT(TESTMODULE(I),TESTMODULE(J)) IF(.NOT.L2) THEN IF(ADJ(I,J)>0) THEN ADJ(J,I)=LOG_POISSON_TAIL(MEAN,NINT(ADJ(I,J))) ELSE ADJ(J,I)=0. END IF ELSE ADJ(J,I)=0 END IF ADJ(I,J)=MEAN END DO END DO END SUBROUTINE propclustaccel ! !This analysis is commented out to appease R... ! ! ! SUBROUTINE countwords(TEXT_FILE,WORDS) ! ! USE CONSTANTS ! USE TOOLS ! USE STRING_MANIPULATION ! ! IMPLICIT NONE ! CHARACTER(LEN=25) :: WORD1,WORD2 ! CHARACTER(LEN=800) :: TEXT_FILE,LINE ! INTEGER :: I,IOERROR,INPUT_UNIT = 1,OUTPUT_UNIT = 2,J,LINES,WORDS ! LOGICAL :: ERROR,HYPHEN_ACTIVE ! CHARACTER(LEN=25), DIMENSION(50000) :: LIST ! !! !! Set the number of words, the hyphen alert, and the first line. !! ! WORDS = 0 ! HYPHEN_ACTIVE = .FALSE. !! !! Find the number of lines in the text. !! !! TEXT_FILE = "7plays.txt" ! CALL INPUT_DATA(TEXT_FILE,LINE,INPUT_UNIT,LINES) !! !! Read the text line by line and form the list of words. !! ! ! REWIND(INPUT_UNIT) ! DO J = 1,LINES ! LINE = " " ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! CALL PROCESS_LINE(LINE) ! CALL EXTEND_WORD_LIST(LINE,LIST,WORDS,ERROR,HYPHEN_ACTIVE) !! !! Every 100 lines sort and purge the list of words. !! ! IF (.NOT. HYPHEN_ACTIVE) THEN ! IF (MOD(J,100)==0.OR.J==LINES) THEN ! CALL SORT_STRINGS(LIST(1:WORDS)) ! CALL PURGE_STRINGS(LIST(1:WORDS),WORDS) ! END IF ! END IF ! END DO ! !! !! Print the alphabetized list. !! ! OPEN(UNIT=OUTPUT_UNIT,FILE="propClustTemp.txt") ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique words in file = ",WORDS ! WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" ! DO I = 1,WORDS ! WRITE(OUTPUT_UNIT,'(1X,A)') LIST(I) ! END DO ! ! CLOSE(OUTPUT_UNIT) ! CLOSE(INPUT_UNIT) ! ! !!CALL INTPR("FINISHED COUNTING WORDS...",-1,1,0) ! ! END SUBROUTINE countwords ! ! !(ADJ,TESTMODULE,PHAT,AHAT,FACTORIZABILITY,L2NORM,NODES,CLUSTERS,L2I,INITBOOL) ! ! SUBROUTINE wordpairclusters(TEXT_FILE,WORDPAIR_COUNT,TESTMODULE,PHAT,AHAT,LOGLIK, & ! WORDS,CLUSTERS) ! ! USE CONSTANTS ! USE TOOLS ! USE STRING_MANIPULATION ! USE MULTIGRAPH !! ! IMPLICIT NONE ! CHARACTER(LEN=100) :: WORD1,WORD2 ! CHARACTER(LEN=800) :: TEXT_FILE,LINE ! INTEGER :: I,IOERROR,INPUT_UNIT = 1,OUTPUT_UNIT=2,J,LINES,WORDS,CLUSTERS,WORDSMOD ! LOGICAL :: ERROR,HYPHEN_ACTIVE ! !CHARACTER(LEN=1), DIMENSION(27) :: ALPHABET ! CHARACTER(LEN=25), DIMENSION(50000) :: LIST ! INTEGER, DIMENSION(WORDS,WORDS) :: WORDPAIR_COUNT ! REAL, ALLOCATABLE, DIMENSION(:,:) :: MODWORDPAIR_COUNT ! INTEGER, DIMENSION(WORDS) :: ORDER_LIST ! INTEGER, DIMENSION(WORDS) :: TESTMODULE ! REAL(KIND=8), DIMENSION(WORDS) :: PHAT ! INTEGER, ALLOCATABLE, DIMENSION(:) :: MODTESTMODULE ! REAL(KIND=8), ALLOCATABLE, DIMENSION(:) :: MODPHAT ! REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT ! !REAL(KIND=DBLE), ALLOCATABLE, DIMENSION(:) :: INCOMING_PROPENSITY,OUTGOING_PROPENSITY ! REAL :: ETIME, TOTAL ! REAL, DIMENSION(2) :: ELAPSED ! REAL(KIND=8) :: LOGLIK,FACTORIZABILITY=0. ! ! !!CALL INTPR("STARTING WORD PAIR CLUSTERS",-1,1,0) ! !! !! Set the number of words, the hyphen alert, and the first line. !! ! WORDS = 0 ! HYPHEN_ACTIVE = .FALSE. !! !! Find the number of lines in the text. !! !! TEXT_FILE = "7plays.txt" ! CALL INPUT_DATA(TEXT_FILE,LINE,INPUT_UNIT,LINES) !! !! Read the text line by line and form the list of words. !! ! REWIND(INPUT_UNIT) ! DO J = 1,LINES ! LINE = " " ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! CALL PROCESS_LINE(LINE) ! CALL EXTEND_WORD_LIST(LINE,LIST,WORDS,ERROR,HYPHEN_ACTIVE) !! !! Every 100 lines sort and purge the list of words. !! ! IF (.NOT. HYPHEN_ACTIVE) THEN ! IF (MOD(J,100)==0.OR.J==LINES) THEN ! CALL SORT_STRINGS(LIST(1:WORDS)) ! CALL PURGE_STRINGS(LIST(1:WORDS),WORDS) ! END IF ! END IF ! END DO !! !! Set the dimensions of the word-pair count matrix. !! ! !!CALL INTPR("MID WORD PAIR CLUSTERS...1.0",-1,1,0) ! !ALLOCATE(WORDPAIR_COUNT(WORDS,WORDS)) ! WORDPAIR_COUNT = 0 ! WORD1 = " " ! WORD2 = " " !! !! Read in text line by line and update the word-pair count matrix. !! ! !!CALL INTPR("MID WORD PAIR CLUSTERS...1.1",-1,1,0) ! REWIND(INPUT_UNIT) ! DO J = 1,LINES ! LINE = " " ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! IF (LEN(LINE).GE.1) THEN ! CALL PROCESS_LINE(LINE) ! !!CALL INTPR("MID",-1,J,1) ! CALL UPDATE_WORD_PAIR_COUNTS(LIST,LINE,WORD1,WORD2,WORDPAIR_COUNT, & ! WORDS,ERROR) ! END IF ! END DO ! !!CALL INTPR("MID WORD PAIR CLUSTERS...1.2",-1,1,0) !! ! CLOSE(INPUT_UNIT) ! !! !! Print the WORD PAIR COUNTS !! !! OPEN(UNIT=OUTPUT_UNIT,FILE="propClustTempCounts.txt") !! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique words in file = ",WORDS !! WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" !! DO I = 1,WORDS !! WRITE(OUTPUT_UNIT,'(1X,A)') LIST(I) !! END DO !! WRITE(OUTPUT_UNIT,'(/,A,/)') "WORD PAIR COUNTS" !! DO I=1,WORDS !! WRITE(OUTPUT_UNIT,'(A,A)') (WORDPAIR_COUNT(J,I),J=1,WORDS) !! END DO !! !! CLOSE(OUTPUT_UNIT) ! ! !!CALL INTPR("FILTERING DATA...",-1,1,0) ! ! WORDSMOD=WORDS ! CALL FILTER_DATA(WORDPAIR_COUNT,LIST,WORDS,WORDSMOD,ORDER_LIST) ! ! !!CALL INTPR("NUMBER OF WORDS",-1,WORDS,1) ! WORDSMOD=FLOOR(WORDS/20.) ! ! !!CALL INTPR("NUMBER OF FILTERED WORDS",-1,WORDSMOD,1) ! ALLOCATE(MODPHAT(WORDSMOD),MODTESTMODULE(WORDSMOD),MODWORDPAIR_COUNT(WORDSMOD,WORDSMOD)) ! DO I=1,WORDSMOD ! DO J=1,WORDSMOD ! MODWORDPAIR_COUNT(J,I)=WORDPAIR_COUNT(ORDER_LIST(J),ORDER_LIST(I)) ! END DO ! END DO ! PHAT=0. ! MODPHAT=1. ! AHAT=1. ! TESTMODULE=0. ! MODTESTMODULE=1 ! LOGLIK=0. ! FACTORIZABILITY=0. ! ! !!CALL INTPR("STARTING CLUSTERING...",-1,1,0) ! CALL propclustaccel(MODWORDPAIR_COUNT,MODTESTMODULE,MODPHAT,AHAT,FACTORIZABILITY,LOGLIK, & ! WORDSMOD,CLUSTERS,0,1) ! ! DO I=1,WORDSMOD ! TESTMODULE(I)=MODTESTMODULE(I) ! PHAT(I)=MODPHAT(I) ! END DO ! WORDS=WORDSMOD ! ! OPEN(UNIT=OUTPUT_UNIT,FILE="propClustTemp.txt") ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique words in file = ",WORDSMOD ! WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" ! DO I = 1,WORDSMOD ! WRITE(OUTPUT_UNIT,'(A)') LIST(ORDER_LIST(I)) ! END DO ! ! CLOSE(OUTPUT_UNIT) ! ! ! OPEN(UNIT=OUTPUT_UNIT,FILE="propClustResults.txt") ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique words in file = ",WORDSMOD ! WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" ! DO I = 1,WORDSMOD ! WRITE(OUTPUT_UNIT,'(A,2X,I6)') LIST(ORDER_LIST(I)),MODTESTMODULE(I) ! END DO ! ! CLOSE(OUTPUT_UNIT) ! ! ! OPEN(UNIT=OUTPUT_UNIT,FILE="PropClustWordPairCounts.txt") ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique words in file = ",WORDSMOD ! WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" ! DO I = 1,WORDSMOD ! WRITE(OUTPUT_UNIT,'(A)') LIST(ORDER_LIST(I)) ! END DO ! WRITE(OUTPUT_UNIT,'(/,A,/)') "WORD PAIR COUNTS" ! DO I=1,WORDSMOD ! WRITE(OUTPUT_UNIT,'(A,A)') (MODWORDPAIR_COUNT(J,I),J=1,WORDSMOD) ! END DO ! ! CLOSE(OUTPUT_UNIT) ! ! ! END SUBROUTINE wordpairclusters ! ! ! ! ! SUBROUTINE countgenes(TEXT_FILE,GENES) ! ! USE CONSTANTS ! USE TOOLS ! USE STRING_MANIPULATION ! ! IMPLICIT NONE ! CHARACTER(LEN=25) :: GENE1,GENE2 ! CHARACTER(LEN=800) :: TEXT_FILE,LINE ! INTEGER :: I,IOERROR,INPUT_UNIT = 1,OUTPUT_UNIT = 2,J,LINES,GENES,FIRST,SECOND ! LOGICAL :: ERROR,HYPHEN_ACTIVE ! CHARACTER(LEN=50), DIMENSION(20000) :: LIST ! !! !! Set the number of GENEs, the hyphen alert, and the first line. !! ! GENES = 0 ! HYPHEN_ACTIVE = .FALSE. !! !! Find the number of lines in the text. !! ! CALL INPUT_DATA(TEXT_FILE,LINE,INPUT_UNIT,LINES) !! !! Read the text line by line and form the list of GENEs. !! ! REWIND(INPUT_UNIT) ! DO J = 1,LINES ! LINE = " " ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! FIRST=INDEX(LINE,"|") ! !THIS IS SET UP WITH A COMMA TO TAKE ONLY THE FIRST ALIAS ! SECOND=MIN(INDEX(LINE(FIRST+1:LEN(LINE))//",",","),INDEX(LINE(FIRST+1:LEN(LINE)),"|"))+FIRST ! !THIS LINE FILTERS THE RESULTS ! IF(INDEX(LINE,"(3)")>0) THEN ! GENES=GENES+1 ! LIST(GENES)=TRIM(LINE(FIRST+1:SECOND-1)) ! END IF !! CALL PROCESS_LINE(LINE) ! !! !! Every 100 lines sort and purge the list of GENEs. !! ! IF (.NOT. HYPHEN_ACTIVE) THEN ! IF (MOD(J,100)==0.OR.J==LINES) THEN ! CALL SORT_STRINGS(LIST(1:GENES)) ! CALL PURGE_STRINGS(LIST(1:GENES),GENES) ! END IF ! END IF ! END DO ! !! !! Print the alphabetized list. !! ! OPEN(UNIT=OUTPUT_UNIT,FILE="propClustTempGenes.txt") ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique GENEs in file = ",GENES ! WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" ! DO I = 1,GENES ! WRITE(OUTPUT_UNIT,'(1X,A)') LIST(I) ! END DO ! ! CLOSE(OUTPUT_UNIT) ! CLOSE(INPUT_UNIT) ! ! !!CALL INTPR("FINISHED COUNTING GENES...",-1,GENES,1) ! ! END SUBROUTINE countgenes ! ! ! ! ! SUBROUTINE countdisorders(TEXT_FILE,DISORDERS) ! ! USE CONSTANTS ! USE TOOLS ! USE STRING_MANIPULATION ! ! IMPLICIT NONE ! CHARACTER(LEN=50) :: DIS1 ! CHARACTER(LEN=800) :: TEXT_FILE,LINE ! INTEGER :: I,IOERROR,INPUT_UNIT = 1,OUTPUT_UNIT = 2,J,LINES,DISORDERS,FIRST ! LOGICAL :: ERROR,HYPHEN_ACTIVE ! CHARACTER(LEN=50), DIMENSION(20000) :: LISTD ! !! !! Set the number of DISORDERs, the hyphen alert, and the first line. !! ! DISORDERS = 0 ! HYPHEN_ACTIVE = .FALSE. !! !! Find the number of lines in the text. !! !! TEXT_FILE = "7plays.txt" ! CALL INPUT_DATA(TEXT_FILE,LINE,INPUT_UNIT,LINES) !! !! Read the text line by line and form the list of DISORDERs. !! ! DISORDERS=0 ! REWIND(INPUT_UNIT) ! DO J = 1,LINES ! LINE = " " ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! FIRST=MIN(INDEX(LINE//",",","),INDEX(LINE,"|"),50) ! !THIS IS SET UP WITH A COMMA TO TAKE ONLY THE FIRST ALIAS ! DIS1=LINE(1:FIRST-1) ! CALL REMOVE_TRAILING_NUMBERS(DIS1) ! !THIS LINE FILTERS RESULTS ! IF(INDEX(LINE,"(3)")>0) THEN ! DISORDERS=DISORDERS+1 ! LISTD(DISORDERS)=TRIM(DIS1) ! END IF !! CALL PROCESS_LINE(LINE) ! !! !! Every 100 lines sort and purge the list of GENEs. !! ! IF (.NOT. HYPHEN_ACTIVE) THEN ! IF (MOD(J,100)==0.OR.J==LINES) THEN ! CALL SORT_STRINGS(LISTD(1:DISORDERS)) ! CALL PURGE_STRINGS(LISTD(1:DISORDERS),DISORDERS) ! END IF ! END IF ! END DO ! !! !! Print the alphabetized list. !! ! OPEN(UNIT=OUTPUT_UNIT,FILE="propClustTemp.txt") ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique DISORDERs in file = ",DISORDERS ! WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" ! DO I = 1,DISORDERS ! WRITE(OUTPUT_UNIT,'(1X,A)') TRIM(LISTD(I)) ! END DO ! ! CLOSE(OUTPUT_UNIT) ! CLOSE(INPUT_UNIT) ! ! !!CALL INTPR("FINISHED COUNTING DISORDERS...",-1,DISORDERS,1) ! ! END SUBROUTINE countdisorders ! ! ! ! ! ! ! ! SUBROUTINE WRITE_TO_FILE(FILENAME1,FILENAME2,ADJ,PHAT,AHAT,TESTMODULE,LIST,ORDERING,& ! LOGLIK,NODES,CLUSTERS) ! ! USE CONSTANTS ! USE TOOLS ! USE STRING_MANIPULATION ! USE MULTIGRAPH ! ! IMPLICIT NONE ! ! INTEGER :: NODES,CLUSTERS,I,J,OUTPUT_UNIT=3 ! CHARACTER(LEN=*) :: FILENAME1,FILENAME2 ! REAL, DIMENSION(NODES,NODES) :: ADJ ! REAL(KIND=8), DIMENSION(NODES) :: PHAT ! REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT ! INTEGER, DIMENSION(NODES) :: TESTMODULE,ORDERING ! REAL(KIND=8) :: LOGLIK,MEAN ! CHARACTER(LEN=50), DIMENSION(20000) :: LIST ! ! OPEN(UNIT=OUTPUT_UNIT,FILE=TRIM(FILENAME1)) ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique NODES in file = ",NODES ! WRITE(OUTPUT_UNIT,'(A,F8.4)') "Loglikelihood = ",LOGLIK ! WRITE(OUTPUT_UNIT,'(/,A,/)') "Name 1|Name 2|Log(P)|Num Edges" ! !WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" ! DO I = 1,NODES-1 ! DO J=I+1,NODES ! IF(ADJ(I,J)>0) THEN ! MEAN=PHAT(I)*PHAT(J)*AHAT(TESTMODULE(I),TESTMODULE(J)) ! WRITE(OUTPUT_UNIT,'(A,A,A,A,F8.2,A,I6)') TRIM(LIST(ORDERING(I))), "|", & ! TRIM(LIST(ORDERING(J))), "|", & ! LOG_POISSON_TAIL(MEAN,NINT(ADJ(I,J))), "|", & ! NINT(ADJ(I,J)) ! END IF ! END DO ! END DO ! ! CLOSE(OUTPUT_UNIT) ! !! !! Print the ordered list !! ! OPEN(UNIT=OUTPUT_UNIT,FILE=TRIM(FILENAME2)) ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique NODES in file = ",NODES ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique CLUSTERS in file = ",CLUSTERS ! WRITE(OUTPUT_UNIT,'(/,A,/)') "Intermodular Adjacency" ! DO I = 1,CLUSTERS ! WRITE(OUTPUT_UNIT,'(50F10.4)') (AHAT(I,J),J=1,CLUSTERS) ! END DO ! WRITE(OUTPUT_UNIT,'(/,A,/)') "Item Name|Propensity|Cluster" ! DO I = 1,NODES ! WRITE(OUTPUT_UNIT,'(A,A,F8.4,A,I6)') TRIM(LIST(ORDERING(I))),"|",PHAT(I),"|",TESTMODULE(I) ! END DO ! ! CLOSE(OUTPUT_UNIT) ! ! END SUBROUTINE WRITE_TO_FILE ! ! ! ! ! ! ! ! SUBROUTINE omimmorbidmap(TEXT_FILE,DISORDERPAIR_COUNT,TESTMODULE,PHAT,AHAT,LOGLIK, & ! GENES,DISORDERS,DISORDERS2,CLUSTERS) ! ! !This function uses data from OMIM to build a bipartite graph between Genes and Disorders. ! !It then creates a multigraph using Disorders as nodes and analyzes it with the developed poisson ! !method. ! ! USE CONSTANTS ! USE TOOLS ! USE STRING_MANIPULATION ! USE MULTIGRAPH !! ! IMPLICIT NONE ! CHARACTER(LEN=50) :: GENE1,GENE2,DIS1,DIS2 ! CHARACTER(LEN=800) :: TEXT_FILE,LINE,LINE2 ! INTEGER :: I,IOERROR,INPUT_UNIT = 1,OUTPUT_UNIT=2,J,LINES,GENES,CLUSTERS,GENESMOD,FIRST,SECOND ! INTEGER :: COUNTER,FIRST2,SECOND2,K,SHORT_NAME,DISORDERS,DISLOC,GENELOC,GENES2,DISORDERS2 ! LOGICAL :: ERROR,HYPHEN_ACTIVE,FLAG,FILTER=.TRUE. ! CHARACTER(LEN=50), DIMENSION(20000) :: LIST,LISTD ! INTEGER, ALLOCATABLE, DIMENSION(:,:) :: BIPARTITE_GRAPH ! REAL, DIMENSION(DISORDERS,DISORDERS) :: DISORDERPAIR_COUNT ! INTEGER, DIMENSION(DISORDERS) :: TESTMODULE,ORDERING ! REAL(KIND=8), DIMENSION(DISORDERS) :: PHAT ! REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT ! REAL(KIND=8) :: LOGLIK,FACTORIZABILITY=0.,MEAN ! ! !!CALL INTPR("OMIM disorder map clustering",-1,1,0) ! !! !! Set the number of GENEs, the hyphen alert, and the first line. !! ! GENES2 = 0 ! HYPHEN_ACTIVE = .FALSE. !! !! Find the number of lines in the text. !! ! CALL INPUT_DATA(TEXT_FILE,LINE,INPUT_UNIT,LINES) !! !! Read the text line by line and form the list of GENEs. !! ! REWIND(INPUT_UNIT) ! DO J = 1,LINES ! LINE = " " ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! FIRST=INDEX(LINE,"|") ! !THIS IS SET UP WITH A COMMA TO TAKE ONLY THE FIRST ALIAS ! SECOND=MIN(INDEX(LINE(FIRST+1:LEN(LINE))//",",","),INDEX(LINE(FIRST+1:LEN(LINE)),"|"))+FIRST ! !THIS LINE FILTERS RESULTS ! IF(INDEX(LINE,"(3)")>0.AND.FILTER) THEN ! GENES2=GENES2+1 ! LIST(GENES2)=TRIM(LINE(FIRST+1:SECOND-1)) ! END IF !! CALL PROCESS_LINE(LINE) ! !! !! Every 100 lines sort and purge the list of GENEs. !! ! IF (.NOT. HYPHEN_ACTIVE) THEN ! IF (MOD(J,100)==0.OR.J==LINES) THEN ! CALL SORT_STRINGS(LIST(1:GENES2)) ! CALL PURGE_STRINGS(LIST(1:GENES2),GENES2) ! END IF ! END IF ! END DO !! IF(GENES2.EQ.GENES) THEN !! !CALL INTPR("SUCCESS, GENES EQUAL!",-1,1,0) !! END IF !! !! Read the text line by line and form the list of DISORDERs. !! ! DISORDERS2=0 ! REWIND(INPUT_UNIT) ! DO J = 1,LINES ! LINE = " " ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! FIRST=MIN(INDEX(LINE//",",","),INDEX(LINE,"|"),50) ! !THIS IS SET UP WITH A COMMA TO TAKE ONLY THE FIRST ALIAS ! DIS1=LINE(1:FIRST-1) ! CALL REMOVE_TRAILING_NUMBERS(DIS1) ! !THIS LINE FILTERS RESULTS ! IF(INDEX(LINE,"(3)")>0.AND.FILTER) THEN ! DISORDERS2=DISORDERS2+1 ! LISTD(DISORDERS2)=TRIM(DIS1) ! END IF !! CALL PROCESS_LINE(LINE) ! !! !! Every 100 lines sort and purge the list of GENEs. !! ! IF (.NOT. HYPHEN_ACTIVE) THEN ! IF (MOD(J,100)==0.OR.J==LINES) THEN ! CALL SORT_STRINGS(LISTD(1:DISORDERS2)) ! CALL PURGE_STRINGS(LISTD(1:DISORDERS2),DISORDERS2) ! END IF ! END IF ! END DO !! IF(DISORDERS2.EQ.DISORDERS) THEN !! !CALL INTPR("SUCCESS, DISORDERS EQUAL!",-1,1,0) !! END IF !! !! Set the dimensions of the GENE-pair count matrix. !! ! !!CALL INTPR("MID GENE PAIR CLUSTERS...1.0",-1,1,0) ! DISORDERPAIR_COUNT = 0. ! !! !! Read in text line by line and update the Bipartite graph matrix. !! ! !!CALL INTPR("UPDATING BIPARTITE GRAPH",-1,1,0) ! ALLOCATE(BIPARTITE_GRAPH(GENES,DISORDERS)) ! !!CALL INTPR("ALLOCATED",-1,1,0) ! !!CALL INTPR("GENES",-1,GENES,1) ! !!CALL INTPR("DISORDERS",-1,DISORDERS,1) ! BIPARTITE_GRAPH=0 ! !!CALL INTPR("MID GENE PAIR CLUSTERS...1.1",-1,1,0) ! REWIND(INPUT_UNIT) ! DO J = 1,LINES ! IF(MOD(J,150).EQ.0) THEN ! !CALL INTPR("LINE NUMBER: ",-1,J,1) ! END IF ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! !THIS LINE FILTERS RESULTS ! IF(INDEX(LINE,"(3)")>0.AND.FILTER) THEN ! FIRST2=MIN(INDEX(LINE//",",","),INDEX(LINE,"|"),50) ! DIS1=LINE(1:FIRST2-1) ! CALL REMOVE_TRAILING_NUMBERS(DIS1) ! FIRST=INDEX(LINE,"|") ! !THIS IS SET UP WITH A COMMA TO TAKE ONLY THE FIRST ALIAS ! SECOND=MIN(INDEX(LINE(FIRST+1:LEN(LINE))//",",","),INDEX(LINE(FIRST+1:LEN(LINE)),"|"))+FIRST ! GENE1=TRIM(LINE(FIRST+1:SECOND-1)) ! DISLOC=BISECT_STRING_LIST(LISTD(1:DISORDERS),TRIM(DIS1)) ! !!CALL INTPR("DISLOC",-1,DISLOC,1) ! GENELOC=BISECT_STRING_LIST(LIST(1:GENES),TRIM(GENE1)) ! !!CALL INTPR("GENELOC",-1,GENELOC,1) ! IF(GENELOC*DISLOC.GE.1) THEN ! BIPARTITE_GRAPH(GENELOC,DISLOC)=1 ! END IF ! END IF ! END DO ! !!CALL INTPR("MID GENE PAIR CLUSTERS...1.2",-1,1,0) !! ! CLOSE(INPUT_UNIT) ! ! !!CALL INTPR("BEGIN WRITING...",-1,1,0) ! OPEN(UNIT=OUTPUT_UNIT,FILE="BipartitePairCountsDis.txt") ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique GENES in file = ",GENES ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique DISORDERS in file = ",DISORDERS ! !WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" ! DO I = 1,GENES ! DO J=1,DISORDERS ! IF(BIPARTITE_GRAPH(I,J)>0) THEN ! WRITE(OUTPUT_UNIT,'(A,1X,A,1X,I6)') TRIM(LIST(I)),TRIM(LISTD(J)), & ! BIPARTITE_GRAPH(I,J) ! END IF ! END DO ! END DO ! ! ! CLOSE(OUTPUT_UNIT) ! !!CALL INTPR("DONE WRITING...",-1,1,0) ! !! !CALL INTPR("BIP 8,120",-1,BIPARTITE_GRAPH(9,120),1) !! !CALL INTPR("BIP 9,120",-1,BIPARTITE_GRAPH(8,120),1) ! DISORDERPAIR_COUNT=0. ! DO I=1,DISORDERS-1 ! DO J=I+1,DISORDERS ! DISORDERPAIR_COUNT(J,I)=REAL(SUM(BIPARTITE_GRAPH(:,I)*BIPARTITE_GRAPH(:,J))) ! DISORDERPAIR_COUNT(I,J)=DISORDERPAIR_COUNT(J,I) ! END DO ! END DO ! !! DO I=1,DISORDERS !! DISORDERPAIR_COUNT(I,1)=1 !! DISORDERPAIR_COUNT(1,I)=1 !! END DO !! DISORDERPAIR_COUNT(1,1)=0 ! ! DEALLOCATE(BIPARTITE_GRAPH) ! PHAT=0. ! AHAT=1. ! LOGLIK=0. ! FACTORIZABILITY=0. !! DO I=1,10 !! DO J=1,10 !! !!CALL INTPR("GENE PAIR COUNT",-1,DISORDERPAIR_COUNT(I,J),1) !! DISORDERPAIR_COUNT(I,J)=I*J !! END DO !! END DO !! !CALL INTPR("ACADM,ACADS",-1,DISORDERPAIR_COUNT(8,9),1) ! ! !!CALL INTPR("BEGIN WRITING...",-1,1,0) ! OPEN(UNIT=OUTPUT_UNIT,FILE="DisorderADJ.txt") ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique DISORDERS in file = ",DISORDERS ! !WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" ! DO I = 1,DISORDERS ! WRITE(OUTPUT_UNIT,'(5000I6,1X)') (INT(DISORDERPAIR_COUNT(J,I)),J=1,DISORDERS) ! END DO ! ! CLOSE(OUTPUT_UNIT) ! !!CALL INTPR("DONE WRITING...",-1,1,0) ! ! !!CALL INTPR("STARTING CLUSTERING...",-1,1,0) ! ! CALL REORDER_ADJ(DISORDERPAIR_COUNT,DISORDERS,ORDERING,DISORDERS2) ! !! OPEN(UNIT=OUTPUT_UNIT,FILE="GeneADJORDERED.txt") !! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique GENES in file = ",GENES !! !WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" !! DO I = 1,GENES !! WRITE(OUTPUT_UNIT,'(5000I6,1X)') (INT(DISORDERPAIR_COUNT(J,I)),J=1,GENES) !! END DO !! !! CLOSE(OUTPUT_UNIT) ! ! ! !!CALL INTPR("NON ZERO Disorders ", -1,DISORDERS2,1) !! DO I=1,GENES !! !CALL INTPR("ORDERING ",-1,ORDERING(I),1) !! END DO ! !! CALL propclusttrial(DISORDERPAIR_COUNT(1:DISORDERS2,1:DISORDERS2),TESTMODULE(1:DISORDERS2),& !! PHAT(1:DISORDERS2),AHAT,FACTORIZABILITY,LOGLIK,DISORDERS2,CLUSTERS,0,1) ! !! CALL propclustaccel(DISORDERPAIR_COUNT(1:DISORDERS2,1:DISORDERS2),TESTMODULE(1:DISORDERS2),& !! PHAT(1:DISORDERS2),AHAT,FACTORIZABILITY,LOGLIK,DISORDERS2,CLUSTERS,0,1) ! ! !!CALL INTPR("BEGIN WRITING...",-1,1,0) ! CALL WRITE_TO_FILE("ConnectionsDis.txt","propClustTempOrderedDisorders.txt",DISORDERPAIR_COUNT,& ! PHAT,AHAT,TESTMODULE,LISTD,ORDERING,LOGLIK,DISORDERS,CLUSTERS) ! !! OPEN(UNIT=OUTPUT_UNIT,FILE="ConnectionsDis.txt") !! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique DISORDERS in file = ",DISORDERS !! WRITE(OUTPUT_UNIT,'(/,A,/)') "Disorder 1|Disorder 2|Log(P)|Num Edges" !! !WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" !! DO I = 1,DISORDERS-1 !! DO J=I+1,DISORDERS !! IF(DISORDERPAIR_COUNT(I,J)>0) THEN !! MEAN=PHAT(I)*PHAT(J)*AHAT(TESTMODULE(I),TESTMODULE(J)) !! WRITE(OUTPUT_UNIT,'(A,A,A,A,F8.2,A,I6)') TRIM(LISTD(ORDERING(I))), "|", & !! TRIM(LISTD(ORDERING(J))), "|", & !! LOG_POISSON_TAIL(MEAN,NINT(DISORDERPAIR_COUNT(I,J))), "|", & !! NINT(DISORDERPAIR_COUNT(I,J)) !! END IF !! END DO !! END DO !! !! CLOSE(OUTPUT_UNIT) !! !!! !!! Print the ordered list !!! !! OPEN(UNIT=OUTPUT_UNIT,FILE="propClustTempOrderedDisorders.txt") !! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique DISORDERs in file = ",DISORDERS !! WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" !! DO I = 1,DISORDERS !! WRITE(OUTPUT_UNIT,'(A,A,F8.4,A,I6)') TRIM(LISTD(ORDERING(I))),"|",PHAT(I),"|",TESTMODULE(I) !! END DO !! !! CLOSE(OUTPUT_UNIT) ! !!CALL INTPR("END WRITING...",-1,1,0) ! ! END SUBROUTINE omimmorbidmap ! ! ! ! ! ! ! SUBROUTINE omimgenemap(TEXT_FILE,GENEPAIR_COUNT,TESTMODULE,PHAT,AHAT,LOGLIK, & ! GENES,DISORDERS,GENES2,CLUSTERS) ! ! USE CONSTANTS ! USE TOOLS ! USE STRING_MANIPULATION ! USE MULTIGRAPH !! ! IMPLICIT NONE ! ! CHARACTER(LEN=50) :: GENE1,GENE2,DIS1,DIS2 ! CHARACTER(LEN=800) :: TEXT_FILE,LINE,LINE2 ! INTEGER :: I,IOERROR,INPUT_UNIT = 1,OUTPUT_UNIT=2,J,LINES,GENES,CLUSTERS,GENESMOD,FIRST,SECOND ! INTEGER :: COUNTER,FIRST2,SECOND2,K,SHORT_NAME,DISORDERS,DISLOC,GENELOC,GENES2,DISORDERS2 ! LOGICAL :: ERROR,HYPHEN_ACTIVE,FLAG,FILTER=.TRUE. ! CHARACTER(LEN=50), DIMENSION(20000) :: LIST,LISTD ! INTEGER, ALLOCATABLE, DIMENSION(:,:) :: BIPARTITE_GRAPH ! REAL, DIMENSION(GENES,GENES) :: GENEPAIR_COUNT ! INTEGER, DIMENSION(GENES) :: TESTMODULE,ORDERING ! REAL(KIND=8), DIMENSION(GENES) :: PHAT ! REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT ! REAL(KIND=8) :: LOGLIK,FACTORIZABILITY=0.,MEAN ! ! !!CALL INTPR("OMIM gene map clustering",-1,1,0) ! !! !! Set the number of GENEs, the hyphen alert, and the first line. !! ! GENES2 = 0 ! HYPHEN_ACTIVE = .FALSE. !! !! Find the number of lines in the text. !! ! CALL INPUT_DATA(TEXT_FILE,LINE,INPUT_UNIT,LINES) !! !! Read the text line by line and form the list of GENEs. !! ! REWIND(INPUT_UNIT) ! DO J = 1,LINES ! LINE = " " ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! FIRST=INDEX(LINE,"|") ! !THIS IS SET UP WITH A COMMA TO TAKE ONLY THE FIRST ALIAS ! SECOND=MIN(INDEX(LINE(FIRST+1:LEN(LINE))//",",","),INDEX(LINE(FIRST+1:LEN(LINE)),"|"))+FIRST ! IF(INDEX(LINE,"(3)")>0.AND.FILTER) THEN ! GENES2=GENES2+1 ! LIST(GENES2)=TRIM(LINE(FIRST+1:SECOND-1)) ! END IF !! CALL PROCESS_LINE(LINE) ! !! !! Every 100 lines sort and purge the list of GENEs. !! ! IF (.NOT. HYPHEN_ACTIVE) THEN ! IF (MOD(J,100)==0.OR.J==LINES) THEN ! CALL SORT_STRINGS(LIST(1:GENES2)) ! CALL PURGE_STRINGS(LIST(1:GENES2),GENES2) ! END IF ! END IF ! END DO !! IF(GENES2.EQ.GENES) THEN !! !CALL INTPR("SUCCESS, GENES EQUAL!",-1,1,0) !! END IF !! !! Read the text line by line and form the list of DISORDERs. !! ! DISORDERS2=0 ! REWIND(INPUT_UNIT) ! DO J = 1,LINES ! LINE = " " ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! FIRST=MIN(INDEX(LINE//",",","),INDEX(LINE,"|"),50) ! !THIS IS SET UP WITH A COMMA TO TAKE ONLY THE FIRST ALIAS ! DIS1=LINE(1:FIRST-1) ! CALL REMOVE_TRAILING_NUMBERS(DIS1) ! IF(INDEX(LINE,"(3)")>0.AND.FILTER) THEN ! DISORDERS2=DISORDERS2+1 ! LISTD(DISORDERS2)=TRIM(DIS1) ! END IF !! CALL PROCESS_LINE(LINE) ! !! !! Every 100 lines sort and purge the list of GENEs. !! ! IF (.NOT. HYPHEN_ACTIVE) THEN ! IF (MOD(J,100)==0.OR.J==LINES) THEN ! CALL SORT_STRINGS(LISTD(1:DISORDERS2)) ! CALL PURGE_STRINGS(LISTD(1:DISORDERS2),DISORDERS2) ! END IF ! END IF ! END DO !! IF(DISORDERS2.EQ.DISORDERS) THEN !! !CALL INTPR("SUCCESS, DISORDERS EQUAL!",-1,1,0) !! END IF !! !! Set the dimensions of the GENE-pair count matrix. !! ! !!CALL INTPR("MID GENE PAIR CLUSTERS...1.0",-1,1,0) ! GENEPAIR_COUNT = 0. ! !! !! Read in text line by line and update the Bipartite graph matrix. !! ! !!CALL INTPR("UPDATING BIPARTITE GRAPH",-1,1,0) ! ALLOCATE(BIPARTITE_GRAPH(GENES,DISORDERS)) ! !!CALL INTPR("ALLOCATED",-1,1,0) ! !!CALL INTPR("GENES",-1,GENES,1) ! !!CALL INTPR("DISORDERS",-1,DISORDERS,1) ! BIPARTITE_GRAPH=0 ! !!CALL INTPR("MID GENE PAIR CLUSTERS...1.1",-1,1,0) ! REWIND(INPUT_UNIT) ! DO J = 1,LINES ! IF(MOD(J,150).EQ.0) THEN ! !CALL INTPR("LINE NUMBER: ",-1,J,1) ! END IF ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! IF(INDEX(LINE,"(3)")>0.AND.FILTER) THEN ! FIRST2=MIN(INDEX(LINE//",",","),INDEX(LINE,"|"),50) ! DIS1=LINE(1:FIRST2-1) ! CALL REMOVE_TRAILING_NUMBERS(DIS1) ! FIRST=INDEX(LINE,"|") ! !THIS IS SET UP WITH A COMMA TO TAKE ONLY THE FIRST ALIAS ! SECOND=MIN(INDEX(LINE(FIRST+1:LEN(LINE))//",",","),INDEX(LINE(FIRST+1:LEN(LINE)),"|"))+FIRST ! GENE1=TRIM(LINE(FIRST+1:SECOND-1)) ! DISLOC=BISECT_STRING_LIST(LISTD(1:DISORDERS),TRIM(DIS1)) ! !!CALL INTPR("DISLOC",-1,DISLOC,1) ! GENELOC=BISECT_STRING_LIST(LIST(1:GENES),TRIM(GENE1)) ! !!CALL INTPR("GENELOC",-1,GENELOC,1) ! IF(GENELOC*DISLOC.GE.1) THEN ! BIPARTITE_GRAPH(GENELOC,DISLOC)=1 ! END IF ! END IF ! END DO ! !!CALL INTPR("MID GENE PAIR CLUSTERS...1.2",-1,1,0) !! ! CLOSE(INPUT_UNIT) ! ! !!CALL INTPR("BEGIN WRITING...",-1,1,0) ! OPEN(UNIT=OUTPUT_UNIT,FILE="BipartitePairCounts.txt") ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique GENES in file = ",GENES ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique DISORDERS in file = ",DISORDERS ! !WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" ! DO I = 1,GENES ! DO J=1,DISORDERS ! IF(BIPARTITE_GRAPH(I,J)>0) THEN ! WRITE(OUTPUT_UNIT,'(A,1X,A,1X,I6)') TRIM(LIST(I)),TRIM(LISTD(J)),& ! BIPARTITE_GRAPH(I,J) ! END IF ! END DO ! END DO ! ! ! CLOSE(OUTPUT_UNIT) ! !!CALL INTPR("DONE WRITING...",-1,1,0) ! !! !CALL INTPR("BIP 8,120",-1,BIPARTITE_GRAPH(9,120),1) !! !CALL INTPR("BIP 9,120",-1,BIPARTITE_GRAPH(8,120),1) ! GENEPAIR_COUNT=0. ! DO I=1,GENES-1 ! DO J=I+1,GENES ! GENEPAIR_COUNT(J,I)=REAL(SUM(BIPARTITE_GRAPH(I,:)*BIPARTITE_GRAPH(J,:))) ! GENEPAIR_COUNT(I,J)=GENEPAIR_COUNT(J,I) ! END DO ! END DO ! !! DO I=1,GENES !! GENEPAIR_COUNT(I,1)=1 !! GENEPAIR_COUNT(1,I)=1 !! END DO !! GENEPAIR_COUNT(1,1)=0 ! ! DEALLOCATE(BIPARTITE_GRAPH) ! PHAT=0. ! AHAT=1. ! LOGLIK=0. ! FACTORIZABILITY=0. !! DO I=1,10 !! DO J=1,10 !! !!CALL INTPR("GENE PAIR COUNT",-1,GENEPAIR_COUNT(I,J),1) !! GENEPAIR_COUNT(I,J)=I*J !! END DO !! END DO !! !CALL INTPR("ACADM,ACADS",-1,GENEPAIR_COUNT(8,9),1) ! ! !!CALL INTPR("BEGIN WRITING...",-1,1,0) ! OPEN(UNIT=OUTPUT_UNIT,FILE="GeneADJ.txt") ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique GENES in file = ",GENES ! !WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" ! DO I = 1,GENES ! WRITE(OUTPUT_UNIT,'(5000I6,1X)') (INT(GENEPAIR_COUNT(J,I)),J=1,GENES) ! END DO ! ! CLOSE(OUTPUT_UNIT) ! !!CALL INTPR("DONE WRITING...",-1,1,0) ! ! !!CALL INTPR("STARTING CLUSTERING...",-1,1,0) ! ! CALL REORDER_ADJ(GENEPAIR_COUNT,GENES,ORDERING,GENES2) ! !! OPEN(UNIT=OUTPUT_UNIT,FILE="GeneADJORDERED.txt") !! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique GENES in file = ",GENES !! !WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" !! DO I = 1,GENES !! WRITE(OUTPUT_UNIT,'(5000I6,1X)') (INT(GENEPAIR_COUNT(J,I)),J=1,GENES) !! END DO !! !! CLOSE(OUTPUT_UNIT) ! ! ! !CALL INTPR("NON ZERO GENES ", -1,GENES2,1) !! DO I=1,GENES !! !CALL INTPR("ORDERING ",-1,ORDERING(I),1) !! END DO ! !! CALL propclusttrial(GENEPAIR_COUNT(1:GENES2,1:GENES2),TESTMODULE(1:GENES2),PHAT(1:GENES2), & !! AHAT,FACTORIZABILITY,LOGLIK,GENES2,CLUSTERS,0,1) ! !! CALL propclustaccel(GENEPAIR_COUNT(1:GENES2,1:GENES2),TESTMODULE(1:GENES2),PHAT(1:GENES2), & !! AHAT,FACTORIZABILITY,LOGLIK,GENES2,CLUSTERS,0,1) ! ! !!CALL INTPR("BEGIN WRITING...",-1,1,0) ! CALL WRITE_TO_FILE("ConnectionsGenes.txt","propClustTempOrderedGenes.txt",GENEPAIR_COUNT,& ! PHAT,AHAT,TESTMODULE,LIST,ORDERING,LOGLIK,GENES,CLUSTERS) ! !! OPEN(UNIT=OUTPUT_UNIT,FILE="ConnectionsGenes.txt") !! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique GENES in file = ",GENES !! WRITE(OUTPUT_UNIT,'(/,A,/)') "Gene 1|Gene 2|Log(P)|Num Edges" !! !WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" !! DO I = 1,GENES-1 !! DO J=I+1,GENES !! IF(GENEPAIR_COUNT(I,J)>0) THEN !! MEAN=PHAT(I)*PHAT(J)*AHAT(TESTMODULE(I),TESTMODULE(J)) !! WRITE(OUTPUT_UNIT,'(A,A,A,A,F8.2,A,I6)') TRIM(LIST(ORDERING(I))), "|", & !! TRIM(LIST(ORDERING(J))), "|", & !! LOG_POISSON_TAIL(MEAN,NINT(GENEPAIR_COUNT(I,J))), "|", & !! NINT(GENEPAIR_COUNT(I,J)) !! END IF !! END DO !! END DO !! !! CLOSE(OUTPUT_UNIT) !! !!! !!! Print the ordered list !!! !! OPEN(UNIT=OUTPUT_UNIT,FILE="propClustTempOrderedGenes.txt") !! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique GENEs in file = ",GENES !! WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" !! DO I = 1,GENES !! WRITE(OUTPUT_UNIT,'(A,A,F8.4,A,I6)') TRIM(LIST(ORDERING(I))),"|",PHAT(I),"|",TESTMODULE(I) !! END DO !! !! CLOSE(OUTPUT_UNIT) ! !!CALL INTPR("END WRITING...",-1,1,0) ! ! END SUBROUTINE omimgenemap ! ! ! ! ! SUBROUTINE countitems(TEXT_FILE,ITEMS,POSITION) ! ! USE CONSTANTS ! USE TOOLS ! USE STRING_MANIPULATION ! ! IMPLICIT NONE ! ! CHARACTER(LEN=800) :: TEXT_FILE,LINE ! CHARACTER(LEN=50) :: NAME ! INTEGER :: I,IOERROR,INPUT_UNIT = 1,OUTPUT_UNIT = 2,J,LINES,ITEMS,STARTPOS,ENDPOS,POSITION ! LOGICAL :: ERROR,HYPHEN_ACTIVE ! CHARACTER(LEN=50), DIMENSION(50000) :: LIST ! !! !! Set the number of ITEMS, the hyphen alert, and the first line. !! ! ITEMS = 0 ! HYPHEN_ACTIVE = .FALSE. !! !! Find the number of lines in the text. !! ! CALL INPUT_DATA(TEXT_FILE,LINE,INPUT_UNIT,LINES) !! !! Read the text line by line and form the list of ITEMS. !! ! REWIND(INPUT_UNIT) ! !skip first line ! LINE = " " ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! ! DO J = 1,LINES-1 ! LINE = " " ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! STARTPOS=1 ! DO I=1,POSITION-1 ! STARTPOS=INDEX(LINE(STARTPOS+1:LEN(LINE)),"|")+STARTPOS ! END DO ! ENDPOS=INDEX(LINE(STARTPOS+1:LEN(LINE)),"|")+STARTPOS ! ITEMS=ITEMS+1 ! NAME=LINE(STARTPOS+1:ENDPOS-1) ! CALL FIX_NAME(NAME) ! LIST(ITEMS)=TRIM(NAME) !! !! Every 100 lines sort and purge the list of ITEMS. !! ! IF (.NOT. HYPHEN_ACTIVE) THEN ! IF (MOD(J,100)==0.OR.J==LINES-1) THEN ! CALL SORT_STRINGS(LIST(1:ITEMS)) ! CALL PURGE_STRINGS(LIST(1:ITEMS),ITEMS) ! END IF ! END IF ! END DO !! !! Print the alphabetized list. !! ! OPEN(UNIT=OUTPUT_UNIT,FILE="propClustTempItems.txt") ! WRITE(OUTPUT_UNIT,'(A,I6)') "Unique ITEMS in file = ",ITEMS ! WRITE(OUTPUT_UNIT,'(/,A,/)') "File dictionary:" ! DO I = 1,ITEMS ! WRITE(OUTPUT_UNIT,'(A)') LIST(I) ! END DO ! ! CLOSE(OUTPUT_UNIT) ! CLOSE(INPUT_UNIT) ! ! !!CALL INTPR("FINISHED COUNTING ITEMS...",-1,ITEMS,1) ! ! END SUBROUTINE countitems ! ! ! ! ! ! SUBROUTINE clustercompanies(TEXT_FILE,COMPANYPAIR_COUNT,TESTMODULE,PHAT,AHAT,LOGLIK, & ! COMPS,MEMBERS,COMPS2,CLUSTERS) ! ! !This clusters companies based on board membership in the text file organization_board_membershipED.txt ! !obtained from freebase.com ! ! USE CONSTANTS ! USE TOOLS ! USE STRING_MANIPULATION ! USE MULTIGRAPH !! ! IMPLICIT NONE ! ! CHARACTER(LEN=50) :: COMP1,MEM1,NAME ! CHARACTER(LEN=800) :: TEXT_FILE,LINE,LINE2 ! INTEGER :: I,IOERROR,INPUT_UNIT = 1,OUTPUT_UNIT=2,J,LINES,COMPS,CLUSTERS,COMPSMOD,FIRST,SECOND ! INTEGER :: COUNTER,FIRST2,SECOND2,K,SHORT_NAME,MEMBERS,DISLOC,GENELOC,COMPS2,MEMBERS2,THIRD ! INTEGER :: INPUT_UNIT2=3,LINES2,FORTUNE ! LOGICAL :: ERROR,HYPHEN_ACTIVE,FLAG ! CHARACTER(LEN=50), DIMENSION(50000) :: LIST,LISTD ! CHARACTER(LEN=50), DIMENSION(600) :: FORTUNE500 ! INTEGER, ALLOCATABLE, DIMENSION(:,:) :: BIPARTITE_GRAPH ! REAL, DIMENSION(500,500) :: COMPANYPAIR_COUNT ! INTEGER, DIMENSION(500) :: TESTMODULE,ORDERING ! REAL(KIND=8), DIMENSION(500) :: PHAT ! REAL(KIND=8), DIMENSION(CLUSTERS,CLUSTERS) :: AHAT ! REAL(KIND=8) :: LOGLIK,FACTORIZABILITY=0.,MEAN ! ! !CALL INTPR("Fortune 500 clustering",-1,1,0) ! !! !! Set the number of COMPS, the hyphen alert, and the first line. !! ! COMPS2 = 0 ! HYPHEN_ACTIVE = .FALSE. !! !! Find the number of lines in the text. !! ! CALL INPUT_DATA(TEXT_FILE,LINE,INPUT_UNIT,LINES) !! !! Read the text line by line and form the list of COMPS. !! TEXT IS ASSUMED TO HAVE TRASH | MEMBER | COMP | TRASH !! ! REWIND(INPUT_UNIT) ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! DO J = 1,LINES-1 ! LINE = " " ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! FIRST=INDEX(LINE,"|") ! !THIS IS SET UP WITH A COMMA TO TAKE ONLY THE FIRST ALIAS ! SECOND=INDEX(LINE(FIRST+1:LEN(LINE)),"|")+FIRST ! THIRD=INDEX(LINE(SECOND+1:LEN(LINE)),"|")+SECOND ! COMPS2=COMPS2+1 ! NAME=LINE(SECOND+1:THIRD-1) ! CALL FIX_NAME(NAME) ! LIST(COMPS2) = TRIM(NAME) !! !! Every 100 lines sort and purge the list of COMPS. !! ! IF (MOD(J,100)==0.OR.J==LINES-1) THEN ! CALL SORT_STRINGS(LIST(1:COMPS2)) ! CALL PURGE_STRINGS(LIST(1:COMPS2),COMPS2) ! END IF ! END DO !! IF(COMPS2.EQ.COMPS) THEN !! !CALL INTPR("SUCCESS, COMPS EQUAL!",-1,1,0) !! END IF !! !! Read the text line by line and form the list of MEMBERS. !! TEXT IS ASSUMED TO HAVE TRASH | MEMBER | COMP | TRASH !! ! MEMBERS2=0 ! REWIND(INPUT_UNIT) ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! DO J = 1,LINES-1 ! LINE = " " ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! FIRST=INDEX(LINE,"|") ! SECOND=INDEX(LINE(FIRST+1:LEN(LINE)),"|")+FIRST ! MEMBERS2=MEMBERS2+1 ! NAME=LINE(FIRST+1:SECOND-1) ! CALL FIX_NAME(NAME) ! LISTD(MEMBERS2)=TRIM(NAME) !! !! Every 100 lines sort and purge the list of COMPS. !! ! IF (.NOT. HYPHEN_ACTIVE) THEN ! IF (MOD(J,100)==0.OR.J==LINES-1) THEN ! CALL SORT_STRINGS(LISTD(1:MEMBERS2)) ! CALL PURGE_STRINGS(LISTD(1:MEMBERS2),MEMBERS2) ! END IF ! END IF ! END DO ! !IF(MEMBERS2.EQ.MEMBERS) THEN ! ! !CALL INTPR("SUCCESS, MEMBERS EQUAL!",-1,1,0) ! !END IF !! !! Set the dimensions of the GENE-pair count matrix. !! ! !!CALL INTPR("MID GENE PAIR CLUSTERS...1.0",-1,1,0) ! COMPANYPAIR_COUNT = 0. ! !! READ IN THE FORTUNE 500 COMPANY LIST AND CHECK TO SEE WHETHER THEY ARE ALL IN THE COMPANY LIST !! ! CALL INPUT_DATA("Fortune500ED.txt",LINE,INPUT_UNIT2,LINES2) ! FORTUNE=0 ! REWIND(INPUT_UNIT2) ! READ(INPUT_UNIT2,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! DO J = 1,LINES2-1 ! LINE = " " ! READ(INPUT_UNIT2,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! FIRST=INDEX(LINE,"|") ! NAME=LINE(1:FIRST-1) ! CALL FIX_NAME(NAME) ! FORTUNE=FORTUNE+1 ! FORTUNE500(FORTUNE)=TRIM(NAME) ! END DO ! CALL SORT_STRINGS(FORTUNE500(1:FORTUNE)) ! CLOSE(INPUT_UNIT2) ! !! OPEN(UNIT=OUTPUT_UNIT,FILE="propClustFortune.txt") !! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique COMPS in file = ",FORTUNE !! WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" !! DO I = 1,FORTUNE !! WRITE(OUTPUT_UNIT,'(A)') FORTUNE500(I) !! END DO !! !! CLOSE(OUTPUT_UNIT) !! !! OPEN(UNIT=OUTPUT_UNIT,FILE="propClustComps.txt") !! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique COMPS in file = ",COMPS2 !! WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" !! DO I = 1,COMPS2 !! WRITE(OUTPUT_UNIT,'(A)') LIST(I) !! END DO !! !! CLOSE(OUTPUT_UNIT) ! ! !CHECK TO SEE WHETHER ALL FORTUNE 500 COMPANIES ARE PRESENT IN LIST ! !! DO J=1,FORTUNE !! THIRD=BISECT_STRING_LIST(LIST(2:COMPS2),TRIM(FORTUNE500(J)))+1 !! IF (LIST(THIRD).EQ.FORTUNE500(J)) THEN !! !CALL INTPR("SUCCESS! ",-1,1,0) !! ELSE !! !CALL INTPR("FAILURE... COMPANY NUMBER",-1,[J,THIRD],2) !! !!CALL INTPR("BISECT NUMBER",-1,THIRD,1) !! END IF !! END DO ! !! !! Read in text line by line and update the Bipartite graph matrix. !! ! !CALL INTPR("UPDATING BIPARTITE GRAPH",-1,1,0) ! ALLOCATE(BIPARTITE_GRAPH(FORTUNE,MEMBERS)) ! !!CALL INTPR("ALLOCATED",-1,1,0) ! !!CALL INTPR("COMPS",-1,COMPS,1) ! !!CALL INTPR("MEMBERS",-1,MEMBERS,1) ! BIPARTITE_GRAPH=0 ! !!CALL INTPR("MID GENE PAIR CLUSTERS...1.1",-1,1,0) ! !SKIP FIRST LINE ! REWIND(INPUT_UNIT) ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! ! DO J = 1,LINES-1 ! IF(MOD(J,1000).EQ.0) THEN ! !CALL INTPR("LINE NUMBER: ",-1,J,1) ! END IF ! READ(INPUT_UNIT,'(800A)',IOSTAT=IOERROR) (LINE(I:I),I=1,LEN(LINE)) ! FIRST=INDEX(LINE,"|") ! !THIS IS SET UP WITH A COMMA TO TAKE ONLY THE FIRST ALIAS ! SECOND=INDEX(LINE(FIRST+1:LEN(LINE)),"|")+FIRST ! THIRD=INDEX(LINE(SECOND+1:LEN(LINE)),"|")+SECOND ! MEM1=TRIM(LINE(FIRST+1:SECOND-1)) ! COMP1=TRIM(LINE(SECOND+1:THIRD-1)) ! CALL FIX_NAME(MEM1) ! CALL FIX_NAME(COMP1) ! DISLOC=BISECT_STRING_LIST(LISTD(1:MEMBERS),TRIM(MEM1)) ! !!CALL INTPR("DISLOC",-1,DISLOC,1) ! GENELOC=BISECT_STRING_LIST(FORTUNE500(1:FORTUNE),TRIM(COMP1)) ! !!CALL INTPR("GENELOC",-1,GENELOC,1) ! IF(GENELOC*DISLOC.GE.1) THEN ! BIPARTITE_GRAPH(GENELOC,DISLOC)=1 ! END IF ! END DO ! !!CALL INTPR("MID GENE PAIR CLUSTERS...1.2",-1,1,0) ! ! CLOSE(INPUT_UNIT) ! ! !CALL INTPR("BEGIN WRITING...",-1,1,0) ! OPEN(UNIT=OUTPUT_UNIT,FILE="BipartitePairCounts.txt") ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique COMPS in file = ",COMPS ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique MEMBERS in file = ",MEMBERS ! !WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" ! DO I = 1,FORTUNE ! DO J=1,MEMBERS ! IF(BIPARTITE_GRAPH(I,J)>0) THEN ! WRITE(OUTPUT_UNIT,'(A,1X,A,1X,I6)') FORTUNE500(I),LISTD(J),BIPARTITE_GRAPH(I,J) ! END IF ! END DO ! END DO ! ! ! CLOSE(OUTPUT_UNIT) ! !CALL INTPR("DONE WRITING...",-1,1,0) ! !! !CALL INTPR("BIP 8,120",-1,BIPARTITE_GRAPH(9,120),1) !! !CALL INTPR("BIP 9,120",-1,BIPARTITE_GRAPH(8,120),1) ! COMPANYPAIR_COUNT=0. ! DO I=1,FORTUNE-1 ! DO J=I+1,FORTUNE ! COMPANYPAIR_COUNT(J,I)=REAL(SUM(BIPARTITE_GRAPH(I,:)*BIPARTITE_GRAPH(J,:))) ! COMPANYPAIR_COUNT(I,J)=COMPANYPAIR_COUNT(J,I) ! END DO ! END DO ! !! DO I=1,COMPS !! COMPANYPAIR_COUNT(I,1)=1 !! COMPANYPAIR_COUNT(1,I)=1 !! END DO !! COMPANYPAIR_COUNT(1,1)=0 ! ! DEALLOCATE(BIPARTITE_GRAPH) ! PHAT=0. ! AHAT=1. ! LOGLIK=0. ! FACTORIZABILITY=0. !! DO I=1,10 !! DO J=1,10 !! !!CALL INTPR("GENE PAIR COUNT",-1,COMPANYPAIR_COUNT(I,J),1) !! COMPANYPAIR_COUNT(I,J)=I*J !! END DO !! END DO !! !CALL INTPR("ACADM,ACADS",-1,COMPANYPAIR_COUNT(8,9),1) ! ! !CALL INTPR("BEGIN WRITING...",-1,1,0) ! OPEN(UNIT=OUTPUT_UNIT,FILE="GeneADJ.txt") ! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique COMPS in file = ",COMPS ! !WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" ! DO I = 1,FORTUNE ! WRITE(OUTPUT_UNIT,'(5000I6,1X)') (INT(COMPANYPAIR_COUNT(J,I)),J=1,FORTUNE) ! END DO ! ! CLOSE(OUTPUT_UNIT) ! !CALL INTPR("DONE WRITING...",-1,1,0) ! ! !CALL INTPR("STARTING CLUSTERING...",-1,1,0) ! ! CALL REORDER_ADJ(COMPANYPAIR_COUNT,FORTUNE,ORDERING,COMPS2) ! !! OPEN(UNIT=OUTPUT_UNIT,FILE="GeneADJORDERED.txt") !! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique COMPS in file = ",COMPS !! !WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" !! DO I = 1,COMPS !! WRITE(OUTPUT_UNIT,'(5000I6,1X)') (INT(COMPANYPAIR_COUNT(J,I)),J=1,COMPS) !! END DO !! !! CLOSE(OUTPUT_UNIT) ! ! !CALL INTPR("NON ZERO COMPS ", -1,COMPS2,1) !! DO I=1,COMPS !! !CALL INTPR("ORDERING ",-1,ORDERING(I),1) !! END DO ! ! CALL propclustaccel(COMPANYPAIR_COUNT(1:COMPS2,1:COMPS2),TESTMODULE(1:COMPS2),PHAT(1:COMPS2), & ! AHAT,FACTORIZABILITY,LOGLIK,COMPS2,CLUSTERS,0,1) ! ! !CALL INTPR("BEGIN WRITING...",-1,1,0) ! ! CALL WRITE_TO_FILE("ConnectionsComps.txt","propClustTempOrderedComps.txt",COMPANYPAIR_COUNT, & ! PHAT,AHAT,TESTMODULE,FORTUNE500,ORDERING,LOGLIK,FORTUNE,CLUSTERS) !! OPEN(UNIT=OUTPUT_UNIT,FILE="ConnectionsComps.txt") !! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique FORTUNE COMPS in file = ",FORTUNE !! WRITE(OUTPUT_UNIT,'(/,A,/)') "Company 1|Company 2|Log(P)|Num Edges" !! !WRITE(OUTPUT_UNIT,'(/,A,/)') " File dictionary:" !! DO I = 1,FORTUNE-1 !! DO J=I+1,FORTUNE !! IF(COMPANYPAIR_COUNT(I,J)>0) THEN !! MEAN=PHAT(I)*PHAT(J)*AHAT(TESTMODULE(I),TESTMODULE(J)) !! WRITE(OUTPUT_UNIT,'(A,A,A,A,F8.2,A,I6)') TRIM(FORTUNE500(ORDERING(I))), "|", & !! TRIM(FORTUNE500(ORDERING(J))), "|", & !! LOG_POISSON_TAIL(MEAN,NINT(COMPANYPAIR_COUNT(I,J))), "|", & !! NINT(COMPANYPAIR_COUNT(I,J)) !! END IF !! END DO !! END DO !! !! CLOSE(OUTPUT_UNIT) !! !!! !!! Print the ordered list, propensity, and cluster membership. !!! !! OPEN(UNIT=OUTPUT_UNIT,FILE="propClustTempOrderedComps.txt") !! WRITE(OUTPUT_UNIT,'(A,I6)') " Unique COMPS in file = ",FORTUNE !! WRITE(OUTPUT_UNIT,'(/,A,/)') "Company Name|Propensity|Cluster" !! DO I = 1,FORTUNE !! WRITE(OUTPUT_UNIT,'(A,A,F8.4,A,I6)') TRIM(FORTUNE500(ORDERING(I))),"|",PHAT(I),"|",TESTMODULE(I) !! END DO !! !! CLOSE(OUTPUT_UNIT) ! ! !CALL INTPR("DONE WRITING...",-1,1,0) ! ! END SUBROUTINE clustercompanies !! SUBROUTINE singleclusterupdate(ADJ,PHAT,FACTORIZABILITY,CRITERIA,NODES,L2I) USE CONSTANTS USE TOOLS USE STRING_MANIPULATION USE MULTIGRAPH IMPLICIT NONE INTEGER :: I,J,K,ITERATION,NODES,CLUSTERS=2,L2I REAL, DIMENSION(NODES,NODES) :: ADJ REAL(KIND=8), DIMENSION(NODES) :: PHAT,PSUM INTEGER, DIMENSION(NODES) :: TESTMODULE REAL(KIND=8), DIMENSION(NODES) :: PN REAL(KIND=8), ALLOCATABLE, DIMENSION(:,:) :: AHAT,ASUM LOGICAL :: NOT_CONVERGED,L2BOOL,L2 REAL(KIND=8) :: OLD_LOGLIK,NEW_LOGLIK,NEW_L2,OLD_L2,FACTORIZABILITY,CRITERIA,MEAN !THIS SUBROUTINE IMPLEMENTS POISSON UPDATES FOR MULTIGRAPH CLUSTERING WITH NO SELF EDGES FOR !A SINGLE CLUSTER BY ASSUMING 2 CLUSTERS WITH INTERCLUSTER ADJACENCY OF 1 IF(L2I>0) THEN L2BOOL=.TRUE. ELSE L2BOOL=.FALSE. END IF L2=L2BOOL CLUSTERS=2 TESTMODULE=1 TESTMODULE(1)=2 ALLOCATE(AHAT(CLUSTERS,CLUSTERS),ASUM(CLUSTERS,CLUSTERS)) !INITIALIZING PSUM CALL INITIALIZE_PSUM(ADJ,PSUM,NODES) !!CALL INTPR("INITIALIZED PSUM",-1,1,0) !INITIALIZING ASUM CALL INITIALIZE_ASUM(ADJ,TESTMODULE,ASUM,NODES,CLUSTERS) !INITIALIZING PHAT AND AHAT PHAT=0. AHAT=0. CALL INITIALIZE_PARAMETERS(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2) AHAT=1. ITERATION=0 NOT_CONVERGED=.TRUE. IF(L2BOOL) THEN NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) OLD_L2=NEW_L2 ELSE NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) OLD_LOGLIK=NEW_LOGLIK END IF DO WHILE(NOT_CONVERGED) ITERATION=ITERATION+1 CALL UPDATE_PARAMETERS_ONCE(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS,L2BOOL,PSUM,ASUM,& NEW_L2,NEW_LOGLIK) AHAT=1. IF(L2BOOL) THEN NEW_L2=CALC_L2NORM(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) CALL CHECK_CONVERGENCE(OLD_L2,NEW_L2,ITERATION,NOT_CONVERGED) OLD_L2=NEW_L2 ELSE NEW_LOGLIK=CALC_LOGLIK(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) CALL CHECK_CONVERGENCE(OLD_LOGLIK,NEW_LOGLIK,ITERATION,NOT_CONVERGED) OLD_LOGLIK=NEW_LOGLIK END IF END DO IF(L2BOOL) THEN CRITERIA=NEW_L2 ELSE CRITERIA=NEW_LOGLIK END IF TESTMODULE=1 AHAT=1 FACTORIZABILITY=CALC_FACTORIZABILITY(ADJ,TESTMODULE,PHAT,AHAT,NODES,CLUSTERS) DO I = 1,NODES-1 DO J=I+1,NODES MEAN=PHAT(I)*PHAT(J)*AHAT(TESTMODULE(I),TESTMODULE(J)) IF(.NOT.L2) THEN IF(ADJ(I,J)>0) THEN ADJ(J,I)=LOG_POISSON_TAIL(MEAN,NINT(ADJ(I,J))) ELSE ADJ(J,I)=0. END IF ELSE ADJ(J,I)=0 END IF ADJ(I,J)=MEAN END DO END DO END SUBROUTINE singleclusterupdate ! ! PROGRAM MULTIGRAPH_CLUSTERING ! ! USE CONSTANTS ! USE TOOLS ! USE STRING_MANIPULATION ! USE MULTIGRAPH ! ! IMPLICIT NONE ! ! !(TEXT_FILE,GENEPAIR_COUNT,TESTMODULE,PHAT,AHAT,LOGLIK,GENES,DISORDERS,GENES2,CLUSTERS) ! CHARACTER(LEN=800) :: TEXT_FILE ! INTEGER :: I,J,K,GENES,CLUSTERS,DISORDERS,GENES2,DISORDERS2 ! !REAL, ALLOCATABLE, DIMENSION(:,:) :: GENEPAIR_COUNT ! REAL, ALLOCATABLE, DIMENSION(:,:) :: DISORDERPAIR_COUNT ! INTEGER, ALLOCATABLE, DIMENSION(:) :: TESTMODULE ! REAL(KIND=8), ALLOCATABLE, DIMENSION(:) :: PHAT ! REAL(KIND=8), ALLOCATABLE, DIMENSION(:,:) :: AHAT ! REAL(KIND=8) :: LOGLIK=0.,FACTORIZABILITY=0. ! ! TEXT_FILE="morbidmap.txt" ! CLUSTERS=10 ! CALL countgenes(TEXT_FILE,GENES) ! CALL countdisorders(TEXT_FILE,DISORDERS) ! !! ALLOCATE(DISORDERPAIR_COUNT(DISORDERS,DISORDERS),TESTMODULE(DISORDERS)) !! ALLOCATE(PHAT(DISORDERS),AHAT(CLUSTERS,CLUSTERS)) !! !! GENES2=GENES !! DISORDERPAIR_COUNT=0 !! TESTMODULE=1 !! PHAT=1. !! AHAT=1. !! !! CALL omimmorbidmap(TEXT_FILE,DISORDERPAIR_COUNT,TESTMODULE,PHAT,AHAT,LOGLIK, & !! GENES,DISORDERS,GENES2,CLUSTERS) ! ! ! ALLOCATE(DISORDERPAIR_COUNT(GENES,GENES),TESTMODULE(GENES)) ! ALLOCATE(PHAT(GENES),AHAT(CLUSTERS,CLUSTERS)) ! ! GENES2=GENES ! DISORDERPAIR_COUNT=0 ! TESTMODULE=1 ! PHAT=1. ! AHAT=1. ! CALL omimgenemap(TEXT_FILE,DISORDERPAIR_COUNT,TESTMODULE,PHAT,AHAT,LOGLIK, & ! GENES,DISORDERS,GENES2,CLUSTERS) ! ! ! END PROGRAM MULTIGRAPH_CLUSTERING PropClust/src/minWhichMin.h0000644000176200001440000000034413346232602015361 0ustar liggesusers#ifndef __minWhichMin_h__ #define __minWhichMin_h__ #include #include #include void minWhichMin(double * matrix, int * nRows, int * nColumns, double * min, double * whichMin); #endif PropClust/src/minWhichMin.c0000644000176200001440000000124013346232602015350 0ustar liggesusers/* Function returning the column-wise minimum and minimum index. For easier integration with R, the index will also be stored as a double. NA's are ignored. */ #include "minWhichMin.h" void minWhichMin(double * matrix, int * nRows, int * nColumns, double * min, double * whichMin) { int nrows = *nRows, ncols = *nColumns; for (int i=0; i // for NULL #include #include #include "minWhichMin.h" #include "PropClustParallelTrials.h" #define CDEF(name, n, args) {#name, (DL_FUNC) &name, n, args} static R_NativePrimitiveArgType // minWhichMin minWhich_t[] = { REALSXP, INTSXP, INTSXP, REALSXP, REALSXP }, // propclusttrial, propclustaccel, propensityclustering propclusttrial_t[] = {SINGLESXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP}, // propdecompaccel, propensitydecomposition propdecompaccel_t[] = {SINGLESXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, INTSXP}, // singleclusterupdate singleclusterupdate_t[] = {SINGLESXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP}; static const R_CMethodDef R_CMethods[] = { CDEF(minWhichMin, 5, minWhich_t), {NULL, NULL, 0, NULL} }; static const R_FortranMethodDef R_FortranMethods[] = { {"propclusttrial", (DL_FUNC) &F77_NAME(propclusttrial), 10, propclusttrial_t}, {"propclustaccel", (DL_FUNC) &F77_NAME(propclusttrial), 10, propclusttrial_t}, {"propensityclustering", (DL_FUNC) &F77_NAME(propclusttrial), 10, propclusttrial_t}, {"propensitydecomposition", (DL_FUNC) &F77_NAME(propensitydecomposition), 9, propdecompaccel_t}, {"propdecompaccel", (DL_FUNC) &F77_NAME(propdecompaccel), 9, propdecompaccel_t}, {"singleclusterupdate", (DL_FUNC) & F77_NAME(singleclusterupdate), 6, singleclusterupdate_t}, {NULL, NULL, 0, NULL} }; void R_init_PropClust(DllInfo *dll) { R_registerRoutines(dll, R_CMethods, NULL, R_FortranMethods, NULL); R_useDynamicSymbols(dll, FALSE); R_forceSymbols(dll, TRUE); } PropClust/NAMESPACE0000644000176200001440000000022713346074337013437 0ustar liggesusersuseDynLib(PropClust, .registration = TRUE, .fixes = ".C_") exportPattern("^[^\\.]") import(fastcluster, dynamicTreeCut) importFrom("stats", "as.dist") PropClust/Changelog0000644000176200001440000000021513345761231014022 0ustar liggesusers2018/09/11: 1.4-5 . Removed internal function .speed1 and .speed2 and the corresponding Fortran code . Added reference to DESCRIPTION PropClust/R/0000755000176200001440000000000013346061712012411 5ustar liggesusersPropClust/R/PropClust-internal.R0000644000176200001440000001714013345760736016317 0ustar liggesusers# .is.wholenumber <-function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol .propensityClustering.internal <-function(adjacency, initialClusters, l2bool, nClusters, initbool, fastUpdates, accelerated = TRUE){ #if(l2bool>0){ # print("Using L2 updates") #}else{ # print("Using Poisson updates") # } # for(i in 1:length(initialClusters)){ # if(initialClusters[i]<1){ # stop("Initial Cluster values must be integers greater than 0.") # } # } # if(!.is.wholenumber(initialClusters[1])){ # print("Converting Cluster values to integers and storing in initialClusters.norm") # initialClusters.norm = as.numeric(as.factor(initialClusters)) # }else{ # initialClusters.norm=initialClusters # } nodes=length(initialClusters) # if(nodes!=length(adjacency[1,])|nodes!=length(adjacency[,1])){ # stop("Adjacency must have same length as initial clustering") # } # if(nClusters<2){ # stop("Number of clusters must be > 1. Use propensityDecomposition for 1 cluster.") # } Phat=rep(0.7, nodes) Ahat=matrix(0.5, nClusters,nClusters) diag(Ahat) = 1; fact=0. lnorm=0. if (accelerated) { if (fastUpdates) { results = .Fortran(.C_propclusttrial, ADJ=as.single(adjacency), Clustering=as.integer(initialClusters),Propensity=as.double(Phat), IntermodularAdjacency=as.double(Ahat),Factorizability=as.double(fact), Criteria=as.double(lnorm),Nodes=as.integer(nodes),Clusters=as.integer(nClusters), L2=as.integer(l2bool),Init=as.integer(initbool)) } else { results = .Fortran(.C_propclustaccel, ADJ=as.single(adjacency), Clustering=as.integer(initialClusters),Propensity=as.double(Phat), IntermodularAdjacency=as.double(Ahat),Factorizability=as.double(fact), Criteria=as.double(lnorm),Nodes=as.integer(nodes),Clusters=as.integer(nClusters), L2=as.integer(l2bool),Init=as.integer(initbool)) } } else { results = .Fortran(.C_propensityclustering, ADJ=as.single(adjacency), Clustering=as.integer(initialClusters),Propensity=as.double(Phat), IntermodularAdjacency=as.double(Ahat),Factorizability=as.double(fact), Criteria=as.double(lnorm),Nodes=as.integer(nodes),Clusters=as.integer(nClusters), L2=as.integer(l2bool),Init=as.integer(initbool)) } resultsmod=results[2:6] if(l2bool>0){ resultsmod$L2Norm=results$Criteria }else{ resultsmod$Loglik=results$Criteria } dim ( resultsmod$IntermodularAdjacency ) = c(nClusters, nClusters) # if(!.is.wholenumber(initialClusters[1])){ # finalClusters = .translateUsingTable(results$Clustering, # .translationTable(initialClusters.norm, initialClusters)) # }else{ # finalClusters = results$Clustering # } # # resultsmod$Clustering=finalClusters # meanMat = matrix((results$ADJ),ncol=nodes); resultsmod$MeanValues=as.dist(meanMat); resultsmod$TailPvalues = as.dist(t(meanMat)); return(resultsmod) } CPBADecomposition<-function(adjacency, clustering, nClusters = NULL, objectiveFunction = c("Poisson", "L2norm"), dropUnassigned = TRUE, unassignedLabel = 0, unassignedMethod = "average", accelerated = TRUE, parallel = FALSE) { .checkAdjMat(adjacency, min = 0, max = max(adjacency, na.rm = TRUE)); objectiveFunction = match.arg(objectiveFunction); nAllNodes = nNodes = ncol(adjacency); useNodes = rep(TRUE, nNodes); if (parallel) { warning("Parallel version does not work yet... using standard accelerated calculations."); parallel = FALSE; } if (length(clustering)!=nNodes) stop("Length of 'clustering' must be the same as the number of nodes (columns) in 'adjacency'."); if (is.null(nClusters)) { if (any(is.na(clustering))) stop("All entries in 'clustering' must be present (non-NA)"); if (all(clustering==unassignedLabel)) stop("All entries in 'clustering' are unassigned."); if (dropUnassigned) { useNodes = clustering != unassignedLabel; adjacency = adjacency[useNodes, useNodes]; clustering = clustering[useNodes]; nNodes = ncol(adjacency); } else { clustering = .assignToNearestCluster(1-adjacency, labels = clustering, method = unassignedMethod, unassignedLabel = unassignedLabel); } } else { if (nClusters != 1) stop("If given, number of clusters must be 1."); clustering = rep(1, nNodes); } nClusters = length(unique(clustering)); clustering.norm = as.numeric(as.factor(clustering)) propensity.all = rep(NA, nAllNodes); Phat=rep(0.7, nNodes) fact=0 lnorm=0 if(nClusters>1) { Ahat=matrix(0.5, nClusters,nClusters) diag(Ahat) = 1; if (accelerated) { results=.Fortran(.C_propdecompaccel, ADJ=as.single(adjacency), Clustering=as.integer(clustering.norm), Propensity=as.double(Phat), IntermodularAdjacency=as.double(Ahat), Factorizability=as.double(fact), Criteria=as.double(lnorm),Nodes=as.integer(nNodes), Clusters=as.integer(nClusters),L2=as.integer(objectiveFunction=="L2norm")) } else { results=.Fortran(.C_propensitydecomposition, ADJ=as.single(adjacency), Clustering=as.integer(clustering.norm), Propensity=as.double(Phat), IntermodularAdjacency=as.double(Ahat), Factorizability=as.double(fact), Criteria=as.double(lnorm),Nodes=as.integer(nNodes), Clusters=as.integer(nClusters),L2=as.integer(objectiveFunction=="L2norm")) } resultsmod=results[3:5] resultsmod$IntermodularAdjacency=matrix((resultsmod$IntermodularAdj),ncol=nClusters) finalClusters = .translateUsingTable(results$Clustering, .translationTable(clustering.norm, clustering)) } else { results=.Fortran(.C_singleclusterupdate, ADJ=as.single(adjacency), Propensity=as.double(Phat),Factorizability=as.double(fact), Criteria=as.double(lnorm),Nodes=as.integer(nNodes), L2=as.integer(objectiveFunction=="L2norm")) resultsmod=results[2:3] } propensity.all[useNodes] = resultsmod$Propensity resultsmod$Propensity = propensity.all if (objectiveFunction=="L2norm") { resultsmod$L2Norm=results$Criteria } else { resultsmod$Loglik=results$Criteria } meanMat = matrix(NA, nAllNodes, nAllNodes) meanMat[useNodes, useNodes] = matrix((results$ADJ),ncol=nNodes); resultsmod$ExpectedAdjacency=as.dist(meanMat); resultsmod$EdgePvalues = as.dist(t(meanMat)); return(resultsmod) } PropClust/R/blockwisePropClust.R0000644000176200001440000003717013230757624016410 0ustar liggesusers# Block-wise propensity clustering. # Basic input should be an adjacency matrix, a logical indicating whether L2 or Poisson updates should be # used, maximum block size, and most likely some pre-clustering arguments. # The function may run something like hierarchical clustering coupled with dynamic tree cut to determine # initial clusters. Then determine pairwise cluster dissimilarities and cluster the clusters. Then merge the # clusters into blocks of size not exceeding the given maximum size. # The advantage of this is relative simplicity and the fact that I get the blocks and clustering within each # block in one step. The disadvantage is that the pre-clustering may not work all that well. # The function requires WGCNA but the requirement may be removed. #============================================================================================ # # Helper functions # #============================================================================================ .spaste = function(...) { paste(..., sep="") } .checkAdjMat = function (adjMat, min = 0, max = 1) { dim = dim(adjMat) if (is.null(dim) || length(dim) != 2) stop("adjacency is not two-dimensional") if (!is.numeric(adjMat)) stop("adjacency is not numeric") if (dim[1] != dim[2]) stop("adjacency is not square") if (max(abs(adjMat - t(adjMat)), na.rm = TRUE) > 1e-12) stop("adjacency is not symmetric") if (min(adjMat, na.rm = TRUE) < min || max(adjMat, na.rm = TRUE) > max) stop("some entries are not between", min, "and", max) } .translateUsingTable = function(x, translationTable) { translationTable[ match(x, translationTable[, 1]), 2] } .translationTable = function(from, to) { if (length(from)!= length(to)) stop("Length of 'from' and 'to' must be the same."); tab = as.matrix(table(from, to)); if (ncol(tab)!=nrow(tab)) { printFlush("Error in .translationTable: table is not 1-to-1.") if (ncol(tab)<10 && nrow(tab) < 10) print(tab); stop(); } nonZeros.row = rowSums( tab!=0 ) nonZeros.col = colSums( tab!=0 ) if (any(c(nonZeros.row, nonZeros.col) != 1)) { printFlush("Error in .translationTable: table is not 1-to-1.") if (ncol(tab)<10 && nrow(tab) < 10) print(tab); stop(); } unique.from = sort(unique(from)); tt = cbind(unique.from, to [ match(unique.from, from)] ); colnames(tt) = c("from", "to"); tt; } #===================================================================================================== # # .minWhichMin: min() and which.min() of columns in a matrix # #===================================================================================================== # This is a wrapper around my C-level function. .minWhichMin = function(x) { x = as.matrix(x); nc= ncol(x); nr = nrow(x); min = rep(0, nc); which = rep(0, nc); whichmin = .C(.C_minWhichMin, as.double(x), as.integer(nr), as.integer(nc), as.double(min), as.double(which), NAOK = TRUE); cbind( min = whichmin[[4]], which = whichmin[[5]] + 1); } #=================================================================================================== # # .clusterDissimilarity # #=================================================================================================== .clusterDissimilarity = function(dissimilarity, labels, method = c("average", "complete", "single"), unassignedLabel) { levels0 = sort(unique(labels)) levels = levels0 [levels0 != unassignedLabel] nClusters = length(levels); clusterDiss = matrix(0, nClusters, nClusters); distFunctions = c("mean", "max", "min"); useFnc = match.fun(distFunctions[ match (match.arg(method), c("average", "complete", "single")) ]); if (nClusters > 1) { for (c1 in 1:(nClusters-1)) for (c2 in (c1+1):nClusters) clusterDiss[c1, c2] = clusterDiss[c2, c1] = useFnc(dissimilarity[ labels==levels[c1], labels==levels[c2]], na.rm = TRUE); } clusterDiss; } #=================================================================================================== # # .nodeClusterDissimilarity # #=================================================================================================== # To make it easier to use the result, the function returns clusters in rows and nodes in columns. .nodeClusterDissimilarity = function(dissim, labels, levels, method = c("average", "complete", "single"), unassignedLabel) { unassigned = labels == unassignedLabel method = match.arg(method); nClusters = length(levels); nUnassigned = sum(unassigned); ncd = matrix(0, nClusters, nUnassigned); for (c in 1:nClusters) { if (method == "average") { ncd[ c, ] = colMeans( dissim[labels==levels[c], unassigned ], na.rm = TRUE); } else if (method == "complete") { ncd[ c, ] = apply( dissim[labels==levels[c], unassigned ], 2, max, na.rm = TRUE); } else ncd[ c, ] = apply( dissim[labels==levels[c], unassigned ], 2, min, na.rm = TRUE); } ncd; } #=================================================================================================== # # .mergeClustersIntoBlocks # #=================================================================================================== # .mergeClustersIntoBlocks: merge clusters into blocks. This code is adapted from WGCNA function # projectiveKMeans. Assumes the labels are integers with no gaps starting from 1. .mergeClustersIntoBlocks = function(dissim, labels, maxBlockSize, method = c("average", "complete", "single"), unassignedLabel) { distFunctions = c("mean", "max", "min"); useFnc = match.fun(distFunctions[ match (match.arg(method), c("average", "complete", "single")) ]); clusterDiss = .clusterDissimilarity(dissim, labels, method = method, unassignedLabel = unassignedLabel); diag(clusterDiss) = NA; clusterSizes = table(labels[ labels != unassignedLabel ] ); nClusters = length(clusterSizes); clusterNames = names(clusterSizes); if (is.numeric(labels)) clusterNames = as.numeric(as.character(clusterNames)); small = (clusterSizes < maxBlockSize); done = FALSE; while (!done & (sum(small)>1) & nClusters > 1) { smallIndex = c(1:nClusters)[small] nSmall = sum(small); distOrder = order(as.vector(clusterDiss[smallIndex, smallIndex]))[ seq(from=2, to = nSmall * (nSmall-1), by=2)]; i = 1; canMerge = FALSE; while (i <= length(distOrder) && (!canMerge)) { col = as.integer( (distOrder[i]-1)/nSmall + 1); whichJ = smallIndex[col]; whichI = smallIndex[distOrder[i] - (col-1) * nSmall]; canMerge = sum(clusterSizes[c(whichI, whichJ)]) < maxBlockSize; i = i + 1; } if (canMerge) { labels[labels==clusterNames[whichJ]] = clusterNames[whichI]; clusterSizes[whichI] = sum(clusterSizes[c(whichI, whichJ)]); nClusters = nClusters -1; clusterSizes = clusterSizes[-whichJ]; clusterNames = clusterNames[-whichJ]; clusterDiss = clusterDiss[ -whichJ, -whichJ]; for (c in 1:nClusters) if (c!=whichI) clusterDiss[whichI, c] = clusterDiss[c, whichI] = useFnc(dissim[ labels==clusterNames[c], labels==clusterNames[whichI]], na.rm = TRUE) small = (clusterSizes < maxBlockSize); } else done = TRUE; } labels; } #=================================================================================================== # # .assignToNearestCluster # #=================================================================================================== # Caution: if all labels equal unassignedLabel it will return the labels unchanged. .assignToNearestCluster = function(dissim, labels, method, unassignedLabel) { levels0 = sort(unique(labels)); levels = levels0 [levels0 != unassignedLabel]; if (length(levels) > 0) { nodeClusterSimilarity = .nodeClusterDissimilarity(dissim, labels=labels, levels=levels, method=method, unassignedLabel=unassignedLabel); nearest = .minWhichMin(nodeClusterSimilarity); labels[ labels == unassignedLabel ] = nearest[, "which"]; } labels; } #=================================================================================================== # # main user level function propensityClustering # #=================================================================================================== propensityClustering = function(adjacency, decompositionType = c("CPBA", "Pure Propensity"), objectiveFunction = c("Poisson", "L2norm"), fastUpdates = TRUE, blocks = NULL, initialClusters = NULL, nClusters = NULL, maxBlockSize = if (fastUpdates) 5000 else 1000, clustMethod = "average", cutreeDynamicArgs = list(deepSplit = 2, minClusterSize = 20, verbose = 0), dropUnassigned = TRUE, unassignedLabel = 0, verbose = 2, indent = 0 ) { spaces = indentSpaces(indent); .checkAdjMat(adjacency, min = 0, max = max(adjacency, na.rm = TRUE)); objectiveFunction = match.arg(objectiveFunction); nAllNodes = nNodes = ncol(adjacency); useNodes = rep(TRUE, nNodes); decompositionType = match.arg(decompositionType); if (decompositionType=="Pure Propensity") { # Run propensity decomposition on a single cluster that contains all nodes. nClusters = 1; initialClusters = rep(1, nNodes); return( CPBADecomposition(adjacency, clustering = initialClusters, objectiveFunction = objectiveFunction, nClusters = nClusters) ); } if (!is.null(nClusters)) { # If the user supplies nClusters, use internal initialization. useInternalInit = TRUE # Check that the supplied nClusters makes sense. if (!is.finite(nClusters)) stop("The number of clusters 'nClusters' must be finite."); if (nClusters < 2) stop("If given, the number of clusters 'nClusters' must be at least 2."); # The following is necessary so splitting into blocks does not leave out any objects. dropUnassigned = FALSE } else { useInternalInit = FALSE; } # After this step the number of clusters is always non-null and is zero if it originally was NULL. if (!is.null(initialClusters)) { initialClusters = as.vector(initialClusters); if (length(initialClusters)!=nNodes) stop(.spaste("Length of 'initialClusters' must equal the number of nodes\n", " (i.e., number of rows or columns of 'adjacency').")); tree = NULL; } else { dissim = 1-adjacency; # Cluster tree = hclust(as.dist(dissim), method = clustMethod); # Cut the tree cutreeDynamicArgs$dendro = tree; cutreeDynamicArgs$distM = dissim; initialClusters = do.call(cutreeDynamic, cutreeDynamicArgs); unassignedLabel = 0; } if (all(initialClusters==unassignedLabel)) stop(.spaste("All initial cluster labels are 'unassigned'.\n", " Please supply a non-trivial initial clustering\n", " or change the initial clustering arguments so hierarchical clustering\n", " with Dynamic Tree Cut return clusters.")); if (!is.null(blocks)) { blocks = as.vector(blocks); if (length(blocks)!=nNodes) stop(.spaste("Length of 'blocks' must equal the number of nodes\n", " (i.e., number of rows or columns of 'adjacency').")); } if (dropUnassigned) { useNodes = initialClusters != unassignedLabel; adjacency = adjacency[useNodes, useNodes]; initialClusters = initialClusters[useNodes]; if (!is.null(blocks)) blocks = blocks[useNodes]; nNodes = ncol(adjacency); } else { initialClusters = .assignToNearestCluster(1-adjacency, labels = initialClusters, method = clustMethod, unassignedLabel = unassignedLabel); } # Note: past this point initialClusters cannot contain any unassigned labels. if (is.null(blocks)) { if (verbose > 0) printFlush(.spaste(spaces, " ..determining blocks..")); blocks = .mergeClustersIntoBlocks(1-adjacency, labels = initialClusters, maxBlockSize = maxBlockSize, method = clustMethod, unassignedLabel = unassignedLabel); } # This code assumes there are no 0 labels among blocks. blocks = as.numeric(as.factor(blocks)); nBlocks = length(unique(blocks)); blockNodes = list(); blockClusters = list(); blockLevels = sort(unique(blocks)); # Split given initial clusters by block for use below. for (b in 1:nBlocks) { blockNodes1 = c(1:nNodes)[ blocks== blockLevels[b] ]; blockNodes[[b]] = blockNodes1; blockClusters[[b]] = initialClusters[ blockNodes1 ]; } propensityClusters = initialClusters; # Run propensity clustering on each block separately propClusts = list(); if (verbose > 0) printFlush(.spaste(spaces, " ..running propensityClustering in each block with non-trivial clustering..")); for (b in 1:nBlocks) { blockNodes1 = blockNodes[[b]]; if (length(unique(initialClusters[ blockNodes1 ])) > 1 | useInternalInit) { if (verbose > 1) printFlush(.spaste(spaces, " ..running propensityClustering in block ", b)); initClust = initialClusters[ blockNodes1 ] initClust.norm = as.numeric(factor(initClust)); norm2orig = .translationTable(initClust.norm, initClust); if (useInternalInit) { ncl1 = nClusters; } else { ncl1 = length(unique(initClust.norm)); } pc1 = .propensityClustering.internal( adjacency[ blockNodes1, blockNodes1 ], initialClusters = initClust.norm, l2bool = objectiveFunction=="L2norm", nClusters = ncl1, initbool = useInternalInit, fastUpdates = fastUpdates); if (useInternalInit) { # Each block has nClusters clusters, so this calculation is easy pc1$Clustering = pc1$Clustering + (b-1) * nClusters; } else { pc1$Clustering = .translateUsingTable(pc1$Clustering, norm2orig); } propensityClusters [ blockNodes1 ] = pc1$Clustering; propClusts[[b]] = pc1; } else { propClusts[[b]] = NA; propensityClusters [ blockNodes1 ] = initialClusters [ blockNodes1 ]; } } # Run one final propensity decomposition on the full clustering. if (verbose > 0) printFlush(.spaste(spaces, " ..running final propensity decomposition..")); propensity = rep(0, nAllNodes); propensityClusters.norm = as.numeric(as.factor(propensityClusters)); pd = CPBADecomposition(adjacency, propensityClusters.norm, objectiveFunction = objectiveFunction, nClusters = NULL); propensity[useNodes] = pd$Propensity; if (verbose > 0) printFlush(.spaste(spaces, " ..done (propensityClustering).")); blocks.all = initialClusters.all = propClusters.all = rep(0, nAllNodes); propClusters.all[useNodes] = propensityClusters; blocks.all[useNodes] = blocks; initialClusters.all[useNodes] = initialClusters; # Return value c(list(Clustering = propClusters.all, Propensity = propensity, NodeWasConsidered = useNodes), pd, list(Blocks = blocks.all, InitialClusters = initialClusters.all, InitialTree = tree)); } PropClust/MD50000644000176200001440000000135213347130122012512 0ustar liggesusers64d3308d0a5f2528cf024a25aee5c241 *Changelog 9b314da8067752a19148b710dd0b491b *DESCRIPTION b73898f8ed7d80789913dda614649241 *NAMESPACE 5cc981f42a5248bb99a7a7d50ff2aaa2 *R/PropClust-internal.R 89bede3b235b8926cbec891774660a80 *R/blockwisePropClust.R 7a36e7bb4e991b298bd61f7c3851ac61 *man/CPBADecomposition.Rd 4a8f30bb1fe693e9867c8872e66a4bd7 *man/propensityClustering.Rd 93c43294d48b448e1e6737e5274dc00e *src/Makevars d41d8cd98f00b204e9800998ecf8427e *src/Makevars.win 54d809de033d75468fa8ee7f320d194a *src/PropClustParallelTrials.f90 cfb52c57826fb5fbde29df7f957679bb *src/PropClustParallelTrials.h e06fbc5b8dbcdc15b91c03be2f72de7f *src/init.c 92ec7875beefbc219418f9e1d2edb2e4 *src/minWhichMin.c e84116d66b09e028b150e48d0093995e *src/minWhichMin.h PropClust/DESCRIPTION0000644000176200001440000000161513347130122013712 0ustar liggesusersPackage: PropClust Type: Package Title: Propensity Clustering and Decomposition Version: 1.4-6 Date: 2018-09-12 Author: John Michael O Ranola, Kenneth Lange, Steve Horvath, Peter Langfelder Maintainer: Peter Langfelder Depends: R (>= 3.0.0), fastcluster, dynamicTreeCut Imports: stats Description: Implementation of propensity clustering and decomposition as described in Ranola et al. (2013) . Propensity decomposition can be viewed on the one hand as a generalization of the eigenvector-based approximation of correlation networks, and on the other hand as a generalization of random multigraph models and conformity-based decompositions. License: GPL (>= 2) LazyLoad: yes NeedsCompilation: yes Packaged: 2018-09-12 15:49:22 UTC; plangfelder Repository: CRAN Date/Publication: 2018-09-15 07:10:10 UTC PropClust/man/0000755000176200001440000000000013346076527012775 5ustar liggesusersPropClust/man/CPBADecomposition.Rd0000644000176200001440000001337012110633144016510 0ustar liggesusers\name{CPBADecomposition} \alias{CPBADecomposition} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Cluster and Propensity-based Approximation decomposition for adajcency matrixes. } \description{ Given an adjacency matrix and cluster assignments, this function calculates either the conformity factors or the propensities of each node. } \usage{ CPBADecomposition(adjacency, clustering, nClusters = NULL, objectiveFunction = c("Poisson", "L2norm"), dropUnassigned = TRUE, unassignedLabel = 0, unassignedMethod = "average", accelerated = TRUE, parallel = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{adjacency}{ A square symmetric matrix giving either the number of connections between two nodes (for Poisson objective function) or the weighted connections (between 0 and 1) between each pair of nodes. } \item{clustering}{ A vector with element per node containing the cluster assignments for each node. If a single cluster decomposition is desired, an alternative is to set \code{nClusters=1} (see below). } \item{nClusters}{ If the user wishes to input trivial clustering to calculate a "pure propensity" decomposition, this variable can be set to 1. Any other non-NULL value is considered invalid; use \code{clusters} to specify a non-trivial clustering. } \item{objectiveFunction}{ Specifies the objective function for the Cluster and Propensity-based Approximation. Valid choices are (unique abbreviations of) "Poisson" and "L2norm". } \item{dropUnassigned}{ Logical: should unassigned nodes be excluded from the clustering? Unassigned nodes can be present in initial clustering or blocks (if given), and internal pre-partitioning and initial clustering can also lead to unassigned nodes. If \code{dropUnassigned} is \code{TRUE}, these nodes are excluded from the calls to \code{\link{propensityClustering}}. Otherwise these nodes will be assigned to the nearest cluster within each block and be clustered using \code{\link{propensityClustering}} in each block.} \item{unassignedLabel}{ Label in input \code{clustering} that is reserved for unassigned objects. For clusterings with numeric lables this is typically (but not always) 0. Note that this must a valid value - missing value \code{NA} will not work. } \item{unassignedMethod}{ If \code{dropUnassigned} is \code{FALSE}, this argument sepcifies the method to assign unassigned objects to the nearest cluster. Valid values are (unique abbreviations) of "average", "single", and "complete". In analogy with hierarchical clustering, each node will be assigned to the cluster with which it has the highest average, maximum, and minimum adjacency, respectively. } \item{accelerated}{Logical: should an accelerated algorithm be used? In general the accelerated method is preferable. } \item{parallel}{Logical: should parallel calculation be used? At present the parallel calculation is not fully implemented and the function falls back to standard accelerated calculation, with a warning.} } \details{ If a single cluster is specified, the approximation is known as "Pure Propensity". If unassigned nodes are present in the clustering and they are dropped before the CPBA calculation, their propensities, mean values and tail p-values are returned as NA. } \value{ Returns the following list of items. %% \item{clustering }{This is the original clustering assignments given.} \item{Propensity }{Gives the propensities (or conformities) of each node.} \item{IntermodularAdjacency }{Gives the intermodular adjacencies or the conformities between clusters.} \item{Factorizability }{Gives the factorizability of the data.} \item{L2Norm or Loglik}{The L2 Norm (for L2 norm objective function) or the log-likelihood (for Poisson objetive function).} \item{ExpectedAdjancency}{A distance structure representing the lower triangle of the symmetric matrix of estimated values of the adjacency matrix using the Propensity and IntermodularAdjacency. If the Poisson updates are used, the returned values are the estimate means of the distribution. } \item{EdgePvalues}{ A distance structure representing the lower triangle of the symmetric matrix of the tail probabilities under the Poisson distribution.} } \references{ Ranola et. al. (2010) A Poisson Model for Random Multigraphs. Bioinformatics 26(16):2004-2001. Ranola JM, Langfelder P, Lange K, Horvath S (2013) Cluster and propensity based approximation of a network. BMC Bioinformatics, in press. } \author{ John Michael Ranola, Peter Langfelder, Steve Horvath, Kenneth Lange } %\note{ % ~~further notes~~ %} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ propensityClustering } \examples{ nNodes=50 nClusters=5 #We would like to use L2Norm instead of Loglikelihood objective = "L2norm" ADJ<-matrix(runif(nNodes*nNodes),ncol=nNodes) for(i in 1:(length(ADJ[1,])-1)){ for(j in i:length(ADJ[,1])){ ADJ[i,j]=ADJ[j,i] } } for(i in 1:length(ADJ[1,])) ADJ[i,i]=0 Results<-propensityClustering( adjacency = ADJ, objectiveFunction = objective, initialClusters = NULL, nClusters = nClusters, fastUpdates = FALSE) Results2<-CPBADecomposition(adjacency = ADJ, clustering = Results$Clustering, objectiveFunction = objective) Results3<-propensityClustering( adjacency = ADJ, objectiveFunction = objective, initialClusters = NULL, nClusters = nClusters, fastUpdates = TRUE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ misc }% __ONLY ONE__ keyword per line PropClust/man/propensityClustering.Rd0000644000176200001440000002277313346076527017553 0ustar liggesusers\name{propensityClustering} \alias{propensityClustering} \title{ Propensity clustering } \description{ This function performs propensity clustering that assigns objects (or nodes) in a network to clusters such that the resulting Cluster and Propensity-based Approximation (CPBA) of the input adjacency matrix optimizes a specific criterion. Large data sets on which standard propensity clustering may take too long are first optionally split into smaller blocks. Propensity clustering is then applied to each block, and the clustering is used for the final CPBA decomposition. } \usage{ propensityClustering( adjacency, decompositionType = c("CPBA", "Pure Propensity"), objectiveFunction = c("Poisson", "L2norm"), fastUpdates = TRUE, blocks = NULL, initialClusters = NULL, nClusters = NULL, maxBlockSize = if (fastUpdates) 5000 else 1000, clustMethod = "average", cutreeDynamicArgs = list(deepSplit = 2, minClusterSize = 20, verbose = 0), dropUnassigned = TRUE, unassignedLabel = 0, verbose = 2, indent = 0) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{adjacency}{Adjacency matrix of the network: a square, symmetric, non-negative matrix giving the connection strengths between pairs of nodes. Missing data are not allowed. } \item{decompositionType}{Decomposition type. Either the full CPBA (Cluster and Propensity-Based Approximation) or pure propensity, which is a special case of CPBA when all nodes are in a single cluster.} \item{objectiveFunction}{Objective function. Available choices are \code{"Poisson"} and \code{"L2norm"}. } \item{fastUpdates}{Logical: should a fast, "approximate", propensity clustering method be used? This option is recommended unless the number of nodes to be clustered is small (less than 500). The fast updates may lead to slightly inferior results but are orders of magnitude faster for larger data sets (above say 500 nodes). } \item{blocks}{ Optional specification of blocks. If given, must be a vector with length equal the number of columns in \code{adjacency}, each entry giving the block label for the corresponding node. If not given, blocks will be determined automatically. } \item{initialClusters}{ Optional specification of initial clusters. If given, must be a vector with length equal the number of columns in \code{adjacency}, each entry giving the cluster label for the corresponding node. If not given, initial clusters will be determined automatically. The method depends on whether \code{nClusters} (see below) is specified. } \item{nClusters}{Optional specification of the number of clusters. Note that specifying \code{nClusters} changes the cluster initialization method. If nodes are split into blocks, the number of clusters in each block will equal \code{nClusters}, and the total number of clusters will be \code{nClusters} times the number of blocks.} \item{maxBlockSize}{Maximum block size.} \item{clustMethod}{Hierarchical clustering method. Recognized options are "average", "complete", and "single". } \item{cutreeDynamicArgs}{Arguments (options) for the \code{\link[dynamicTreeCut]{cutreeDynamic}} function from package \code{dynamicTreeCut} used in the initial clustering step. Arguments \code{dendro} and \code{distM} are set automatically; the rest can be set by the user to fine-tune the process of initial cluster identification. } \item{dropUnassigned}{ Logical: should unassigned nodes be excluded from the clustering? Unassigned nodes can be present in initial clustering or blocks (if given), and internal pre-partitioning and initial clustering can also lead to unassigned nodes. If \code{dropUnassigned} is \code{TRUE}, these nodes are excluded from the calls to \code{\link{propensityClustering}}. Otherwise these nodes will be assigned to the nearest cluster within each block and be clustered using \code{\link{propensityClustering}} in each block.} \item{unassignedLabel}{ Label in input \code{blocks} and \code{initialClustering} that is reserved for unassigned objects. For clusterings with numeric lables this is typically (but not always) 0. Note that this must a valid value - missing value \code{NA} will not work. } \item{verbose}{ Level of verbosity of printed diagnostic messages. 0 means silent (except for progress reports from the underlying propensity clustering function), higher values will lead to more detailed progress messages. } \item{indent}{ Indentation of the printed diagnostic messages. 0 means no indentation, each unit adds two spaces. } } \details{ If \code{initialClusters} are not given, they are determined from the adjancency in one of the following two ways: if \code{nClusters} is not specified, the initialization uses hierarchical clustering followed by the Dynamic Tree Cut (see \code{\link[dynamicTreeCut]{cutreeDynamic}}). Arguments and options for the \code{\link[dynamicTreeCut]{cutreeDynamic}} can be specified using the argument \code{cutreeDynamicArgs}. Some nodes may be left unassigned and their handling is described below. If \code{nClusters} is specified, an internal initialization algorithm based on connectivities is used. This second algorithm assigns all nodes to a cluster. If \code{dropUnassigned} is \code{TRUE}, nodes left unassigned by the clustering procedure are excluded from the following calculations. If \code{dropUnassigned} is \code{FALSE}, nodes left unassigned by the clustering procedure are assigned to their nearest cluster, using the clustering dissimilarity measure specified in \code{clustMethod}. In the next step, if the total number of nodes exceeds maximum block size, the initial clusters (either given or those automatically determined by hierarchical clustering) are split into blocks. Clusters bigger than maximum block size \code{maxBlockSize} are put into separate blocks (one cluster per block). Clusters smaller than maximum block size are placed into blocks such that the block size does not exceed \code{maxBlockSize} and such that clusters with high between-cluster adjacency are placed in the same block, if possible. The between-cluster adjacency is consistent with \code{clustMethod}. Note that for the purposes of splitting data into blocks, hierarchical clustering is always used. If the internal initialization of clusters is used, it is applied within each block and idependently of all other blocks. Next, propensity clustering is applied to each block. More precisely, propensity clustering is applied to the subset of nodes in each block that is assigned to an initial cluster. Some nodes may not be assigned to initial clusters and these nodes are excluded from propensity clustering. Once propensity clustering on all blocks is finished, propensity decomposition is calculated on the entire network (excluding unassigned nodes). } \value{ List with the following components: \item{Clustering}{The final clustering. A vector of length equal to the number of nodes (columns in \code{adjacency}) givig the cluster labels for each node. Clusters are labeled 1,2,3,... Label 0 is reserved for unassigned nodes.} \item{Propensity}{Propensities (or conformities) of each node.} \item{NodeWasConsidered}{Logical vector with one entry per node. \code{TRUE} if the node was part of the propensity clustering and decomposition (recall that unassigned nodes are excluded).} \item{IntermodularAdjacency}{Intermodular adjacencies or the conformities between clusters.} \item{Factorizability}{Factorizability of the data.} \item{L2Norm or Loglik}{The L2 Norm or the loglikelihood depending on l2bool.} \item{MeanValues}{A distance structure representing the lower triangle of the symmetric matrix of estimated values of the adjacency matrix using the Propensity and IntermodularAdjacency. If the Poisson updates are used, the returned values are the estimate means of the distribution. } \item{TailPvalues}{ A distance structure representing the lower triangle of the symmetric matrix of the tail probabilities under the Poisson distribution.} \item{Blocks}{Blocks. A vector with one component for each node giving the block label for each node. The blocks are labeled 1,2,3,...} \item{InitialClusters}{The initial clusters. A copy of the input if given, otherwise the automatically determined initial clutering. } \item{InitialTree}{The hierarchical clustering dendrogram (tree) used to determine initial clusters. Only present if the initial clusters were not supplied by the user.} } \references{ Ranola et. al. (2010) A Poisson Model for Random Multigraphs. Bioinformatics 26(16):2004-2001. Ranola JM, Langfelder P, Lange K, Horvath S (2013) Cluster and propensity based approximation of a network. MC Syst Biol. 2013 Mar 14;7:21. doi: 10.1186/1752-0509-7-21. } \author{ John Michael Ranola, Peter Langfelder, Kenneth Lange, Steve Horvath } \seealso{ \code{\link{CPBADecomposition}} for propensity decomposition; \code{\link[stats]{hclust}} for the hierarchical clustering function, \code{\link[dynamicTreeCut]{cutreeDynamic}} for the dynamic tree cut to identify clusters in a dendrogram } \examples{ # Simulate 50 nodes in 5 clusters nNodes=50 nClusters=5 # We would like to use L2Norm instead of Loglikelihood objective = "L2norm" ADJ<-matrix(runif(nNodes*nNodes),ncol=nNodes) ADJ = (ADJ + t(ADJ))/2; diag(ADJ) = 0; results<-propensityClustering( adjacency = ADJ, objectiveFunction = objective, initialClusters = NULL, nClusters = nClusters, fastUpdates = FALSE) table(results$Clustering) } \keyword{ cluster }% __ONLY ONE__ keyword per line \keyword{ misc }% __ONLY ONE__ keyword per line