gclus/0000755000176200001440000000000013414727711011376 5ustar liggesusersgclus/inst/0000755000176200001440000000000013414703624012350 5ustar liggesusersgclus/inst/doc/0000755000176200001440000000000013414703624013115 5ustar liggesusersgclus/inst/doc/gclus.R0000644000176200001440000000363213414705166014364 0ustar liggesusers## ----setup, include = FALSE---------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ------------------------------------------------------------------------ library(gclus) data(longley) longley.cor <- cor(longley) longley.color <- dmat.color(longley.cor) ## ----fig.width=5, fig.height=5, fig.align='center'----------------------- par(mar=c(1,1,1,1)) plotcolors(longley.color,dlabels=rownames(longley.color)) ## ----eval=F-------------------------------------------------------------- # longley.color <- dmat.color(longley.cor, byrank=FALSE) # longley.color <- dmat.color(longley.cor, breaks=c(-1,0,.5,.8,1), # cm.colors(4)) ## ----fig.width=5, fig.height=5, fig.align='center'----------------------- par(mar=c(1,1,1,1)) longley.o <- order.hclust(longley.cor) longley.color1 <- longley.color[longley.o,longley.o] plotcolors(longley.color1,dlabels=rownames(longley.color1)) ## ----fig.width=5, fig.height=5, fig.align='center'----------------------- par(mar=c(1,1,1,1)) cpairs(longley, order= longley.o,panel.color= longley.color) ## ----fig.width=8, fig.height=3, fig.align='center', out.width="100%"----- cparcoord(longley, order= longley.o,panel.color= longley.color, horizontal=TRUE, mar=c(2,4,1,1)) ## ----fig.width=6, fig.height=4, fig.align='center'----------------------- par(mar=c(1,1,1,1)) data(eurodist) dis <- as.dist(eurodist) hc <- hclust(dis, "ave") plot(hc) ## ----fig.width=6, fig.height=4, fig.align='center'----------------------- par(mar=c(1,1,1,1)) hc1 <- reorder.hclust(hc, dis) plot(hc1) ## ----fig.width=8, fig.height=3.5, fig.align='center'--------------------- layout(matrix(1:2,nrow=1,ncol=2)) par(mar=c(1,6,1,1)) cmat <- dmat.color(eurodist, rev(cm.colors(5))) plotcolors(cmat[hc$order,hc$order], rlabels=labels(eurodist)[hc$order]) plotcolors(cmat[hc1$order,hc1$order], rlabels=labels(eurodist)[hc1$order]) gclus/inst/doc/gclus.html0000644000176200001440000144333213414705166015135 0ustar liggesusers Clustering Graphics

Clustering Graphics

Catherine Hurley

2019-01-07

This package will order panels in scatterplot matrices and parallel coordinate displays by some merit index. The package contains various indices of merit, ordering functions, and enhanced versions of pairs and parcoord which color panels according to their merit level. For details on the methods used, consult “Clustering Visualisations of Multidimensional Data”, Journal of Computational and Graphical Statistics, vol. 13, (4), pp 788-806, 2004.

Displaying a correlation matrix

library(gclus)
#> Loading required package: cluster
data(longley)
longley.cor <- cor(longley)
longley.color <- dmat.color(longley.cor)

dmat.color assigns three colours to the correlations according to the correlation magnitude. High correlations are in pink, the middle third are in blue, and the botom third are in yellow.

par(mar=c(1,1,1,1))
plotcolors(longley.color,dlabels=rownames(longley.color))

If you want to change the colour scheme:

longley.color <- dmat.color(longley.cor, byrank=FALSE)
longley.color <- dmat.color(longley.cor, breaks=c(-1,0,.5,.8,1), 
                            cm.colors(4))

The plot is easier to interpret if variables are reorded prior to plotting.

par(mar=c(1,1,1,1))
longley.o <- order.hclust(longley.cor)
longley.color1 <- longley.color[longley.o,longley.o]
plotcolors(longley.color1,dlabels=rownames(longley.color1))

Displaying a pairs plot with coloured panels

cpairs is a version of pairs All the high-correlation panels appear together in a block.

par(mar=c(1,1,1,1))
cpairs(longley, order= longley.o,panel.color= longley.color)

If the order is not supplied, then the variables are plotted in default dataset order.

Displaying a PCP plot with coloured panels

cparcoord is a versions of `parcoord where panels can be coloured. Again, the pink panels have high correlation, blue panels have middling correlation, and yellow panels have low correlation.

cparcoord(longley, order= longley.o,panel.color= longley.color, 
          horizontal=TRUE, mar=c(2,4,1,1))

Plotting re-ordered dendrograms.

eurodist is a built-in distance matrix giving the distance between European cities.

par(mar=c(1,1,1,1))
data(eurodist)
dis <- as.dist(eurodist)
hc <- hclust(dis, "ave")
plot(hc)

order.hclust re-orders a dendrogram to improve the similarity between nearby leaves. Applying it to the hc object:

par(mar=c(1,1,1,1))
hc1 <- reorder.hclust(hc, dis)
plot(hc1)

Both dendrograms correspond to the same tree structure, but the second one shows that Paris is closer to Cherbourg than Munich, and Rome is closer to Gibralter than to Barcelona.

We can also compare both orderings with an image plot of the colors. The second ordering seems to place nearby cities closer to each other.


layout(matrix(1:2,nrow=1,ncol=2))
par(mar=c(1,6,1,1))
cmat <- dmat.color(eurodist, rev(cm.colors(5)))
plotcolors(cmat[hc$order,hc$order], rlabels=labels(eurodist)[hc$order])

plotcolors(cmat[hc1$order,hc1$order], rlabels=labels(eurodist)[hc1$order])

gclus/inst/doc/gclus.Rmd0000644000176200001440000000726213414704627014711 0ustar liggesusers--- title: "Clustering Graphics" author: "Catherine Hurley" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Clustering Graphics} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This package will order panels in scatterplot matrices and parallel coordinate displays by some merit index. The package contains various indices of merit, ordering functions, and enhanced versions of pairs and parcoord which color panels according to their merit level. For details on the methods used, consult "Clustering Visualisations of Multidimensional Data", Journal of Computational and Graphical Statistics, vol. 13, (4), pp 788-806, 2004. ## Displaying a correlation matrix ```{r} library(gclus) data(longley) longley.cor <- cor(longley) longley.color <- dmat.color(longley.cor) ``` `dmat.color` assigns three colours to the correlations according to the correlation magnitude. High correlations are in pink, the middle third are in blue, and the botom third are in yellow. ```{r fig.width=5, fig.height=5, fig.align='center'} par(mar=c(1,1,1,1)) plotcolors(longley.color,dlabels=rownames(longley.color)) ``` If you want to change the colour scheme: ```{r eval=F} longley.color <- dmat.color(longley.cor, byrank=FALSE) longley.color <- dmat.color(longley.cor, breaks=c(-1,0,.5,.8,1), cm.colors(4)) ``` The plot is easier to interpret if variables are reorded prior to plotting. ```{r fig.width=5, fig.height=5, fig.align='center'} par(mar=c(1,1,1,1)) longley.o <- order.hclust(longley.cor) longley.color1 <- longley.color[longley.o,longley.o] plotcolors(longley.color1,dlabels=rownames(longley.color1)) ``` ## Displaying a pairs plot with coloured panels `cpairs` is a version of `pairs` All the high-correlation panels appear together in a block. ```{r fig.width=5, fig.height=5, fig.align='center'} par(mar=c(1,1,1,1)) cpairs(longley, order= longley.o,panel.color= longley.color) ``` If the `order` is not supplied, then the variables are plotted in default dataset order. ## Displaying a PCP plot with coloured panels `cparcoord` is a versions of ` `parcoord` where panels can be coloured. Again, the pink panels have high correlation, blue panels have middling correlation, and yellow panels have low correlation. ```{r fig.width=8, fig.height=3, fig.align='center', out.width="100%"} cparcoord(longley, order= longley.o,panel.color= longley.color, horizontal=TRUE, mar=c(2,4,1,1)) ``` ## Plotting re-ordered dendrograms. `eurodist` is a built-in distance matrix giving the distance between European cities. ```{r fig.width=6, fig.height=4, fig.align='center'} par(mar=c(1,1,1,1)) data(eurodist) dis <- as.dist(eurodist) hc <- hclust(dis, "ave") plot(hc) ``` `order.hclust` re-orders a dendrogram to improve the similarity between nearby leaves. Applying it to the `hc` object: ```{r fig.width=6, fig.height=4, fig.align='center'} par(mar=c(1,1,1,1)) hc1 <- reorder.hclust(hc, dis) plot(hc1) ``` Both dendrograms correspond to the same tree structure, but the second one shows that Paris is closer to Cherbourg than Munich, and Rome is closer to Gibralter than to Barcelona. We can also compare both orderings with an image plot of the colors. The second ordering seems to place nearby cities closer to each other. ```{r fig.width=8, fig.height=3.5, fig.align='center'} layout(matrix(1:2,nrow=1,ncol=2)) par(mar=c(1,6,1,1)) cmat <- dmat.color(eurodist, rev(cm.colors(5))) plotcolors(cmat[hc$order,hc$order], rlabels=labels(eurodist)[hc$order]) plotcolors(cmat[hc1$order,hc1$order], rlabels=labels(eurodist)[hc1$order]) ``` gclus/NAMESPACE0000755000176200001440000000042313414672620012615 0ustar liggesusers# Default NAMESPACE created by R # Remove the previous line if you edit this file # Export all names exportPattern(".") # Import all packages listed as Imports or Depends import( cluster,compiler ) S3method(reorder,hclust) import(graphics) import(stats) import(compiler)gclus/data/0000755000176200001440000000000012662576360012315 5ustar liggesusersgclus/data/bank.RData0000755000176200001440000000332711772076200014142 0ustar liggesusersBZh91AY&SYCnMH]$]`ߤI I4I$I$IfI$I$I ` o ݜt WvcnwwOUHBP~)*zzFh6Pz@ L4hhJyJUި2`&0LLh 10`h1 Hj4@)=MFjz0S@v7](C88"o%~FrZRQ(IP~Kv}8ޚވvlqb.Hժ.+$UcU|ZI,]k昀*8(jЊRABߖno*>4q<2߮bx:^s@ 4TL7 V  @d+!Efj)H]PD# M áHV,X)XRl5e)bJm I1RS4Q%3(TM!eEiQ+L+UUVi-)K#VYFJ88;p8QRL䪒5]C[SXXf%T 4P΁%DRJXZHdiFi`PLEj_V!ERHo9͛6lٵ0UT#i9s*NѤLi3TaQFTaQ9999ֹ$ILBbDs9΅$ 0Ģ.I% "vf7KUD\QEQcM+N?koIyJ*<IDDM&&xyoMʙ+vM\ߪ `K\c"{^(WoFݠڏ{$I5F1cƥU]Ps9UUAd:#ZֵkI$jg9sꪪs9ΪtZF;SH.a$ Y9RE\E7Z%JbWEܜԇ'J酨EӲ0 ҂vV%D44 jK;HIfF’N2;E`W(Hp9Rnn;T4B#n9F!;jVflC99S;n;(mid%N⎸NYk( Pd):t-U*dFʤupQrTl4MtriҤWewMյd\҂s)) ArVҡ,6i+bŜ$QY"YuKPf `BTB@$$w1Ovm(Z"Cd s,Ϟi `:~sGo|(r+# nJ"*@NMK2-~mk IQ $?;xv1RE>)7"bE[LZ]}c\˫^9U+n ܌˓%k _s@ Ϻw}"'(M%2(jwr5) 3@`XM03(6- '㎠[-*\GEoq9uG|"qL`W(gw[6?ˆ/"gXc\fnwU M]+kX aX`)!`hx̄0+fcHLM$Bj0vfVpy,]*UWw 85, ^JEbA*hr:rz"'zXs H bp10IsE2d 4=aRm"= iVD91.ԥխZ*X6ȈoNwټZ !_@JCt 9'Lē@ H2b\{sU⽾#+U¥w!IJ5u-%"LTnkSxF"3r\'\rdf6eɃ=a|:nޓ"]u{˦qkޝUluwݯ: cu0Q[:?[39eV"ոob۩mjG9skhʿĖlZIg] ٺFŻ"3$:yp:N٧)>jtLWOt>./oVoa&n;=1`%3,FhjW&CD<G WQ_f%D *x-47} /ۧ5Ts7yxu`Ywu7m]b!_iZuaĉ~.=e CӼΦ}-^|MEkjBlIqn%2%V0vW/{! -1*sL2~CsM ~Hr׋n+ fj̡9֓WC`ƒi{t!d2XY$%-0!e@pmS{)ڣJ LkK%9ϴi+uRN9/ 8jumi&8,}! ^O\}JVDuZa 8vX٘}Cm!Jtk@[9قӥlhuW|g H귅->[U-U&5&cdCО6Wg8])/;bvq|c'IP+ 7)-VlK*ŊvtZQDX(ees,؝)6En*T[ILR)&ppqd hGtP& Z_veckTvnw؛ =^UӶ _›L"t`xwJ_Sy~ `{=J}T6)&L^f_| YƏ5+v "n_oVdf4ۓ0HNf# Q3@mĬG[WL^Z:MB1.h:@{C1W\cDN.m^ѷr 70!-jk o>pӫ:IAĉU/豪.A`yj'd!ɧ-VMɹӟ4ɇ.͓g=g1RXŋeFDnE7i0.=YohM#յuhHzD>=cwDB;o+ dl@v2[-QUʌ~ؠȘ03),@&r`+eAbH4(ȅ@$iMX!HZx&#"fs_CaX\UhF0:0JF=iy!)mVi<*#іg). `5Dȷ+W-&[cHۓpXZQFY_-@"#¡$Lē,LD" +χU1 3AD#`K 0 !%R͙TMA"XH@;,P>"*r6,P] }9ܐOA Za7Ơsr U  B'P@HGT\EXJ* _]P` !'#۽j"l8(B:;95 q1.OZeDfX^BV/]=Q?B`X\,:==ސ1 d[rHzoT,i>O)J/)]@$KaÆ裸JkZֵkZc]0BI!$H@I$ $I $B 9ֵkP+)]viJNs $BI!$H@I$])s9WJӜkZְ+x@,D^@zq@@   I$I$I$c!DDF/"""""""""""""4skZֵ (Lc)O<kk[Z)JW̰akZҺ]ƺZֵjI$I$I$1~)x20+Ȧqni)ks(s!P4TCPC{smem >@3Aۈo'kA (^QKݢ񧭁1FG<$@ (e, ̨2X# L,AXb\㢆QH(h3;8Dy1A4hf 3EҐf )ƙ=҈%43JB("ISҽ*sO pJ! 0UN DDaJ a2 **N{j^ַ^ݝmkE@А1K h Z-qBH7 ˎ|wv`[i[QH/Ȑ<v(8JH0 g}BVnbaJ A}|.( 2T(s4AR;UP߹ EJ1 x%MT2sMjcR?1Y)d2L \}{/ 诋7Y_ky>Lɷ9ay)1͈$aܔDl *(d ?j8Pk'kPq͜c}Y#\rdd /f=ݞ O>Mj +̈驺pw][]n * .F0W 3>mb3ˬw("6f Gum&woyV>gCRG&LsPy Gflirw|/%)1w^4kѼR!49/i{j>+Fˮk0l[f)y 0AGbk7VVf*3b̾}ݼ<9KW u9X\6&˺:FO:NTLRU71AdY Bp)JAںBuWk,_[7\ÞO8L]s[8QLέLj9):Ef( `:(kL1 uIJtH4L c2+k&)i2HA_mcGj@-}CDE(B !D22"}3s\Γ:C<d*ѿL[*2i{o M~L1˔L4ܪp$=(ڂ5P$+ԒU^PJA:22(QVPV Afa" d!(I$d`ƕAʄ 0hHAREbF2 )IJDEDE"UP "ʑD$ `I%"dFA%H$T D$B-Bh,R1T A H( H"B'U8J  B"_P!_ljS_ɴ:YӤMs+.ns7RAH \}$`&*\!N9mZтn? @/y`WX_J'p&QfIR!fBHC79|$?%BfNPⰕv'ar G Ad5p{}zAq(? $2z J3 .mҫ¯#/0.\,*|ˠHfZ)eYeY(ieY+3HQ$(*օ!dHY"dDfDTH1iJN")eU%)JRjeYRI%B[@*yLکi.\8it.ZzCbN4km0ok^,(Tdc4GD]׮Ht)JUU!` ;Q yCPVUdZ88F>d!jd$ ;ґm|]/BtY (jW:*5-ziuCk$+Ej!DsiHh^ftBa#5A'ïivRt$k&f}7= SF8$uvC$NUlSzZ: ueS]t.ZGYw G0EQ}Y0_U5+$Lʎa٥_\iHmKQ-eψbم cZƚk^ t٫¿yJnpt`5=}HګQI \;q7 v|wT-ZegJFZ58dYÂ-wMڷRf׍-^EvaJec#*c0oY*ex l ʠq-uP]+t/GTH8E;:j%v!2|\(u~)wm >OjVِ\fI -êd ]YKՌ=Xe Tq/g}MvWKIK;RUabn7欼ɦToH9>q1v|X> G>ΒOFW(Qk;+U 7 ld9ypӍoϊSL[c],B0}VSB)ê~oٵTmOsݨǦ{i*R, Qx07:?5l'ytHݴzQYGp$(o]^{mPԙ3enagΎAS3vΑg&V[YeFЃlR͑[fpILf l-Leq^wfV8]H]GcP4=ϕ6aWNikorB͜^ 'hzCIr7R'REg&N HA"m9`*V5޴w0%U%Bs|BsdbNUb:Pb>~;hKMz`kv=wٻ#Fe GI 휳m@k9v/Uݻe7Ŧ tI ٳl\XxSK25r;̐K8fZqFHY%5*҈-Qiijʑ *("1$m TQJE@FiAUiUAUFRV"-P(()T"R%"mc/okr)I=-%+*)YBd6  ٺ8.F!gr$t$藮} ׏A^׵A!WyE$mi DhP0@rt 8Iۃ. IrtUNU`+Lҝ.Pfx-a(%[nqcRfPs4s` $y`yKȿKk2.XAj7UXËFK[I#PT*YaGTZGbh4xHP._1(ӈ,U3x TJTik:ZE8I "}G~֎_UG0YQ=uN!tEMnv\|hG {(,X)fG9vbMD]݁i:x{ߦmOO,գm*I "E$EAJ5.=øB{kV-$ei}j6R ^JBc[_r0No(2z:`L eM33"]PY0HfbRNR( W]?bl|\=ߚCh0H0uufFu:@ lOFd)am#2xȈPdUj5`.  6n(?-q/* ςl  @ SUfVOy^Tho! e-5e+cbǙ]{_30*c؝޵S@7/)8 :՝[ͨ 4g]\MlHHH$X^"%T(e]e^Nb2l5;c NOvlY`Q]AW"FФ^viR "I]B@Tgclus/data/body.RData0000755000176200001440000003426011772076200014164 0ustar liggesusers7zXZi"6!XO8q])TW"nRʟX^#&'ƯN6p>ՀNߎiOB9#5J?5Xedxy \c,pQL4H^.;Zc|> Z%_XO2Gs+;K '>srvc7GZ2|~4S%?n/حLrGa@{)+ T!\6jbES/H1SNԈU,> w }tg8tjkQ!zuT" ,b6砼\C=L–o B$w&3 ]hX{C*VkiH@x_ͦrMR'EZЫ hz4;1[fDN-dЪDc8;u/nѴXr@fN$'  \ds~"}OMXRS٭{Z9Uq=knΐ9zE!cP Ť_!%ncFɆAs P?1ihlvdBGI~mRכ"OXfX#6ӗՋ=>ӭ[4|>JYE d@PF DT`bO(5.DS^2XS|]]{ u5mE_զPW'{:K31sh׃)\yP _+Wz`^(\ӻu`[LXrs]{[~;fCt]GK79=ucřuݥyujhnlL %#ǯ啷-e qQˁ&0}6k@Ia& jXb߳ܰx#5\1UplP;o4O}joJz5 sybgٶs+\~i'ss;N]DEԱBbS $,&: H,~':vCN}c*u'C=E殮<n5v Z+VEе!84W]W$ k/ń7r{>!v&ُ8|gƑg VP(V߱.aI>A J[Qm^}b6{R HCRILOMF\< Iށ5)1ij0Ik05CEcy!i$F^g8suNkʼf݇YÉ_{u4B:EPzJg5NϺ5!Zp*\"onoʊ؜_*,hHO˱-2#[\BC;8i vg]دApra EGK2=974$ɷ> ,) ,rRHO/Ql κ4w$\77i^Y2r}KIc#P6}y;cQ Vi` ^Wsޒ:,As4xӫ'\byDQPbTR\(Tq˿o1(S 1G捹7vTgf]Qѝ@N-LUi48.'KQBwPVZNZKi[1 ĎE6|>>h\ c1N0JBtD;UW59J-nT ygx*I2Q ~a~ɸ5[3Pd1 Y2E裶 ]6n$1r0z 'F䘤*E`` gZ&*_j)uO>#V[!{:]6yVQ (0KϛFxLG):4@a1O[g_ :~[D UG* kv,5S/S)Ȃ[QQߞ5$H@@Jl{7Sp {^+~̧R`j~%mopOwnX8mr4QH`TXhzp0wdzq}$x?vϾG0i#]ew**א&(۷࿝l5Υ Kۦ߳YpFrnlWpMX-t=6K1Tӝ︟y7 ۆڥ0ΉvC9B!|%nVI H}ƀȜrO~zR8å:dloSȭH=oOʅ.1)T}i*TXW(D@fJ zdʰf"N;)Ko~X $e 3՜_`Lazs{yx%la%R 6xqi qpuIax 5TKfdI(xSaMVf˝ʶڢOQ8>ʑ D/n;@:tOQ${`~CVN+<8oNtb^]4 TsJUL0q %ocq{R& Yi^ꮯuפ8I&t9p%c /D?Yy>|OaeCKT#"N GYs/ BG_>ı%IBwfIQ.v{3ږ> amlzH:˝77N3ah2t(xL sh=dN*XV ;c+ŋ}7![ i؂VS^KI),,nC t[?YgӮn 4{퍑xDX#L㲔^pP?t!~YV7UCˮ쏘biL(0u*UI +$[K@+Jt5Pھ_+~}\Ɣ` z} s[H*Quڲ*"4^񁽛vmrT -ΘԻ*w){<á+flяUq/VڹO^*W{(:}ˡpqu7Ti$c7~:v'y+of(K`̓5E= Y/3O7v0-Κa(":s&=JPwD_͔:&}aWw`R? 1nI7+aC7ݢNa6׮Zp;"Ġ:%Mc +[fhy$O>?rוci[̻; oƫ1}F6Hfq$t!V+z%]JZ{53t՘̟M*"A| #gUuN*izOH,HD}<&Jz!&* ]sAO4 *[1sacT[!{eWS)9Ayݼ~zU6r4 E@BVvSɔwHs+0O M-9b)4w}mۋ?(:x?2gf<-Į3YyI*)~DNHE>H&F9FPѝ-S?ֲBD`@P3DT2/NFASoW:rK)aWm4o;#`i1Z/zX#7b'عybtBhk= cZ,U {oP|+MGPUg'~'3;4F;By3ҤGh Rq̣\_%M*Wٷ 5 8VP*'\yPoQ,6%GZzhhCpOIMTo"M+.C"zWwwE x;. 6?nEq2o_i]"j9e`܈nhmBlBXBڌ ECA@61 egة5B{u}*ry&2:scDfF~GZN^q6N ExQCT]Zy{<ip3{k`Y*5RD-3-ZͲ{lU=B,`8ܦ3]')9\υVZih "[>M1ʞA\*7'+r :L8]4t" 83&3oqsf"C^L:fʩ#J.67RxGXq}]mm兌57%A7;wA `X%D,mH⻍cڜVy0GGOhӌ5%|(itpyCUFv.U!Q7?&D^h)_,b6{,%|_v$:޶*~џ\4"&\6Z{3`7װbJk@V>%+Y oȦG?ٵO?[i y;cQ/3&iABz7c-$7o@XωjsKUN͆|,rkK&`q_7i<,Lx Urvޱa7; n-UDDaSA>麠gϚES]u8z+x:.r88kt!ΈFTod {NaM_L]_%~6~"lEu<w7AУlq~%x " Hnݑa۞-x]&*-hD@Q;wT.*|[`/R>,7i>{jfҕrj֬:'7jlwǻȗ/<[ g,IH3옼>3{?],i ar\>s=SWRnk#%{Y}\-TunkPG# Iw|]z=sir"I 4/Rɀ-&۹3tmzͿeb6|H?(Rg"iA'Q+;v8A,ru#sy5*ZL&XW >L_MO$[C?0B޴D >݄Uԃ}< D;3 AaLeq`]VQzi{ 297z**)ՊysqzH1Vߢϋ~v >55EyS"R~vLc8$˩5KJʴ-j J-L\ '@rbjJ--fI2>^v)O}{HOZPἩ,x3H\QPscY=8$ DeyJm.S>4Q_ '+_' ߢ=GgAan 崩w}Os` OKct?6;n?1B˚xTT(*j]ϿyHgqoŸIJҢL,2(@^ *n#&k2qÇ\; "Ut ޷<50gqR3[J6bR&o\Wn 8 [ЗOZ-oǏKEa <*?i e7 렂pSm01lv*.cl눏?Reo]M'gW>U H40fZU-ouLgpEv0ä#٭3F  քթ)5 ~~VUkLsy) N>ՒObk?:VhiL‚=^CٮVu_Jo +%89+ @wBsXskUK' H!=,=yaMm/ C3YReO+8-̤*NiIyJ@Q/@MYX]|c }S ` jPr:ĺTq(tЗv<`uo,:zLk_t]F|REŬcxI8֗RԾd_('utWʹ$-ڻB@xC.}S',2x1߬oL< hG13~>}=WZ X9cHsN"m-/\W UC?>5FF4b7Ec%SȻ-=d9#sEPK+?.7C;<7üQ`CwzIǩ#Wkbȳv=pJ%RO)VV`(~RH^`SY{"/?/Xcޏ?>"f!FW$輛PgiVOMioEb23\,.Y5ϵ. dn8ϔ[O=>;%Xso>ίp% j%_Ɋz^ "&1@G`f2-ęS( ҈bBmWF t 'N5h!;A47tSc w&?x$NSU+V2̳3Vcm7V?̀iV(XEɶ'GH7:2Om|b5N M ?qx,zS LKw}⤹cwzOBTMx u}MAR)0/JHȺd.諓#@y^enyZO2|m \u;"@ٓRb*ء-æ[}PE葏 (i*_r=pJ$BߦCbx!ei|CXMG$ڹQ(gb/6EQJxINOAHj1o'. /M9fBބ~kXX~]_GJ@dode+K1|o}pqFc VBܳ7ĜMGbdDLߕ_0 .z  @M~:cOo8 8!"u_QyV%  ܊Gtpggsy YKn˓d&uZJEBy~̀pNZ]:F$4kF"|UM'd~ kriԙGVgr,ibϥ2 jb[QkM1}@.àT̀2@P)pgj&>) kAdCd! pZ/ u7) 2NXP H"s9Ŏ3KζuJ!B {~Ew.uwrɽJ 5"h 'ŗa_{rǸF1Cv`7;Oр:٪" Z|4|W^G@y-.vE㜠C.&Bosfn"~#*0?9wC/qHghZo3vP֖a[n2[*LRvVޝ@饝'ԘP\û9Dj4z@ 5#]oW"O7dmfƲ[؂} 3QGo;E_ WhENVFFA̵*Ti슳VVpF7m l3NY6մAp/X VEtkYj Uc4{,Pxk&ԓ#6%7Su$6nϣk06. svGQ Htzr=*tOE &q MjLNT 1֚6V* ^秐 -&LP#iLkSS![Gڂѓ|x9%Ul*F»k%9(ZJƝF'de$⣘Aj{Oe2.—ix$ό!tax̿OQ_S{]J跔!*.it:W@%tWy sNu=]jU4`.|=8N@,"8x]h˷o%JSyXaF;OLi%}#SfcmP d`VF_I 0`_^XI=l+Stv@c)KЋOl#kن:'KtiMѿIȆa#FpwI!6hxgo>JZiQf`9*H'Anދ!QA Vl8/ݥ#k{@r W/Qsc9R˜+sgT[շ\^IJę` jbJUɑB?&OJ(@h^dV.ɟz2S9|$MhT㵮vyl/9xy/s,^^d]fAN,iX$+Ws*N 9Ȏ)Aa.lJzo!{DEED#|G]g7DREWA2R0,uY(2OY|`|PZu0?Ti2 tzJ?Lo2$B%6Lh! 5r {$6萕5.sRbIkYŜXO*8'mGdI<ˆ R«wX=VSn(tb ,wce,U)<'n8 "*OM`,4㡒A{u;اŪ` ;p7Hiq,cȲm05kq$O O]0/= fp)Kb}ˮНu[{ˣ/>Κs7WjgԞFk~OSd:b(43Lm[;Α.XIwFƠS'iir.f[Xd䰘}իJ2%U YlCH$&ۻVȞi cTČW8Dd\'!`Ȱ/q9ۯ5V>U:$"`aWe䤯-ҵ6 BQ>*KѧXI-Ѥh6-5Yڡzx[1%#W2`"k$eM_#'mfARz*K"|{Or`7(Rҋ$GG*Gf۸a,a;d,hS/ pӤAl:_}|9UaQ$aX[twk>6q#L-dsxU2NF!Rݼ"PoRJ8Dh/cH2E}oN|R?ҟi%mTM3aҏsl6uQ68GVNhvAfOlZCh֙ H(/5SgI/`h@KV ';]꧓QI+Vk.}LuXǠ$Q WqMמx)NTC1?1:od㫸Tձrk8y9 rWᨡ4Q\~jc0Uy~B2 3-;i#1W ӷ@,Vn#L( UXRwvk"uL8] "y\X>zeKpi4h[*5R38$Ib2%zHw ]^w2_-%^j@68VA=f#N0o]x C3vQbyg%C꬟#.ZjtF LV[%,J ]! l1FjfE"{PtblTC _v%t=όpk?xhm%ƙ sdWv`F'W0_c(B&+ hmMM'󡊎ݵ.̑G7!= 3Ug >{1wFnJL*1=WHH8CL&l1+ r^Z}&5C4]$~QhQZ-7QqjH%V2ɋrk8eGxk'|r;_qaC {%O. k/l7nw=E=lPvԈ]r hqи>0 YZgclus/R/0000755000176200001440000000000013414705166011577 5ustar liggesusersgclus/R/graphclus.R0000755000176200001440000001412410001020066013671 0ustar liggesusers lower2upper.tri.inds <- #copied from lower.to.upper.tri.inds from cluster library function (n) { n1 <- as.integer(n - 1) if (n1 < 1) stop("`n' must be >= 2") else if (n1 == 1) 1:1 else rep(1:n1, 1:n1) + c(0, unlist(lapply(2:n1, function(k) cumsum(c(0, (n - 2):(n - k)))))) } vec2distm <- function(vec){ #convert from a vector to a distance matrix m <- length(vec) n <- (1+sqrt(1+8*m))/2 ans<- matrix(0,n,n) ans[lower.tri(ans)] <- vec ans[upper.tri(ans)] <- vec[lower2upper.tri.inds(n)] ans } vec2dist <- function(vec){ #convert from a vector to a "dis" as.dist(vec2distm(vec)) } # Returns a vector of off-diagonal elements in m. # The off parameter specifies the distance above the main (0) diagonal. diag.off <- function(m,off=1) m[col(m)==row(m)+off] #----------------------------------------------------------- # Accepts a dissimilarity matrix or "dist" m, and # returns a matrix of colors. # M values are cut into categories using breaks (ranked distances if # byrank is true) and categories are assigned the values in colors. default.dmat.color <- c("#FDFFDA", "#D2F4F2", "#F4BBDD") dmat.color <- function(m, colors = default.dmat.color,byrank=NULL, breaks=length(colors) ){ if (is.matrix(m)) m <- as.dist(m) if (is.null(byrank)) byrank <- length(breaks) == 1 if (byrank ==TRUE) m1 <- rank(as.vector(m)) else m1 <- as.vector(m) fac <- cut(m1,breaks,include.lowest=TRUE) ans <- colors[as.numeric(fac)] ans <- vec2distm(ans) diag(ans) <- NA attr(ans,"Levels") <- levels(fac) if (length(labels(m)) == nrow(ans)){ rownames(ans) <- labels(m) colnames(ans) <- labels(m)} ans } #----------------------------------------------------------- # # Extracts information from a matrix of colors suitable for use by # image. # imageinfo <- function(cmat) { n <- nrow(cmat) p <- ncol(cmat) levels <- sort(unique(as.vector(cmat))) z <- unclass(factor(cmat,levels= levels, labels=1:length(levels))) z <- matrix(z,nrow=n,p) list(x=1:p,y=1:n, z =t(z),col=levels) } # This draws the color matrix cmat. plotcolors <- function(cmat, na.color="white", dlabels = NULL, rlabels = FALSE, clabels = FALSE, ptype ="image", border.color = "grey70", pch=15,cex=3,label.cex = .6,...) { n <- nrow(cmat) p <- ncol(cmat) cmat[is.na(cmat)] <- na.color if (ptype=="image") { info <- imageinfo(cmat) image(info$x, info$y, info$z[, n:1], col = info$col, axes = FALSE, xlab = "", ylab = "", ...)} else { y <- rep(n:1,p) x <- rep(1:p,rep(n,p)) cmat <- as.vector(cmat) plot(x,y,col=cmat,cex=cex,pch=pch,axes=FALSE,xlab="",ylab="", xlim=c(.5,p+.5),ylim=c(.5,n+.5),...) } axis(3, at = 1:p, tick=FALSE,labels = clabels, las = 2, cex.axis = label.cex) axis(2, at = n:1, tick=FALSE,labels = rlabels, las = 2, cex.axis =label.cex) if (is.vector(dlabels)){ nl <- length(dlabels) text(1:nl,nl:1,dlabels,cex=label.cex)} box(col = border.color) } #----------------------------------------------------------- # This function draws a scatterplot matrix of data. # Order, if present, specifies the order of the variables and # panel.colors, if present should be a matrix of panel colors. # (...) are graphical parameters. cpairs <- function(data,order=NULL,panel.colors=NULL,border.color="grey70",show.points=TRUE,...) { textPanelbg <- function(x = 0.5, y = 0.5, txt, cex, font) { box(col= border.color) text(x, y, txt, cex = cex, font = font) } if (!is.null(order)) { data <- data[,order] if (!(is.null(panel.colors))) panel.colors <- panel.colors[order,order]} if (!is.null(panel.colors)) { if (ncol(data) != nrow(panel.colors) || ncol(data) != ncol(panel.colors)) stop("dimensions do not match") diag(panel.colors) <- NA panel.colors <- t(panel.colors)[!is.na(panel.colors)]} env<- new.env() assign("j",1,envir=env) pairs.default(data,...,text.panel = textPanelbg, panel = function(x,y,...){ j <- get("j",envir=env) reg <- par("usr") if (!(is.null(panel.colors))) rect(reg[1],reg[3],reg[2],reg[4],col=panel.colors[j]) box(col=border.color) j <- j+1 assign("j",j,envir=env) if (show.points == TRUE) points(x,y,...) }) } # This function draws a parallel coordinate plot of the data. # Order, if present, specifies the order of the variables and # panel.colors, if present should either be a vector of panel colors, # or a matrix whose i,j the element gives the color for the panel # showing columns i and j of data. (...) are graphical parameters. # This function is adapted from parcoord(MASS). cparcoord <- function (data, order=NULL,panel.colors=NULL,col=1,lty=1,horizontal=FALSE,mar=NULL,...) { if (is.null(mar)) if (horizontal==TRUE) mar <- c(5, 2, 2, 2) + 0.1 else mar <- c(2, 8, 2, 2) + 0.1 if (!is.null(order)) { data <- data[,order] if (is.matrix(panel.colors)) panel.colors <- panel.colors[order,order]} if (is.matrix(panel.colors)) panel.colors <- diag.off(panel.colors) if (is.vector(panel.colors)) if (ncol(data) -1 != length(panel.colors)) stop("dimensions do not match") oldpar <- par(mar=mar) x <- apply(data, 2, function(x) (x - min(x))/(max(x) - min(x))) p <- ncol(x) if (horizontal==TRUE){ matplot(1:p, t(x), xlab = "", ylab = "", axes = FALSE, type="n",...) axis(1, at = 1:p, labels = colnames(x)) if (!(is.null(panel.colors))) for (i in 1:(p-1)) rect(i,0,i+1,1, lty=0,col =panel.colors[i]) for (i in 1:p) lines(c(i, i), c(0, 1), col = "grey70") matpoints(1:p, t(x), type = "l",col=col,lty = lty,...) } else { matplot(t(x), p:1, xlab = "", ylab = "", axes = FALSE, type="n",...) axis(2, at = p:1, labels = colnames(x),las=2) if (!(is.null(panel.colors))) for (i in 1:(p-1)) rect(0,i,1,i+1, lty=0,col =panel.colors[p-i]) for (i in 1:p) lines(c(0, 1),c(i, i), col = "grey70") matpoints(t(x), p:1, type = "l",col=col,lty = lty,...) } on.exit(par(oldpar)) invisible() } gclus/R/hclust.R0000755000176200001440000001302513414672735013235 0ustar liggesusers # This function accepts a "dist" or matrix of scores and # returns an ordering, based on hierarchical clustering. # If reorder is FALSE, the order returned by hclust is used, # otherwise clusters are ordered by placing the nearest end points # adjacent to each other at a merge. order.hclust <- function(merit,reorder=TRUE,...) { dis <- - merit if (is.matrix(dis)) disd <- as.dist(dis) else { disd <- dis dis <- as.matrix(dis)} n <- nrow(dis) if (n <= 2) ord <- 1:n else { hc <- hclust(disd,...) if (reorder) hc <- reorder.hclust(hc,dis) ord <- hc$order} ord } # This function accepts hc, the results of a hierarchical clustering # and a "dist" or distance matrix. It returns a hierarchical clustering obtained by placing # the nearest end points adjacent to each other at each # merge of the hierarchical clustering reorder.hclust <- function(x,dis,...) { if (! is.matrix(dis)) dis <- as.matrix(dis) merges <- x$merge n <- nrow(merges) endpoints <- matrix(0,n,2) dir <- matrix(1L,n,2) for (i in 1L:n) { j <- merges[i,1] k <- merges[i,2] if ((j < 0) && (k < 0)) { endpoints[i,1] <- -j endpoints[i,2] <- -k} else if (j < 0) { j <- -j endpoints[i,1] <- j e1 <- endpoints[k,1]; e2 <- endpoints[k,2] if (dis[j,e1] < dis[j,e2]) endpoints[i,2] <- e2 else { endpoints[i,2] <- e1 dir[i,2] <- -1}} else if (k < 0) { k <- -k endpoints[i,2] <- k e1 <- endpoints[j,1]; e2 <- endpoints[j,2] if (dis[k,e1] < dis[k,e2]){ endpoints[i,1] <- e2 dir[i,1] <- -1 } else { endpoints[i,1] <-e1 }} else { ek1 <- endpoints[k,1]; ek2 <- endpoints[k,2] ej1 <- endpoints[j,1]; ej2 <- endpoints[j,2] d11 <- dis[ej1,ek1] d12 <- dis[ej1,ek2] d21 <- dis[ej2,ek1] d22 <- dis[ej2,ek2] dmin <- min(d11,d12,d21,d22) if (dmin == d21) { endpoints[i,1] <- ej1 endpoints[i,2] <- ek2 } else if (dmin == d11) { endpoints[i,1] <- ej2 endpoints[i,2] <- ek2 dir[i,1] <- -1 } else if (dmin == d12) { endpoints[i,1] <- ej2 endpoints[i,2] <- ek1 dir[i,1] <- -1 dir[i,2] <- -1 } else { endpoints[i,1] <- ej1 endpoints[i,2] <- ek1 dir[i,2] <- -1}} } for (i in n:2L) { if (dir[i,1] == -1) { m <- merges[i,1] if (m > 0) { m1 <- merges[m,1] merges[m,1] <- merges[m,2] merges[m,2] <- m1 if (dir[m,1] == dir[m,2]) dir[m,] <- -dir[m,] }} if (dir[i,2] == -1) { m <- merges[i,2] if (m > 0) { m1 <- merges[m,1] merges[m,1] <- merges[m,2] merges[m,2] <- m1 if (dir[m,1] == dir[m,2]) dir[m,] <- -dir[m,] }} } clusters <- as.list(1:n) for (i in 1:n) { j <- merges[[i,1]] k <- merges[[i,2]] if ((j < 0) && (k < 0)) clusters[[i]] <- c(-j,-k) else if (j < 0) clusters[[i]] <- c(-j,clusters[[k]]) else if (k < 0) clusters[[i]] <- c(clusters[[j]],-k) else clusters[[i]] <- c(clusters[[j]], clusters[[k]])} x1 <- x x1$merge <- merges x1$order <- clusters[[n]] x1 } # reorder.hclust<- # function(x,dis,...) { # if (! is.matrix(dis)) dis <- as.matrix(dis) # merges <- x$merge # n <- nrow(merges) # endpoints <- matrix(0,n,2) # dir <- matrix(1L,n,2) # for (i in 1L:n) { # j <- merges[i,1] # k <- merges[i,2] # if ((j < 0) && (k < 0)) { # endpoints[i,1] <- -j # endpoints[i,2] <- -k} # else if (j < 0) { # j <- -j # endpoints[i,1] <- j # if (dis[j,endpoints[k,1]] < dis[j,endpoints[k,2]]) # endpoints[i,2] <- endpoints[k,2] # else { # endpoints[i,2] <- endpoints[k,1] # dir[i,2] <- -1}} # else if (k < 0) { # k <- -k # endpoints[i,2] <- k # if (dis[k,endpoints[j,1]] < dis[k,endpoints[j,2]]){ # endpoints[i,1] <- endpoints[j,2] # dir[i,1] <- -1 } # else { # endpoints[i,1] <- endpoints[j,1] # }} # else { # d11 <- dis[endpoints[j,1],endpoints[k,1]] # d12 <- dis[endpoints[j,1],endpoints[k,2]] # d21 <- dis[endpoints[j,2],endpoints[k,1]] # d22 <- dis[endpoints[j,2],endpoints[k,2]] # dmin <- min(d11,d12,d21,d22) # if (dmin == d21) { # endpoints[i,1] <- endpoints[j,1] # endpoints[i,2] <- endpoints[k,2] # } # else if (dmin == d11) { # endpoints[i,1] <- endpoints[j,2] # endpoints[i,2] <- endpoints[k,2] # dir[i,1] <- -1 # } # else if (dmin == d12) { # endpoints[i,1] <- endpoints[j,2] # endpoints[i,2] <- endpoints[k,1] # dir[i,1] <- -1 # dir[i,2] <- -1 # } # else { # endpoints[i,1] <- endpoints[j,1] # endpoints[i,2] <- endpoints[k,1] # dir[i,2] <- -1}} # } # for (i in n:2) { # if (dir[i,1] == -1) { # m <- merges[i,1] # if (m > 0) { # m1 <- merges[m,1] # merges[m,1] <- merges[m,2] # merges[m,2] <- m1 # if (dir[m,1] == dir[m,2]) # dir[m,] <- -dir[m,] # }} # if (dir[i,2] == -1) { # m <- merges[i,2] # if (m > 0) { # m1 <- merges[m,1] # merges[m,1] <- merges[m,2] # merges[m,2] <- m1 # if (dir[m,1] == dir[m,2]) # dir[m,] <- -dir[m,] # }} # } # clusters <- as.list(1:n) # for (i in 1:n) { # j <- merges[i,1] # k <- merges[i,2] # if ((j < 0) && (k < 0)) # clusters[[i]] <- c(-j,-k) # else if (j < 0) # clusters[[i]] <- c(-j,clusters[[k]]) # else if (k < 0) # clusters[[i]] <- c(clusters[[j]],-k) # else clusters[[i]] <- c(clusters[[j]], clusters[[k]])} # x1 <- x # x1$merge <- merges # x1$order <- clusters[[n]] # x1 # } reorder.hclust <- compiler::cmpfun(reorder.hclust)gclus/R/colpairs.R0000755000176200001440000000521411376460640013543 0ustar liggesusers#Given an nxp matrix m and a function f, # returns the pxp matrix got by applying f to all pairs of columns of m. colpairs <- function(m,f,diag=0,na.omit=FALSE,...){ flocal <- function(i,j) if (!is.null(diag) && (i == j)) diag else { x <- m[,i] y <- m[,j] if (na.omit) { d <- na.omit(cbind(x,y)) x <- d[,1] y <- d[,2]} f(x,y,...) } p <- ncol(m) m1 <- matrix(rep(1:p,p),nrow=p,ncol=p) ind <- mapply("c",m1,t(m1)) ans <- apply(ind,2, function(i) flocal(i[1],i[2])) ans <- matrix(ans,nrow=p,ncol=p) colnames(ans) <- colnames(m) rownames(ans) <- colnames(m) ans } km2 <- function(x,y){ x <- x - mean(x) y <- y - mean(y) sum(x*x)+ sum(y*y) } # Computes the sum of all distances between pairs of # objects whose coordinates are contained in x and y. gtot <- function(x,y,...) 2*sum(dist(cbind(x,y),...)) # Computes the average total distance from one object to all other # objects, where x and y contain the object cordinates. gave <- function(x,y,...) 2*sum(dist(cbind(x,y),...))/length(x) # Computes the cluster diameter- the maximum distance between # objects whose coordinates are contained in x and y. diameter <- function(x,y,...){ d <- dist(cbind(x,y),...) max(d) } # Computes the cluster star distance- the minimum of the total distance from # one object to another, where x and y contain the object cordinates. star <- function(x,y,...){ d <- vec2distm(dist(cbind(x,y),...)) min(apply(d,2,sum)) } # Computes the silhouette distance of a partition of the objects in # x and y, where group contains the object memberships. sil <- function(x,y,groups,...){ # require(cluster) igroups <- unclass(factor(groups)) d <- dist(cbind(x,y),...) s <- silhouette(igroups,d) summary(s)$avg.width } # Computes the agglomerative coefficient, from agnes. ac <- function(x,y,...){ # require(cluster) ag <- agnes(cbind(x,y),keep.diss=FALSE,keep.data=FALSE,...) ag$ac } # Computes the total line length in a parallel coordinate plot # of x and y. pclen <- function(x,y) sum(abs(y-x)) # Computes the average (per object) line length in a parallel coordinate plot # where each x object is connected to all y objects. pcglen <- function(x,y) sum(outer(x,y,function(a,b) abs(a-b)))/length(x) # Applies the function gfun to each group of x and y values # and combines the results using the function cfun. #(...) arguments are passed to gfun. partition.crit <- function(x,y,groups,gfun= gave,cfun=sum,...){ dgroups <- unique(groups) gm <- sapply(dgroups,function(g) gfun(x[groups==g],y[groups==g],...)) cfun(gm) } gclus/R/order.R0000755000176200001440000001111607770634776013060 0ustar liggesusers# Given a list whose ith element contains the indices # of objects in the ith cluster, returns a vector whose ith # element gives the cluster number of the ith object. clus2memship <- function(clusters) { ans <- 1:length(unlist(clusters)) i <- 1 for (cl in clusters) { ans[cl] <- i i <- i+1 } ans } # Given a vector whose ith elements gives the cluster number of the # ith object, returns a list whose ith element contains the indices # of objects in the ith cluster memship2clus <- function(memship) { m <- sort(unique(memship)) index <- seq(along=memship) sapply(m, function(g) index[memship==g],simplify=FALSE) } # This function accepts a "dist" or matrix of scores and # returns an approximate Robinson ordering, used for scatterplot matrices. # order.single <- function(merit,clusters=NULL) { if (is.null(clusters)) order.hclust(merit, TRUE,method = "single") else { dis <- - merit if (is.matrix(dis)) { dism <- dis dis <- as.dist(dis) } else dism <- as.matrix(dis) n <- nrow(dism) if (n <= 2) clus <- 1:n else { cind <- col(matrix(0,n,n)) cind <- cind[lower.tri(cind)] rind <- row(matrix(0,n,n)) rind <- rind[lower.tri(rind)] d <- cbind(as.vector(dis),rind,cind) d <- d[sort.list(d[,1],),] if (is.null(clusters)) { memship <- 1:n clusters <- as.list(1:n)} else memship <- clus2memship(clusters) m <- length(dis) for (i in 1:m) { j <- memship[d[i,2]] k <- memship[d[i,3]] if (j!= k) { if (j > k) { r <- j j <- k k <- r} memship[memship==k] <- j clusj <- clusters[[j]] clusk <- clusters[[k]] dll <- dism[clusj[1], clusk[1]] dlr <- dism[clusj[1], clusk[length(clusk)]] drl <- dism[clusj[length(clusj)], clusk[1]] drr <- dism[clusj[length(clusj)], clusk[length(clusk)]] mind <- min(dll,dlr,drl,drr) if (drl==mind) NULL else if (dlr==mind) { clusj <-rev(clusj) clusk <- rev(clusk)} else if (dll ==mind) clusj <- rev(clusj) else clusk <- rev(clusk) clusters[[j]] <- c(clusj,clusk) } if (length(clusters[[1]]) == n) break } clus <- clusters[[1]]} clus}} # This function accepts a "dist" or matrix of scores and # returns an improved ordering, for parallel coordinate displays. order.endlink <- function(merit,clusters=NULL) { dis <- - merit if (is.matrix(dis)) { dism <- dis dis <- as.dist(dis) } else { dism <- as.matrix(dis)} n <- nrow(dism) if (n <= 2) clus <- 1:n else { cind <- col(matrix(0,n,n)) cind <- cind[lower.tri(cind)] rind <- row(matrix(0,n,n)) rind <- rind[lower.tri(rind)] d <- cbind(as.vector(dis),rind,cind) d <- d[sort.list(d[,1],),] if (is.null(clusters)) { memship <- 1:n clusters <- as.list(1:n)} else memship <- clus2memship(clusters) m <- n*(n-1)/2 for (i in 1:m) { j <- memship[d[i,2]] k <- memship[d[i,3]] if (!(j == k || j == -1 || k == -1)) { if (j > k) { r <- j j <- k k <- r } clusj <- clusters[[j]] clusk <- clusters[[k]] dll <- dism[clusj[1], clusk[1]] dlr <- dism[clusj[1], clusk[length(clusk)]] drl <- dism[clusj[length(clusj)], clusk[1]] drr <- dism[clusj[length(clusj)], clusk[length(clusk)]] mind <- min(dll,dlr,drl,drr) if (drl==mind) NULL else if (dlr==mind) { clusj <-rev(clusj) clusk <- rev(clusk)} else if (dll ==mind) clusj <- rev(clusj) else clusk <- rev(clusk) clusters[[j]] <- c(clusj,clusk) if (! (length(clusj) == 1)) memship[clusj[length(clusj)]] <- -1 if (! (length(clusk) == 1)) memship[clusk[1]] <- -1 memship[clusk[length(clusk)]] <- j } if (length(clusters[[1]]) == n) break } clus<- clusters[[1]] } clus } # This function takes a merit measure and clusters, either a vector # giving the cluster number of the ith items, or a list whose ith element # gives the indices of the elements in the ith cluster. # Objects within a cluster are ordered with within.order # and clusters are ordered with between.order. # order.clusters <- function(merit,clusters,within.order = order.single, between.order= order.single,...) { if (!is.list(clusters)) clusters <- memship2clus(clusters) if (!is.matrix(merit)) merit <- as.matrix(merit) if (!is.null(within.order)) { clusl <- lapply(clusters, function(g) within.order(merit[g,g],...)) newclusl <- lapply(1:length(clusters),function(i) clusters[[i]][clusl[[i]]]) } else newclusl <- clusters if (!is.null(between.order)) between.order(merit,newclusl) else unlist(newclusl) } gclus/vignettes/0000755000176200001440000000000013414705166013406 5ustar liggesusersgclus/vignettes/gclus.Rmd0000644000176200001440000000726213414704627015177 0ustar liggesusers--- title: "Clustering Graphics" author: "Catherine Hurley" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Clustering Graphics} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This package will order panels in scatterplot matrices and parallel coordinate displays by some merit index. The package contains various indices of merit, ordering functions, and enhanced versions of pairs and parcoord which color panels according to their merit level. For details on the methods used, consult "Clustering Visualisations of Multidimensional Data", Journal of Computational and Graphical Statistics, vol. 13, (4), pp 788-806, 2004. ## Displaying a correlation matrix ```{r} library(gclus) data(longley) longley.cor <- cor(longley) longley.color <- dmat.color(longley.cor) ``` `dmat.color` assigns three colours to the correlations according to the correlation magnitude. High correlations are in pink, the middle third are in blue, and the botom third are in yellow. ```{r fig.width=5, fig.height=5, fig.align='center'} par(mar=c(1,1,1,1)) plotcolors(longley.color,dlabels=rownames(longley.color)) ``` If you want to change the colour scheme: ```{r eval=F} longley.color <- dmat.color(longley.cor, byrank=FALSE) longley.color <- dmat.color(longley.cor, breaks=c(-1,0,.5,.8,1), cm.colors(4)) ``` The plot is easier to interpret if variables are reorded prior to plotting. ```{r fig.width=5, fig.height=5, fig.align='center'} par(mar=c(1,1,1,1)) longley.o <- order.hclust(longley.cor) longley.color1 <- longley.color[longley.o,longley.o] plotcolors(longley.color1,dlabels=rownames(longley.color1)) ``` ## Displaying a pairs plot with coloured panels `cpairs` is a version of `pairs` All the high-correlation panels appear together in a block. ```{r fig.width=5, fig.height=5, fig.align='center'} par(mar=c(1,1,1,1)) cpairs(longley, order= longley.o,panel.color= longley.color) ``` If the `order` is not supplied, then the variables are plotted in default dataset order. ## Displaying a PCP plot with coloured panels `cparcoord` is a versions of ` `parcoord` where panels can be coloured. Again, the pink panels have high correlation, blue panels have middling correlation, and yellow panels have low correlation. ```{r fig.width=8, fig.height=3, fig.align='center', out.width="100%"} cparcoord(longley, order= longley.o,panel.color= longley.color, horizontal=TRUE, mar=c(2,4,1,1)) ``` ## Plotting re-ordered dendrograms. `eurodist` is a built-in distance matrix giving the distance between European cities. ```{r fig.width=6, fig.height=4, fig.align='center'} par(mar=c(1,1,1,1)) data(eurodist) dis <- as.dist(eurodist) hc <- hclust(dis, "ave") plot(hc) ``` `order.hclust` re-orders a dendrogram to improve the similarity between nearby leaves. Applying it to the `hc` object: ```{r fig.width=6, fig.height=4, fig.align='center'} par(mar=c(1,1,1,1)) hc1 <- reorder.hclust(hc, dis) plot(hc1) ``` Both dendrograms correspond to the same tree structure, but the second one shows that Paris is closer to Cherbourg than Munich, and Rome is closer to Gibralter than to Barcelona. We can also compare both orderings with an image plot of the colors. The second ordering seems to place nearby cities closer to each other. ```{r fig.width=8, fig.height=3.5, fig.align='center'} layout(matrix(1:2,nrow=1,ncol=2)) par(mar=c(1,6,1,1)) cmat <- dmat.color(eurodist, rev(cm.colors(5))) plotcolors(cmat[hc$order,hc$order], rlabels=labels(eurodist)[hc$order]) plotcolors(cmat[hc1$order,hc1$order], rlabels=labels(eurodist)[hc1$order]) ``` gclus/MD50000644000176200001440000000305113414727711011705 0ustar liggesusersa78d03cdd3ca785ac4065708a9c6e10b *DESCRIPTION 4d9123eac653d70f49ef76b86550f7ef *NAMESPACE 8ac2d8c5436ffbafd586cd3adc230396 *R/colpairs.R c4ad4ec7005990fec04ce8531de0584c *R/graphclus.R d3cfbdaf4804d03cd13a8fcbeeb496eb *R/hclust.R a3cfa8b9161f090f184b3d69710c9bdc *R/order.R 49af67ddf0cb9ccf54bb4d73f2aaff8d *build/vignette.rds 7f6f2819b1b40c16f9390ad62522eed8 *data/bank.RData 0141a7b2917a268c6075a69cb4861bcb *data/body.RData f8f9dab462a48016569f5cb0c664c87b *data/ozone.RData 9b98f0c3eed60fc94bad60512f5cca32 *data/wine.RData fa99fe23a33d02cca680eafacc1e575d *inst/doc/gclus.R e60d7145d64eff05deb9dc1b9b8c2a2d *inst/doc/gclus.Rmd d1373e106d0da91e4e682f74cdf30298 *inst/doc/gclus.html c077985a8874238b72c72d23fdd1c24c *man/ac.Rd df957acab209b8a04b13b79e909323b1 *man/bank.Rd 1be03c1f2adb4dc55d9d27326326eea2 *man/body.Rd 1c6b5789e4047f00c72111368af2f6a7 *man/colpairs.Rd 14eaed3aec495cefa0e756c19813148a *man/cpairs.Rd 3f880102b9cb133090f298c824ae5651 *man/cparcoord.Rd cdf86ca44b49f10c461472cfbbbd4fa4 *man/diameter.Rd e653be182749bededa931844eccab08a *man/dmat.color.Rd 103c6b6163c35b5debbad3b5d0cd5778 *man/hclust.Rd e7e8d3ab3fca2c82060eaba31e6cd4e9 *man/order.Rd 626f57d9faf09e8602d81133c2087d12 *man/order.clusters.Rd e0913d278e8a9360d4534fd25f646833 *man/ozone.Rd ea69478ba64b9acc7d86c9c07eb124bf *man/partition.crit.Rd cc57ab659dd151f3957a8e09e16956de *man/pclen.Rd ebc2e5204be1d8555aab7007e1e8616f *man/plotcolors.Rd 6a46868b4d23839aa8d7adf3b1944a1d *man/utilities.Rd fe01eafa0b29146b5fc865107d01706d *man/wine.Rd e60d7145d64eff05deb9dc1b9b8c2a2d *vignettes/gclus.Rmd gclus/build/0000755000176200001440000000000013414705166012475 5ustar liggesusersgclus/build/vignette.rds0000644000176200001440000000032013414705166015027 0ustar liggesusersb```b`feb`b2 1# 'LO)- MAve+%dd&)(Aa _, &$ey覲楀aM wjey~L6̜T!%ps QY_/( @hrNb1F$$ =gclus/DESCRIPTION0000755000176200001440000000122713414727711013111 0ustar liggesusersPackage: gclus Version: 1.3.2 Author: Catherine Hurley Date: 2019-01-07 Maintainer: Catherine Hurley Title: Clustering Graphics Description: Orders panels in scatterplot matrices and parallel coordinate displays by some merit index. Package contains various indices of merit, ordering functions, and enhanced versions of pairs and parcoord which color panels according to their merit level. Depends: R (>= 2.10), cluster License: GPL (>= 2) Suggests: knitr, rmarkdown VignetteBuilder: knitr RoxygenNote: 6.1.0 NeedsCompilation: no Packaged: 2019-01-07 17:20:54 UTC; catherine Repository: CRAN Date/Publication: 2019-01-07 20:00:09 UTC gclus/man/0000755000176200001440000000000013414705166012151 5ustar liggesusersgclus/man/wine.Rd0000755000176200001440000000275411376457634013426 0ustar liggesusers\name{wine} \alias{wine} \docType{data} \title{Wine recognition data} \description{ Data from the machine learning repository. A chemical analysis of 178 Italian wines from three different cultivars yielded 13 measurements. This dataset is often used to test and compare the performance of various classification algorithms. } \format{This data frame contains the following columns: \describe{ \item{Class:}{There are 3 classes} \item{Alcohol:}{Alcohol} \item{Malic:}{Malic acid} \item{Ash:}{Ash} \item{Alcalinity:}{Alcalinity of ash} \item{Magnesium:}{Magnesium} \item{Phenols:}{Total phenols} \item{Flavanoids:}{Flavanoids} \item{Nonflavanoid:}{Nonflavanoid phenols} \item{Proanthocyanins:}{Proanthocyanins} \item{Intensity:}{Color intensity} \item{Hue:}{Hue} \item{OD280:}{OD280/OD315 of diluted wines} \item{Proline:}{Proline} }} \usage{data(wine)} \source{ Forina, M. et al, PARVUS - An Extendible Package for Data Exploration, Classification and Correlation. Institute of Pharmaceutical and Food Analysis and Technologies, Via Brigata Salerno, 16147 Genoa, Italy. } \references{ Blake, C.L. and Merz, C.J. (1998), UCI Repository of machine learning databases, \\ \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}. Irvine, CA: University of California, Department of Information and Computer Science. The database does not list the variable names. These were located at \url{http://www.radwin.org/michael/projects/learning/about-wine.html}. } \keyword{datasets} gclus/man/utilities.Rd0000755000176200001440000000351611376457616014474 0ustar liggesusers\name{vec2distm} \alias{vec2distm} \alias{vec2dist} \alias{lower2upper.tri.inds} \alias{diag.off} \alias{clus2memship} \alias{memship2clus} \title{ Various utility functions} \description{ \code{vec2distm} converts a vector to a distance matrix. \code{vec2dist} converts a vector to a \code{dist} structure. \code{lower2upper.tri.inds} is the same as \code{lower.to.upper.tri.inds} from package cluster. It computes an index vector for extracting or reordering a lower triangular matrix that is stored as a contiguous vectors. \code{diag.off} returns a vector of off-diagonal elements of a matrix. \code{off} specifies the distance above the main (0) diagonal. \code{clus2memship} converts a list whose ith element contains the indices of objects in the ith cluster into a vector whose ith element gives the cluster number of the ith object. \code{memship2clus} converts a vector whose ith element gives the cluster number of the ith object into a list whose ith element contains the indices of objects in the ith cluster. } \usage{ vec2distm(vec) vec2dist(vec) lower2upper.tri.inds(n) diag.off(m,off=1) clus2memship(clusters) memship2clus(memship) } \arguments{ \item{vec}{is a vector. } \item{n}{is an integer > 1. } \item{m}{is a matrix.} \item{clusters}{is a list whose ith element contains the indices of the objects belonging to the ith cluster.} \item{off}{is an integer specifying the distance above the main (0) diagonal.} \item{memship}{is a vector whose ith element gives the cluster number of the ith object.} } \author{ Catherine B. Hurley} %\note{ ~~further notes~~ } \seealso{ \code{\link{dist}}, \code{\link{diag}}. } \examples{ vec <- 1:15 vec2distm(vec) vec2dist(vec) diag.off(vec2distm(vec)) lower2upper.tri.inds(5) clus2memship(list(c(1,3,5),c(2,6),4)) memship2clus(c(1,3,4,2,1,4,2,3,2,3)) } \keyword{cluster} gclus/man/cparcoord.Rd0000755000176200001440000000442713414665451014430 0ustar liggesusers\name{cparcoord} \alias{cparcoord} \title{Enhanced parallel coordinate plot } \description{ This function draws a parallel coordinate plot of data. Variables may be reordered and panels colored in the display. It is a modified version of \code{parcoord {MASS}}. } \usage{ cparcoord(data, order = NULL, panel.colors = NULL, col = 1, lty = 1, horizontal = FALSE, mar = NULL, ...) } \arguments{ \item{data}{a numeric matrix } \item{order}{the order of variables. Default is the order in data.} \item{panel.colors}{either a vector or a matrix of panel colors. If a vector is supplied, the ith color is used for the ith panel. If a matrix, dimensions should match those of the variables. Diagonal entries are ignored. } \item{col}{ a vector of colours, recycled as necessary for each observation. } \item{lty}{ a vector of line types, recycled as necessary for each observation. } \item{horizontal}{ If TRUE, orientation is horizontal. } \item{mar}{ margin parameters, passed to \code{par}. } \item{\dots}{ graphics parameters which are passed to matplot.} } \details{ If \code{panel.colors} is a matrix and \code{order} is supplied, \code{panel.colors} is reordered.} \references{Hurley, Catherine B. \dQuote{Clustering Visualisations of Multidimensional Data}, Journal of Computational and Graphical Statistics, vol. 13, (4), pp 788-806, 2004. } \author{ Catherine B. Hurley } \seealso{\code{\link{cpairs}}, \code{\link{parcoord}}, \code{\link{dmat.color}}, \code{\link{colpairs}}, \code{\link{order.endlink}}.} \examples{ data(state) state.m <- colpairs(state.x77, function(x,y) cor.test(x,y,"two.sided","kendall")$estimate, diag=1) # OR, Works only in R1.8, state.m <-cor(state.x77,method="kendall") state.col <- dmat.color(state.m) cparcoord(state.x77, panel.color= state.col) # Get rid of the panels with lots of line crossings (yellow) by reordering: cparcoord(state.x77, order.endlink(state.m), state.col) # To get rid of the panels with lots of long line segments: # use a different panel merit measure- pclen: mins <- apply(state.x77,2,min) ranges <- apply(state.x77,2,max) - mins state.m <- -colpairs(scale(state.x77,mins,ranges), pclen) cparcoord(state.x77, order.endlink(state.m), dmat.color(state.m)) } \keyword{multivariate } \keyword{color } \keyword{hplot } gclus/man/dmat.color.Rd0000755000176200001440000000457310001257236014503 0ustar liggesusers\name{dmat.color} \alias{dmat.color} \alias{default.dmat.color} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Colors a symmetric matrix} \description{ Accepts a dissimilarity matrix or \code{dist} \code{m}, and returns a matrix of colors. Values in \code{m} are \code{cut} into categories using \code{breaks} (ranked distances if \code{byrank} is \code{TRUE}) and categories are assigned the values in \code{colors}. } \usage{ dmat.color(m, colors = default.dmat.color, byrank = NULL, breaks = length(colors)) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{m}{a dissimilarity matrix or the result of \code{dist}} \item{colors}{a vector of colors. The default is \code{default.dmat.color}.} \item{byrank}{boolean, default \code{TRUE} is unless \code{breaks} has length > 1.} \item{breaks}{the number of break points. } } \details{\code{breaks} are passed to the function\code{cut}. If \code{byrank} is \code{TRUE}, values in \code{m} are ranked before they are categorized. If \code{byrank} is \code{TRUE} and \code{breaks} is an integer, then there are \code{breaks} equal-sized categories.} \value{ Returns a matrix of colors. The matrix is symmetric, with NAs on the diagonal. } %\references{ ~put references to the literature/web site here ~ } \author{ Catherine B. Hurley} %\note{ ~~further notes~~ } % ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{cut}}, \code{\link{cpairs}}, \code{\link{cparcoord}}} \examples{ data(longley) longley.cor <- cor(longley) # A matrix with equal (or nearly equal) number of entries of each color. longley.color <- dmat.color(longley.cor) # Plot the colors plotcolors(longley.color,dlabels=rownames(longley.color)) # Try different color schemes # A matrix where each color represents an equal-length interval. longley.color <- dmat.color(longley.cor, byrank=FALSE) # Specify colors and breaks longley.color <- dmat.color(longley.cor, breaks=c(-1,0,.5,.8,1), cm.colors(4)) # Could also reorder variables prior to plotting: longley.o <- order.single(longley.cor) longley.color <- longley.color[longley.o,longley.o] # The colors can be used in a scatterplot matrix or parallel # coordinate display: cpairs(longley, panel.color= longley.color) cparcoord(longley, panel.color= longley.color) } \keyword{multivariate } \keyword{color } gclus/man/ac.Rd0000755000176200001440000000233207765115544013035 0ustar liggesusers\name{ac} \alias{ac} \alias{sil} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Clustering coefficients from package cluster.} \description{ Computes clustering coefficients from \code{cluster}, where \code{x} and \code{y} give the object coordinates. } \usage{ ac(x, y, ...) sil(x, y, groups, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{is a numeric vector. } \item{y}{is a numeric vector. } \item{groups}{is a vector of group memberships, used by \code{sil} only.} \item{\dots}{are passed to \code{agnes} in \code{ac} and to \code{dist} in \code{sil}.} } \details{ \code{ac} - Computes clustering coefficient from \code{agnes{cluster}}. \code{sil} - Computes the silhouette coefficient from from package \code{cluster}. } \value{ The clustering coefficient is returned. } \references{ Kaufman, L. and Rousseeuw, P.J. (1990). Finding Groups in Data: An Introduction to Cluster Analysis . Wiley, New York. } \author{ Catherine B. Hurley} %\note{ ~~further notes~~ } \seealso{ \code{\link{agnes}}, \code{\link{silhouette}}, \code{\link{dist}}. } \examples{ x <- runif(20) y <- runif(20) g <- rep(c("a","b"),10) ac(x,y) sil(x,y,g) } \keyword{cluster} gclus/man/pclen.Rd0000755000176200001440000000302210224511144013524 0ustar liggesusers\name{pclen} \alias{pclen} \alias{pcglen} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Profile smoothness measures } \description{ Computes measures of profile smoothness of 2-d data, where \code{x} and \code{y} give the object coordinates. } \usage{ pclen(x, y) pcglen(x, y) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{is a numeric vector. } \item{y}{is a numeric vector. } } \details{ \code{pclen} computes the total line length in a parallel coordinate plot of x and y. \code{pcglen} computes the average (per object) line length in a parallel coordinate plot where all pairs of objects are connected. Usually, the data is standardized prior to using these functions. } \value{The panel measure is returned. } \references{Hurley, Catherine B. \dQuote{Clustering Visualisations of Multidimensional Data}, Journal of Computational and Graphical Statistics, vol. 13, (4), pp 788-806, 2004. } \author{ Catherine B. Hurley } %\note{ ~~further notes~~ } \seealso{\code{\link{cparcoord}}, \code{\link{colpairs}}, \code{\link{order.endlink}}.} \examples{ x <- runif(20) y <- runif(20) pclen(x,y) data(state) mins <- apply(state.x77,2,min) ranges <- apply(state.x77,2,max) - mins state.m <- -colpairs(scale(state.x77,mins,ranges), pclen) state.col <- dmat.color(state.m) cparcoord(state.x77, panel.color= state.col) # Get rid of the panels with long line segments (yellow) by reordering: cparcoord(state.x77, order.endlink(state.m), state.col) } \keyword{hplot} \keyword{multivariate} gclus/man/hclust.Rd0000755000176200001440000000463511376463166013763 0ustar liggesusers\name{reorder.hclust} \alias{reorder.hclust} %\name{reorder} %\docType{methods} %\alias{reorder,hclust} \title{Reorders object order of hclust, keeping objects within a cluster contiguous to each other. } \description{ Reorders objects so that nearby object pairs are adjacent. } \usage{ %reorder.hclust(x,dis,...) \method{reorder}{hclust}(x,dis,...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{is the result of \code{hclust}.} \item{dis}{is a distance matrix or \code{dist}.} \item{...}{additional arguments.} } \details{ In hierarchical cluster displays, a decision is needed at each merge to specify which subtree should go on the left and which on the right. This algorithm uses the order suggested by Gruvaeus and Wainer (1972). At a merge of clusters A and B, the new cluster is one of (A,B), (A',B), (A,B'),(A',B'), where A' denotes A in reverse order. The new cluster is chosen to minimize the distance between the object in A placed adjacent to an object from B. } \value{A permutation of the objects represented by \code{dis} is returned. } \references{Hurley, Catherine B. \dQuote{Clustering Visualisations of Multidimensional Data}, Journal of Computational and Graphical Statistics, vol. 13, (4), pp 788-806, 2004. Gruvaeus, G. and Wainer, H. (1972), \dQuote{Two Additions to Hierarchical Cluster Analysis}, British Journal of Mathematical and Statistical Psychology, 25, 200-206. } \author{ Catherine B. Hurley } %\note{ ~~further notes~~ } \seealso{\code{\link{hclust}}, \code{\link{order.hclust}} .} \examples{ data(eurodist) dis <- as.dist(eurodist) hc <- hclust(dis, "ave") layout(matrix(1:2,nrow=2,ncol=1)) op <- par(mar=c(1,1,1,1)) plot(hc) hc1 <- reorder.hclust(hc, dis) plot(hc1) par(op) layout(matrix(1,1)) # Both dedrograms correspond to the same tree structure, # but the second one shows that # Paris is closer to Cherbourg than Munich, and # Rome is closer to Gibralter than to Barcelona. # We can also compare both orderings with an # image plot of the colors. # The second ordering seems to place nearby cities # closer to each other. layout(matrix(1:2,nrow=2,ncol=1)) op <- par(mar=c(1,6,1,1)) cmat <- dmat.color(eurodist, rev(cm.colors(5))) plotcolors(cmat[hc$order,hc$order], rlabels=labels(eurodist)[hc$order]) plotcolors(cmat[hc1$order,hc1$order], rlabels=labels(eurodist)[hc1$order]) layout(matrix(1,1)) par(op) } \keyword{multivariate } \keyword{cluster } gclus/man/partition.crit.Rd0000755000176200001440000000417210224511754015412 0ustar liggesusers\name{partition.crit} \alias{partition.crit} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Combines the results of appplying an index to each group of observations } \description{ Applies the function \code{gfun} to each group of x and y values and combines the results using the function \code{cfun} } \usage{ partition.crit(x, y, groups, gfun = gave, cfun = sum, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{is a numeric vector. } \item{y}{is a numeric vector. } \item{groups}{ is a vector of group memberships. } \item{gfun}{ is applied to the \code{x} and \code{y} data in each group. } \item{cfun}{ combines the values returned by \code{gfun}. } \item{\dots}{ arguements are passed to \code{gfun}. } } \details{ The function \code{gfun} is applied to each group of \code{x} and \code{y} values. The function \code{cfun} is applied to the vector or matrix of \code{gfun} results. } \value{ The result of applying \code{cfun}. } \references{ See Gordon, A. D. (1999). \emph{Classification}. Second Edition. London: Chapman and Hall / CRC } \author{ Catherine B. Hurley} \seealso{ \code{\link{gave}}, \code{\link{colpairs}}, \code{\link{order.single}}} \examples{ x <- runif(20) y <- runif(20) g <- rep(c("a","b"),10) partition.crit(x,y,g) data(bank) # m is a homogeneity measure of each pairwise variable plot m <- -colpairs(scale(bank[,-1]), partition.crit,gfun=gave,groups=bank[,1]) # Color panels by level of m and reorder variables so that # pairs with high m are near the diagonal. Panels shown # in pink have the highest amount of group homogeneity, as measured by # gave. cpairs(bank[,-1],order=order.single(m), panel.colors=dmat.color(m), gap=.3,col=c("purple","black")[bank[,"Status"]+1], pch=c(5,3)[bank[,"Status"]+1]) # Try a different measure m <- -colpairs(scale(bank[,-1]), partition.crit,gfun=diameter,groups=bank[,1]) cpairs(bank[,-1],order=order.single(m), panel.colors=dmat.color(m), gap=.3,col=c("purple","black")[bank[,"Status"]+1], pch=c(5,3)[bank[,"Status"]+1]) # Result is the same, in this case. } \keyword{multivariate } \keyword{cluster } gclus/man/order.clusters.Rd0000755000176200001440000000616610001023726015412 0ustar liggesusers\name{order.clusters} \alias{order.clusters} \title{Orders clustered objects using hierarchical clustering} \description{ Reorders objects so that similar (or high-merit) object pairs are adjacent. The clusters argument specifies (possibly ordered) groups, and objects within a group are kept together. } \usage{ order.clusters(merit,clusters,within.order = order.single, between.order= order.single,...) } \arguments{ \item{merit}{is either a symmetric matrix of merit or similarity score, or a \code{dist}.} \item{clusters}{specifies a partial grouping. It should either be a list whose ith element contains the indices of the objects in the ith cluster, or a vector of integers whose ith element gives the cluster membership of the ith object. Either representation may be used to specify grouping, the first is preferrable to specify adjacencies.} \item{within.order}{is a function used to order the objects within each cluster.} \item{between.order}{is a function used to order the clusters.} \item{...}{arguments are passed to \code{within.order}.} } \details{\code{within.order} may be NULL, in which case objects within a cluster are assumed to be in order. Otherwise, \code{within.order} should be one of the ordering functions \code{order.single},\code{order.endlink} or \code{order.hclust}. \code{between.order} may be NULL, in which case cluster order is preserved. Otherwise, \code{betweem.order} should be one of the ordering functions that uses a partial ordering, \code{order.single} or \code{order.endlink}. } \value{A permutation of the objects represented by \code{merit} is returned. } \author{ Catherine B. Hurley } %\note{ ~~further notes~~ } \seealso{\link{order.single},\link{order.endlink},\link{order.hclust}.} \examples{ data(state) state.d <- dist(state.x77) # Order the states, keeping states in a division together. state.o <- order.clusters(-state.d, as.numeric(state.division)) cmat <- dmat.color(as.matrix(state.d), rev(cm.colors(5))) op <- par(mar=c(1,6,1,1)) rlabels <- state.name[state.o] plotcolors(cmat[state.o,state.o], rlabels=rlabels) par(op) # Alternatively, use kmeans to place the states into 6 clusters state.km <- kmeans(state.d,6)$cluster # An ordering obtained from the kmeans clustering... state.o <- unlist(memship2clus(state.km)) layout(matrix(1:2,nrow=1,ncol=2),widths=c(0.1,1)) op <- par(mar=c(1,1,1,.2)) state.colors <- cbind(state.km,state.km) plotcolors(state.colors[state.o,]) par(mar=c(1,6,1,1)) rlabels <- state.name[state.o] plotcolors(cmat[state.o,state.o], rlabels=rlabels) par(op) layout(matrix(1,1)) # In the ordering above, the ordering of clusters and the # ordering of objects within the clusters is arbitrary. # order.clusters gives an improved order but preserves the kmeans clusters. state.o <- order.clusters(-state.d, state.km) # and replot layout(matrix(1:2,nrow=1,ncol=2),widths=c(0.1,1)) op <- par(mar=c(1,1,1,.2)) state.colors <- cbind(state.km,state.km) plotcolors(state.colors[state.o,]) par(mar=c(1,6,1,1)) rlabels <- state.name[state.o] plotcolors(cmat[state.o,state.o], rlabels=rlabels) par(op) layout(matrix(1,1)) } \keyword{multivariate } \keyword{cluster } gclus/man/body.Rd0000755000176200001440000000560613414675276013417 0ustar liggesusers\name{body} \alias{body} \docType{data} \title{Exploring Relationships in Body Dimensions} \description{ This dataset contains 21 body dimension measurements as well as age, weight, height, and gender on 507 individuals. The 247 men and 260 women were primarily individuals in their twenties and thirties, with a scattering of older men and women, all exercising several hours a week. Measurements were initially taken by Grete Heinz and Louis J. Peterson - at San Jose State University and at the U.S. Naval Postgraduate School in Monterey, California. Later, measurements were taken at dozens of California health and fitness clubs by technicians under the supervision of one of these authors. } \format{This data frame contains the following columns: \describe{ \item{Biacrom:}{Biacromial diameter (cm)} \item{Biiliac:}{Biiliac diameter, or "pelvic breadth" (cm)} \item{Bitro:}{Bitrochanteric diameter (cm)} \item{ChestDp:}{Chest depth between spine and sternum at nipple level, mid-expiration (cm)} \item{ChestD:}{Chest diameter at nipple level, mid-expiration (cm)} \item{ElbowD:}{Elbow diameter, sum of two elbows (cm)} \item{WristD:}{Wrist diameter, sum of two wrists (cm)} \item{KneeD:}{Knee diameter, sum of two knees (cm)} \item{AnkleD:}{Ankle diameter, sum of two ankles (cm)} \item{ShoulderG:}{Shoulder girth over deltoid muscles (cm)} \item{ChestG:}{Chest girth, nipple line in males and just above breast tissue in females, mid-expiration (cm)} \item{WaistG:}{Waist girth, narrowest part of torso below the rib cage, average of contracted and relaxed position (cm)} \item{AbdG:}{Navel (or "Abdominal") girth at umbilicus and iliac crest, iliac crest as a landmark (cm)} \item{HipG:}{Hip girth at level of bitrochanteric diameter (cm)} \item{ThighG:}{Thigh girth below gluteal fold, average of right and left girths (cm)} \item{BicepG:}{Bicep girth, flexed, average of right and left girths (cm)} \item{ForearmG:}{Forearm girth, extended, palm up, average of right and left girths (cm)} \item{KneeG:}{Knee girth over patella, slightly flexed position, average of right and left girths (cm)} \item{CalfG:}{Calf maximum girth, average of right and left girths (cm)} \item{AnkleG:}{Ankle minimum girth, average of right and left girths (cm)} \item{WristG:}{Wrist minimum girth, average of right and left girths (cm)} \item{Age:}{in years} \item{Weight:}{in kg} \item{Height:}{in cm} \item{Gender:}{1 - male, 0 - female} }} \usage{data(body)} \source{ Heinz, G., Peterson, L.J., Johnson, R.W. and Kerk, C.J. (2003), ``Exploring Relationships in Body Dimensions'', \emph{Journal of Statistics Education }, 11. } \references{ The data file is taken from \url{http://jse.amstat.org/datasets/body.dat.txt} This information file is based on \url{http://jse.amstat.org/datasets/body.txt} } \keyword{datasets} gclus/man/plotcolors.Rd0000755000176200001440000000421610001003752014623 0ustar liggesusers\name{plotcolors} \alias{plotcolors} \alias{imageinfo} %- Also NEED an '\alias' for EACH other topic documented here. \title{Plots a matrix of colors} \description{ \code{plotcolors} plots a matrix of colors as an image or as points. \code{imageinfo} is a utility that given a matrix of colors, returns a structure useful for the \code{image} function. } \usage{ plotcolors(cmat, na.color = "white", dlabels = NULL, rlabels = FALSE, clabels = FALSE, ptype = "image", border.color = "grey70", pch = 15, cex = 3, label.cex = 0.6, ...) imageinfo(cmat) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{cmat}{a matrix of numbers, nas are allowed.} \item{na.color}{used for NAs in \code{cmat}.} \item{dlabels}{vector of labels for the diagonals.} \item{rlabels}{vector of labels for the rows.} \item{clabels}{vector of labels for the columns.} \item{ptype}{should be "image" or "points"} \item{border.color}{color of border drawn around the plot.} \item{pch}{point type used when ptype="points".} \item{cex}{point cex used when ptype="points".} \item{label.cex}{cex parameter used for labels.} \item{\dots}{graphical parameters} } \value{ \code{imageinfo} returns a list with components: \item{x}{a vector of x coordinates.} \item{y}{a vector of y coordinates.} \item{z}{a matrix containing values to be plotted.} \item{col}{the colors to be used.} } \author{ Catherine B. Hurley } \seealso{\code{\link{plot}}, \code{\link{image}}} \examples{ plotcolors(matrix(1:20,nrow=4,ncol=5)) plotcolors(matrix(1:20,nrow=4,ncol=5),ptype="points",cex=6) plotcolors(matrix(1:20,nrow=4,ncol=5),rlabels = c("a","b","c","d")) data(longley) longley.cor <- cor(longley) # A matrix with equal (or nearly equal) number of entries of each color. longley.color <- dmat.color(longley.cor) plotcolors(longley.color, dlabels=rownames(longley.color)) # Could also reorder variables prior to plotting: longley.o <- order.single(longley.cor) longley.color <- longley.color[longley.o,longley.o] op <- par(mar=c(1,6,6,1)) plotcolors(longley.color,rlabels=rownames(longley.color),clabels=rownames(longley.color) ) par(op) } \keyword{color } \keyword{hplot } gclus/man/ozone.Rd0000755000176200001440000000175510224511712013571 0ustar liggesusers\name{ozone} \alias{ozone} \docType{data} \title{Ozone data from Breiman and Friedman, 1985} \description{ This is the Ozone data discussed in Breiman and Friedman (JASA, 1985, p. 580). These data are for 330 days in 1976. All measurements are in the area of Upland, CA, east of Los Angeles. } \format{This data frame contains the following columns: \describe{ \item{Ozone:}{Ozone conc., ppm, at Sandbug AFB.} \item{Temp:}{Temperature F. (max?).} \item{InvHt:}{Inversion base height, feet} \item{Pres:}{Daggett pressure gradient (mm Hg)} \item{Vis:}{Visibility (miles)} \item{Hgt:}{Vandenburg 500 millibar height (m)} \item{Hum:}{Humidity, percent} \item{InvTmp:}{Inversion base temperature, degrees F.} \item{Wind:}{Wind speed, mph} }} \usage{data(ozone)} \source{Breiman, L and Friedman, J. (1985), \dQuote{Estimating Optimal Transformations for Multiple Regression and Correlation}, \emph{Journal of the American Statistical Association}, 80, 580-598. } \keyword{datasets} gclus/man/cpairs.Rd0000755000176200001440000000432013414665455013731 0ustar liggesusers\name{cpairs} \alias{cpairs} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Enhanced scatterplot matrix } \description{ This function draws a scatterplot matrix of data. Variables may be reordered and panels colored in the display. } \usage{ cpairs(data, order = NULL, panel.colors = NULL, border.color = "grey70", show.points = TRUE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{data}{a numeric matrix } \item{order}{the order of variables. Default is the order in data.} \item{panel.colors}{a matrix of panel colors. If supplied, dimensions should match those of the pairs plot. Diagonal entries are ignored. } \item{border.color}{used for panel border. } \item{show.points}{ If FALSE, no points are drawn. } \item{\dots}{graphical parameters passed to \code{pairs.default}. } } %\details{ %} %\value{ %} \references{Hurley, Catherine B. ``Clustering Visualisations of Multidimensional Data'', to appear in JCGS. } \author{ Catherine B. Hurley } %\note{ ~~further notes~~ } \seealso{\code{\link{pairs}}, \code{\link{cparcoord}}, \code{\link{dmat.color}},\code{\link{colpairs}}, \code{\link{order.single}}.} \examples{ data(USJudgeRatings) judge.cor <- cor(USJudgeRatings) judge.color <- dmat.color(judge.cor) # Colors variables by their correlation. cpairs(USJudgeRatings,panel.colors=judge.color,pch=".",gap=.5) judge.o <- order.single(judge.cor) # Reorder variables so that those with highest correlation # are close to the diagonal. cpairs(USJudgeRatings,judge.o,judge.color,pch=".",gap=.5) # Specify your own color scheme judge.color <- dmat.color(judge.cor, breaks=c(-1,0,.5,.9,1), colors = cm.colors(4)) data(bank) # m is a homogeneity measure of each pairwise variable plot m <- -colpairs(scale(bank[,-1]), partition.crit,gfun=gave,groups=bank[,1]) # Color panels by level of m and reorder variables so that # pairs with high m are near the diagonal. Panels shown # in pink have the highest amount of group homogeneity, as measured by # gave. cpairs(bank[,-1],order=order.single(m), panel.colors=dmat.color(m), gap=.3,col=c("purple","black")[bank[,"Status"]+1], pch=c(5,3)[bank[,"Status"]+1]) } \keyword{multivariate } \keyword{color } \keyword{hplot } gclus/man/colpairs.Rd0000755000176200001440000000366410001020150014234 0ustar liggesusers\name{colpairs} \alias{colpairs} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Applies a function to all pairs of columns } \description{ Given an nxp matrix \code{m} and a function \code{f}, returns the pxp matrix got by applying \code{f} to all pairs of columns of \code{m} . } \usage{ colpairs(m, f, diag = 0, na.omit = FALSE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{m}{ a matrix } \item{f}{ a function of two vectors, which returns a single result. } \item{diag}{ if supplied, this value is placed on the diagonal of the result. } \item{na.omit}{ If \code{TRUE}, rows with missing values are omitted for each pair of columns. } \item{\dots}{ argments are passed to \code{f}. } } \value{ a matrix matrix got by applying \code{f} to all pairs of columns of \code{m} . } %-\references{ ~put references to the literature/web site here ~ } \author{Catherine B. Hurley } \seealso{ \code{\link{gave}}, \code{\link{partition.crit}}, \code{\link{order.single}},\code{\link{order.endlink}}} \examples{ data(state) state.m <- colpairs(state.x77, function(x,y) cor.test(x,y,"two.sided","kendall")$estimate, diag=1) state.col <- dmat.color(state.m) # This is equivalent to state.m <- cor(state.x77,method="kendall") layout(matrix(1:2,nrow=1,ncol=2)) cparcoord(state.x77, panel.color= state.col) # Get rid of the panels with lots of line crossings (yellow) by reorderings cparcoord(state.x77, order.endlink(state.m), state.col) layout(matrix(1,1)) # m is a homogeneity measure of each pairwise variable plot m <- -colpairs(scale(state.x77), gave) o<- order.single(m) pcols = dmat.color(m) # Color panels by level of m and reorder variables so that # pairs with high m are near the diagonal. cpairs(state.x77,order=o, panel.colors=pcols) # In this case panels showing either of Area or Population # exhibit the most clumpiness because these variables # are skewed. } \keyword{multivariate} gclus/man/diameter.Rd0000755000176200001440000000226010001017772014221 0ustar liggesusers\name{diameter} \alias{diameter} \alias{star} \alias{km2} \alias{gtot} \alias{gave} \title{ Cluster heterogeneity of 2-d data } \description{ Computes measures of cluster heterogeneity of 2-d data, where \code{x} and \code{y} give the object coordinates. } \usage{ diameter(x, y, ...) star(x, y, ...) km2(x,y) gtot(x,y, ...) gave(x,y, ...) } \arguments{ \item{x}{is a numeric vector. } \item{y}{is a numeric vector. } \item{\dots}{are passed to \code{dist}. } } \details{ \code{diameter} computes the cluster diameter- the maximum distance between objects. \code{star} computes the cluster star distance- the smallest total distance from one object to another. \code{km2} computes the kmeans distance. \code{gtot} computes the sum of all inter-object distances. \code{gave} computes the per-object average of all inter-object distances. } \value{The cluster measure is returned. } \references{ See Gordon, A. D. (1999).``Classification''. Second Edition. London: Chapman and Hall / CRC } \author{ Catherine B. Hurley} \seealso{ \code{\link{colpairs}}, \code{\link{cpairs}}, \code{\link{order.single}}} \examples{ x <- runif(20) y <- runif(20) diameter(x,y) } \keyword{cluster} gclus/man/bank.Rd0000755000176200001440000000150607766360410013362 0ustar liggesusers\name{bank} \alias{bank} \docType{data} \title{Swiss bank notes data} \description{ Data from "Multivariate Statistics A practical approach", by Bernhard Flury and Hans Riedwyl, Chapman and Hall, 1988, Tables 1.1 and 1.2 pp. 5-8. Six measurements made on 100 genuine Swiss banknotes and 100 counterfeit ones. } \format{This data frame contains the following columns: \describe{ \item{Status:}{0 = genuine, 1 = counterfeit} \item{Length:}{Length of bill, mm} \item{Left:}{ Width of left edge, mm} \item{Right:}{Width of right edge, mm} \item{Bottom:}{Bottom margin width, mm } \item{Top:}{Top margin width, mm} \item{Diagonal:}{ Length of image diagonal, mm} } } \usage{data(bank)} \source{ Flury, B. and Riedwyl, H. (1988), \emph{Multivariate Statistics A Practical Approach}, London: Chapman and Hall. } \keyword{datasets} gclus/man/order.Rd0000755000176200001440000000673310224511330013547 0ustar liggesusers\name{order.single} \alias{order.single} \alias{order.endlink} \alias{order.hclust} %- Also NEED an '\alias' for EACH other topic documented here. \title{Orders objects using hierarchical clustering} \description{ Reorders objects so that similar (or high-merit) object pairs are adjacent. A permutation vector is returned. } \usage{ order.single(merit,clusters=NULL) order.endlink(merit,clusters=NULL) order.hclust(merit, reorder=TRUE,...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{merit}{is either a symmetric matrix of merit or similarity score, or a \code{dist}.} \item{clusters}{if non-null, specifies a partial ordering. It should be a list whose ith element contains the indices the objects in the ith ordered cluster.} \item{reorder}{if TRUE, reorders the default ordering from \code{hclust}.} \item{...}{arguments are passed to \code{hclust}.} } \details{ \code{order.single} performs a variation on single-link cluster analysis, devised by Gruvaeus and Wainer (1972). When two ordered clusters are merged, the new cluster is formed by placing the most similar endpoints of the joining clusters adjacent to each other. When applied to variables, the resulting order is useful for scatterplot matrices. \code{order.endlink} is another variation on single-link cluster analysis, where the similarity between two ordered clusters is defined as the minimum distance between their endpoints. When two ordered clusters are merged, the new cluster is formed by placing the most similar endpoints of the joining clusters adjacent to each other. When applied to variables, the resulting order is useful for parallel coordinate displays. \code{order.hclust} returns the order of objects from \code{hclust} if \code{reorder} is \code{FALSE}. Otherwise, it reorders the objects using \code{hclust.reorder} so that when two ordered clusters are merged, the new cluster is formed by placing the most similar endpoints of the joining clusters adjacent to each other. \code{order.hclust(m,method="single")} is equivalent to \code{order.single} when \code{clusters} is \code{NULL}. The default method of \code{hclust} is "complete", see \code{\link{hclust}} for other possibilities. } \value{A permutation of the objects represented by \code{merit} is returned. } \references{Hurley, Catherine B. \dQuote{Clustering Visualisations of Multidimensional Data}, Journal of Computational and Graphical Statistics, vol. 13, (4), pp 788-806, 2004. Gruvaeus, G. and Wainer, H. (1972), \dQuote{Two Additions to Hierarchical Cluster Analysis}, British Journal of Mathematical and Statistical Psychology, 25, 200-206. } \author{ Catherine B. Hurley } %\note{ ~~further notes~~ } \seealso{\code{\link{cpairs}}, \code{\link{cparcoord}},\code{\link{plotcolors}}, \code{\link{reorder.hclust}},\code{\link{order.clusters}}, \code{\link{hclust}}.} \examples{ data(state) state.cor <- cor(state.x77) order.single(state.cor) order.endlink(state.cor) order.hclust(state.cor,method="average") # Use for plotting... cpairs(state.x77, panel.colors=dmat.color(state.cor), order.single(state.cor),pch=".",gap=.4) cparcoord(state.x77, order.endlink(state.cor),panel.colors=dmat.color(state.cor)) # Order the states instead of the variables... state.d <- dist(state.x77) state.o <- order.single(-state.d) op <- par(mar=c(1,6,1,1)) cmat <- dmat.color(as.matrix(state.d), rev(cm.colors(5))) plotcolors(cmat[state.o,state.o], rlabels=state.name[state.o]) par(op) } \keyword{multivariate } \keyword{cluster }