mclust/0000755000176200001440000000000014525113245011563 5ustar liggesusersmclust/NAMESPACE0000644000176200001440000001443714524764227013026 0ustar liggesusersuseDynLib(mclust) # useDynLib(mclust, .registration = TRUE) # Export all names # exportPattern(".") # Import all packages listed as Imports or Depends import("stats", "utils", "graphics", "grDevices") # export(.mclust) export(mclust.options, emControl) export(em, emE, emEEE, emEEI, emEEV, emEII, emEVI, emV, emVEI, emVEV, emVII, emVVI, emVVV, emEVV, emVEE, emEVE, emVVE, emX, emXII, emXXI, emXXX) export(me, meE, meEEE, meEEI, meEEV, meEII, meEVI, meV, meVEI, meVEV, meVII, meVVI, meVVV, meEVV, meVEE, meEVE, meVVE, meX, meXII, meXXI, meXXX) export(sim, simE, simEEE, simEEI, simEEV, simEII, simEVI, simV, simVEI, simVEV, simVII, simVVI, simVVV, simEVV, simVEE, simEVE, simVVE) export(estep, estepE, estepEEE, estepEEI, estepEEV, estepEII, estepEVI, estepV, estepVEI, estepVEV, estepVII, estepVVI, estepVVV, estepEVV, estepVEE, estepEVE, estepVVE) export(mstep, mstepE, mstepEEE, mstepEEI, mstepEEV, mstepEII, mstepEVI, mstepV, mstepVEI, mstepVEV, mstepVII, mstepVVI, mstepVVV, mstepEVV, mstepVEE, mstepEVE, mstepVVE) export(mvn, mvnX, mvnXII, mvnXXI, mvnXXX) export(cdens, cdensE, cdensEEE, cdensEEI, cdensEEV, cdensEII, cdensEVI, cdensV, cdensVEI, cdensVEV, cdensVII, cdensVVI, cdensVVV, cdensEVV, cdensVEE, cdensEVE, cdensVVE, cdensX, cdensXII, cdensXXI, cdensXXX) export(bic, pickBIC, mclustBICupdate) export(mclustLoglik, print.mclustLoglik) S3method("print", "mclustLoglik") export(nVarParams, nMclustParams) export(map, unmap, partconv, partuniq, errorBars) export(mclustModel, mclustModelNames, checkModelName, mclustVariance) export(decomp2sigma, sigma2decomp) export(imputeData, imputePairs, matchCluster, majorityVote) export(mapClass, classError, adjustedRandIndex, BrierScore) export(mclust1Dplot, mclust2Dplot, mvn2plot, surfacePlot, uncerPlot) export(clPairs, clPairsLegend, coordProj, randProj, randomOrthogonalMatrix) export(priorControl, defaultPrior, hypvol) export(hc, print.hc, plot.hc) S3method("print", "hc") S3method("plot", "hc") export(hcE, hcEEE, hcEII, hcV, hcVII, hcVVV) export(hclass, hcRandomPairs, randomPairs, dupPartition, as.hclust.hc) S3method("as.hclust", "hc") export(mclustBIC, print.mclustBIC, summary.mclustBIC, print.summary.Mclust, plot.mclustBIC, summaryMclustBIC, summaryMclustBICn) S3method("print", "mclustBIC") S3method("summary", "mclustBIC") S3method("print", "summary.mclustBIC") S3method("plot", "mclustBIC") export(Mclust, print.Mclust, summary.Mclust, print.summary.Mclust, plot.Mclust, predict.Mclust, logLik.Mclust) S3method("print", "Mclust") S3method("summary", "Mclust") S3method("print", "summary.Mclust") S3method("plot", "Mclust") S3method("predict", "Mclust") S3method("logLik", "Mclust") export(densityMclust, plot.densityMclust, dens, predict.densityMclust, cdfMclust, quantileMclust, densityMclust.diagnostic, plotDensityMclust1, plotDensityMclust2, plotDensityMclustd) S3method("plot", "densityMclust") S3method("predict", "densityMclust") export(MclustDA, print.MclustDA, summary.MclustDA, print.summary.MclustDA, plot.MclustDA, predict.MclustDA, cvMclustDA, getParameters.MclustDA, logLik.MclustDA, classPriorProbs) S3method("print", "MclustDA") S3method("summary", "MclustDA") S3method("print", "summary.MclustDA") S3method("plot", "MclustDA") S3method("predict", "MclustDA") S3method("logLik", "MclustDA") export(MclustSSC, print.MclustSSC, summary.MclustSSC, print.summary.MclustSSC, plot.MclustSSC, predict.MclustSSC) S3method("print", "MclustSSC") S3method("summary", "MclustSSC") S3method("print", "summary.MclustSSC") S3method("plot", "MclustSSC") S3method("predict", "MclustSSC") export(MclustDR, print.MclustDR, summary.MclustDR, print.summary.MclustDR, plot.MclustDR, plotEvalues.MclustDR, projpar.MclustDR, predict.MclustDR, predict2D.MclustDR) S3method("print", "MclustDR") S3method("summary", "MclustDR") S3method("print", "summary.MclustDR") S3method("plot", "MclustDR") S3method("predict", "MclustDR") export(MclustDRsubsel, MclustDRsubsel_cluster, MclustDRsubsel_classif, MclustDRsubsel1cycle, MclustDRrecoverdir, print.MclustDRsubsel, summary.MclustDRsubsel) S3method("print", "MclustDRsubsel") S3method("summary", "MclustDRsubsel") export(me.weighted, covw, hdrlevels, dmvnorm) export(icl, mclustICL, print.mclustICL, summary.mclustICL, print.summary.mclustICL, plot.mclustICL) S3method("icl", "Mclust") S3method("icl", "MclustDA") S3method("print", "mclustICL") S3method("summary", "mclustICL") S3method("print", "summary.mclustICL") S3method("plot", "mclustICL") export(mclustBootstrapLRT, print.mclustBootstrapLRT, plot.mclustBootstrapLRT) S3method("print", "mclustBootstrapLRT") S3method("plot", "mclustBootstrapLRT") export(MclustBootstrap, print.MclustBootstrap, summary.MclustBootstrap, print.summary.MclustBootstrap, plot.MclustBootstrap) S3method("print", "MclustBootstrap") S3method("summary", "MclustBootstrap") S3method("print", "summary.MclustBootstrap") S3method("plot", "MclustBootstrap") export(as.Mclust, as.Mclust.default, as.Mclust.densityMclust) S3method("as.Mclust", "default") S3method("as.Mclust", "densityMclust") export(as.densityMclust, as.densityMclust.default, as.densityMclust.Mclust) S3method("as.densityMclust", "default") S3method("as.densityMclust", "Mclust") export(clustCombi, print.clustCombi, summary.clustCombi, print.summary.clustCombi, plot.clustCombi, combiPlot, entPlot, combiTree, combMat, clustCombiOptim) S3method("plot", "clustCombi") S3method("print", "clustCombi") S3method("summary", "clustCombi") S3method("print", "summary.clustCombi") export(gmmhd, print.gmmhd, summary.gmmhd, print.summary.gmmhd, plot.gmmhd, gmmhdClusterCores, gmmhdClassify) S3method("print", "gmmhd") S3method("summary", "gmmhd") S3method("print", "summary.gmmhd") S3method("plot", "gmmhd") export(crimcoords, print.crimcoords, summary.crimcoords, print.summary.crimcoords, plot.crimcoords) S3method("print", "crimcoords") S3method("summary", "crimcoords") S3method("print", "summary.crimcoords") S3method("plot", "crimcoords") # deprecated functions export(cv.MclustDA, cv1EMtrain, bicEMtrain) mclust/data/0000755000176200001440000000000014525075362012503 5ustar liggesusersmclust/data/thyroid.rda0000644000176200001440000000377014525075362014664 0ustar liggesusersZMOW~a:ꀊBQDjx0 1VQ*&( 6o+CWIrѸ#1!B0 <ǜgM7Mڅ$Ù{~d/[ 'חDA;Rj6kϿ>u-ƹٟgKߒVUz7_4O[ rsh5FfDHã"e'G/KnsBP }ړB$-62Ag^T˰ : k3l^?3g} =e\(7+fD~||h?L޷'$/@i'$&/~Z@?Ӂח}=?̈)K{Ǥā8 kuZpMKSB ǪGC(gRpy񘊈WFՄamLKadm>l&a(la`z¸y#o_u 1g}'珬nH<6 J&(Yrv}1u>ƿ.bjՋ^զ~w$Ny_TU?oj-?G{d~^ojG{w;j `[pGYT[zGQ0~gyUcK&`̏o]pa{xA4LB1Y' 9qnsCB{+čM#~~D?W;~ RG|gYo_c?W{oر%_5~6A=EYR?$nz!/?Q7_婟M)Ez\m ň=orN ek>I]Zj_ e~Muhk;bJߪ'%ϻ?kw-Gq5b,JtDRgoEct?|s]Z5bz\:7,s1{cR|~4ǯa(律%V_v9-%7nrpOs3ISl[%cmc ƹU rntؿQҊ^!c߶Κ|S}85_vM9ڸ=?v?xA96ϩ/^+t{{s{5(qq|. yc}x;:wDSC|8Qo}z |G?߱=y,шѧ/>]oH+mbwou}_IlO~F8"7dRh^e7V_~ooo9:&mclust/data/cross.rda0000644000176200001440000001750014525075361014326 0ustar liggesusersSO.i] D+""" ( JK C|$39?^{m}g. C[5[222 2*][Ȩ(v 9. xh~s.Du_]f2N$(exL$l=sP-1`, '/0 D=o%r"jl6En8}CjPHx4gJ2ˏCvj5=hsr"!ZWsղtE/'M˴ xxx_򱬑4F'yяNK` U[&\̅?f^ #Gs{?Ed=xTpZ3-w HFu|܏~Pz}9x'̷ ?@mGzJ7mj͋EOWbyۉ XVQ_47: F8k=?R5#uQ"X=5J7ŝ#< `j+<`MNB#*r3AZë7sC0|o", A/b*zI"'XF|L߂KYgdW{01~zܯ^BSXhïތ>VPY|9qYLj>9QX]6CYShy v2/X]emuMc]hxu~!jڈwSk{?F;c;0s@vm0bcͿ[wDÇ۞bJfGmќ䱦=v`!7cx{Zp:Z<Ϗ-:X H@ߊ>~^{ޏ_e\-U+]=7O#F[(/SR[^QS&& ~4Z%z $-ȷ^l廉<.̀| c;X eň@Ӧ05e)B5OށŤ{b(RuR *pWA2Ohh\NJWPQdyⱃ%k3hک>@M!̥C1ZR#gZv0? @6\jƬ [A&h嫤DTq P"6ؖ+) mЇ5^=X&˝@fK V}6dį54M}b"f,oUJ\zR{ϓ:s$(#G :r>U߿DV4= ]*2s>h<\vlo˃^_ 0ue^)v]8x)-KE62hQeʨd(+pĬY c^`爤U(̨\} rѲxbqt,@䋩\=&̯ŷwҖhE=@:uSPAQj"fHjJ;%Maf]EIר~4X{}k-Tz^ܱr~dAmno2Zh[b+|r݌UPKkhL^u?,'ÎP%Fu|<;'4H:3+\8O"P)cme23(_g\w$UxV8YZ.cҘS)=Q"j}ӰMc(|l=*ȶtA so-&t4Ս|&4+Kku(뉾[w3b::S/N@NqPfn<m̏ ddɾP<$ <y;|8j_coGcZb{![V]0|zAMx&&OcDfݎ9*b,ݬ{)u̟+)}_R\X 9t& ’;&P"ͅ3ΗJz?u+R$/\F933HHBo}ʩ7_HKb[[) sK?ocTk ]'TSsL*fL}V$mnv{% @ȭ0e`_ybpe"1 ˤ5f $afW s[K6Tr7V9Dmqg$OK@@ 5G[,+Bѷҏz B80N)B^9]FsVLyveWc鋣Z21 6e;e=g-:DŽaM|cAH?5%JC@5:fZ  jƴůR\cf5^jX0N)B@A%' UIhCϱW h ~H1)8s rDӸſk|~6Mo|AX.|B\D00ϠcI(& c2`mCb5 _S|R9 Ţlx w s PSw,\[hnE/J N9h)c0h_37B&?̥aGCRJө4qWpBeazMEi/~ڽ'97g؂ʁW(A#ڰX0ũ:̑m8=Vv|CSOLdfBX8rb([3reeR`SBqsd}<51. \iߩc/>7 a߆\iۅ"wB)-(; ̢Wi|V;XEiL5Z'jYe%r@`c, A(Z-J+jb10Iɡ6˓3\jd)2,D =nvohh!O)<` -2ωd&EMh Tg0{lWVzbp&{L}YT r܂#NCU8 ,`K"5]@ Wβ=2bIgx(P_!]d~;C7},[0ˣSğ=XKi[8:UUH\bՖY?$"]c< OX,ecXyNa&G ݗU`i9;b=zzJ%;({I{sDO@g'waN gjOqsX4 䬷brfKIJ,S./?1l͔vU,AFXzɫ E}t)};?(Jjm7{TT,2uSztK󄒉w;4IaG|>SOYViLHVأvR=mrB6PؓybM?\~SZ55A2!zZߦ"B+FBKPc1wY}g5tv7}t"Q75mѦF){6mE z TiT#'$!եh2.|sA܂C4X w&C#6]BS/r&9lf7ь g]Ho8.TJGDkZ9~pYq>/&4:)j#)7E;WxQES7ja]rީ|*4gO"`V_0#b5Heݲgm1fgTl"f 9ߙ푵2jkJ3F[kQ^-`T/VZvN#2:ZjQ10O}l~gDVw}tjZƯ,4ӯZbXf+$z8jcC2`4}u`:/9;.s`:l\%Q_ЦԫF-'7k.taw}#)qa}X '&3!G 7"8>nA$4(4<-l ,])$Uchҩ_W "dQXr[a*e^{qJ.s349ָ`bPi B*XC|}e`2 5h7}rtn# >Oi=Hy^e 'h[$9fp>)踗FyPK?/ rehqkx[ƾ"VؙAb۠rj ʆ? mW/WP'omZV)㈔1u0'H&jBR'K@m5_W,IUj&KsFt aO5j)[2U8pLl~h Ӱ-k;H- F|nP)OcshK`]@SΓ7ytn|Lz\ ^>m"9!;\[m_ NW f[SOlU="QkMN0Z}Bef!w) Fģswo LU-,Tn~Mw+Vw+`Nq$)Ɨ|GGęQEEֲa-Jj&78g޷z+Y`T&r[}<bQm#<]*3=q,u<K=c"}'KfUitϲ I gMBsSGkc J<QI4^>ev8 t_|UTwѽ W&t#[O`KJ* ֿX~6OZml u pLy}Z@y-UYòA?[@[Ī -y ^>Li{X{'uikvwp!WkXԮ$}qj>{9;2i%ڛ>sEFk1MԨ,K"m|*`&^8ֹ҇6֡+ >H9THXOt mL_l( chUCԗ'ۥtЄ^ӚOInx1Z~'Iv,烑*x{W}յ;GϣO?Ӗ6Er0_tZH G) :SWPgx Mp,+BMsRN[ T])C5`c'DtG냒uژ~P!Z6/{?xGwg 뿓v(lUR8zg.8pT蹻Ny^v6.U 41:/mclust/data/diabetes.rda0000644000176200001440000000274414525075361014761 0ustar liggesusers=l\E%K K4.(N I Νw}w݇sBA.))(((^I%%%U%?S@Aofv{1c *UnSޟ>Szr%DJ l's>CD?O+Z)m45Bjo7 ӫJR#ڬU6˥Y}֕OەfS~o>ÅI_a9ȩD[UHNxRL*)Onj8TӅם-ȸ(Oqn^I.XDe6<~7,H?T$mknVa|!+\/fr Hh&yh6xw0C._Jh"I&C&_|ᯉy;1v'ɏg?/lץ;?B-bwؐoB"6WҎC1d6noqD/D֏֣q*ׅd v߅E|{.<wۥbN;HA<;֙R'=u;>Lhpqt! FɻRi?FuC~?,)/mGXgKӕL>D~g;ƺ< 3q!c*MB]qGG}pF<3y8x?$,'rr](M߇X&x7{͋3|f3g >3|f0#`F3f2#dFȌ!3Bf2#dFyf䙑gFyf䙑gFyb XZjkѪUOVZ4K,M44K,M44K,M54[lM54[lM54GMs44GMs44GMs54W\Ms5w*d3)mclust/data/acidity.rda0000644000176200001440000000224414525075361014622 0ustar liggesusersEyPSW#;h^$VRZQ &BQV," AE*JFNAq[! :"V˨*P@(d)t|{;wABBҜ!01+yܰ~Cby*2Gl79S9-\w_tP[7<QN㍦)5\"పbWE,ĉ3XdIıNŻDϏ69jzp ؖܡi@hb q]fv@a]!a3 [.*zIAInō‘ ˲Jyjǀ( (&< 8O(~;튫WS@nKv}vĂjR^i=`fj՜#5$,y.ʴu-bU(s2!}}c $q @o< oi ء֞o ?+Aˬ*te u u2MN  H=[sAЫ}ujڣ qyf 2*a|;vɣN7y;7>ω~^_vD[@I˞Ϩi/Qddߦ=W@O?s@9\nb7T&i e὾mySKp9X畒% Rx>:iIoAݏfEnAf{?T^q$ѩWh3$:B{5iO4 o8o;>$d uMYUvAnԩf4r$f?Xc|sxq/B_e,7J3)xBnNp]{o^Rwo \F эVҹY;*>¦lRx$̮lcATɥ! N8k.*gAj/ U- 3*R&MQgwLvp`C3߹!iG,`ǘ(4/C!$ORp=82|S} qZovAV`n&ƹ89OCqkԳ8:i,(g=u`xll_r$hF=mclust/data/banknote.txt.gz0000644000176200001440000000334214525075362015466 0ustar liggesusersYn7 +p]vq vg($ꦋ MQۿ/ϯo?_Ͽϟ޿}~|凷׏o ~8찕iE|詳Kϥ]m7ƶgmv;ܦf3K{`]p;oɸmA/ 빥;<~輅뿴y]Pѣg1Dd򄌗12L7KoLP&DX41-[Qݗ7|0^:$cNclS*O S7ޒS7Ђ$/ior湻&HĿpQ Z2@E ruP@"0KN"3nZ+f0N0_=% CFYEMl:th♶SSy9]#]w$v [O oFvtvEըf ]vd++йK\c3GEx T G-S:%jf.@Yܟ n3skJ3 ;&Ij VsS W T64 |!XYFԙ$Z\IR _D$"lCwh{bI.$C!DU1'3ԡ°D.B[3Cw7,t;* 0x6\ëǶۗWb׼<6' sf(^`Ѐ4&KumǠE'UG̡+n. gV2J7Fn+<; AՈyTcBD&.:[D/.ك`NIW0 Z٤ٹT*?}<^cb|6Kd\ A $-mXض_6F15s\n;PpELǬUa]mygѓz-+|m[LT߫u„0E*;Wd݄xl"sc^`v3Gs+ZЃ9IBq62g# ĢZ"ެv lТ&XȐeTN-pS~jL"E#I԰RI8loq irA'eS UTք )k2*JՌC/-vMyWo-p@l-ǜr|CNaipd#+#GM VU Z-`RE938es-}7't,-J5<]lE! I}oVݯ/U\RPȵ/[&+XdJ-xzPd~;{6<Ɋml3KZ}v.T5I-]vaͶuZ2${v_e.lB# ER;Zg%O*SbHV31-U+* s5^`i+r7{vv&z;=z<{Z,5p3+x5',H,(Z:lf^|1 gqfoY"g%=6{rQc~z*Ŵ[4YAo&(&7=/'C|' 5־_48o{m)3"r +?xv e͓F}_F~{/.p ,?CEæP?;6uE/|9lq/w}aW+ieB_ǜO'HO XG/ U%I5tE>m'=藆? Rc݂_o5cz̝D9Gs/|'a}0CO*t=\uj6US0Qce.O<KnB"9pc[]gbژwn[U*aI]rm劀"'D ndGr^QYR%DT@ȘB%6ye.`l${'--يjpٛ^[ 4C]: \q#۽c>k)YbXq$y",kso-QN huDWvX̱'߮z mclust/data/chevron.rda0000644000176200001440000003733614525075361014652 0ustar liggesuserseXT1@10Dٽb``;;Pncح|fy\s]uޞ-c23337(ocfanO93 3KñRN̬|-å [|||||_~|||||||||||4KnpV?[vBUh-￿2>Cv~=~>Ր n\㳧Y@DOJm `]x1fi? /R Ȗ Gała ǰ|or 1~@}~Uφc@eRQAf^# ǝ5\ m67A{B9bo8eޞb8&5JpT[ r}J;b8&o5MJNJǯT2_y~+`nu3rD4bN *r78v!C_oZ$}q^Kn7~]>nW G,ߛ{iru~t8.P%Z9#[^GsG}3)v3˴iz=`"y־hKO'ԣbŨ{^@}'Sb5}z%ߢϻBv`^:E1~]QOh_Rm<]X1xncmVeijwQhuïjQXwՄIhK7~}V6BDT 1}^W΂ GGsҠ)h輲=oξ\/ւcзm;QY[Y^$両=Nnzr vj"׀۸85omC^b+یE, ڇ(O.8CbЏ!cQ߰~#qܘ{ ZW(4t'|춦)ګQXOuGn]w55z>@LA}̻C{zX>;Z{_=v ͢/]E]v]%Uv^}6b_ކ i0 =W_׏&z}y~6_]ơ#g[az}]xǿXvi9 1;x@U"g8KsʦBIvڎgA>\:q-x TWu,N?ut'aDi ߝ vܬ-Kէ>}o,7g'7($3rqwq7MȈ?F}&]u;c[$9~s՝IvP#*P#ub>$\~aoඡ3о]]W܎/3'6jGv8 9qa'~E"$E1d,Yp_ K*m%OBn-~I%[' e9M=禯#ca[8S ?J,SP~Q/ ^5>uٝ?r שvD;N癞kk@'eЌ&qxd>?N&SWD?kQgWwJGaqhwgvtS8OҳFWv`[.2,>v,> rCB"5c/Rc$Tl;c >˔~EWoTc-ֈ;F H6[cpv ʟm5´! vu1~5ug?:yS W⪹'DŽF@-+;(g#qg-;\'a\$m~߼:Ê?9mP3gZF9vTW}G{Xn}%5$.x'6Eup3p;طf1fo6bueo4Oys{?ʗ4fkcŸPn:Z'l; ;[OH:Y؅&zCynO{q|G2<p? Kq uܟʿl8mg?}1S\6܌=r `R&|yky-2^IǼ!c[ַ+z?XÝ7h+^hg8Z`ס27'`ϗ%vuyc]߳)2[[&մ`Wfz~';ЯF_*jy9ݩ)/~w?)+ꂃ#`WZ>F~߱&}.-^8mvOǡ_CgLVݫQ qM`GMZ]lt{BОF8vi}x n\)z_^G"mZ<7h֊8k9p͏n?;JaYMvBg?-wh8Ky淕mg0{YW;%f=qvw-B \r`7J QA7CKS~7ĥǴQn'׃߈tˇݤՂ ۰< `ģGVCO'3RӪ D,r}]LJv]mNjy/x_:uj(̗ҚHI5`׉+|mq4qaꓹ;n6}v2 (_\]kUIK~ڐ}џ]Z"Kp˼tzR/p|//`^`C1؞_0Q>FjW[!nW'3<=QJ|4|+}r+/΁=@<~y|33Pscj73<۰nj3 9 :^8& .vd>t<[C挬 ^o|;J)=adY}pH ?,Qa=P4cЍ|jJ=4'_y~()AA G{~rgC/@^(·ڃaܾqO:n=r씚JO*1ӕY=E(3;v'SlPvڥٹM?ѯ1|M|,MjbAE?֠9hj#+‚O5^ 4G.ߋEڃ^;D)ZtQɵ^솶M ` CfT Qk(\a^R2ꟴS⑌[,c/ oˬ&N o865츚hn‽#+?`C^m8&hbBg~0 shC*:ne5'B{NeNQ36ߋ~jCr>h٧ȏ8,EX9L .0 [šb=W"Ncw2חYþrJt4w<0TWZg¾޼SD_ԢYrP[W⫤;3NeN{~F{Z`/Rum~WWmc'8&s=0~ֿ5Cqu8q76\sKcXuA'w O+vՍ<:av ABOZNEa>߮7P[zdtm1 w~aPgl}em`[U[[ 1x50N3})˥q}r{]ETJn}>ʉ0N8@q/Vze1Sqp]8 V7ztt0|%A;v{+~*3>z*d fh_ccя 'MkNVV5;5} ) 7! 0Y]P7R{8p=O|h~5xﵚ 8.n&cN]GО6^  ,:m|ec1e} 8M %ωQؼXةcj?O|'G_M8A yyOuW}H.a#dh' ˔JାU=nG--!k2!^dJȿUmL|3 88zZel;T_mj)Ԭ}5P? oXywHp{yԘBW6iA+bG9S#.7owqk{(oz$k仁[.ly1nVu 4dψO3_7)xl>o b  _]sdTNoܶ(LC˳{p.0MhR܍YuJ﯉dĒa5< i \z:u=:92-w;u@w鰻:5(GC ryf^߭+}Uq-< y!9J}G'_!4wW +53F^wr)͂31}Zov0U~0u;-||Vc/* ~oeYկg?#~Nlwp+d' U.5"9pa'B rQW8:q3%x4y߳_HkeXx ѪW2 c:^p^ïǽtz˿lm`߻#N󭴲TtbCJ {29䤿ڢ.X==/.aξFybFoaZpzް!Na&ha? ǤfODnNy<l 7"AI|VgYzwtק;a=:SK/x=?alXb|^IW5OdO6ݴ`Gw66xZm_a\vj=(i]K1ppc77[Ypw-Nf#:x3dh?f'flv]^QjEl`^ܪh ⥬ᗢ*SS؇x~˻1oׯ-]<>~ n.l6O/ =zg{K_Ru+^M`u2aOƼ&bpMX ^qG9yQqXgJ⦰`<71q)5m eʸO]IZM5Pރ hu8ϳ}k̫</l&:F9ԌPJ~]~826off}(A1}# I qVzQI4*zg9O8^QYi۴(9Pĵ#E[`<x7;y˕>`~U.>[N_A6. =&b_+83,7p{dU%^?Sӕ`]ܼՐuF~go!Gs?QO̝YqV~tpUeCXh⬐_eSf PR= 8*f%xTG(OdK xE' VzjQdIfRA5#n ~'ptQ::|b|7blE+0v[QvY{k7`׷FbֳXOTU9{2i٦ A_z95*e ~Mvvs ;]^=8 ȸcmII)y+qofa%?r&:.o)`O+C}xbE3$s4ǀMf_  ر{5qX{p oil;oͲ`W!;]:R G?B)_#W\k4݃}US*MFhÖo0/zv7l|¡QӽZ AJwKZi7aW~Cڃn0>vi]z4Xa"oVnTMW\ަYcĵW!~ UT;w@wl8N)_u2J/RÖ>矇8;Xv6}̻6zt@;\>SϠ"n˷ ~X]'4m& µ3{P0"_\ٔn|?}FXUI*1t| Jq3o{);JDC{4oS1NU~j}7hf)SOş}lg>6KvaSFOyJ~bmQb=ڵJɛl2q_-_ibŰl XzXMXl k86ŖYƍ3n}Zy/:Z {+ w"^l0qIQ~(f0_ X :9[|K_ 2s.+`/F^/DSa>t=|:yHG_swMusYjC_(S:`]6]e-3?޵7zߤ7+"?g$=O* ^YnOGGzO;Զihyj1bhAW}yvP:U骘E;kz~DH[P} Һsa(z({/G{G`(וS'*<-zqM]q^.}kb;2D_:ԁ>qEF{ lV}ec$V/h|{'_Mri,˼c AF"}~# A9T\&ؚްƩ?1S>{ͷ!=Yk#~Nʐ+yQWt<;Mwe[9#^MW> ooe{f. Nޮ~Gw4Unwx^/&?a} #-*ޞ@0q %7ʺ ?Ļ6@!I)лgh؇߉Eܗn`+%_iY7{z%o(㡨.Mk#Oҭb_F*VXT94Mč<{S:_a'vR4}b~T}x)}Vdg)~^qa׈7`)\:6DyCϾPm'v2ofج,Fyl8!w<+`hԉ~_Ro H5,ym!6<vXNoF\ FD+Vҗ]yEYn:\Xr0eHol!a|41ž\8Jwbֻ5U}E^+aw܂~e[ Wљ8h+b4ׇoA͚}آ=tk~&oTn3)הwv{펡R: }.qX sЯV#Ɔ ߎ>:Ig+v/֖O[GzCm>EiW5~>vW`{Tҗاa4 47z`r̅kmOXvqߦ: t_fz#N=t&~?cmGyvqb~5hN,Vg7 ?[씃]#/(lilGwMé%z&?ײME|>fh[#i3衛~WxJʾ* _JA{wʃPQ6zNjW!MIXT9a'}|*Zb#,?}u &?M˅N6f=[RKhyaW-i7!~ɛW8\reR_'ZxaJ&_THiJ\*DU&?zdk̫FJ3~+)qtmf>:7} Yc/#mԠjhW{s8Icq/ה*:G6qUּZ[򌇽瀧Gzfȥ櫪:i%C,ym%{O2:e2]@( |8s[vh1XeM'b])C= ?9wG]x^cK=HF8KvYKQ.-5]垁^n6~| / rTA swx£P7+1 ^)Kf/>}sa7x-t;}Ԝ1-;?%' sVNEoqHʺe}Xwн:m ,ٺs]&V򨁸-d~e-띃}}7yQ跌Von`_F\#>~z`<WodR7>D)Ykm/ט{͐*ʛ앧)GެGX `|8eCqEڐOȣx]ΎەuF(o 0H|fW fXxwdl I~7d#/(-夼/p2;:{MqLkuqfXuڣJsŷ0b^vtvbxu؃./`0"_?ՖM -}z{X)-z}=So)]z'r)}Q^MXѡڡogF\:wwSG_wՂ6Mo*y+ 7OD#y[vt8f2O ?\z%o٩Iq>Cy/:is0JyaCyn uXhma1WHnk~Xϰ|Wvpڒ@6gmYWX9oތq4SS=ĕO>n͡u_63E3p;8d]ĵƾ/ݻ*.:?=nF" kzG@h[=FaCj%>Пp&*p3^vx6qGzhq;iqU߶G{Ovnpn⾵u>İ/x_E-v~ 7 n5/"?-I}vfzߓ8VdO=Ǻ=xF+{h*? 5QC)x_"iӮaMXR7k2sΫvW6qzұfپ0Jݫ~1[qt?n;`Y:|z7UsV7ȴׅ Դq>lErÛ֌YWvAXJ^jjxOrb8Vg-OV9*5;*y%a8NRR-8ުd%6Ske?H UW;JE}G;,;3뉸>A!K1/v:w@& B8 <<dni9*3y Ponѿڏ7!N|f)y{+(ﯞ޺Nf~/Aw0^m.n83m%{ϧ>7UsQn'뷖}zhe{ w=a=a======\ilWsÿ>_\~}hW |xKms}o9_N{w~w]~~slw}5ÿos߰~n {~<߳5|η?~aO>|29?d'nG}0xW>wyg[??{6y 3i{ ~ϟg}]o y?6vzen{sk{ǕWvGos_{N|aW>s[ϽzJ׍i~{akn7\?6: ?Q~]v1a]ֆ?w&=Ox68/N@ް;o{?絃wu=ß&9XG<8/mG}6޽󛼺:r;gJϹ1<ʨ?0 ~u{=eؿYC{{gX3zy?=~m |f>[?Iz<|OG=] X_xa/:n=|{w;x<>_<f=p8&>=8Ҽǯkfo}˃X>x6m)y9w#p~5 !.Nwt؟}<GX~z߹' <Ͼ؃ژv}<8=oz5cQk=Gӽy/GWwL=_雞壕# wV]yQyy'~n׽A5}{eW'=Uv|omG xu#y~4׷v{=y7$UTO⃖{>=>e^|!~W[tks5|ӨE_zcݎ[N)^wեWv?|ŷnvm-|8A=|g藣7s/oz=|.Hu?%siqW\{2yǽGRG}S~#yD;??w_sC>^>Ѥ{xQn3sχ>78k'H`5ϯ~7zX0 ~[pv=|6vg~$noFq_|K;;^ԁy# U7AWػ W[xRq{8_o ܽmw<^/xFۋ#<r>/ߏ=/k\\P_[/_{]n.3=JۓoʼxEOs"7^W=:v__n{QWw}"|}>ϫsɣ;{G^0^>F=~,^=۝x%4Ο^H@2|A)|ym v>z' 8$xtzz~]95o|o?;>v{/!>Gdÿ_8.v;|y΋߿>j(q;xb}Ώ~z~/=_^<;Wot{f˽.`p/~ȓ[_-]N}~Ϧ"E|~oGpOC< ȓxWB?K?S3_t(~EzyyX::;ݞ=uH{=|Z7q~:NF=3 1O:L>.Uq|g? t7uJw>|8Eo.YCQWyyT/.R'uȋΓdzx|~|>^Nxs)t8 #gn'[OF\@|^u;ا{7{[#~g=<t#8V͈VosAXV/Ϩow=ql4yl9W|CzB<7 ~eB+h~}>ۡO/D=nN;'ng?~}/b? U? ݚvo#eoNW?)|,pOV:NꀣiBS#3~/F]tc|.T^Wž? =a(x5ţT1-7X:oσWR{|U_T%ɫw΢_CW{ӟ[< Kr>KЅW/O=._yP?~ A`]Q^ޟws|t~j2MOpO`-t :|GQ~9~>A? _Y cSGɗ~}^7y0GgJ>:x~xoNs굗3Ne?Z|?uOVozQs0FOYnqWu;8U+nrɟ>Su'{sqcs{8I{=ѸUu7xPLyNux(+.^^_@9no\ }:-}\ 9◺#؟No+eYz?sfx Vۧz}zeq8CNc;?])[ߵ%W?/~V=ըm(F'%Գ^sw=6q'Ux3t'=wK~@îc_z&{; zҷ> \oǧW6WD}n]'_#ûօx3qk3ĕⓆr7)NuGxG ;6~@})ޏK,Lդ瓚K:=Zd]9Gk>/@uC#//"o+՟x:9?35_oOVC'׽!^ uNo8<=/Ӈv{['>]?}<?zzv߱_9S Ӿ?cE^>o睗peDQο:T|ݮsG? # x/#O;[m7өf+>t^LN[tP\O7)K o#-s xWpVmg1?Un_۳n]3-^37F>'<9'j"i#ixZIw <)^Pv7 \y?ߚ34vco)iUu< V-pA*OUy|>uL_y"o0yͺz*X]džœ:zG Tss{(qpE>C\#ԜECЙ?_˸:br: ßG9FoS^u7 Nxx\4qys}umq< yoD=Օꕚ{s֯1Tu}?Jwivmn<;< ]_<:xy}%_߉8"U|gw煏䵝e"0~p 휖Q}H>O۩< ?n?t-}un1_Ou>o*;>~_P}_wEK52/,_Dw asޟ.o]@}g?OIui7#O;NbЗQ'ʗ/OOҗ? ܩT^w73qY@5} S\z'sF /W/XF+{|o=Nh{=;u,lOyd?Bw|NPKgi_ʗsҧ5Bs7IVsy-yςg8ϱWW-x ר[/p:ksς7o7Wvm܈z{cxVcGtx:eu{t@Ws%p^?Wۢ㈋oI,t,Y>XN.1U⪼|8o|/0ηL󻚏0GևwX'}(KK}~yUf}uN~'߷s܍a;\~yy;=> u?g?Wz_x7it'_P >t'gxg/"dtd^G/Ꞵqw97Ino7QxիyE}KQUđEOoԁ -}΁oj[0k:H:fv5?)p;pL'oyXݯ~{8?.xgx#>T=FW~;?WRS}_s =ou,xMO"[~E]Vy܇z,E9#.9Tk7mp&\ n9sG.p 3݇<UHz;?q1FqNwpau޴(x%V>z4M?9=v#xrϵ5WG5\t^k7'=QC?ɻqWZJ~F<;^QykC`x}wC/W)|4ǁ_?GT.:~ 38l5YtBOi޼3<Ziw b/BqQ_{P7 syGПPφW͹p>izKߟ%?)z.|럆eY\sK󕰗$mZdt<w?B݃{E_Eu{N-KZs|:w_`_/ފ7M.(s_=u}щ?K'|(ObE+{3U1ܫQϿ%ǕX/8z_}u'+@es=ZYYgnٌuha.;fSukEUoXxչIur筜[󯖻 Cssuߴ}_u!G/qn`>E-IpSݏv oӱ<Η㧢ާj~QT|ߕ2~T}ՓVx*qg~_wj84xjMW~~s﫻Baʳ=Oxrcz @usoΉs{~^ݯu/xâ>_U\{,'RӯdžojJR|?|HXܭzd?kj^ۙߦ; P|5`yǑ/3?w|ɎY/ N^܊/.^xֽFݻ7'^jfpx]k. ~[sc:Q}g[/}9/Oϟk^;> .WWלݵlGr'tų˱[gvUuuLN_Ĺ:>y͑]/3~tIy85_;}~|y=꧗7>un/ώϜWc5Ohyu/)p}KFpovHߝ|4qO?#s"sBpxb?>؛WnX?Cf??{֗9q~3kO/8;c788o+MkЇNBys#ZW~cn0ԼA%{qN1)~T w48:6/9gy9@nJ>oq.؜Pչyǡ~:8n3x;?QsU}~K]~g&ag_\x>~sEs9_Շ^R,XתVޡ3yDJKwս]ū{W=Џ ?ouxW\\tf}X|sF^£չW=_߫W-ԜQ+%t?uNU~/'/u~N5W0ڴxާ>G=n|;}<տuCYn}^^w.tYq׹sy(| Ŭ' :xdaxm͕K)U'tuY~:xx:yRuyr9۟.Lx3mYǟWv} \.?yaAz|c]oG(t||)BW}xoGaOxUϥd=bM[>zZx!p6xjۡ}v[{O{nB7ǧ+xQ/=~hTap/mlssIu`x}p~LߤS>*t:^ ԁxOEWǏw~u^3;??X~,:׀js~v.:ZC_gEY:AtWJqÿiuwɈ돃2Co;~+~,?}wE/.Ԝy#p~f8|8@1Ǽ \zv}{g=wYgov\w!^Mr7xawuzn{C|8{=u/xu wo >j݇ Uz:YyU^g7{1/Q=Uo{ U>17RBiO!}F~FMuN y?u ],(ԽKO;?JfΧU"tICo-opz-]ƽuRǧ]gCsg:yo>;7_zymu>_?x<]׹^/x^|Q?uwBOnG= yNDm׽nk_Q:ڴ\9#>='珅,3#RIxaO:^-w7խ[ˈvWW^{M'׽Njy_z7]Uvx/Cv+݈d?::FQyM-~yxi;u6\/zͳƿ<'P{="n9~g8S\G[<|ocO<Η7;/o~n>W9qNY^e].>u1oC >UWB|7cc4~;uJQ?{x ][U=< }/=uq;zC_{* f_Iͱxxng~!/uc~z#|~fy-^L2I6t`Oys {kr_kS?swŽ?~ xXݺۉ՗WȢ%Ϩؽuu.|foQZ7;\:q+tU_>kD!*q<搈O G[uЁ<>Muès&7#nT?F=Tsst7}=x{^B&˷^yeRx/Ǟ=zG,3摗?|OWs<$ώ+mgooa_<;z7=o{N:}}k}y}CQ}폺y8I~Ca峈~OFͩNGeKVbsa df<%>~:𸾙9/ xG c/ t/<*> |/t.u>sgt+}$C_p¾?^O<:Y_7=J9~x᪚ Fp;ێ#z^Grn#-Mw4u)}=]}G>^8#Y5dlk^z(>ZgyN wVSr%3ш+nG<ȣzj U8/lHW ; ]s&Tw>gI83)YQ!X])>=^gO/5"QQ\G쀽8t݋(/kyz+N[;8zLKYS}ǹQKOvŗgv'f%x(8=C1z׽,+=ܘ^D_j^ܤۍ1y{?~rħw=T g8.q+|)o8?-OE/yQFNkٻxQ2z;&|1ybқꌏŻ3֯gϺ=w=F#ߏox3|_#|kCqY^+s1^qW._$ߊ[+uI~y6u;{KU}oGOC7z^|>P}˽.'; /U~cm{;.~~y_C;Ş#lŗՓŻ/ω珂oW+ }s&v=Q| ;|k|/*n{'3|U/z~9|||s뾠ݞG;̡~| пw|~b+s<]JނxQ:Tsw{3I"3ɗ= >_=s>|yck[=O_m,O} xu n@&V:[|iʃxs~7AG#??_˺gtEǫB-^rx8G+=.E꽎J8V@_tx4 3YsCv;-|[u5Xݨ/-~OgyJ?Z|ϭ?IIǡ:=~j_eⅹYx_} x*u(?{|G \]M=. }byJ~I#W tdvBQ<08X^6<x>x9odڗw3-~`x.}^sfJ q.vś3îտP/<:ԝs#W#.ȗd5lvBW~~#puͅ< VBqtGa_ʹб/w';Eҗ`}͍!pޯ=uS.>վSßz8iGCɮNz>  t?OÃ5_w0v:zs'ǯ~^[O's%~U_G=^赫Q'\ ~ '>!E{#u?CWut.?Y7QӰu0B~oS/z3]TL`ъ n􆺇vx^duAp:I5zWS>~dq3tN|4aȥ>:%S}y\ y/>~^N~k~ȨۅCč'w;NS_ ~N4tMϫu]v.i<$oW %,z^T_>@/{?PLv\sX=/7^>^OTs zhk| Rs`=8QwQ]{x'{W؛|ܚxwwT_A.wM䃺t1: WG;2x!8\uT&]ZK=/[׳wuQ?wev}8(ޣ{tiw7} 8|ĺל>|Uޏq;-iOslyJ9(~A}:V;|9{>GC<9# m,~k4-:'%ÇW<:sSwKÿ>\NrUu>:y_uk{? {[~f؉FtG8xu}>7x/,ʮ>wL^6q_Ӊ_ѩwung8~#nKov~CyJn_` QSjA?jPdU_ȯ/o5c<<NK^Xo8Wcp*\~bz zS+K}xKo} mF}XO{:~{z?Q= ?x:a_:블NsC<x~z~_: ^d?K. N?|}9b6$!T|c OvX5/mGҥÙz+N7b^DTawzU~='T K/fΌcOY2]B/:s}{7{J4!SO~k#u?~u ;w-:Ny4~;/לqϛ[ҩ&]/Ro0%n}-|-N _K'ݏ܏[~Poz+~xl uuy+=>9ڏsu^<2[^^S#= Oۗoɜ~葉_78U].yM:{,x<,-<{8UW>x8Z\W~Kg~y¿{_OS9е~(pJ,tkZ͈:t'q |[֜й>Ǐ@7>_Pw]|+9g3v\b _>y}q :w?.]q,oלI5W'^c"?xYݟ}FsnǀO:u^sx!x(]|aoq^e{{KS|,;} x%,Sꕟ.N[?ν+}Լ݈곃!~!nO?_6v4xV|6]˼v.nԹ؃ ϫOQ':qbl:T߼{3ǣor(\NxջE\+8AԽΏKq~A Y%ҟq7=Oo5G-ouAREM>G<~~-.ӅE+ZnÞ߮_\֭pԢ/O7E{kQ< ݾ=ws}h~_9x'uG=ΛB}<އ[}w}]'~iz}=8p*uUQxZ^gw?YA,w}1x |^8}WzF>?G> ;OϕЋ+; y;v99 _|˽ypyf<|Ww/aO<=GD?8,~IkџnY<=|r|;Oqj.~E}^_|}_>Iԛ.qÞ B?C)=eqQ́]ts)kٞ<>5?Z\{Ϻv 3{@C?:yG5o'C}#!d2~R=?^:'ؕ}ƇԜG9yp+_ꞗO^>X|^zJ|>oO{'>܈8Y<_(=ϋlg_w{. L\X R)3ϿiK?t?3׹ݎO>w;Y}!IϷ5wv'o5_,$}bkϴ-9yYV]] \3RiࡺoWYJyߠ=8w-yv^8G}1ɿ;a>0Giq.Z^Js"Lُ:k|#85okH륿j#xA*^ A机guWݷ!óGֹEשx>O{gy(OCW䳟Ps=vopmz| =^q~@|q_t){#:gty#ϫ_5aDz\??s˽^eWn':u'aGxʝ#}R'% XGwu8_sZo/u\y-ppR5Kֹݘw}fu~iqS?T;:%>h5tW>kNI _qM%pV͕_qQ=5uiG闢2L>σqn,;ӻ|Ay?*q1||8v9p#q?:]Wo'.vPD]$ꛞߓǑs/C|v^Cdۡרozܭ_o(^Yӫ?oznkt??[({| 9IlS>_|}!u4|^xiv,.u|ۉy ߈Xl?isWvf=?uN9ىz{'{w;q:xy\PݟgUsBus{aQ[Kfݟj.rj|7Q׏#?r;y׵yĿE'ԙ:輔7;OlN 7w߼}߾׿Gݮt3ycӷǫKܧjnco<Y tHS͹^ս ߧgK2ܮhпT@ͱ[t<=jN`;5m%> z5/yS|<~xе+&ؗOjuŝ+׽\/ڗ7~NK9{8~v\Smo>iTx lA|y?x<ۡ;uu~ W##}b : 9iaPyTτI^p'd0?>i 7zr?WY#~R urk~CG.YKqP|/.\ISr.|џ9<ru$vYkO|)vȯ>W,xu8B\_;սp(nN.W^޴^f~v*p*޼x3A;F:4FWמ ].@~:A]xѷz/55e?xڥpB݇1k f*~Z>ax.Hm9?^S? ~J\.V֝> ]⇯G]VWzT>'.pmο\iģKqW/=ߋC~&;|{5TF43x~ )NzSk뤿WxRy4?J9A}R㘚יuJԸO[z=\g;{u__? =^=RC} \x3co<'1xCDGGW~v/ޓ ''j~!?nt^;`~qCO"K<@wnWs:N+i\O{>' W[ [2imk~`K×ʯ{ѭt|,Lqv^|=T?釄FN³y~|Ty/] {];b$x:^ jxX^_C_k?\(pn/Wz>x=o5q`y:G1d7|+'ᙪNZpB~߹XPu:o]uX~}0 ]3tV=8tύ:Sw n􋮏scunWw+nG9?|+^ Ĺpsz(E;!p|h~[}s&KЫQ{oaD lnVoWyǑ__t'? ~[g}^tLQzׅt^asϞru9zБ?Ǥ//:{їů/>.năw]џ9: uQ^^֙&_SKA?Ko_]'r7汾ѹ>V%:~-#>bu?YwGO w9S&]m9#=?/+Qog =_~.eP<{]s㎧%&Nq>k5~fW|ôכ cQo9/j;ܩJY'}G.nx_DY=^˝7 x /uǟ񟲳9VۤUG}Ƭ8ܩr9\=^ ܡY⹈7C/yسw7U^ܩ)>}"휫}"p>6U}ߋn{GU7/|t$E_f7}cQ= .pNqoNo7s/s͚8Üz8Ow¾_u*}T~:зw랼ykx>>Wve$w\z1peώ}}cN8 x8x?@SpueI?CO7G͏+D];πWyOx:e#{Kչ_u1w}P=cr}bq :xǁoyF;Ǚy[xEܟ.#|pI|L'qn4 c^|s y/8=toX^jʴIY^{FG]g>du|+=?]5qT?;; >lJ!] ^Jƾa /mէ9sw } >O{?tqp8WQ-uZGwOnJYɮL8HHl:^zM~~keZzt~5騻¯iף}^=J :(=aaĻЭėռ7=>x}vR ]sE'9Zݏ89$/̚w=E٧ NunmZA}z:[~~7\&|zeg_2xŽ#OCw 9[:J_\}D/ױίk;&oG؝Ss*+Qǜ ^BuOǰN/#}eO=cQt"^W'a厏_.,ߨS񡳎n"oD~{lo ݖs_FUKǥ8%8}&WgsOQ?/!J7B߽|•yko{o>{Aȫn|?;өwD^_Mï?{ײI_ ޑ޶W#G]V#wv>t׳n'prՏ2rs^e/w{ܠ?L#zy_jW_͏͛?_߉|>i>οٿ9I}O3">~8]su^g[ܯ_6[us|[7aE8Xu &p>oOo=mq~f[F=_Kߛ< ^?nssLzxNǯ=;[L=Zp-T7-'ҟov\ȟss;;yy".K8q ®كxixho͈KvyHѧ1 }݊{\;xyԟ秱B_j8GR|߫O\˰a'}>-x,\s3\^>ukɫO{<[ ~Z w%ϙ)oxA|R<<^s^g~_#NSv򓈗pF±Ӯè|uݣw4˃Wppޝ说n3h~d~Q?/uD}֡XeG/G:tu.ǣNW]M]fj4xgߧtv|׵oϛw{7Qǵ_ttA}泌{^zdcQsOF9.ãAJs{N+ [|Ƹoz_Ct9X\rǵx{2gq iN8ljZ}ߵIw*>EaZ| dpsx W7?zC"e <,}- ?ݳsw߃ 7~*s?^#s=OC՗q*x> Rҕ}|Ow\ ~ϫ _/j/~|ĕX5u飨 dzewC&ө3Q[WqsG]7:8-_'%|WGz}~$;{u'< #~}^4: Yݷ9qƛZp50]_r4׼xBd 绮G=xd~JWeU.ŧz1gQOF~3~QKG 'o}?8A| >3MoU}^|YE|&p~q7s㮳w<)^g[8ޟs~x" ,ֽv Gx//_/D> {Dž|)'S>5/k _u֗Ts:zkvϕCş>wlu/`[=WYt|Z\bݽ7܁7xjgI{yqT^}t[z.DUN_?}n[gQ?]ƺ Ft.[w&xUs(Q}o#?_ >ϵ{ᇯ";~8zskY;{*m|*x'=~;x v[ϻ?n{5`'_7W? 佨Sg7]LM|99~9NVzAoj׻YG~? |?்8ZfN?_GS>kqw5Wo~*p/\ 㜊3[ :D!޽ DgsV[<ĵGj vO}p%pJy\GLZ ꯹sw|$x>/CWU7|q9CS?^u=~~'.;硷8ݝSWu'zV_ȸǑ1|u:{uO~9pgQ7۬|?|j ߟVNս^ Wיn⦺?gŷDޛ8Dx6f<׋V }Τ/w*.~kO{إw8J}/t335_/_ h%%_uk9j ^+h;w!\y58.'?y'5wvu07~#p/CD'"  z}fpתްua{SU?1xT$VTJWߣߟ8hy9_Dԃ8Gߟn$ Cm)}v=.!yOqF?^-ȹfzjwq[c:#zCX~n> #0xsß>W<*D"sֹq//߬σ3g8P^S }]oz>X{$ mߎ;np$hAj;?zQ7hE/^—9qe#q؏ӁisuEkE/C?{^ᯋkyS؜az|^5\5˞W)a o7 ?`zwϙ[Qt8E]>. ěۻ}x˖|~Z ݸ[Ü G'N Cu/ۨz%9FW蟢,_ N=Y>˩8y3t:z~7F/;|"r q%p>SNvw؇uI_<{ϋ4x)3Н/!F<_~q;"qOE]ݣ~o'7mGo8#xX x209Ix5Rvz5. IWgۋ ? Ey~^p7T?"'>g#{}|yGoaf ᯅ+Go%>JgާI gW0ݨOE_sRulēo7.E}d_K:掕:> ] ]|c^A_oG7͢>{x;ߋEu:v{]).u? ~B~_^{_*u@WIݷn.֋gW6¿7ānC'(a-d:ߟ]] Y߶goNG }Mgýp?F}u#t{_ yk뾈O¿_wB:|ՋnGu^; kezN>ϥtnڷc3zAKG9_qRV7?<%r&eIm~s/Wt9to.$ZpWa79Ћ?{ƍ%鵨?'(g ΢w#['kNߝZ88T_;zޒ?Fߊ[G鷅7J>|ߗzϣ.ΞՑu/^#b 'QǺko{V |>ts~8k>QWE~MQ48{3~õȗ5#pЭcp"D:+}^߽:s×pfi~N9NO]}W8Oʋ~?/_/ߵzGpEMy<:qFxkV"QcFoCOs:8l#^ ~Ģ۳Ǘk^ ۭdN{n?qA=mݬ9G# èC˫ۋuΥc~?ع+Wk|swWB2{~O|ot*pGo8hh<ɧ??Cxyh-W<՚Ϲ|ƷBoM5Hׇ s3/RYw_jNθ'{yI󓡃8 /j#_^ =\?}柃7&ܛA~% n g tLƋ%P&Ou1xck#9%}>==]}K\g^ν\s|F]򌂷#EStX'),tFu^՟z<Ï=R*ڼlx/:>ܹQ턾÷WyGy p9'G| Ot£Oo4tkuŇEC: u7>LW|RDy*i /D~Sٿ5uJ#I<ըNF]>e$!O$^xg wG|>kJ~#N;JڇGF|o㜺3St!x>>nOuog*y8Jϯc|>EϿ[ ;2in}u/{|:<8m%jq<t ?:4x!g\̏ ԑW sXCwS 5_aZy6핎_NΟ?zlY}7\k S8ZL:n ^D<;tA nۯS}?#k{|N;SNG:>:|繎}u%̫|p.s.⻑WzĹ3}i= ~\7婼R_|VWE=\ YkI߇yZ~ W B䉨f=N#oD70u:_?޾W>{a+R}p"/=ED= ž:vk?}yAꓨwӨ#CgxO7\>CHO8n|&^MNE̡Өk =g[ Y]B60 nV-Gzbnq&=˦W^ lw☺r9F]԰ߚݎཏߵaN1èoGw;.I>Btpkts/~)z)~$u_> >|Wj_Oٓ> ; s'QW=n5tb?W7_||;ggidW"kRw~J[űE[  p1⾺Ѥ?v2Fqho깶c:T<xOxJJ~SvקտuZs潞9g[c~u)]} g_KyMv>^|S< ;J|W柗##﫾yBĥ+G+R'2Ws^_ "N,:߽ås:s||x+Un^xn%/^^_ ^]Yp45m긯g廽g3Yk;v*tyrcQo]>t;B,;(&#ԼoҜAw۲SjO>|by_ IqagCd^y~ 5zoٵ߬_{8z~Uw+ւRט}=tIg*?S\ <)?urF=קCOW&Ϥ*σ:rwx:aq'?[Y~9s?}'A옎ͯ#n$ГS.U$ݗ_\ \^[_xTc=}?~{%t,?:xT]P}:|us븏{ ).~߻NY1[?5j+^xW=w&=3ۑuuVz2 n>~ܗW{<:Rяy!tu2n%42꬚3q:lAÿǽw^`WQ}Z W5x8I^ND"T>zSA?|Ǣ+//;p5tus.p[ߧqm=Nj7O}7eNĎ/'F}v*>J'zOs^׈?_ rT\?7|xӏBw>[`by~3 *T]_d'5:]Tx(Gs؋t(}7f{!yl"_.Iޤt?8_ \p:t<9ͷ/4E9b=/$$tHqBi,^o/B'Ǻ;g~/6>6s2~JQP}8=OB?z;U=^~Fϲނ2_3./΄;j]kKO7-:/YO ~jԭtoDcwoSw\~8}#> Hcuͼ{Ϗ}oίǺW1ߊ"o׼k>O?Uw>M^Ow>􎣡M#N "+K~^M}_FsϱsxduuspXGG}y$=7v~#=I?3z;wǸ>E%ߓiO݉G# =v[Ngo~/V<]p#x|j5/89o(}'ًcST;SֹQx Qs9V .D]g,0p7"}5Kq7 /^ ]1]*OT^o~'K=F^st9?U^ G>yN\^ԟVjWoꮭ7w|ׁ ϼxcE}~W|{;xv߾>߹_MQ<?T>|[uhQӅ׼ݿÓd&nyS^Q~߉i: x9W ?IGG~?8`q>OYG5k+ϻxy{uS緣~2z/y/b엑oE/^9|i}zzNFq3Cd݇wBF]ob~؟G_S\<*tWvb]~uR |۷k1_9?Z׾̗ޏ ~[sgbq.*<3uu_:yUu_OÏK~6zs'C9x`GOnhx:_:w:!n7\(WypȾ;}z?G-ڽ^gQP;C<'Pׁ{/<~ L¿.|SK> anH~2x]=]5W2߆=^ |8|cxe=>I~;u2EE / Ug湮_Eܩsb˓yx84&q_|r^?O}oG=d..?uj;U|XOսv_c>lF|;X/#F¿r}BgUc|Ź \oΐ&/wxqC?φKQ \xOMr 4fy?Z5o5pu#/px>ۛWu\͇U|#Ok/s:xO?w9ϳ8ȏ>Q%4t8YO˯$rGQWu9>I}٧cBO v=N"oo3<ˈuj3vչ_nCC=zZϢ>9zϹz垪m/?FJ/Ov"nF^75}"{Nj?׃'|YeN;t'x}Q_.ۼYxoƺ܊zb%s-M#}q]];J/979{.|}:pt}bwxZ{GZp##tOMqKRgw_*u|Cw"N' =݈Qyߋw \z3p]F׎Mbߏ 27zQ:_=vz56"_1) /׃_n;s=ץw~:C\ ^}(eӾ.;apz9s֩~Oߚw0OYwfpo=?/>O3uasw>x }x}nyG] '|.9|oĵ?wpx)_ a}ͩvzԵxSߓ|ߐpPt6"/^ӾJg>f?Εos4xla^$>F^[ũNsG])䗍oW}+x-qŹSΣ_OF|(pۋ>Wu^ DŽ]5w:< w GS?/$sѮDPy? ~Jka{1p˝7ᯚ3m&=~㱿W#~'~{x~ NӛW;+>{u :K='k[qJ8/=罖pV#V>3u.soogQ\Sss=+Q'VCzqc=u_qĺ>$8UɟMqYtW!k%c{=ϟ_uIUt }qY?-zݬ^/Bga?/d>?n:.X ow.{W>F=\ J'=| J<qٴzmگ=Y_}})~;?Pָxt=j.~~Yԛ\sPOX_u _%<>H/7/B<U/hu׽vM`Udӧb](99{W@MǫG9Ewïq%IenwS6WoIDf2v_-:xotX] "x["oMiQ׉Z\ $,(^祍 r;~ռ|O'ǕoփtNa"_+1|:NF>qı_uwv=FBLuo9c##]S?F藷$xK[ʷwσ!Ƿ|wtxxW|?/~k.OKB'λ^ ƒ|~;/ٗ껞w _ d<X#>vw+ըk> oY^k$O#s|({/v7;3ÑװCǨƌ#ռwz8_xDy"c[Q3_{+ 'ePIx^ b+v׼ywւK? |3xзj}.<'aGtW!k}qbguz&|n>- ވn^}P|Zԍ7Εv9i}KXWzivy:: ^sD\k4 }x:p5tȗ>ok+uwOQ<|wxuI^DyV짼:x߇ cէknOz]LZu&vڜQٯIם.ǫSiыuxRs6e-y/ׂ7zeq5|o@gxZBcY/ݡ3u?؁>{_i_UpO]~ O= 뾾ԙzb/?-wǻ}ė'<xt Que%\-Ǯ ~7U^ ~KwNp.t~BG]7C:znr%.qCE_u|R|]YwO|'<ޓ@^?O͞)^/:ŧK9v8M2 ~?>tyxk~N;C_zwoۊGy>ܩwyħQ Nn.E; l)hsF8}U7-:"gԥw|^NƛlK/.\~^6~=@%kC=<ӵg1_7î~7zv=n׃/+ٛ:fz |g#>:>kgJ zvԣ\^v;>a7B_'~%7 x-u2DǕKVI)<е?^.}ayCß >_ϫo>}"ũ?,xVuJC>Jg(c|8s:s}7㷗o圾;a+?(O"O|3du~K*_vދS-W+~q*>悌 8nDU/~7y'x='yNoOy u ?yD.~4<^s=IyonwˠkV߃f? ~2tyH7<żثa/W֤0/<$^qw%vs$tQvPQE!^_57Nփa_gzd>{jǫ_'%{}O>Ǽ\_tt1ps^}۫K7A=AG+>׵5gj߿rQU/p$qN!gnW>.~/_D%/zxU]BS>׼aͭ^G+Q;W &ao7*}/x^ǃXͬs=Μ ^܃7]|:pݎ ܕ̋S=^Z.z>C[_aMߩ8C.GC_U'v>F7ۈҸo͏끿مGR\:_Ov|d=izTߣOï.)>Wg"?O;~U#<`?{1p&x[^̟^ۿW۱qw Qیy_7BNti8 =\x/t$^/u}=|S8y_۟o_/]bQ?vI>yOg B`xwNꃚg8E+ކֽۡыWbu%'=C/~Rov.EϏF|<Zyo >=am^?|;tgQ¡ ޸"{]  xCqS?hG ~)_,>;~5J_ yT^`#ߟG^UKu͸?G>S7w#~{?uU:qZw@KЗd=ZiuG܆(=򠿏SsC' |У8c.uyrOWE2q6pc_'g-OgEIbjr;ySt;~}Nc䓋؋qN.Εz^yc G}Է5s*Caw8}y>vu7^s-y^ռiϝ~QGvoڍ98>8F⛁k[wYN~}t(Z<??8RsʳL;sk6.]~RU>r.瞯o.? SE~ꧺns=tC֜Qoח:VFq˿=FV7>|sw+zk-x.s\=3Q\5a ǽmz=p_}w<<"?s=ٷe'wC_w#彦C`q4xDn_t>y-Y<^ϛ@T:Ϗ;Q|2xj WC-z+)փcR?ݏ:*'={~wxvuHsi)hO<Q珆q35zo/̓?~j>੬\I!'GCCk>Katsٜe s_kճ8Mܜ.zkR]nUvン77".:ת@W{m_PF̺q-/__泥N3?S_,= ϡe.+8RFz;xV$a~w+]l#y; {)~JċhKnE)E~|?܌u/8&ngC_\< G߫:>~S.~RͩF-܌xW{ W+L{\pŹ*F~3;sCXޏzy:bw'aꞾq7yݝ;~݊|? Noto~fLJ,9A;c.K{3F+csoQ՗﫾i7./_9tiLu_h=#>f}\գQ7/=._ s5f._3Q 듁[^kc%;?(B]y:?nCgQo5?57_oR}d8CzOҟXݓׅwdgS?ߍ:*XVf?;|9`7wɹW~8P\sN]׎_؛c׽}GܹNun=:ҕvoϗЇ~/xBͨ7 ^gy_u.x"{).OG=[cC?x&sQׯ:qDQCH߷_wq)<@ywR+z."~ZA]dͨS眧' }Y,Jr5 2UEz"tYM;3_/ϯ|4WjQ_;8?WG)yxA+^zI^8*_/:<:}w?\9'@нq>uo=9~RsǷq37/E>չjn;/_D=4O{Q-/Y[aGu_aת ߋ7vݫ[Eu=8ZԇWB'ewԳНILJ9Ux5x<"tYUs!t[cyǫԵ#S=5͉x^|*}μJ9~ũ|עo_8zCxC t70t :\ەг%wQ/ųK>8R|:u3O7L;.:z/ fwC?|gݮw{{b-t#y^ӛGnN|.^,=g3Y? Ѣ-{7ֿ ҟ/A}ƿ;yl5Gp.;\R]^Z~m~Ewφ?˫Bq߁݀v>:SQf{S'ޓoD?4_???=_Mch{yyQ7Ji~7nn.o (O=IϪ8'xw+^XUsX"?a4<#.[^u>TGñT״Ƿի&8[;w=;QWoße`ysEKS:W;s-tí53xu#xOW77Ï#[w=< ] <v \:oOYCz.gnyr/,}۟|JD}zy^~=5q7J&nE]nbΝ>ЭK{-x+Qgs2}>#t9xv\sԗտ=(N]OBH;:~AWރ?>ϯA,|>|z={W?VY?zt|i%(\=/.po1vux_=;ؚ|"]z~ɳ=qfzn|_8c= `=p/cÚNS{ݮk>xuyǍ:\oU?k}4x|;< v.O GO]_\kG0'uy4 Msqo[ ߯ըǷ\:m=Ot ŭ~quOxOu9p$sG ]yVЧ}9G+NXݛ3#n7Joy#suåGzшף>uοt_}N^ON-t? jfmoľ? n]}aO)i?S8*z5~u%zQ>x^D[?~|!3 g uaᗑi|;\[39Қ?:i+npT .W~]_dͣz^|?.4)'WtIs;}k^ѼSg}6v7e}MqQ_CG2MgQ( zݟ.^x}guh:Et\n|7IΣߌˈ[9~w{ݾj^?; {Byh?}}m+ގ$ϵ{7_~75 ^O{7[j_D޿unAG䣃O*]2"D>f?Bw{Fw,uBwyMcľ<Þ{ޝXOvYt> mwl!?JO:ojA#*?wGG}?==I[#mqn8P|´7M)|2v$x '_xsӹԫ ZՏ炗n=_xxbx#Vg)(λ/E׫ԝ񛷶o< 7= ?ZKuE/wX'.nqr=xOsV^QK3gov%I_MzN%T1uI1Q}-圔?Q({<G.Z/>n䗺7n7tIוDoiz|f_?:f_t~ojuͤu2xxwQ3wys#~u~eĚ?xqבD|N#Af\ ^~~#r&.ϖ(nңwz=v9fs(Qgj.Ai8sE ^;8OOE{{vxD=1AH=?̟9'0?g?OYWL:k߿z~Q߯9[+=׼#?U_y&ίzKkOpk3WcˊGk?:OC^˜s3=OݥN>xesw˝WwUƣOnN]/ݾ j>e?.>L#T|X/f]@?ϟ7v;#ٿzI_k]=^s|Ѕ _ =G܈ס? E$x4cwu|X >R]!~ ?c?\ucv4|c3t=W3?V藩S =?F0?Ꮖ>+_IW7Vw|[|Io}w>yp6?h?FӚZkTQ'uZ%XB\M{v%رPwz :״nkQ} *,ѳܳPJw'-cޫV݉PG߷^ӞE[o+Tj,|l%w{vy-z;l vϗM,C3u+'{8ꪷ}UA\-+뎺QSWM vH7 y/Yk7tb[رi/~OⷺFD`9٭.GÿtExP Z>wy1T`c/E5ӏM܉*ImT2z[TjX D5\S']_ߛ{`.8l2 4<յ\>۫Zr[MwvJsU7z\uSHߞ 4q쀪sN}iUlӕt5M:U횂`V>{<ͭID~2<.?rT&i"YMz_8]Z}W[2]!Qw`ߺzGt-{>MSoF~*7]X(ݥ#^eo}nxHz[76Z:tSsp[_ӑ᫭`;%aW#[d~bX`_E+^QSnԸћ/j~">us{ \ǟFk!vFj:r۸>tO-C^zaص&鲆^:^~(h7s_;u{bǎ9+7Ϲ}ʩR*|7E2ix:?gz/?fgvb/Buzeqqz Qq,xq3<Þ=οIæS|e]:8T4]N o*ntsdL=u>W7/Wew?/uwxtw{ǑIܳp,<x8U|à |j9֭~^ݏy v0g:N%bJ5\t6;-3W:zߺuW<:P/̿{_vw/q~ZV>:~He P7g|<y<:rut}-@~pJgU'{pnOAs3΢g~uA3|{>:7GY'q[٢-tG=/YGFikW~n)w|_C^zg.ǝP#?=Piu^M_4*]!˝g'jee8qg֣Nd?ZywÎvby+>~Oc |+mgӽ˻B.^hp)a_?|g_{Y}vy]=}~5]l߻.o_~NwK5b3vB߹P}PSF}wNK\ ]Ap|}o-fRǗOB)~b_-nR\j-<8Y]%=>HQQq+t~:>{n]խú)i#}^~ct(]NS{#OHOoϺ E<+my]Qy.zŭAw^n5R /::|^]tu4}|u2in"oF=QSX]υ{VQOtͨS6CG9Bۮd)E5zE0"q|:.<))?::u~ㅟGO5SǾN+m\xIǿ2f{{3x_n&> ]z=mYoo?g=.냟ϣ~ތ8 C=n!qG<=YX5sTE?:w`ۇ=n7h٧ߌ\:˴SQuu6N<>yC\K_I*>z L =N.ΤSzuWЉ MݲKU_TWeGG\GR?qx]?Ͽ|sϿVu*x)}ϺѕQEs&ob׆3kvx=i3xI'z;~Sq-qA>SOCo_8^aT݂>ŃvZ|S v //5qEe?ֻOr/c_"UӼDO#V_dz[n ]Iw6VIAYyM| ־ ytA_OV-_ >zCu Gn?~m{Z ǟ|nZ^e'Tܯd{M}>ƭ M/s_R}vN3xX3Vu WqnOUg O굽X'u<;ot%vEN:Ҭˑ#m~w=x'cЋgWA8V>u~y|nx|mw/ ]<:~Q}Q߈)|Sٰ;jsKY+BbWߌx?ىzO?>8U1Zշ;>٨9vScxKo<_ޞvxKy $ JNnUSʼ+|cx<مK_fW{~K?z:W|߼W@-O}VWKxQ}WB_:~"o7ދ8S<Һ<Ƶ}]; Q7;5218+VħS.=;g_z񤺏=DZoxH}Iv?5u?,~$#Suwo+2\\Oެ^Mfy1.RbqCרƧ[B~S'z8NOu/X\=פD߹S7ux.7:>>0yyx~}x. y/GXך8+Oks婍ܯށb+?ylnJ_yFO>H8 w~y3߷;9/9T~Z<׼:g0 7@} =nWyo=^s|2.#tiȾUzyeL+[]7+ށV}{pkz+h3;u~qם;:~z堯ݘss4C]?:d;v^ފ:+sDZ ;WxzJ鬋W V D~=tw9Gn ~C<'f_U nRU_°un#_'PGTC[qp&t}n)?tmbyKœʫoz[ʷxqQ><"-z-J^vCOW[?.]t{2ka _< ӳ /-HYሏ:gs%>Yxxo8_Mgҕ&g:Qt{5!;s-NC,|ע(<ӵk]X}V:NػsVw7Vʃ]mc {>ݎY't7y|:ȓtn5w]H?ңgCϛsXZ]C>iG\}9tq:xnSPZI5tH нzqnv˱Bovt)~E1S.?.~|a`xf]Gw{6uE_+ݾ#Miקg)7zR͡nZ덇n_zϳq^v3zFpyݭ'cvc&{ԇnVϴupI=[f}zisc;Goo:(L!x/M L:tE7K~3]7or<|d`Su+\}?u;p+M璘a'og73nn xky~>r=t7z} {ŕ_{;_eYYG~x2S^r._:MVa=~:n^}gLx|qA O݌:ƎiԹw?[~wA_/8OA> ){;=뗤#sxscg#?||y+kNMp8GV˿5qǭumv{}9nkQvQwg'ouy~py+)Cq_|Y8Q>3+oUO_xuQ1 ݮrO^ZE"Tk"p̾uӤ~_;u;Ǫ'v}:n6]KMW8_h'oٿǡO;Σȣ'.n_ m;yȎn;VH/}8|RG\ ?~O|+ydays/R{O.[__?3BHwe9&N>q.㉿[=Lh.[U<k3ݳM{|Կq-@| ]Axun^|eMۡ'gkn伯skNwJ+B>+:\-qzP\7v+9u^33}1ufGqa/.|GgoBuLu>Rw@^/"m)Γnj1CWoZu( a5W x8o#:7\t|'jnAyǬσo_uboƺY=&?F?){-tB?>O7/u>=tޭ{F=W_dG5_/Y|EK8 ΀Sn{ xk~uE{]7p?HV/^z79|Ӛ=}74j.K uKǏa:< |nTnR? gdطk+9ϟy9g;Daꍈ;NbuĨ~;4Iد Z⇂qnFxOa͑z.7oׇs>Y?|I7';_:Co׷+>-/ˢ[~;Z/9},꣺Wqټj.?:<Qֽ7oߗo{勋WG%u:y?~xYԭp8;g=.[_τw{I]wN>~^V}sW8~}>7Խ"K}~WkVs7FqU^÷ּEkX+̗z=Qs]/)^MQY5n߿9O>|7kf]`?𑹊0v8Ky_T?Y閣^޻%އO?Ň u|4/ǹ^~x2<"-W{gPVJ}BDžO8pY{ޯQ[ݘw /4q Pa'.X|KG?O }*Yv@v)]l/>gQ?K ݳγG79؍f;Oz53'~= ]z ~n若$8qzG~%6穏o9S՗5swžG>?ΏyY'qKEWY)>5E=*nչuܡyϋc5z׵:+t?W^;||\{^zȨp|y(Vo_Ck7$T?x{_u}A/Ԭ9*:x!ϯ}gBUs : jnߍI> 'D\oyoE=|f}~YkG9 (sEC]aGS|w ߻-^/.WyU㮟^]'~4y՟~Y!߮tESsvӥF^{7uü=%Wwy#kN~9h :<%#TqnQv]v=:O} a|9\I '眻:'WCxק!3DR>ysd~Nd+:}݌aŸ˅"/? X5egxv25堯}AW7?uNCQ={G=g tiǣzT\,NN7Săx.}dq#᭗p̴It*{=%<-w{{MQo;2 to裄o{S~Ӛd^>Ax^0RsV=g_xНxGD_s]#\iԷB<᥈©׆?C?axG;QUBשs~o3'?jJבw_qNؿ9悳ͨ_#t~E? }rxzc3V¤kc{74[?ʻ>CgS8S|YouFyˊn We+xyZy5b~껛v&U=N>tnb{F~~4h!#Bͥ;"}}cI3Le f4Ӥ^ 38%ʌxg9翜<=݊_տu7 /x&;ܕ59V*Y ].}Ew0_aொ}ߗO2 7:k gUu~8Rv^}Q;Z'/8Na_g}x>> V5R:Ý.to7t8C{ţa޺?(8빷7fRwzן=| 5 }jX~ /_u_owB/Wg WOu״Cނz>+sSK=TA#d]ȭOK|Bw>='g}هu3zs2os_\lEޮf"^8tM*n-?)=|Sq}x^bҡQ*VQd_f?W9GBhS'/qGSnO#.ң.O3{Nk9\ZZ?U~p-|;އz)j'8b9tWX7W]9?q{[C_N|\}GOU?Ѓij=c֟{#'=J%:܉ut߽ulip{;ν9/R)WQÏM8(=ӥIٺw#˽S]o~#xxYEAsɝCOW}|{Cwo>,uy^ tv:p{um}ɺz:.tbE;\qV߈g Yrǫ珻fԑ? UnGc?]|Xq3R._·|zuj]=2G\}w'ȗWߎt{K~-oC?8 1>}nT[4~yv^.)^ym;<~oy4=>U?zkU~bLi_O-~~ƭ}Z^ç/u] _0d?^;I8x;?y338WTDsV;ηnOz[ݗ:7oz8~K?x%+t[7=O'wt:slϪS?:л7+O}/yȴ$x?Uݷ|.:wT&Ocu>w^3sG'[}kwWaxQ-u|^>N>x;(}e]-z|t[ח5@=[ ~?+Fw~L'ߤvQ\go'UXlu]./Ti{o`q:s<]}x޸>Y_SaG~TU$fb<8?fO:Ok;Py :o] u8WІnX<`C)ninp^Dk^bueuNy>σ3c#pv2qb;f{YltʼT]Nja _J<'^<"_|I]Uf=Ua_'}|K㩪oE|NOVVzrs A<}qx ՚OL =Io{^Խ]kմ G+w>[e U_8[n}/y ^YX&ވD<ոO>_fO>̫iǺv>p+K:G4WDޯYqc?t O;~=dλ3J{ȿ20xɸ?:98'~ҋ>~KG}cn9~&>4.\%'.$3=}kWua_9:_~)լ x>/RT}#^['4|Ig":w$QO䡣Ug8qW^ljħoEz\}uL߉u/o;ޭjG.ωno~7𣸳:|/[_x@/=h-iݣ 4w:<\&Ӽ>ը19|x+&Ƀb_7瘗7_[eQ?hNzgY0V_gdn݈c1_>]t]% =/_Up#!սG=_I_߭{nWƗwn%Q}B?+{Gi:>.C}_] .n#/,^Y_uޥ{~q<ΓOUYZxuNs_{^ > V]〪+g)G {uK9P:qn'|70yL_:5;QIK7x(? >o8Q/-8UA?Ƌ?:͛~mt~?']g $+8|G:g:z(u]M^I; gC>Cî# Os~Cvb^ "Z'ъ;OgCuU+yW=]KI=u۾} xs8]C^<q|;OGϨk}~8zu]6txe\ɻn>M:'o]^KW]ϼx 7$,Sx!O+8 j[= ~w]Οa}| {-iߚYCPu = ֹa~EwWiu_DkAXWҭKw8fq)kGF=o{ך>pF3Upqiϟ[5{/]xAw˕~8+!ЩtT =|Xc/C>~zrG"LXwIYO~7rD]>1/8ߡ{kG/>7톏[}YBv%^K_δ<|Rn?rˬеR8JzOCw<> iw#›%:sס-<=J}ymG#%<赵G:/1yO]Nݔ.zxS>A][/m9 R~p?4=}wQ}O7tCgTu~Ӕ1zz{;8 ]yA (_/>tQԭnŕ^G3ߪϿ;ӵmgk>gK8_oT,thϗW}^4*I?qT_} z^> O>GyIWwTaU>r+S.>0po `՝YBwx!V]IȽ;{}qrͽUͯ|/O c9=^uL'U3'n?kok3߇/^O}ΣuziUpzЪW}yG'@/)1 g?V茫ts;ox9>7 mnuz؋uvw7:~}U˥7ǂk:Ӿ9u4p|h\^n w9yqGWm=CCzrnI]S?ǯ_wW|wA8k>矣nT=u>Gt9>/#_:a?Ug3[T:˹3q _{U%N/y:{UA+xuh*|.V\B'[ЧC>Eա;:~tteBێ0Oaě?e?3P|A ]?˾Yc?JE8Y`qq7szޡo5q5XU}yA࿪ <-tDz=L]quxvO ] =_cq=|'?\/=Qa o|xdGs)u~?ذd։sjb WKw+|Һx׉l:ys_h][BO:lƽMuO>A/zyOx]7]s{oT?qׅև(}W{O|~/V_DžǡWG+Fp<{GsxRR\F2t:û}_ne<"^}s/8Jsoе#_HO.xIt:`Oa'}%ޯ''=wZG񞣾I:::VPDZJ|MQ÷ϪOWx\JűIKG2_jMxυuICt9sqx6">׽Ӿ헭}7q-ϧ|t>|qwz/W4J=q:M9g /<\u_uxqz-*|> ??c) ׿>X!ȷ].1sWÇUEp'W.՝֓Q}7F]o^uw1|Yi]8[]ZQ];NiuazXKq+>f}}W}! d׿f=w:_):|KLW7ޥaW,\OT7%y{?oěGtY9I)[CCBgԡn5\t_?pǻ|C3ߗ??t>a=>NG΅\eŮCRt'=פcoξ~vط:EЫ3/ꨯ3מּ㾞wcfStN{ᷞ~9pQQ*ti|GǁKovӹ?skc[C.3[?wɿ_~03LAi޳w]w"pY_8~/`? |{"> ~eA[Z.}<]S>uU::zD<=|17O;^OQI[+U=ox׃GOs;:,\O%ӳŎ7BO+v^Ykq P;ݬ˫;7Q>5X>n\x2t>_E>8{>gl<:3uoLE}U6}GSKm/Yu|},:?MN_J|Iíq{u^m#Ik >9yqz0|s; Aa1^oq^^CqxA{+}_aƉ>Pa}ˡ7!^)nG<~:dՕ޳n w^'ųOBW7BO{T5 O.uYF=ُƣ-[!loYǡcL;ůлOśG0nw|f+thy;A8 U׺`J|A_?|bg#B>}[y1m=sz~|z#KJ[<!=Y|Eq1q/b:ca? ޜ ϫ7x9 #(~WT}}A. GϺQal1>piU~W?AǑv+ϷwUYlj|:#WƇ׼>^>>yf|0zyi_>W_kl{AuB=_8>^H8nی<\b"wΡ {~U~ ] \kv'7Y^܋ux-ɓ՟~}|F}.^·OMg8g~vr1x?#ߺOGs;:Y +> W?y?O< ?Fn98RGFiS^|1Ϻqn\įonwέпS.CovC/3q8-{Wz܄۫o1?'tB[N=Y_XM;GO}=7ڋ}>sm};?뾏Y{Ό:هb6'.=B:'}<Swb݌C?7z+ ߋOֽ :_<q3|nQ8vqQOչI1R`^'zK??tü#wzj t'T\\O N(X?->(?j7| v=xB ]Έgx0PnkQu_y^u.OQǫ 7uX^M ƥn-04~ģMs[MGp;g AIϧL=vv"m[¨vӾ^WCw{;y[Տu<Za8|턾7"pF{D՗߄*pzvk#ݾ< }GC_]ЧG\>qV^Id #V?Iw<'K\Y߷A^]޲fgzzAjI'cUaeuo_Køf'q:ws{f}=o} jGǾ9qϭX'CWgUߓqpzOc#Ju||%W}=aqyq6/v=yXVvQg5 Z'{.~~8Uw:ؿtc<\p3y*|3?cwy |/r-|AY >ZWU߯~HxwW.gs_|g1;<59 n'.?񣳞 = ܴ~#EQu}N>YS|2~xfշ z//xN<n}$>ПV^yAU-O.r=tNuPC}u;p/>{W|q 3xQ؏'pj[p*]i|z;'s{mԓ;|χo#3#o>R#wn<|QQ ߊ#W@хCiOR==zg#;t]n`_|9]~=nY:fmLߵBތq(xyf?YgPu'[ k9"Yn8XV~l+k77"/Uh^N{ÿ{_F~s ۡ6#K>5 cF~9Eq̹Nj}0տ>_d? K\2}q|s6xd{A~6ú2>x1qZcWDzJƷpߝw/o;s3HyW|Iw?#G랚EӵKKKCT'QBW@{F |W">z+>_xU=ڗˡ.ޖ}ܜCWyF oJgߨOD7~8|{;}^~Fb=~^u78Yx﫡GuNkTOb5+<{!tiq[ſK:)./ v/Fz|;|4c;ΟQ2q'oas8=;V\z+k?t/8~?~﵈K =.G䛮D>~ħ}loX@uO֕IOT?yӟqoz~$?^<:|>T}wI^>h<:֬x??}x%gGo:r̟-j |Ǻ5%NуX'~>n򃇁g;ŇNK8zN^?[%܉qq{ơkʋѸ'پ5: ߳۞ϪcuN~sԫ'̾Io})PG?tڼv+e=OXt P:5qOK{H1Sg݌C7F=?5|E܍}t;dzaz1y﫺i_ob/fs/?O~_z)Rs]ڧu?l=3v01_Ǻdu&ϏgE#_q?_H_u޴󋣎bֹI:=K ˟WC ,^ 4Ʃ/~ɇc{'}~|s1/uxסqAW|Od签fG]Oß.F\xqLw>8YՏ<|ՋsexjZSe_>R@W؍qb]V=B1#m/1ިx>ԁu%|DaQX& zy7 .)>-sk_UCWݠYSo3_^a=I޲ɪOn~clsuF{wGٯ5~V} y6A"`9ߍйw贈+ooow>Ox}@_UE[ >o>z&Aߗ։c㣟yǺη_c/qseo:f||z=:׫մW{;JwTГ*޴ ?~z'|8 u:O"'].oE|~l:>p3||.|IO$ 뎎K?ootĭ~~u/w:}@S8>3 7Y dW_Y _Eu@x!'.>NڹGy'+>@xu=<#ս1>gx_W_Ak7Tg=T۴Sv뾋Q_/!§|??/_E~aCS䬯cK7t=Fk;YUgqv⎸x-Uoݓ7ZS7ϰOCBt%p]-^ z+t?wO݂b;m~2g)=]2?-V/]_T7o؟qK B7D]xL/Kf񏟢:0>YKg^V+N{ n?Nʏhw3X.ż'} _!ս)8Ow_G(i#c-R窦=?n|ϝW=}0!Nϯތߋޗ_(O(7ӗ>m6ccG|=G/R8^O筧~}<>;\Tkb^o q˩FC)X_ _sOًqZ |.V #~ śs>.3u}?:#ʗ霴|k7K]FpyC2R/v<qGT}կx?.87|{1?'Xgᝈ!!} NsN]f̫O=Q7_=z~#/}o*Bg=x?>W_೓Q_ţЙoyeo/?ޥXg;9Rһ;x]oЇb^;kg}]qBO}1g"NG+|>W|~U# \d\_[yvyOD~"oT_IVCS6y/r?F^ Tx2'?_/vH5^Y%.˷W˚&Ix~? }~N8yv^tw_Pk?φu-Q_/t~B߮{}e|xWg5#s|OOiq $x']ى-+_~>F}n,~8 _E>:~.u=_{Gφ+#|=7=˽>KK{^TXXx|oN:.;:8tȫ߫X-QCt uwCϮ{8ŭA3zcǓ_wI>='Kpu\Р㒭[Dxzyb?oq?/&5t߹g;<{] p:Lͺ[f῎9^W w:|`Ruށ1u)_U0a|a=F3X/x~|qmaycUC>w\C7{Ł/s6t\/~=v^Y3uʗ>%Ӟ_eu?R#Rkw޳D|i۞R Z5qԺͺ<9+^ЍzyYkfq܉3ՇjKvO: q5S/s{IOգgt+/oƷyOb㿫'ō/uշVR&3u=Ffyx'xzkuOBZ'C0oy֓n#͸?Gٗ\ݾ]zܯ:Y_Sm.H =΅L;o= @&?;M_=q~#XI/\}ХX'1@+6Ot>.os\}rwy|\ia⼟=%(?/[s}] c=_8Q8$|P۩s7qzuqT]トoU88tO^QϧN>/| c=GS1Rx}_/߈ ?ot/c=nى4~]\\Uî7tLτ.QndaЩăہ卻77];[KKgs.y랖_;PL.۝O''1p8W3I}{kLO=Nw]xCDZoIוkyyzP2?t]7תKu?yK֏v#|şڍ8+O Ǔg˭799:ŕp> Зxoe˫gc=T][ 9:RWx(C^*NzS\'IqDoT=m)G|ώK-ݝ77ÿۈua|g'GWy3xםeU7a~xcnFoGEzw\O%o'otJt^?^ n4g/2^2q (<븄DkQ+x.h)po?-iqB NeDW TnwaOxOgs1鏞{'|:!p׸}f|5:y^spA_NBjO~Eusq mDgn>ö7#quO:ޭ{8ǫ{X'=:w^_oG]w>{;1?HGy@:wij¿7^sE^y99ɰϙwy#ѯvc]u_zu_Wqrv>:_]&}"{竁sC{?zi_ᕕފxTPG}=Oo#|H~=>{~vc{_nskW˛y<j߸7V|#tã݇a5xr-ō'=]Ϫ{æ=Ћ|x>~BXfs#;7ίY3x>/Oݹ}~Q,].qg#L>)/'?+_|jc 2S]Dg:nN~iURosh/.d>꾚=n|Nk#ky~9cT)(7E?9|"г刏SMˁ߬ }>=O"? yؿxVsxN9!?8}qo8Mx9O$=[s)?^&D vC^SOQ}' xy:SC:{!=&SO^}=Oc!W·Lxγ߭x>Pu9̷UzW{W_N:ǡe?7_~*,Fg]9x3p(.KsQݿuVO߯‡{YG8'??? ZPO{^Ǯ8ȿ7bxxΠ rq^UNz\}>zV3||sCkˍ+ޗ1]t%t),ě맩 X r!tua+pYС??<tTxNy2 υ[Ļ7W?t-N&__<ڻ#G#/y!q!5i[៬Hw.͟ei4y/`_1.? \={.>O.}jľ>}x#oOZVaimߏ:Oޏn 8ˠɇuO>_~x: nT}q<,^ =|3Nu~\xgC!wyR]C=npc 0taGzz.^S'E+3_ﻮ'~uo_Hzw;oU Ѓ/n|-|q❺{㎻;8|= (W? /7>|u%1zJywn ^R<v~|Tװc{^40}xuNodz<| :O/zuux~,8BqɶB75Я?<^\ u'B#ZyǪ_%o =.Iw/V߲aǏV&<~WOz(aOZ}#C'*'W=WBO_ܣzZ 4^\t򤏫yJ?~nI:Lo>yZK~ OOԇ^*ϪOͰ>IϜ(z+Zo}u?q ]~p>N1|zOw蟥ϟg)O:[ }XfW'~{&(E G|kq4Pd׎;"xW=JG]o9ˍ;NRsjYzg<"^W7ߌ^[弓=DO[g*Y9|'\ϭzf ?}NRnXq=[?IG'73K;텞'ߛWRǑ?}"ϥ1(yNO8Ty7|6 V>^2^ ǹnx{'F8VYׁ|iQi8W<ǭ WyIMaש.D뾷Yu)8߰Nԯ5?F/K}~OzdzGWC?܇En%pTu6z5{so}Iꓥo~sww}[¿َ8<->O=S|SHQ@ǹ>7bC3Cu?>{|[OBCo>}pryz|Y}WCG{=-_y2d=:ˈcu/B[u-ޮޝ?ot_ixǸ>_?du8#ωKBOOܟg/bYMy]M{[/:WL9Wk7R7~r:}}Kt|ߛ{&⸾q ]w}cn|p, ~VTudlߪ}u ~ppg ԧw(w*}1^]ңΛ= ߁XN}z9·T_4y6|`A׉º^f\z|u#B%FOuS-P7:?7N7".zS',O-5.>Cwq/Q]qv$ey?/9_NagCyqXx6bqsO?sKǁ/ç:Psc~G~ߴ3_SS/>W#ފЁs|SzGwq?=A'78~2|_}(=[g9 qO].vnD}c{0M/ p< A?w81|:>108C/mܗw;|$~COu:(?UPWbY< }h3~n}K'V~>b՗j ΋yӮ#c&9 UGGĿ/oԟ3ώOuC|m<_<8{#'>G4麸z5 ~:>3~TS&cn|KIw?Eς7w:ӈq#t:.}Z^= `.@tuk?/x*;K\tZҺ>Wam}Iqۓz+K~`x?G=>G:Wy;.G]3^.8e?zqʟt~'g{sz $}ˡMxAp6_ == }R}{p <_o⍸K7CoO[3 Yo]K?sB/r 8F<\OvI[0-<+ñW֟ CI?W𓌋}>᜘s7oGc㾾x?ϼ¯q=0:uWu7|;}>իg"]n/ڪ"nogg'/v|mnFpހ+ni~{y܎un{ĺsۭWM/xopI_?ޛ>B3{Sk^o'˾Wa&{^5xk"FϾ.|/<<}>w֭ 􉥈֩zC`;t/srWb ߬68֗yQ/.u/z%|u_O"/6_FϴI|\ G~|i<>go-_}-+woTǰ_*|SGu'>t#R/{ޠ95 ~<ըj7t,#.ït½O;x|IYxvcG>GABw2'4f TO/W볭OW8+x¥> 'Wg_=ꟃ?zfP]~:zJ]z(7v_QZb~zp='Gu7W/z+~Np5pϏ"~]/\/¼ ]|yuHQZ}}ӎ__?o_EONzS:|>I4Pwusx\hϺ ˆ}ё1ι9yzֽC1q쯂YjWC_z}}5}y/޿t#\ Z~7|`2V]{}O3|R}b}Co߯".[zS y|h)p^X$ˡ~K).>ۯ/m :}sbgc]:7}<:~w"u^zu:Ow\|Gy? ,8?˫ַ:1qs:1p~l/׭Su3@ݫ~W|Ҥ'8m>u~?tw =w˼<^>Əws<7~_ԗ7C]R4_c~. ݞ=^ |q4tjqd9x+~_"n_y"ֱZO6k++֯>IU>7<_#~Wn F̃+}bǭGo%?kMQ恮U~>87Q5`>uٯB_Q~-xӹ>9c΃ąnnPp/osQYux7Ot]D>_WB'΄h>q^Ta.@7A|wW>3t^Bk.Np;F=n}> s)p<_䤿ЁXp?L>_g|7K#p6t ˑg틕|E}[cK c#p:?K8:&uy^}#o&Г iK__xp>v訞33>˯ǫ~qZt};D7 9~>,ۡ ]%N?nzV[:_þލ:7>zuȣe/x >I혗wmqGCe c]xByW?VKe^Oi_a\Lہ/̋xx|j&:$o~Sֿq,f\{]I襛=yU G@s5%_N2ZauH~>/?~*ͺ;WCs݈֯rO|"N{{'] L)khYWsۯEs=ﱩ׷F1ٺ:n;~uyPݯ>]S ^Y_t(γf|*xqA[>+??|C‚g_{: _W_͟1K^ X#mN;r1 ^꼡{'އ'7+Ky:BB;(k[qs\7Ѝ9}E^_$xjVpгr.೩W뼞{-~  =x>>~+H/ O᭣uߠ·Y3q_| =5ttŘ׬3{:?ôǷ{]6L>?}G gz~Ӻyst?C'Lz>C'\>|?3'{?W}{ި>߃7/ź2x>wy?|So)C;W/< {_F~,vO/unzÏB'yu-x2oÿX/w'{_yk/|[[uΟo*97[PxЃAz׳]7q6| Wu,fy(<>|w]? ? V<{ǿ;t٥G> w/~gkyp`CcE> |Aǟc_i-t"~?N g ?L/~8|GsJG}~%gçx8=zC4M_w/x!Ru6u+3Sݜ؇Bơw[꾢ۯ Os]=ZWw{wr/㑅=Y+GKq-fߟޯu*|/y݌K8]]>n_jL_3v)pOO-F_Z |_snw=wnDNeq+=[/<煕w+K裾nz">~[}N8nwxuQ}:O;un5}lA8Nv)p:z_ UoWݣx}zU{A܇W7>*PZ[MoX7l3q8O)Ny{G}?ΟMv鳾Ih֑s/ߓs)x^ _~m1:1~+\?uoǰxyOWNѥX?z^:wT'GCg?#^sz!%YY|9?w6yjҗB(q++cܟ N;)+7ا^\ =| T$.yO胎> ~j*8뺍y''>~{Uf=\ \G:Dg7z=? >{R7D/FyyQgcޜ;^\N.`ݛb:'x x>:JS84 .[I<<?gsw3x}U5y^_ x7|97CY _@oPQ8不<*<\~l@I^P_lU<" n?$KzD_u Ʃ'-'ݷyGi-a\zDs.W?嫝u$SYu͓ 6^f6{36B0{/O5xd_>~.\!Ӟg:7Fpϝ?G${A~_Asz}i\p^N~P' g0_}]c'u_w:/C/] /=zU:._!~^9N;?קK3 ^ Z4N\yrKŸϯ߫ ÷Gw;:)>ֽ`k_ lf~KYg?V#Wg1<~ ӡ ]@&<>Ax_ :"^СG<U]4h puC':~u>u ]^r?u_ W_Qߗx1;uRON/qO+Q3Cg{q~5ty8xOvdJ'6|a+^"yZg&sJ:SN c>u;x`y_I9Nމ8չs6_~s/s^NW[?  _Ll~yv_շJ8Gv=tw!_Jָ'[M]N!hՍ ;ߡ3/gc<ߎx2n?s6 ]c!tI=T=>#Pz~دClq}qM8 `}w~oCɺxo\-/}WW'뺾8YW'\ӺIh_î=͓{9~ˈ7b ^}+wi\pX>G}(1=؇T358>VXG?Yutzy8zRs?jN,o8ӑg}WsqBO#=79>ݸ\ ]|-)%zcx!7~_u>h>{|Q{^3]'v}1#2?>// i~Ud8g= >WЭFG^|踷Oafd5µo5N{ޭAUG:py-~׽ؗ[/Zݗ[}5OzIOEV0yCf:5sX_ӻgţAqt? p=|d~?:!~Z|:_ºO?O{% u_Ǵw;?χ?㏺ouĤ Oʋ~_KO9wlRc̋(}|q |.Wz۞w?s ?n}6]|| VӖCuz X|w=؟?S<}߃ >X+el;ԯ?w9x~ȧϣ9>|/՝NwSyuR˗X8>1~^aljU9 {߮{U=QZ\S߅g!>5}8}]<ʸp}? ~]3gqf9&}/<=CsUF`y? _&,8bq@ |knM8 ּXGG랟?t! }"⧼ 7C+\|i~"G >d>+ߎ%p>3/u'__@= ڿBޖg7t.Mty[Tmֳ߃kSg}7×. >!A#l'W_qҧ܊8⁾XRDu༸z }C;YtQz?[_[ OƓRř/V֤^f3؟_ĺ磚W:A=ԗ@|ţ}:V}tnG܎nIAC\m'y-y? xG]o.>o7/c=.kΡ%%'wCj}y uT}-GcǓ׹iuNy~~t]>ʗ ]~ms8u:.=DZsKƺ1k__Vw=ulD>rAGL/F[z먏k]AZv7tªwx)N#]wb|*kpZR7COƏ랼arkc,|Eq:'5q?=Os4}Nm\ '*uuy.VzBupwyz)?8tX/^T6\{*_QMe?t"t`>s#{zuUE|Z:_|ou ]0q8tD\g6}Q`N_wͤ xqJ_9uUx""!|BM:αti>_,:oZ?TV?unq.x[ y|?|>~< e &}3WopQV+դp#\WG/x^>~1}r?uXLzu"/߆D\uHpy~}oc_ >I{G^2y suuf>|7qݺ3W&=~28gޱտnFt=׾ ѥx<<?Ȥ{Ti[k]qyXT?;8F~p+'%xpR//[|6uq?xIgy>Q%^>vsg×ۓy<8|x~/|ukBMy R?`=l/Cσ'%c:wԿR_c=7c~8RaB'zYs>_ΡsDտ&|Pԫl|u/?|SGM=s*O7~s|IyR뎂ok[b]U~ti}Kg~zi_m߯gS[_Yv]sꞆQuoȰ9Q~6݊]ޠ+yPv%p'gWWo"a_ q3u2 |opy6t:Z{G.*P N>x _݌<ⅾ}{f,?x/h:CgG}<# NPF:߸@xWo?:<8՟uv&>xת_մeqp~Ϲˁ' /Oyl;xsz0^\@7z3Nm8|"#'uTWt,Wd9~;tu"[ւ?Kw#eqGCꎬOrM1v}!#~ {+>g)wyY_BM<~kc}Y$Ox'8 W Oo%pa:-nz*}jZW>^;l>GUyc ަg5Jg8o;|񋁷<7D~o|//y?uA | x$,}. {`{`bs#\>9tb}ߎ"ϭ~z}p3t/Çqd3^S|Nj|{ |(COs.fЫQظpI?E\ًB#NxNqzm ^ ì{x C7ת^jܿu%Z3Ѓ>}FWNz7tߺ۸/QQ7Bi'Kk?BwTo5W##/؏t;mϭ:>ws30:֑}ENQ:]׼Iu/^xAџ^{;DY Vo\_:Л >?:<$yƨDu>yF1_/-Po>nϺxaO񙿋ug9tY_w{B3y3<|ptՙ ߟG:YOYbw\u xyƁ ~ӾN| z|['}WCZ{}?>G/蓞c>[@V}6| I\:K w8 x-p~<]CWõ>܌G=8C2<]I?_5o > |^~8Y| ,vͧKzתY~q|y=Q<~5pY1]ܻNstG5W箇l]{§zgft@w%2ßs%tx녏_sO] Cwէy.1 ?3|A_uNk}ƒ}/TW:9?:;͹-p煳'{?uEϏ>Ga7NG 8EF{[CtNI^gy>c#~};yIU7cF۾X:R||ZwW#~j<օ|Gߔ?yz uVW_i_u/Mണ1τ^.#ˤUη"#__|ešɠ[\ py\ o=ӳ pwG/bٷ#~YxPw*/ŧ}VJc xƗWlOx,D=vq.rOďYz粒pU?>?qI\(u7UtAFl< GUoވwqIɵ?u~o\C?9>RӨvAt=?>{7xcoR< #֣(|q䕃ɇ}:AQblW}Ǹg}G{-ׂG/盡o{KG{qhyߏBw>u?I1g=._ =B+n0'TL~#7S/:>oԼ…us>gBwr.@/PyQ|O΃ϣO~з+_ꅝ7xGdO÷/>;|asGI}zǿT#u^X?B1~~| q.%]>OL_O >ͫugߋ|8}hlƾܹdﱞëC?_x_8c~uh1t͝Vps0Uo4:|vb%os9>']wƷ EzqZiS9xu? SvÇsqq7|Ss+&]@Շnj/Կ*Y叿<\~OU=Ӵ-x)v-ݏ\ =r3)}zZO}^WW#Nmj\^'tQ1tсD~q _0|/"_Yt37~s\[:}kqAo}oQwx𯞏.Re.>H,Ϫ7^ǏegI˕С pϥ7/~/{N'9=+8|T~ ><99Aط?f 'W_Kֽu*n_/??}]uI=nn}MVb p+>eu?:_}<{¯= Vwzw݋yHz>^o 7Խ˹zB&Bσ.yQɨzxE>b|9<:>îǣ Ǜ [FlG_Ρr=|U}mn>Dw>}qd9F_qyͽ˷/s\T4,_G(~:yzNGOia >DWV?aK/S~Y p5;9a7K.΃,̯}7xj?[6A_0| /B8oU\x/oCcGWjƼ囝8"_ɃgWw-Wr䩵U{և:YzGy\]ўZ0te;syWfUʺu#?wYE'%/ϙoy/o<uicAWxIܴ/|[ uh͏烃{R1>qC= e9~O#? ߕ>%_W_wyB!g"ϭOW*x//Z:xOܷS-~nU=6GquuB^g=ё"^Oۧ? |c== {2v%jI=[Z)n [oן7p.cw×;旁ԏǃ0uu||~;ևz9`uD%Y]uoo;RWsc5~kYǛo8s[۹dz[Е~~6Lǯ>mXV4q7*[''O;_qs|Q)穿qXci7<8ީT~2Gݿ0_BG}0꾬_u;Cn=ߠSGx>O5y/'#}Jw.8Uh8v-t*yQ=m/yқc8A%xE]^:c:x3zKc}N= ;>=yRpǧS{g}-?Fw9F>~.pJ"CBG{7|e_mSO⎼O]Juhk=Xu }O}+ᓨWya7c<qϫWqKpn#] N'!>oD4|#* z?}WB7"nWu}]}$Cǥxؿ[g ׂX^Loh57Gn9FuxyCoƝԽה>9q^^7b'V_U_~,+sd}2>/ #ȫO;zuW#{| חz~s~uN:u~w/ НE|fЫ=(|>M ~y<,njQ򌺺W|}x6G7K=/-C? x}Oz^>3~8~?Os|}=Z/|؏8BψwKW yQ7R~I;7 \7>.=cA3;뜎ʏ_}qw{t]qetJROmBw^.?c#3b>tFy<>zo_X:3o][oջ˯:]NÛ?OUGonݧt?TߖŎ+7B^S}|ꗬo䝷w^]߶|_uG=OC[IiC>,N3sVa)Գ#ڌ]Qs^yÛoݷ&b98z'| y=U7ozެB׭{y߳oMɤǷ7bǝ?O4G^Yݓ1xK=_Ne0/'}tsqU}?}z%R[!_o}Ty9 | >y@_^'|Mvڟ{~;,>'<|YwC7 A̷߫u!uq4/p܋)߽^ߋ˻O7=Y֡4{s}Q|3 ϼ߷z ˼чD6o?~oߍWݷwwy5oL]s_?tJ<>o۟g|V׋]}9 y@q\ܼyG/yuT?s?{ç(~O<ȹC[_o=ӎCЅ;|xIǡңWO u~xm֛:~(:G/1|oCps dzM|7x>8Z<&:NV_QΔ.j>=tƾy͟(?Շ{]>>gy)'.|zb~0:/v|;|1qM~~'yPx· zqf=ntAzY'OG}>RaWU4yp-C,|'-"zV uw|}b]t< !tg'ݗsg}vxQ+WC/yǮ}+=X+>b]C.}}=n%z|_u}WNt`3t Nyyycbs)?zz~a5uq>|ҙoϻjQマćoc~0?|8lG<6JՏwso>z8>=Uu:k= Rlo,t{uu\ f֟>u_gĬgu|;q/q&]gzĭg{9I3Gc+[' |->T}&Bצco89NƺG?94*>-}qҿO>Rע0xyc>b:p%C7-`WA=G|s Cݚ_u>ZEC!Vzq ՕSoOeBϡ5q;V7>x]$|m4j] <z}GcOQt]'ot{n+|y~Хf֗cɡ/'[:Б]/WL:N??A]/'=a'QYV/|7:y3>'/-}5;lsC8:՝C"+kτF=޼Fy6~|SyuֲoIsZ|/;7][~4?=ԗ8[-tp t3p YеG7 :OΝ6{x5{/9e|~Z Ƽ/ߓce8RuWq)=j?'Vo}<_~K7/x+pr78qc?x|]x< TIS[N\82tl\Ǣ9z-XA  ߢxӰ8x:]~2 2<<~UXt9qʸOmwb/ /ΙGsΫ}x鰏 }_SS;<'떯.ިUc)ǡK;RGz#/=pP5:*>sXpω#t%~}s!piMV?/Ꭵ/Aq@9.TQ[p>8ܠٸ]o: _8vbZ_/|_%qqBc:Z3"/|pn|6n^ c_VoκPyl68:yП>}_׿z%θKt9|Mװw/we[wx2_OOh:>JwW +@Wg5] ] 20k[uz}˸Ql>A/g>?}<y }|'t,s;OeiEΙTXtu~GYxoG}~_aǹBB=-SnX <4q⼂,90Ck.|P_|GM;?iU/o#^:|+&+_?_0GSn Xyt=A%§onX_2ߋ+΋GBN>n|c= c<9xtUwp;x?1֥S;oBU2>}Lu>$?ͻV^o?x5ܪߒ_>=/7=~ۋu۸/V\tu5nzYqؾ8xd>/U}u=;g\c<8!eyA|Oڋ7kx7_s禌KNt zdV%soHמl̛Qɼ?%`Cphyx~NxFݏq=u_y+xЏ?)p̓s>QUND_&:qOuߥIOϫ;o97,MfK[%^:SKx.yރ="_w>=۸W=eQo>]ﴯͻל/}>;>KOZy6c^}v y'+.=8 UCC౯B,zg8u|?^u:E/޴ߴ _B> {?*^yo:kJ8//>s랂Q[7 <uXac~/+E?4ۡ|AngB_^cz<,?p\| qib8:oכ?Oox }Vbkkb}g꒭b:IW{x}m~'y#Kyw5⊼\zG\uϽu ?u~翬7n|#6Ç~ێ7'x.K_6c߭χXOUo>'K=uW0S}~_K}}/Pon s~;^ r5!|wBs{z2}^-z\Zx2s?Ctg}gEop^:{y|'žƤB͹BYs$xu>}o+6͸nշ&o:$Do/>_|I?0.1\~e_[ _v:G.loD<2p/Vp#guXqn=txKee}ݠL{~ysJ;2x^U}[IS~A}UާW_]59Wu2<߻@}gw#ЭO<‡לYx[ugT,B|EH_K7^8G;@}RۜwX݃6IKwCU ~L*tX:zһ'Ky4 ἃ''[λ ^b>}^_syÓݯP.էk'WqO$&,t^^u[G=R_w :.[a 'ׂ/ˁyG_Nw?&^[|׋&߻ x[On{HOC+)u}Lox>yg7WS~g{;觬w:Ou?9'Z㎛F{'/D6?~Q!f>OCWߜsQp^YoD~:':aw;^Tn/~aG='k'.N<#>"^Tmʟ [>m;~^'^Gg>g=.N&>˷p |W獦}z}C(w:/.u?V;xg}1C39\zSu<ˣ77z=뾣oG΅d=?`=t!ԩR{gӎc/.2zFuo"M#-޸8o_o.U]~Vv9|{[;;_\ RuK}= S_騳W*M!q ,:EOߐ_C78j\7t<|+{6y=y_hLn !>ut/R/IwCVu>>~؇%?U>U0ԧWzz59q':-x?t7z5>q?|-+b^SǼލUW7~/l~suF\ _ >x5>^aƧt`5:}Gܪao< ^E?3Mݴy^S/^`ZỼqu m:y_䧨;;zACqM|U{M>j|Gӳ٠>TcCX_i\q <+G^^;X R>SgiN/.z 李g[}DƝOBowǾ>=pxu\t~o'x6l5s?§eC߲g~~R8̟>|mu֬ㆪKu9_"qt}ݏb<hTg}' _Ƽگ߄TN:綾^~?< UN[QK〼Yu>wJǟ|8_Cx{w}K&S:W}|~Szd|^|Ït,׸l+}6@F^~Qw.^@K^~+tսJo{ދu\uЛmCgo9 >6O?ϱYsկA߷lޣo9qΫ}]8ng<8 xwu/C༵/F/̤ ۡ|3Wuz1 oQ_u'7+V3Y4{]AN+{gݎQϿIBya0kY=&/?fCnzuzӥ zYpw_z1BX _b^NqwG/s8vϫr_: 9:O5|M{M߾x߸TXO{0(sv8t,xb?t}3_%uS ~}~.Rz: G4zF>m f}y_w]O}_uߤ΅ng} VGx~m}IO[tuuֿq~on|Uw{ysJg&չ%t۷䓍W:NnG|BǕ#Oŏpo`D!?}'|-.[Wg>UoJOz<oJT]xױϪSΏ7c4ǩ?~t̾hG… {ֽAȵWa|uOGuT0݋}9t:Ou|\ߍ]?} Uh.qwE^iUg0A<6Ny/t5|y/'-WGQ:5:􌪗uq@?>r9>чo~Y'ۡyo\O7uN>}d^'3xKy[pG+9;?{o|痿g} 9{}:'|տnrn>zA1cOtި^n#trsgWgy;N{ mWy8SwcuV#g=oyO^ッz.c3(O ǼK|}|eL؉c;S gQ_ͬٗp;o;BTk(RO[';>"=Nnb¬yU?wݿ{Ƀsq"ĿFƽUM;TJ?w-t|&xnVBOt]NΏW:_;H!xqY'<9C,Q8_W)?Iկ w̃q+>R4xԫT_YhOB~3[{D:i|`]ۯW ȧ'U_Iǯpy!  V`uP_A'}ցsA&Vq![UοNWՏ~;?}Q8W\PС{xTbc=:]]8?[OF</*yE68EyMu #:8kˡyQ÷: p^\OzQ'w. l9pr9<>~t'|:?8A#/}N U硷WA_t ySOw> /W_>Bsf/tLy!≺euJiyެiOgL>'q4.9}l~{b7Cy}q t}p+C{N ~i9Oɟ-~|aߍ:}!u?:>w _qa} Y~:PP~'%yo?~}QEy9|ۮku_FϗW ꞹY_u.tu|<ޝҠD'=vӎ#tb|UvPzӚu=ɼ'^ _/Տz~={x^:kӾ. o-{<>y> }h?9~Y'/e=W]|jkʷ㎯h2?F^8UQOPWVzY'GQ }w|/h9_\C_?^ m+|ԨBY*!zuG}|QR :A(7# \y7t/~1?nf~? ?sÈ PYa:O]=}w;Ws3d3Rg }\{"Ot3yW}PMz:CS_2]}.u`]Ӄ6CuIE\~>Z+<>8 [x?U}|*o/'5:ȹʺ'p}mKZz0ޏ!j4uB_0*_|qϣ;^+^XOa܋-␺]x[ {+[ӎҷ'T;7p/pj;{yvceG`o;+_~*p8aLq.:c6_ЍBx>]wb90|uuxg)_JSx;'mIW¸bR|~y ϧUkN{N_7' GuO&tI|ڧG8U}n|\\v~yĽ+3.RtZ1tUǺj<:u*}[zʾH?A^u^u) _.V8_ 790:_|ߨ>cυZ}}gKu~LIgpw>W=y_R_{ʃ\u^mn=|#>bߣNs}6o_sԹWUyD|}>xR? Mܿ~OGZγujl'pg-tiNɋC_}9W/_cuAzY7u%8cyKYy3Ghߝ|}K(:o1Wۋt'R:6#X_y"Uvu'aܿ_gLЙf:>)t[UߕqΓ=夿J Yspcp%)3ɬvB+7BOWgOW.?"OVmt/am }F=%/W(}7|˯֩jTp%yr<տvqNb~Ӟ/řgTzV`/BXyV z܀UKܽy~~{ gCu+'][ _CwՕO+*mEk~"=B `}.b\s3Q(|SqBK~cމ}^ssΓzAϷ.΅OүcxA[<9/п̛sտm(8&}?YUwQ*d}[OQ7 /3uJGs']yqXupyyqYxW~ⴏ[v^>_ =M=I~Yu;_y{i__ OcZgu|ߐ~Z覇{qZ&׃uKUe^] }+xu/k˧P_;_>k`QY8:|5G t!-do>Pur`{xy2>:f/wlw|nqxc|u>'ny|揮Z~8Q7oz&(b|=G8OE῍B?T?Aڌ}`p`E+_xT߯K?k^I_Vήq׭ot`x~T<Nn8ϫì̓>#<#RMz<> b/pqJOm7~wn ?^=*7C+4ܷgxz+t+_ :~8xOz< +p&U}|q 鏡w.—oG{x}b>B|fq5-%Ugs_.`gz>>?;gYwӞߧnlXS:W܉uSA'=/p:'WBዺ_f7 סGu93?q[WxORU`AtO{~x@^5?f '}pfCx"y1?t=ߟϯ~sQ-u>;o\+OQUߎYQ_ۡs.辿m Cj[×rqᯨW;߲8V]:VεOg^߉_i.ɤ|/^^~sj {^R>CUNK5(/ Gc+yϷBci:\q~ɠG|F=>po\W>z[}twʷ @oqw-x~tKzZxRF Ñt.tչ˪/:?58s?Cډ#Zuд8x/{C܇/uz[M|c;u}:Yk}^g}x@›?j7랷Q_d:X]KNZӎjO^Gs3vBGSR?u],iuHuX}BA:L\ӿJ@9`p1Uo~o~x'ǺLE:@~"~SG_[kC\ OJ:AՓӱ/Mϳċ_]mt^.tS:̨R]t\g=oS`nyyzW =غvo_;8X,_?o#pu'o.pR>Iq8Wgsuh˅ǝ:G]~~>4}ZN#?ʺob >A=4oK wޤBb'pv>Xy*^|Iб ~y}r_Kv;[=^q[|s=Y?UCo&p٠HpMzww}ГϺndÓCws?'U?Aב r~+/RV3fϿ_;  =I>jǡT}ﱏ?W/pp!=czCfi]>:útubu+{uS~2PMH?T7<:(<zmy<=:OX:YMzퟄVou/Xy…|؟+L{>OK8δ.'l9O^}]&sH}x]_~>zfu/ua>x_#<}5c^ u79W>q>~]<8vf< yvC ????^?_-Y[?/lqL!<mclust/data/wreath.rda0000644000176200001440000003623714525075362014500 0ustar liggesusers]i8Uomyy`*UQI2&hH)(I27L 4*B"!!9yֽ׋:^gi*)% $ "̿/!MP@D@?ED"=jQHp,wnMh#>Kkγdse}R%3:5tfd8N=BE{ A\T] .P["XpYn^,@+A?(6zW'aUӺldMG eEC>J>ٮ ET*.o@բU歑w:o / VffF yrT$> ɔv9@m^R%dPJxMb;?]G3r M99(E#j@۠* *ڣhC,PL)7v2go~$=$DC>?{8&5TOCF"2U[[@YBܸ So\f;1YYR_=B#7 5dV NeHXoY}rP9?$S{h}bl||,Y6S2ATtG'r?_Eomٻ$ÁZ={rcO37fm~@>u5L L>l#;4fɬ܄(~ 5y5n2)7 4 DZw3OjsO p筨S dwYfY o ȯwO#SG>+;ZO"gš@sMjg|L#PTzV-s-ɂئտSH VO@es |)7;@N44CPC8 GUt&`7h1:!}7ys͚]@xM?D݂QDdgN.3MC܏w^L"ڵpȇAf6=tJ]՝2 Ǝ/k_1i$Tfq(tEFJx9h i7PwTx8G&"Cu*y Oy\PM-ti\׷_NH;OU:VTdy6)Oqȱ/P>T!&GG[ztg@,=^\ %W^2U7r_z:@1J۝MSQ/@4dHY.3{ !L2 b -Qavۏ&hj4~]%9rhJ] {c Xaw):7m(q1*N57y]p0R.G߄`ȸrcM}@`I Oboj 7>9ǝUz6x9pno +ڢleL}F/'w1̙u ix\o|9SO?ްUA󫪙 ('{U8ti5an (zd=H.̔jV F!Ik8`>b-gmNW9"n"g_GSzLG=dfZ,B-;}6N]B{r9/smBҝQ] VGE[2+:@1fk/.wx^ǯJ!5?B5i@+=.ֳN{RPkY=ʊ:Ϙ΋ő]᤼J(N+? )FU;J)=+֐;*r>Eb@s*?#+KĶ[H/Gɥ_sgt7 OλSWPXB7+8v.W+z{H\-2wޒ}u* Y(.qu>7&)@??ɭѫA:GU,`Nv] 1 ) [%$Y V !2%# y6[N F- {T4wn1@sp+b|7k؟M@^ӰCFQ?mpA :27{!J%C xaLKמ{i,墝H>k;n`vE0(zHlm vwaȚF7׾ /J=F櫼W1qݑn'N*(^=u*;)Hىߝx如j&LJh|)ȦmA@%/C--߶7Vw;/wX]Jw"߰3Yޙ' 1AXj&RnpWϝ))*&"wϾPJ5^U !G$nBÙ^"(?C\>6؃@RXV Lv@I} SEN N4zԣZMxbw 0>,mJ8o86MB1UOe_#4e4n.}T$cD. 4(pE"NqU@;ηw#JVf[8؋LQ*h2NL>T9y'"ys09sZL\?ڍz߅IF-&@ kߍ &NH8x$?P P6  Ht trPWwcT=2L6xhH G@JcLw%uֶ j&XJB!KQLTPLq522xp t] _UC֟[԰8+G?.Ba@(إqCuz'wI] 6z_lAJϟ A™tX `.HĺBeBE%U:~=H߮g(5JMقJv-6\3NiRoz[[ҁ8B.3ۜs.[,R=8ܛ ;.!Q_F yT]N{r0Tm+z Ηii@H hl.$UeR8RlM_re6*V9u?)qPI|7blQmI0RǢχQ/mQeUP,!h8ms(1wyodu >ц@}]۱}/u;)ޭs, ܋좶sV! +MeM) O yxRiʯ[PA3@9CY5,0Kˊ+T@8.C 4Jh~ӕ5 w kQ>[]>c[/E x7wN_d^d]TN2~yLI3COVM qBd$w^/ike@_"i"1u} b/BGj#{-]ؕ\UVGMF"s7/~^wiE'_o#eX_f0}:E܎d|+xϋ S/z!dI}O]G ;yswSQB"Kyh z*;% qWјY[n#èt{ɤ*V>&*w؜Z|_/2ڏײ_0B0JBM*WL0PV[~=zKΟzЇʧ/D}Wg_rVtRV`9W@3ͪ$ZO'F@1`;pP&^;'"4}m_-ʕCNj%G-iUH,xը"䟮Ta ?ΡPU~gix=gGE֣(sF0j;}sM̕oP%Q uH`6KeL}QhhcY=M7l ;P~G%ѶM ]JӰh MAKxQgPbx |g_V.>iH΅P3@UxU,ןYE] P5ĹCM@pu(>)B繄v:ÄMh +ɀީ4\goܻ+w5 kߴ-tJv2gҥ^Eurvo! {U̔d׵FK``ބ2Sq, vpnɩǰ]0͟z١ՂL7q)i[P3҉FTODNA3w2 BWNo%#0sO]o'[(:37z%u[kGy%܉`韊vy-ft:ڐ'] &MĵiTA 58F~n" HTv{$hE q?衆uzW)B3ֶ=+ˆۃt.-e6%]&OJBځk\l{*GT\|>P]+ %C㑤ZH/:젺GYɊ<;-J7Rvnvd$*O6ɸY?r~@9;:Qp eSoz̭k:+>i<@ڏuXTd'a_4+US٪7uTy+b[r3 橫Q!cK*\P@u֤hE%:'r6阠/=ZBJ)~ OwA#3qвI\R3VCQw u{E^RۄsHlNw4/AN因w7u/R-UG'zr?x'hmWnB'~@Փ߹#[YF; X<' (藣M%_jЌXX8&(ӳt`@ o+o 99h / G  Kts<ҟ_s7r<Ғ"c &c?Ɂڟ& Ļ}kx ,llm6.ܐT( w+4F+?-`,A | "Pn8B>(vetoaT bBOOٶ&*^]hlo^z?3 pUμFMW w1Zj7lEBѽP"tP[DoC6*UB48^_l,rtYXQȖb LeV%@ 4:f!t^ ^ԓQH|We1M{{/SᾹ@S@VǻW^DmӎO#_$Ђ6@ u7FL75E4ː9G8)b>RcY/7 8r4h/r(۟Ab Z/OA}[ 5Hw bQih{֠s ,~k_Mٽ@#[w3"@S?nF -ϱ@["LPz&**|ϝ2GGQ㆜];MAq̍9|qh;X(0PO4]_ I;FB=]g7WH{VOѸ0!]-+/[S#$PyW^E>p|5GrXLNيyEey/ԪדJ#QΠ)?& 3L36TU&߉qs*Gma+[ܬa'h[M*@ZӉ Qk]fpjG =PJNXr5 &j:ݒp;uMkNDߍ dK rZ+MڡSť;CO\WUvwxsBk׮FߤtQl'%.iJ+O-%9c@Vks=62^(sVb(_֌١aOb:n"9b cр*G](ҧ(؂!>gL]|r{+65eZ7O~>ΆD̀IH} uofr^䜺 4I%N yҊW_F]厐o*] &Oe$ (;//>qѕ`':滣 'fؙH|[aL}uGH @ ]\rƗxC2 oH;wb@Y_lv49=)X+fG*_o=TL̬ v=\ d|;y jj>p9$˶[$2)Ѵ͐Wm= ( 6W@b}Ri"?T>h.YаVX(emfHv_klꯖ2ܢ nΛtec@~WG7y1svݯe[.qLsPpq?I.CiGeMިskb Ĵ2P=6f kh#&%b*.$K˺ /|kXdnnzb{٫a$KQ.C+0q{[Kg(JpjRdv)'ַ}5\nrngT!$M'a(HDڪG^h.؆xqWD~ 7se7uYFVXn n+YۋYUY }lTSț.śӖ{iPW8;\ux_ N}E_RXڳ|UT8)v?ފ^TT;\yEv[l,jjh0guUCF2uZ`R3?Uě^%rLEPPSu?9+E")~u %p egOX5ԓ113V 5;5kp*(o+7SK%A?so9x(N$La}t rL~ nCAD$N M@\ůi@HyςQj=eCrO b' ;c`1j:UX'. ;yqSP;2ޮvv;qk>P 4Z(UƻA7H-4l3~BdPQS XPx6AVl gоFE4gvbT 2vKQV cʬ?/?fdG|&Uy > ?FvznWPq>8Կ?|il &M:wu@T_ȟ1kG}lnȭ k 3jmxbZ Xw k[D≶#ngWYO`eTAEް×E ?;5MyAߖLzT`AwdKTm~U³0Rw48fܗAMQ_xcJY[CK%vb!)[7&kji^GsMf w '`*$~ԅb(\O{/p4 =VtFA]s]+g923/EЌ"#֓?x=Ӷr)u1Q{0c Ŋ.y2ޛ ,đ(3+cx/ @՛^]  f4ޞQVH]EW7U[p>gn-DZmHFH)>We ATnzB Mu gq+ ukxʌ40&FN{t$'㫥I1ةe<=:݂PڭA75o޷j%(eDD1td<Q6\{$E>CҍcG@qAñhg Ɵ@.$e:htQu,d^ <ۃN !#T(k8p]#q 璆m@:+3xC7; ֓Ssu܏'VoAY9k5h-Rz!/v0uw";Fg?ФRQcE@!ժt3{NY5-q 8Y-n^YixP< 'ܾCZz+*&M-phtUq1p؀&_ y?`zaddR$͢#m+pT0M g{6&U^7G튚w_D+7K激fsW@4+)>aw6Fv(6H#Q$*荽F-̜ytQ%زQΠK[͏ H}kqR6MMHyȖ-pg7~]D>֢ytDC޳"ÑI3fLj$EnmI+ރ{甜 McSj&JښV~Űax@`[B97w۹_}9mA ^& @ݪksqΥUFnkg0|J]^Ir=pCyv!Jo*.Kjג]87VR4~}u pWu#j?x98uoHvۣaxү ]wb0^p ɊuM#S4|δu[T9ٴ"K:w4Pܐ|8 %ڳf@ƭ K*^Ϥ c@_vH:G\R!90ahȩ)wQ{yՒ>yA[K@P֊֓G?vJtF| 7]aaۼ?]@>%Y꫒t\*nGrTO. r{}OX+z?eɧE@|(:oqfˣR0RwM 72_{t`odh Xٷ*G^,jF6\rg^珫MfU71y3d'y>o] #K [S&Z~o[Gq1ۃ=)ϧ47{emR>BoLh)QzJ@OFg48j䕫b?ɂA϶Ҷ10FX CFuY 2!d~`dN=D-K5_x?b:bx_V:߃uON򷽨5왙|Maf}of#^Dz:TԹ6Z4 cބRK)F2`Fp$jBnGҹ>0tz,.%} <2 ={5P^Km7VHÓ>fr'dܵ^u YVhX Ց)2}k[thJFBݛ˷6lƢ8Ӊb}xX> ql7rPrs)%$6sKJ G%K1Rq^м=!PC[O[neǠЖRY>}g$n[`=ޤ] C7 %zfhZ!-s.ɘypvt ^hf/Yp;oR%j2?2m@/ܱ> \E |{m* r[F)-wy}d㙩h +8|o/!k!X>-ZְV)\0>'VK恁+Cܹ?AOqDPoTP]sݷ`-,5^X݌g;ymjrj=wcW% &i>iMn/*PFte}QɬHyBcz-Pl͎آ^~jJF0*Yt|<\B +EFdEFPzt=ZWow/MZB*L:ōcRʳH$hҽGd{CRé'}[oTs]M|y+F @q뿢QoŁ\ ĠKpxt_97AfP}ͫ Ōmj4:OdyQsݨ{)h;Q>%B:~9f䳯,0*>ɝW(34T==l Ig/'-έ7tP\H\Qjn$_}1; xLoV t_1O -*3R8wٲozYKGoNӵ\!TP}Ɂ;8'ZwM1j {j@I+ϜWXW I35. ^qi G(+zUwHx%I $C\>3o!-F4*HwgoZgYN XJZiJtC K'FCA@8(~2}>*O[ `T/\4Nm@pad* mlo>[ p5"R]QS,RvIyR&'eLUP)1 G9@)nGH EۡH^gg ,>*= Q3]GZ:6А8}ٸ5we0'DQ{ӂ`8-$loǕG{^5ssZ*lq<>I5Ep0uio4Y8tv7{<m9E7rPjr_0>~z3ɰSY FWzN od?K$KBBw^`)Y#t}2YO?mclust/data/wdbc.txt.gz0000644000176200001440000013772414525075362014620 0ustar liggesusersˮfK)v y As"Qh" T~=#'~Zfj"?_?{/___?KoO_ӿ_OI/?[ O[}7?~[wo}{W'?ӗ??k]ֿ華h_UϽjWmo)WK{_:8vFm)w23f=~}n_jS_cZ{5?lkgƲfoYsߖ3w_,/BԦVvK۽_kOgkSKz;>{F<{^SZJmǮ;=y}uuߣz/oG'X?zjm=5+}_-*_~xOt=VȒ˘ſg{u}vOC/cշ?t/ _[.&)>|t*w=~ sTNU:з͚ᤐqY]~KƪkKFaP_=Z2"x5"6x=:oP,-fow|(Dw j={H|.~s }b݊½*F򻜫_ nq|_xڏh-wս|wDGo/|?=;{աVMЮFb5bYx @6=vv,m#B֮Cm\;I7HRx@^FvllqzsO]=!3J cV~w~XRO [E游_wܻ^-$XEowi4kr)|Gמۥ9QA1+b]'n9;S=ILD:b7-K:_%~1 = 2_8|Cx13 pwBeոV戗__Dz >fR8}xo{JA,Fr8N^)]ݬ};Fx Y9NXk9C<>E+}i g1z%meEw>̷˴m_}8}k'/R)s~a':?P[k|ujw?߃+"GT/YM "b8#'4^B{jFzW^vSfpImt_eHE>rUr^g=SCy>G Q]g-~ғx'٧JepA0© d~ƞ T?]b?N#)2|z~mU1u6Z ,p aQvmb>H 6:oય թN$ɋr|ȠF|uA.O{F ;WT}{|)$S{o5IDwϐx>iUo.qݝN7|(q&6z؊,եT#{>k4_x-߯ -uҧ0房Fr׳\QͿ>5s ]\r-rcQw66KWQcۗH"uYF: _`Ypv:{mX}a%etb=ڦxױ.m.J0xll/ҥKwBҖ-3T ^(w~Ĩ+xgM%o˽v92tS.%4Xޓc*M :g/ƉΈ:$%PKdѾ:^1H:E_oH)63` $hػ6f`.XR@S{icwժfU#i/9DUhhՐ{Dd&:|:A Y͗IVubg$ ٸCVƁr9]DA9utއҁn3:JGzaunu4 sifd͛r&/-yuaoA(vWKhJ*:l1R@kpmhAۡ.GT}Ez,`bDgw6meKQ5רNXE:)n<6ykZhv~S\'+"qGrt߀D?&>8][Ǿo=K| B dDr"[pꯛ4nvjn7Cv]Àl 0钹nJnz+hU n#1d"^Toy(qM8〽aŭ0ӭƻ} eqƙ~n)Xq[]]ͧs߹C~(*=o)񝦷 qBO z!ޡ h>P}B} .Զ,I}n|ຽU慏~^<~'.oy7s={S "iSS|mx{+6 Ibn>WIY&p֎lnꜫX16GĤ|lۥ:kdZ;HUO:_UGUR4+cY֌ 1(Tfa/N7@TekQuo ָwpj0t;6&|ߗrDxGe~tz\f|,(3|iʪ뮴q #2',Jo2#ע[^]ccM SeUWyG$uT̋"fuASl/EpBpZv]Mٸ N3 xO1,w2NAkFjLaYp :0 8hڢSv޻sA}h6GWы:H4ą$vG*- -{t/°&g;uf9]htexZ_Jl G7z$^Ю) q@u*N?:j^];s8rtks t/m:9f~ި jf~gFzGgY\w>lyF7B/X5Q2nsP33^1j2E vx2dY87~''ZUM.蘻 Si O~`ksF{ T@ ߽$hɲG+A~o-Շ`;dt&kQx,zL?GM3Wo{e =|znI -Zd|4[W"Qg\3i%GFiL:m_KL$.+.~2ѓ;9׉+'RW 6AK"!7 giW?H} Wff>̞d+/GSJY00Oza$Kg ԥ˶ךrfI:t2g~PP 6~ c6!ਟ9h8 &owB^B~鄒{YެN-z'_@NtݽZ = Ƃ2|*?YLCQwo8aOGҖ.JH񼓕7z {A%8oF㮩 /v;~ޤ8ES?24i.͝n= UyX# ^ qF.%Bl3b[֍z4;5x=^K|w_@A9?o1r1ݧna)o7ZB2bwW#Bpg9:3"e3OiqcFx8h2|Znoxm |Nk7|caH(կ}ԺN^m}0'~ZquG2LBWц) Y a6tZ q︇z EEu2XRlꝝuV؉äVcQnb*TxT^FGUec TEm'f(V$'^ݒ ݩe|_wֺ$k1;[/,Ɉ.\ e3W{h cJt4Brk:bNuiNd7}TxA 0k(12?d-Ld+1,]{XqBfPA;(05=@"/hv$>Q~}l3[IN_ 6xz{yƠf|)HaVL8L$4 jxa$F5v[Gxyd٥zrF#%},%f($h"i1T]e2Y#@}}.Ӵӌtxw;wd Iv=s+ ¼]/[&À2PZcd {?0y4(56UyDa.X_[~cl=זA=3V.SV\h!<(P!gtP\K}ˌ !l5ݢߘa/= fe:#vvtƏ{Q N䫠';!v@!2L (QWIlѿ^&GD%9Hأ#w208Csxgk~ *6iJѤsS*v6qǏ_lS ӓ #mǞ a83"d^vcGzDq_\]0.0ATT?p>&qMA"p>E6]bWIA"s⮫&n;HFW!S5%2k%y.@P NnՌ7Ks4S^;q3h !N eәTO%S=ڞi:s':g[Yt۾C‡ |C"#Um"P,s:Sj-u^K*5(<Ě ܥ9VS_tϻArhC&^,l$~槲T Z1i/b4QCf.k)'ȃ6dh)NpEDy%nZ/V0lq8(TbI#jW)nw0HWC[e<6S f"H*)ƞVF*RnrkCԫyMlmܩՓ̸8SRXk55)"+{qmL0h<PMuݗ2)kYҷ#xLl5Lf%\ x>m\#-b*Ři|ǘ4 O)rx*o Zp' u*jծ6+ONӶu w6 *W~ kiBK;GAj7dPdxUZWEzi Xs/q͔ЂϤSx eTӍ(HߕDș2UUKNinVjWՠi5QHaV^~Vׂ;b,Dw[/ :>:U~}vkŐ8~4M?hHXpLPO9DyH~Y/M/}b`o!crB ;k T0\6MY*+J-B*7VՠQ:G slD+ܯ=[ $޹n %=FOEyCMAVZXϚ}7έ VDBK \.uj,6CqkrUԯWQUSu7>Y2@fe3`5F, ]aH(,(Zc+ʃψQnf;WA^0e`z+iT{Pp"&E w#5EjN";T o?$sw}Hf26 h;=+U_UnAc|HG_*mCFu99b7\6N:KP90􈇠g++SVvrS T([z3OAU#0UaQŔpYuw<@j ݐk{G$'[C2亢 h HsnzBE~J6MWM@0[0v&!\J 2DnG F OLIO\`~nĻ8F#FD[|ĉ,4Uj 8 `f83fli 3N=_Xˣ*bt:߭L%yH{s},ѣ]՛3m~(GbԢy!dtx7s>M-TnNZ\Uf,PV+=-6P2ST%le9hgkNrݨ?tWǕB]hɌkk5kstͿf;ULϪ)TBgiQ0U[tCvaeR~o.rK3jH~t.5fr,u}kNVrkRq̧ViՈ<\*[sB<&w-(`z @kE!JQ֙-YZGZiIiCqv5tόAræA{PS2$`+QTfbnh>sN=5tW᠒O^{ h-$hjAlC7l㢍*oG+`#75ܔn< K_ L氮{sPj'GLӉ(v~۹2^Kr2 :oGGgat"*ook5v" 'ҐS e1U%7I\ǟ Y4;1Q74"E{]Q ?Zݫv]*!['3)/l1!n 7V/KӅbCѡE<,tLӭ#!4nֽSZ ױ ڤuS5F̭0i=\w䀐/Tl$tz4l]3~<!Ler8F6:h4x2_f%p'OWr 4x92 ێ l+Xp\TrfR?BG[%!Kv_ #-oH1csٍueRVWe@:3CUtx3&%/%;Ws^m&+R>sRX{f?-۹x=-w0e,#=22@M˗Cg_~u #x"}d UgЬ;'jtMOȱC=َT(ǚF "G [1OXŚXsU@vrBISiȦ=;JI珹2\=4LOn|83Y66UW=PCmKLfG{h>Q՗ZFkZq z;)G0rϛ|ąGjV״"E[I3'tc3}60oFZՔeuĈo֗4ZJGy[UX}+]vs] {3iS=D`3 KF/C%eBDjTox2w:L͵޶_yl^~:!jUlw Mz@k1p[jDF$iX6p o6zk~'4u&i?g׈oh]W' Jl6Qe5>xhp4E97rڪMfAk޻v?~iqАJWq\[#1- d*'5ʍG|?hɍ>Q8@ݑ_iQT&4}Pr7m]{'ȸ% =N95KU?+Z)J>C;|ăއtڞ:P |#-e5({z?EN?:Jgelfܽ ([{6\ > X15^Әr%7ǐjdgkq4b_Ǖ܆Sy4OIhG}@KdWAMlvwNUG"[!fCH)_[@WȰt nVw4Ä-LLaw$İ3^h)34=j,A%ʱ8k&%S4; 4z  CvlGoN4Hiϝ 4n4_Sx?/4D2jOSjx7).i>H["s&'7 I +~1Kk)TARhAP)f;'ø@0{h ]w d`RDF/"GP79$r bq nh³ȧ;QwYop).J^0P`VHH!B 4o Fih8};銵N:mYn\+j$hʰp=⭨ǂl^%ZG;˨}9kNqR>Ex6]?9gޗkh!A2 !E{2;OR]*]>ۣ@:#B}I<=KՄibL3ϯ,nor=9*I&m beh-[re[ZKaZBIDJY[s9fej/:cP;PjAjQ*f;լkrxЌG[W欷88^Ȓan덎R +B >xQ-'(K"Dw"ZJVZx>1 \4BZߓ$]0!h½hiiW9jh3`> WHIЏfNb ]$4! Y#-a=l')'k{ -8H' *?r2@۲ɽO&k0qU4<ۄyB[O5<2fSܺIvG*Ȕt$<(ىވnw =8xL? tҥf-!.ДôKiutz"7j*Vxoߐf`R9PfRkS@G>ދ`r)UuL[7k#:QaXD V'YG/z;+U/k^U<3w#+# )h U,PYN?&2ʤs=֤;#:>sꫪ&xꝢZK(x[j6Rs(@F[ :s C24 ƏWqMX:zo*xAbMW 'wkPFyNB(#ޓ&>hJ8J>$R7"2J,6 &&ފUu&(y4 \}Ve,u iPW.x{꬘nF?~Τ.d-t\Gdx%¦IQz5?@lHh9-5UԎ*FP{e/D=Rz\kyra4+Z$fN9F&To7j / 'f&eefkFn3PƓEZI}ˉ8nG0M8/ݜR6f D ԇ5 kNE|%B @.oteWޑ۟MKö3^E '5;2ͱFL}Ԃ+4'M嵎BkhAX~r?))Z_w2- =8p#7*M/JSf"+oRs֤\PlXX 1gN1y(jAj(;t: -l#4q(j}ړ⮠)7=~"X%u)-Cc!֗ޜRxSp)Y{[P^r˪:wrV8|RԛmԂ0V8Ų<^ζ~VxJ:T4cqf{T7v4&g'ϡ3H=y@շji]f~Ǜ"АWQ.oGK D>bl71;?enSB>AT,\Y'c,W.ȋ\Zိ~QvBC̡ˆ0ٲ{Uc Jedy졊nӥ$xCH'd81Qck4uEFˎn Je "EY9c[^e1ӧ b;K-z!0P,Y5K[ܡ祈$kT-xLjuEJK>+Fe<; c y@fi|[2i58,䶤3g-pW hWWq]E,ٜ̀1ˉV<҉ WH@ ѐ,weU$r$]AJ^^*qƾ3V?ڟ1iekjoWV`p ]+he-Չ;)ibilL1ֹվ?襞E!k N1ă<_6 '/kT71~B"\Fz$q<-AM&>*CuӜp T*؏Cׂ|6r#E7Ex 9mGȝs ȟlݢgo"tݬrhp۠ض!&XOLDKk=Te=QtBֿL7ZiS4'ѤdHK'5lQR쓑띴lu5.-]c%tH3Ʃ^^lꙮZJEU;CD4P1Vl{w Hr. Z;^Z"uDy< 4S~Y|4 X#C2u!(+#o/S dS.%ZlhC3+ju\DUgLzAwũ]'1 ;}\X)X7ݪi@ ]Oك`AaŤ</Jb]a?lHMNJ\>k0^zB/@bs/!Z :ڜ3i"I+WQ1 T4藖4d4bK4rfo &Uhf^֭Et_u2+oT{,!1oU>-HyDxu,^٥6fBƒoK`=AeOFI5~i} t@F&D@lKXv{A W_$rBƼy|k~pwj=!󡛱$E4PЗt8f(>:{u4oZ,|14<}5%h2Ysž"HӵAi*fks9{q<񁠶(3! Li Ҹ09dzPp |(InLpΚG2246Aw&OEvŜO([5Kd_M ]l%[h"-7FyOy;/<./hE5BntR:xTA6%A0TLQ0ψmCJ9#0sBۥz}p" +"֕'C˼[94Y;~ 8fΞ o_-M胾J#;VGO&7c6On, +'zKʼnڴ:&MC]/Lw(2Y[dG[6>$Aܩx{=Z _-u(;꨼\~ڈH&)19~$ uܙ!y$߉KnftjV+3p39t*#&t_wgSc&̫ S"j4Q"\]>v$R0fTǀ [dP kO"2Ee]L?0`-_jxO!bʹ7#3(aޡxm?iZ]F@?g4A;r(u{ u$)k9mi4DT}xXc :D7唕>z3Ϟjd6:\ޚvtlbuURl9nU sz=٣*XoA+!:$QґrTM6j$6?C g!9BG$P{DJ[Q6!Y4d~366ԴÐnXo]s|Jǘ2Y-њcˊjL//-js4+ Rj]o}kI mh1{ҸFk  |FOo0xR5q!לF<'Daꆉ4%BO6~KraΙ3ts,٧ !gWU aBTnKľV?Yz x8vk^7qt4}c|xi vbģ7tfܘ]K@6VIA k#HSaw{K.nĔ_N|!9;9b$Ǿg~%+UԪiɬHɼ bc{Ƽ4jWa} Zm*/"NeV‹)u*׉\lsf\ϖ vL(Sބz["E#+= 7oGrPa9UTTS&[XNO;<[iTz>wo.,M28ДѶpf\I#SR2eWPsP`Nf4Sh%YOJ<9yYNe.p4Ա($99\W2~(WӌڝdNvjoKeqVաBc~:fc ^\mfAPĎ3NrG"~9`ܔ+n E)edJ hi;yVeMQm#eP?Dٞ,G(\qj~?LA^D |Ԉa'!U'52ZM#f)TE ں8;fpdPZҧ mY9`r4j>JbCfyOOid/ݾH-)T6n^ZA1wf4+\G0?w鶜ob.x6wԬ(gJZc0!&ns&m\DA@ffu\H-.97?/4B8]v&H5,#8VM+)aՄj4#VcHA n7І&%\x@s}iKIB"O((,XR'݉{y|̔7dc{30Dz< 4۽s`)2'7XEDr+9r[LNz(%y"+ mNd ݤfUV;{0TbE1dfw)uW PL8\|UjJJ/*:4[Nj<imm |nND 2 ;9Jp ې8#9m3ˀ(MAD8ʫt1i`G)P&(mm]R_hL|کLVZJJ$ߪ@fdo2:q~C\[/ 팣ZŃ"NSGttuѱ5q$:!k9u^΀6BۭCe\ 牳6n{M lTB1Pn74*DK!O| fh)hOOdo#'tV-ܡDtt?;A\ԔCYR$l%WZ9$T>B6L_:m̱`9oyg{OA1Ժ4-8Z5j!ŠtĞaX(~Vk2뮉fje@3nI%g7 |/֢c7bܦQuƄWV71<\v[@Wh>r땅B_#'PMieJщ&E#" jbZ/Il$>c,EytA9Jvq'7/./gS\ZaH4x\i ?qC(Lٗ[Ƌ\htQ#b3*tPE0~ق"V<%45PүkaT%QǣFm}Bs fw{ItSZ9\xiy<Mg'$T[VOEA.Z+" u;Nn/F{aP8/.G)ڈdk*w4Ұcr,ȑnj= SNo&K Lyr }^TWp\oƢ8Sd!&{Rb\Ǯ6WveYl1i6")QaȆ!܎"F28qC/,7XVh5irgRslQ#l pu k;ŁEӎU\9׿ wZDx7*W(@"lx&F!A5\_M[Ҫm% ȄVsSiB!)8ѣ2/Xq23vY uYeH*[#ګ ђi7~P񴲵zx3G9dtܵCfߧcPRyNŦԈ?bVeY*!]wHZnnŸjH,G_d6OPA.`Ri&aO,z=EqVo@![´="{𩊙P *_?wۛM>MP6i_ABޤklsl6_B1CW–\#ˑ %"* oHFjݓsLRgaˠvƔ^IQTE >Qwv(v:bn{X58;"IɎ kl\(L2krmC$@7zG 4?y~\vQ:SMO:=Bym ;p2d *UE.ph0G_2YZ$X~:w0`?h7e֬ѽHx pQ67E+2ej˲q6GB@:MjֈogWq؝.[ߖwe\aZܙ8mwOVTҍGO]Q-6|HN4LO Y5n$py&u]_(o5gɴ$Y~1#sws{' T"z9FE 3ez0p" J@%0? RBwO]yQ,-J⛄ˆ8f;cݲgV!cxpj.\B?Taf@'R$ i LiH͎>HxZyKkI2N"g gaMDF&fc^LC}Lj(rW$"]Dw``qsvk [|VMy RR MuXnV% <,)b0Ai:o"\@W練3#ktq P(u2<)'.}ac\Mɤa3 P8Je39,Mo"ck8gH}heT(E7Ej%HlBaU-rk잔Tm9`9XMߜ˗J D3JƤ 2]BߥKkl*wëq$ɂTP/'n/[;5Xju2O~I[FS$qRAˌx*Ad&K脅mɸi4&>g|RF\oM=]c8t}8-D0-.~9uJW\az"W~{+uU$vYjYtC>^Q=A x5yUM uKds=K-Ջ8hKjv-VtF&NeifĸMLa+&90kDTkn =XrNFK-/Ҭݐm(Ecw%bFҤvTy:BmU$O+^g#N zTlgL_YxPyk'j~Vdġ_ qӀ VGPI56WZڇg#6M/K)E78Bki #wOJ֚8dB֞w77)x ,\bxi4 hP#4>+x{xb9OfWD=h' [IY,v3SuQE7kp @?`aXֆHQbک2"pGC7Z7/F`XQZtHT6xWt>YmkH.[[-s3w'渉澘F#8wf4;:抴b0-UDj0kygAM` *"@zɪJ5j0Ja*?2cBHRhڠNM RPZvЛӚ } /ysX|.4!%\%GCd7WHs1CuM "OUKZ =|/" @:8v._FԾ (-$odt!҈O%G!Ʀ;B*~/#}o7۰ܱV1+@\] jP:/VnJ {HBq&sg%J>_FѼ`pAp~%U(Ou{D}dJjR+ҜѢ QK èo'RN`OEO7GE2be)׹0nf,2İK}Ľ6\лو NME4 ]u1{vO)E#.[̡ 1ZGDGy,o&SAu6#w(qu8MBCK+η*cK9s6ylqs75g':Cɹ?Cn.3s;ىUh#MQ 8h-M=ѻ}`(guו < Ijk<+> kBu՝)mB: 2 qԖDhXI<QeߒܼПQ}A(ݝX2 =fc|5r +,sfƣEQGz72KaJ|GrhjCk%f@gROtnp].<ҊS@s8x8uMAtW֩5'"u5o$=.d#|Yd*(iZ,.hffߵS-φwkYK؝ 3j{gu*잙d_=ud'G?p ? YpCt[c .f" /j9e@>ZUZLC |Rut &3uF:Ot' ~c8v˓]WvRhY6[jPtD`C0#N]MJqkq4m+#Y'0*w?Iuy4eH5g$*XNhB6D/@;j D 3&T.$dj!Ξ4_mfBT}ӅDw IWnݙDh ψf+DŽ11j{oz7l@y !rvJ3rk[wZ*yɕ 0@91՝l "@pлwsQ$t! F<Ȳ8| WuHЂqJu̴ .=g[d/KX:£=mcRCi(jHpm1[APEGԄ, $uȜ;bhUheS&CTF*`klTiRꋟ7mMZ;t)SsBܮ2=;mJ{R :`a$*:Hy3E% -'1@c\GZaB_6:^b)jS`,8odua%z84YkhAC;p Iu .=e UDcare`ՖzF.]ajDC;<7c`&'13[ZTK$Hrcyb><~04؞1wfaK"P?ГgﱄyFB}d!3˛ԦV2Ks;M׶?HhԿa踾Eə_b2Pa1]?5zo<2F8zIviwLJLJ ^;Ey 5_K^(e3d  ۃ~}i*'ϫ"kFc#S/(AeyHӺ5gW0Vxfh9IƦ\#2] jssʷB-'kZF=-zq,tOM"y=M$R.;q%hڌalfah.,B5-uh6x#;2g44BA! R=˳{+auvLzA .ݠG,ZVߺ$R:ThÞ!&]6{h2huKYoޅ2 ;YnVbooosPbі@vҖ%NpG? TƽlN.yݔv U9UAƟ :x#BxkYmӡbea,3N&6s1o40/R| 3$wLg7 -M8dCkWG;@ m?W[`#~@r;WƍaV. -) y vgr-7m-~F"f' D U:1#[f$C9{3 ' jGeӖp-&j''gJo/F B5gFh}e𣱌uH*6{tŐ6ki b}%}IqX0I{&NUj)?\ 86כ]ſ @v/Nymaa{܎'7e%#{!'}ْخ Wْf2ߏ)ev5~BgoߢN\ND>Xσ*3yHJ,[#߮ZBkL8oĬQ{ՍH):?ԁJC^ouCoFM0*B1d5 LbkI}`qT塭hUq^f_g]@dFZ01_ ^ZۆԴAMnN[h 1o!Z}P6ji^}OI%Τ&UwXASD^nýt^^RpId+5jt4K,'oKVէ2Lt < $S=Q!Z_bSG{F1'FH-Cߜ@g}PRQa2j,!UK0|g2Znp9*L=5M1=6HXzIFåQKHIV9_O FY?ODx/'Eg\W;pa%g^&8Q;^)Xw6HAr \ TQGnhҺ:5[٘xA`_|^u=)-5S 7#eTO@#7Ņ: f/f 4u{W6\RȝF\M(Ѹ8WO!yrnµu-a63Dw4#8m>xR <# O|{^<߂cGx"Zsd"}0)C)ꢚ[+G^Nc`jUH"/^ s* +œu9 `rlr,1 6uAukT{ijݭ6K:B<g"+&aW:" opj9EN\χgX3Q1upP+xoqNtA5=-~[w HRlX#^rH"t1#̕d˯;#zΘ4QW0|+'<>!=ԤPD%EN`V=l"7|,! ^1, jw%bkm*[Iɵ6GuﲮtD0AQԹZf{}dZo. Lg'UKSb+κ/*G,@aӮsg(8kn sayJOB0Tx31x:-2 s{J^|cA=O18O?kWb n<+Tc_>όy0-Tv@7]xL:%'w+Xm9aSX)F_Ӎ/9N?-d3". \{ {H,' Zm=d9'`)qƷM _7ʝ$kTYjWkb7L"f8s{M4$ 4S K B8n7ɘ J7McX5 ~{&bNPg7$za;RfTL?ܵb{DBS!jRTZRV`4ɎKx$r *T(?\nV ]aؖsE{)luW2_xH}QE3Jo$ Hy+ʓ)18 EF:TN+ŬC[-3vwwљr(Y*/5p)h]g`+$sӿگ$ -J :6\@d|/+qK_wJe7[&V `A򓑭Xag(8)tJ1&j虢 ,z*<[CbK}Sgc{s Z]wX7x+5Ș€20ZM)-j&+&,8[Jjj,夜yS2 ) ٭iL߀B~LLkb{q[u_TQՂD{n{$վ_1lYx;Aܣ-v [W JYq9PSI*隳5|`RnW2Kj8{j~{<^2jZ ]x/v$a6ON6~i %x3Z1'flklh6@/1WMoMu&cdwzQ0ϻ\cU DTV:SW2cr㜸\O{wm+wU鶚J98zum ⢱E 0bWE4tg!7kD|qZw4Ge4Ý Tnx <=-ZBL4w)YHOo8VQO;&y:. p::ea98YT'Qn]!["ךe[6؍C$\7ba:aYƶCxD&ÉEQi 7qez} 2kczC͎hRdizJ2&!HۖaYv8Uǵ/fOǾr4fUl2_KIߨTBCQgTٛ!#Ѽn1,r:q3mULKzQ|[Cᐂ4_U;7S7M҃LA:)\H 7ϠŨg{ ;￝Σ|KQxuX(򭻢x,j8=\"<:> yy{+ue2OJ!?KPkėߊ ij31EMwR uxW%gN7:3уmv?p iq ^2.I`i$*y;/ΕnfY\'WnUr,SMK:f(?\A Ncz^!qD rg";  }|4^*OsЎ 5ed'sjzrL-w\z͟;SG^Iî% &d}E+i|ORٺ[vC+:~v {-|F4ToHUջ;hߌL:IaySߙ|tKjFfTYC(9OؙơkS>G9&exL75 E*@4So#UkAV Z-|d/0YI޻i㸗$S\xJ}8$R^x/SgMYX]L?X{ >M-RR` wb׻wJJL%KO*r@rA*୰D G;@pV-qdyD{#<^E f|-cUuY 9~^øiBڝi _c i!S2-s g}qtͶFu$J ?u]poYZd#I][M1 u_,qz v鼑c{>"?tAHr%M#i!c{ .%fbSgmXܩbFpX OsBRǒ-(M' ka6& ~b 1\[_N1GQoKSa_nFih!lIC~ɉm,L]sO[ۉ@1GM3 GG*h@#~V.>fvĄb\$ 2[;LUJ u="B1H~|N6r=N*m&JZXWjUx9zha#BPf}h"3MrG<Ȥd?ۢTBZN7j;ZL>yoU!GZ cH,F`yUD6"Bj+|*>ב6v1 Du B:fLM#BԪݝ3xW]%iSWR98jmo|jAO_ _9Z,Χ]a L7q. ~P 7Ucܑ}G#ck)FݨAlY#b䲖h>A'??R裸 ,WU;UY,^hRu.Gel3qDG5JHNiydŒ6`6&qZIԕzt߽;0ckJt_$ND!VWveS<㿝qđh(^#BV79 %u® &\ԓL[?R٦d8=*qorxiMB>h%5Z$I-1ۥ9UlZMVQ=x$1{h[gPw 3|b sKŲ{FbB;h +@TJq!A>b9~;"rH]+,S-eP4FJT!p`2^"'A<$;|lFÕvץvͱrTp*S\2)4.c,i=P$V0 h[u wЋ)zM.,Vgc[{1hەE}7U݌oV!CTnQ5>U$0-R| j1STƔ aYdyBIFs/0s ;7Xմ3ʦS>yHr:ror]J*1iHcʜR͙ʘ6Ĉ&rro=Ik8HhjQw˘{; 8z'ǍB~OOxXms[MdK=Yp #>530Q#as, t9^vI*PxmܚMvgVE)L7ˠ=5r*y/8.< ݴtX_0AE8-޴H2Ϻi)7Qt0n8uviTі7O2BWCIs zaf<ٓEP򺊴XbhΛaYd/i{Bfq#]CVhuoq=5%B'R-ߛM\,xdT\wsjzd`pVUw^Tz (7yLmm=`T1"barѲ7I8iF_#30[ -+:tCy̤vD#U}ު|4ծXpTQ*c2q0a_A~+5#I}-Ԝ2jT'(!Dޅ0s݁TF37oz܈fD(IJ(fͽJP>9꘡t{K/cs޵;EZ¶ZWU[~1H&zW]6P2Wr$=Z'k,7nt wSgֈo턁p| fuM-B;DӍz\$Yv_{u߸a6R>yd;KE6! tSRQ7tI:T5Z:B`*N x;ÊJ$c&Xq=9tߍT@7͎);bL r+Dޓ iC~ƂAP:hHDaf ]@W*|ksͫQwCHI؊쭩|{X6A/7cnř0\85;/^%94b *! 1Hᮊ|掀IDjeR5MSM$M]c5.+i#RƐBݟy'*ÃH1V52d$QK`lR3ŵ MSR[#Uk<³MMͽ7Ίѱ}W/T~rTעCUQ e 'f,0{Âj* **qW ZB; \l\mҫ Q( )ZnEd$5i3l"$w_4-ldvrl9 %j%^ky6?PomOz djq.YC={@Wi e}DPOnM6xEOOr f߯rL GVShW@ľ,{/9 p5y]֠»[t.wo7$>Wvv4]ɷL۽&,N^ki&n{ #hmurwO ĭ'6-$~*2:db+c>2 +1 Z@V݆N r݃EX+θ3Z!/:Sj`0TvQE ;CGZ"PiN&8Az|e^ig㈁:hP?m>qLmk"qֹ@_&zkSF0|:XZruՂ/|0}3M lZ[[y0D2FW {9}Kpޏ@\kj1hƩ(-ACfS z4lk@ۦVJQ"ykozB x|]EnVb:^㘒+G15/gv^?FͳgMhoksEr8?|vf $k s-þT(] !E0N]a(t}tZzi-SuxlmYk.+U zS#ghV<9DbmUg^sj/Ş:an֦x 1΄uthBLi a"OZlW%/m:O2ꃒ)2i}!X2W*]ݲ:RMi{!脸J\ Mo /.#ju9.fj64 _9{ASxJM2sT`L2=~O!K{l.{.F+@ p1CDT.ȫydB^gyjlb6 TEaI]cZ- ՁTZytM^MvV4A]͗F&ʙ%&ѥȣ s- mXr>PoVHl,ihTlL 80D7V璥` ϻ]u|,`iJ[̲M8z.qAT5ȘcHsx|E#}DDLYߺUn@=LfiR䁠Lgvٮ;lJ̞6ð&TAbmJRm2YxG#Zbc\sC5jT}%p)*-X1<OL \+U~0Jv:ǢZ\%ݻ#XmG̮-3&H''k5gYxe4?|k/&zTkC%OA!|)%:)VJ9$c;Z_eP72x nߑ$sDyN s֞N9XҬz RMuv:x""5GN{Ly늧IqD畹]=zCxK[4Pͮs$U@`qӪ!SbCԬTN8lOR:K?̾>g?* 3ǃrLڭ#G^e@h+ DQB#F`KD؎N"AM+쫉M>oqQ̴w[pQ#HAauy-*\#~kZ¯)w!q.ŵCp\Hҗ 6_l㴲OmKVxMdz-T 1,3~20DR jb].E܉ (4!& Z+Reu;N:dØ%í/;v\{%-$kP[27dF+;0bn9RP̗]Ŗ0t@lK'^wKg0oc@Tɦ>?AwRvcZi bX̒Nn6 qHO/:z&ׁ˛fϏ,i!@ozLdؽkR#5jk&}pL^=K9L$-_KK 1ͧ֙5x k7PEհSX>&Y]5h3ܔM?ڇNV9x/R V{ވV"j)_eh7̍1%(8 23N4mvʋgQdsXYp F:#F 7M&oDYu M #}+NKD_< Mgm1e3$a(`Mpi+ћ:Zݻla J+gRӐ;*]JjG-M@3i^Ӆ*MݻqE҅f R=Wfdd3Io26|ˏ/뢅9tT.Km~G/4X6fcq( ZIfd;qm c21=Jp$, =7!HBg̊fH%= hQ~Fv.K2!V_CjdWP~ѸN.ZjBL>=t B~:i,Vā8wrZ_ڟޗbH%%hG䊩ؽm^ϹLYTk,.܎q2Kۤw ns+"c+I8.C+1 i 4(wPȕIs_4}pG%ڊCq&;ŞS<(^E%p$n\׋!:ZTu0΀Eh? _WiLF0ifsfKq#vttCk+&!'WLEhqbTQYԭi’Q*D$aޗٷ^i;`F p*B79\kFpZ})JJth Z .QQUgEu=F=)^U)I}` ┭:p=']iBSKQ}6rw~HVd|eáʟ|iIKn dD4.,8`ֽ+K \UKB_k{+#}{'GfM]|8hӘ[,WL_q%uN?Ob2ժ`I2,=[0Oe_f1rOΎfxtf:SB}1$yuiא欙I&y(#4!닅NV"4F~FeC Q=7wj7M^tNvp3MkQ4u ~OwXxO9h5VcvYy>O}pW!-@%kj+KcVL;7HZIqֵeMe DV672NfӀk6ݬc QD/EYv# "`q61l@%;Mi!ם 2(cR#ug-RpbNcRPх0Z ja9ޖB")7uٟaŖQ6wuYU."Frq.By0תVsfe1 G8 wtPsS,'u)tUq]Wi=ԓO`Ix)YS7h̶lί?q~EF !zoq#GNFtr +7mNؚ i ͑\/`23d*Q;2v ƻ'1$Qd.z1^g]-+^UcŴ'(|l% 3SEZlTv(7W7R2 (z.!ͷgK􈮮xP! jwT1uDo $BiH[7;F\jsy_^3--$!ARz H*_kFp[iU(*KYL#<6T Q_f0n&Ŭ(Wv)ܫC jrl\MS':Z60>WES"npRѴ3i rlדMϐ%"I.d\T},H/Q)]EziJ$T.a`8`ÒY20v|ʶ&&z8RvJ@*^|ӿ∹i:dqpHm2`Z[U۪сͪr8@qqi DOӟZ7)}OO!^%K?Rc[ުԞ9v= mQ)$_ ke&~5.mRq7Njjtu;YX_ 8Zwyvgf꼔'J ib)o"k6|] !z |ra3RDH%uBDޖ zÇz kiDiÛ^ `G5s) b;Ut:=q#0%_ڊQ КIW}ѓa Ӹ`n^FMDM> TQ[Iv&R4uʧD=mp~Bw#/haJ+ݼsv+DKߞ9!l耞8ZvRz#t?^Ryt4{- iR$Vd NllJ*Mbˡ4CyabAjmeޝAec2JV]'o=T}g6qÌO?vmx_XL˳o=gg:V*H3a{)WK\$٘3}A@450ӆU@L[ص3MBkү0M>hj.Wxp8ws[ͫ+3وUh$Jg޷ .C6o@ ;M͠^ڟw0vչwNJ+m FrȘn_h K m..Ciu֕SMN9Er/"m$7iHUᅮCѻalMq.B" W>틘#@CeyOD%KKA ;\=i"Q4y?q6 C^Y`>faG"k| @ "JEZ޸Cfz݋Z NJII@4L@͆;ņJ 2Fb.h{#c+G445 /jߒsבe4UjNw:r,&%IPTvŋL!Yă] l/ȏnqbau!.0iG7ޙk&;gt /PŒq9.b+p<..7/6ː a#f#m!Bx|_LTebPRU\A{J1yq8LPuQܽA1Ң :CH%3u~\58 5<\, =\yJ4#6Pt6vY-㎝oQ툄zYb(w*`=vgYe»Cs[ttgaٺwScVɸ#Z}k BT`ΡVH|FwCDdV2]D?~~|ѹy{z^kmK9p111ihtLtgf7MUu::F7О g?Rmh 4m#qkǶN"no3 4qO>ZP,i߱[V5H&xKeoc a@9>l D,'U̮čx+#lψI~C2(_aձf :޸Fٌy@%ddwԒE$3Vs46p`\dx$_lȰVC#OoQɽP@E>A-+ypY4!5h\c'/^64[Ri# B!nxds X2X|[l C-\1qFՇQnB^@93X}p.24J^9 Rc~"j[aχ @RHո@NUcnY<-o|rs}&H?iDKvy hb}˗@?'-m=[F]o/ A:{dyAv g}FC.E^zwRm2>'PfwQT疁J4s*m H̟Mf/&дrx@SS Am%z^*>d?иN@g#ф{QGq?MZL5ϸ\mh*ҙn?rOI@Q1$t fѬmh2(V9~7s6)Z:^GUM@n?jYo$ sd$VN_GU_ۢٷJeA7]#OcX|LIKkY~^ѾSŀtעD [@PK4=OWJ 6IT l-_ۋ.n@2ޔT[ lp'L]\sC(E4HcRQڬ2&m'DC:bjTFAlf+ɶoD玂gri8h.MmR.&]Bsm׮|U~α˟4B4q`͗u/h%$t‹ Ќ oZhl2UY1P(̟&ANu4B4 .W թUՀ2[#z](Bgn-cМ-juK Y2cb@Cg|i;>8c6LnvR@)tdf@{OgGc9У: b@]JiSQqL~ s?뇆H/qAK@ϻPDe[ݟ)LK|=;\o(}lpE+- ™hfC@װI󏵴3E "ݾ /MҮ~(mx'NwWe.^& ZVPDCa{F6g@l֗$W*St*8}qT'u e(u>=7AW]@S @%,µZiK<@jyXYB9[OrnkuߡV;m^xNß uqP%Su/cB [2.6# *?]DJh>vc(e*)Ь *A6{%2΂0QmļWڎ{vM4[?dFӃ$V ر~3p=$`Rrioɻ7 Ț; f*y*c{}R:^e'\⅝dlAegm - }PϖZ!샞wf#)pV&jU[[p $?R@wӪ|i^ߵNsGq3.3ܻ辬HYܰ2xJ+P6 ;cU^\f ~/GMsRk.ꮊX Ƕؤ%lqg=~:Hp}K"u nsq<;wI)2CN4"i*dß1Y N $ 6SBx,"JawAn 5 ] ك&<Y;ZQ7~$0Yޓ K6'T%a>v(u8pIMqXLB)/Zmʦ4Vé$r5T?Q..<ĕD@l: /ާ3ЪBӯV0<шCA_FۃNnc i\űg|^6 LU@<0xǏtDDDcmI:xM U*9)lyV~A3u(+ Md't_:[oS.ʕGmHƼ~i~3'Kxwf4}ٽ Iv4sk"NܝS+=K@u~z gLq ?_o׻L>5,c"tAf)9_D-qɺGMw'h~4ql!{:~_Ԩƛ3hνfy ~\MҙX_<0d2d H>0lK0iK;?߇3ϼi~S*vxR ]1UrD@zs~ z~>%U.!hL&e7oONyܦ&(I'H 15FA5wN Hww4e}:hί}f zb qAm~pU)gHob$1Kkk[Rޜk{Q5̳IӠ^7fo ςnXo7F3ߺRPY]xL ʊzyQoC+*+וET1|s`G,c'+b, QkЍ ו?bI ]6g?j6X| m oԂ% "c+b6QsL}|e $Cuwv;X H§1F'g@zGr?KvoOnnE#^ NGY).є-yY߰Z:컯Kǩ:}T:a[38wJHY&}gXβynZcOtxtm8'Z^^^\S]Zh2NGr/*oJomp~/ -t#I'K_/3iggF?rsw.}E߱L"nV(9$2}{6&Ds M,4__#/KQ Dѳ r" =e1O0CM3? n Mn0yW $ǧz6 ﵯl{(G4:P=rc=n]cώF{,IFE%A /\Xx2#E+';= =oj$WkW24lYܕ (^Pg_n+.- +ۡmg86@HxNߦd/O=Ow@H\\Rd)&-aul\ TLvIstx68d!U0rsj* b DEkλ'hq2ް @>|5s* tǍf`Јt! $~pfB݋3iS4f~JL}4y'1܊$?}FYFAL(A7Zq;).@n;z07H|+ݒ|A-=<$mR/B?]c^qimMsYwpZ}^ uT\ֲ.AF~ P$4z G^51ÿ@ 7EYOAۆ' ƒON|•{ h6q2i>d~ճ?{n`9v$eNiKW/vg@7H2 `h+1TwC5~ {MHQ^y=jxltqϚ>=a_*.oKuA#( ݭiۊag{lnjBa)ҨNJL y* ONPsy]ij]Cz5uKD_Sν2|cnos n_Í$8g,g<Κq3'eD Wgseh7+7LݲR ;tV Ycx:{V4NQjx*#9/ZP躞@?v>$($>N,SbUAȶ"޼~s0A;ƣcI&㥀ȳ9hR9^^Eݧk=F8(ZU r䣈ˌ#Z\e Ȁ#?BSY=AXgMl5/qHyK)ߘBЧp< tm:rk:#+tDLJ&4,bqVk{vV#`[`?eFm{*jp{t@fg@ z q$•'Y| gX0=2ޑ M `t&*B˸XvO@\ -RcI> "Eg0du?tͥ#h^?J5 '1I9,뭵8h8#O) /M ٢?#Eyy)8%"v>)zkO4M}EЧUE*/cMۗyO\@5]6>|y BߞJ4๠"5V #zN ,.@3˻4by;eÌ{{nXGmjh }د\]:FR6ޟZΧݼi'ߋ _r"ygch_og?wWiɴ̱;s ږ}]DIy[4?Ӂekv[zq. n{H LJ iuHZrj[OIӱPZuFHijp{_a^-vYs N?\ z4P4x|Wa#JDK(`VQ0ҎQ? \bSp+RLo'9\vsto,?q-Z5@wY2 ރ \ex5U<SB!=9@)é= P45u,; 4prq~m#HbL7ΠòhFU;H>}!sv{biwATs>n% Ɩ!qU-' (v9=NI/b*@+hk (<#p|N JTKI*9 dڮL mY v⋦~ e(hL?zAKXxrH$,2'6.qg#g@T}\i9rv+܁Vgmat>h t^(fNɒEpMz_:/3TF4=-dJ;FΌ_ߧ?).86(h iCTe=rEz,b/* vH)}vAS ܴN0.E$sg$~9~) ` ?^m:A 링oeDx⯟.ſs;6>R _5ƣ믹c7t ?g:}ԒSeߺ?gIO jMPUILP6b<$1yHJjhBg,-ّh?yx\-Ϟ7f#^92=bU18gJV҈.~M=P"h25!c(]?M"ڻXV?>>~ MpACSg@ƾ4) ≻nI KCC}{_1F۟M*>rO9(61kE%{гA؈*r$,~^.+he{tL ͢'\_kMh8Ԃ䶍5:&|ݐ a/1pVZz8S?''YzmJY`ZS}ΣcucjhR\/ Hķ;J ghu-&m@u2kфYSumǝ(bh<})( s9*JQıG!,$z(NkGf<^8UϕJ4>OJm Gshqh2N7>Md57ɣZ>P S?.",|hN0+e aQa)*w4+U~U?B_ [L48u?+z5,[KsA27:1S6'd^׽ 5nof| >[ b^Ǡjj@|ϥr.MRƸf’!KQ49ukm Ԭ"(۞~c2V*ݿ &%KQiq;C?}iH^oA d4HM Y(rN IxrL]T{\NY8O\'y7c7ş^?دChȹAh.AqVRi> f3u؏er&m޸kFB[ѧ*bXKDEB4qXz Hn~!ɏ:=Jw)"#Vft|NWZ 4w_++^tg9:ե2wc}mj QI;PǼwh6dYUUkMW_/ Dh󨿆VR "CAIY?AB0g λ(7&*Pcι?NݞCS~zmdz8_+Rx4ɵwzl"?ɩqG)PۓJ*eTew!Zv{ӚKÿu3hEh4JW[+BSͷ.~ (;%!.@%fx8a:(<'H[~+/C%=j_LՂ;+ +I:/LK"=@9pd-nLz͂Hg~3$EN< ǟE=CM#/l K=P5ʞ <ݵ׃}\/*~隰MvdGtkfjW4Η)&}W_ҞEac>lBlZPWb1u4 ViW8P83;WIh2zVqxPJ^BG*;:hwf_}1Ѻ'߶)Ol-#>q⮃M.ETyEh/.L/]Rqd; 㝙D[v> Kiv QErK2 y`G:KO0]݃٫9(-͞D=hۻFmALw? fh*s@)nby2$ڔ" "^}'S-^Gt@щ[{AY4l(.}{.D4T\Y"u-uQ[E,r;ҋ+Lzzu f1WvYJ(v쮌&Hj' c7*eKظ3Q{01Yk)N_sLvuw}'#|\ ~%n3RWZ}ϑ)烫cBoXByk;̝ɣGQʾQϠC605h0 `#ڱjuow=d%h"xkW4LWW^4}f&(!2Qmqx{ĭ;h}+H0Qhdx|$M>m`nt˕*PiIhYGhk4iw +}6TFy,޷C2s4\. 9rެh_9H nPF<ƨS $,NBbۙwpCAꁇcIԼ &/p :ukX1y^}OvU:za4. 6>tnSCb ō]аqߛS?ៗQ|6KƵN4+z:>V* 8+Y H>pw~;O `'8Ĝ>nh~^ΥRZ5G/~˨pr }{Rٲ-ueniD u۫qWJ;:'*tJ) 1һ(~ F {.Mey3jT:;NB^~d]PbԻ|n1KG{ f 6¡? }wn89% Dƕ @f5LknۛJ \{{*#~6#;\xg+T?vflLs_]CMI5]pYH+Y10G[՗hw1سQ ī?DE g.ޞw+@f)Hhz;ݮlxT~4٘;B:j(Z6" jTdb޵@ouYUJe۬QyG BWyx D]җi+ %S%Yퟒ@ L|;5o@blHcJ /f{"kV, ǃpTNT~ӏn d%uy{]PVTzwvqxcu gTܤ!!AlRϢ2 y :K 9Ϡf?B"xKs8IdS/ ቧ 2).QT[Y:WtA@UQ+?T}mĎ@y}q4 %Swe_õn("4YaΞ5#U 6؟6??H=yA1Єӎ=ֲo@ 9[,8  /җg5h$ %<fg/ ɈDor=y5O{˿U3vrKe@[a;7U|5g~Neݮ,2czKM/{je>Gh)Id#F7(pbB@' Fx7 ==Bϲwgf)rj3 4VRnmjx2@MC˗@ޙ*ȇdX/덹!)ki@[4Hl-f|^2ko dkze4)<Ά#ʮ78a^7{9n&7JQz:ܹ(qNԱ^5-[<~ ^B:7\̷Vco0;6r(񨫱{,Z@?8<'Ƃo 5:)} >0/MP:qپ6; rTX.IG#pj 8s Kn 6ImH 0|[]齿w_^q+R[HCqgs_{drJT(|(t,F9Qz Hv^7-.fQ_WS0;"Z=6 [LQ}år{R<&%,E]"FcB@'`ɫU/8땡ptͽŁF$՚%'IFxFk)kW˝tN>|@8GR SUk @qS֎+;KN Á(ovot  [A,3Y+?e?^&@r L;EeXrPi#@+jX*zu R_mXCT]:Z}Cf0]3d)*sgn B?>6'F\0&؏F&Yh|_g>i 獳Jw>7 )p8$ wu{ LdcXu, 37%oEu1 _Z"K;qF;쿿7.*hkCD@DIϯocƀ~hPZ>yuY7 Yὥ®o%9-q}*ۨCp+)CM(tZfG5;ުs<O<+&h|%wZ$v\m;)T݇pq˹җNO}]POmp=>PBvա g,`sr%jG`5a pk@i l]ۈUp9hxPt}87krUX7:Cg:r <,1"UT;x[TL˖'ok @FΣ:.o :WV =C @]*d-d10_4?JQߝGPՀ.Ih~ LXk|ݝ5`Ϭ9ez'eOEiAW`:~~syֶB}SA^3xEh; 58q'wf? V>=~xYD]'JX{nqΪLjOn/x"K.`y۬ ϿbxqTOYku)kC %]UμA h$hw.5Oh[]ј:hoo<|fA`>PFdAǥPq9ꜥ "OF<Fij*{ֆ `fZ QxxP\8 u_(<j4ǛHYCG?C3.Ot~o9r;}܈=2a%z4j>7^߅׏ޓ?6 ޟ2x4oqټ Xn!439Si`\9mή铦hZáPm,< LylrV_ꪲ{mR!zz E'oǝ:wPžN^n뇿|''K4a( LS@˿&z讱~ٽFh5ﰽۈ_P|eӇRTfUMNvxtoo*7fTZEۉ "Xbm0 9g"6һu=~xg}zPBGwO48./s ?dc o+WPj:VnCeTeل_CUL't D 6fqUKO[,Q}<bd`|>y؈_uƌQ{xJI`UPP sP׏Suxo鲊'm|rqT4YǩVh3KM3r1v<Zo(Yح(2ob ph?ho#;UNmRW)4;Pm|f_T|,*i ؃MLe˯bqb褆n|W"8<Ͼ: yfqt|E.u_'h.qKh;!GDU}k"YyFxGH*6,̚]F-VuOrRY+FL To{Rfx4b7ʸ9%Gq׾-i|&<+Oc#:WN۔PA;o{*~cF{ N&THxϾʡu9WuVfK?4gݔW9N>PMcx>~Zy% ooV!c9To-@п z1J0nt)p'xjM[OIlhȇƍ^єCP0Z붚7peϤ5XhQYDoi4BJ! ?}^{-c+u LF;8iN<#}˩o>@pl>=W;ظW@L.<׵}W}5Ӡ&G}9ktnWuP SsԿOK90VbfL\KQYSh4H:].H h]Omm7dA?#B5!gh(?DX ɖD~ I03Pin*Aw53]`?>r/ī*wTX)N?@W} .,vH 'q ic%%3?{6lDוT9 je\=w TU6 |v2/Ubb?>;jSX/1W>ҳ3kjx6ānI7:@Wss\e>J*wJ ]J`}n)ݱ N]wD^ywHi(B_EF69S|gqO2FmMBO}Otm;T *dz~C  Sx}  ya 悃ՌC~\^ZϚvD>Y%Ӏ| ]@hbGjRhpznv#V6\250<{ُQ|)pmDў!?֛#~qh"Kr !//*k}N&ȷfLξ^qu~f),Hd`U7}|.U83 _IYo@cnqqX{SBǻx|w6&@{,ZcY ۂJCdob;Hl4 +@NZgnµ&K=@o<Y k;kєQ-n ɟ|B`~"jOn:\""čؕ7eq-ٝOC6 2ic\xG+H;Y.2vƂ_`8Kc:\=%4{<+U,>fp׊wՙeWk͗m qfďX1Ua 1i}W~{on 9t6e @Tr\I3PIM p=JStTÀ[IOPjLG'IR~Ɛ-i.\.KobXo40}_ [X`=UPs&a5i/uȢ+w~WSYbN*OvAxZxI9c/37q &WDţ7ab6}@<~) p5]Q3]$m}i4<B MJa^rTXy[nx]oc? _Qy1h?'yU0H˿I]YƮFl=ݭwlA "m~}f3RA{Cy tL~&+=] H5*xÞ=|J5=/&aEej~48S;5]Ā?o8scw S)}:þ UGU@ H39N;"|woz] s?/x.-֊͹/)/ :}0NďCMA1`j $ kwy6j9N:h=Uȯ>qwEd.BGGDȕk;@t~oC jIsJ&Im rt6&8˭Mo?s}~ 9Lج3tlrdDt`;xHu>[[gݝ-ā;Ώ R( {>z$[ܱcΥh{dS))S_0'uc'Rw~F>Wi'{$O1 NX$]”a+ܷyaI9%+vٳC٫ \}|NM+g[u56Gg9es &_KF7gxlK&w~kٱ/Yir}gt7p+ bLw paœ^p69 VuNB뗐iw#J8% trFLD Uo@u[z>N qk9^H.q|%Ce>!T]ms[fy˯M; Da 1K\&w z3Xcv!ޕBJij3!AHF$KיQ<6ؔ.` $+ 9nD7$KKU^Z͔oQFQ>,t2* J:xz7(np53 ]ͯ ؾ{!fb;$Ь›젅E\8},*m@vI{i7^+$f@,cuxn0_|g"?pQORZ4v+ >x=@(\$+j|J-[^>h?:YnSn|7dk#i:3ϩ{2U_= <,{ƅԉ\MV%qhs]vk/_N~̜࿀fvV3)aJ A$_gkjOgzIRpR9fo)@fEeIΘ 8Hsʠlf-ZndD,`]n?R7n0gy c!H. QywrH{Pb_NmDdOZ'R* v_C۝PM#܀Š_[l7MܰA_k/۰No9 h@ sj/jZ<*Cf6*EG~Ȩ _LZ " ֌hѹÞ()t٦u('귦gO/+D,ځN񉯆xȬaI2_Z)!t ^>:H~os)@* :C O M_ՄX ^I86HMEir{d`@>*O]<$+^uy,;C~LWiLŻEE 7mRd015UcNGGm]Z9B|r7@tP5lW9V7 zޓ !u;c|tk{o ck,1JwIق jXv:Yܬڷ_1WIĺx4^[NMj-,}t=dG-ku~Cs8A9št#p6e%DuV gMŏWi 9mĆۋXdUDMRV bqD<l_hx뒳 t=8sbSqcAi|E$fnsҽR֓xKcRA q9WFΠ'^;ǘ2Q#o= ܻw8bȆ5AMs7-\ 3sHoM lc9F [Ӭd%vsi=fG-q Vz\B0ݑu(*@} .qZ86 rmrV Z^d aA@ w{PHZ<] +׼A%Q~`&YD565%fRm_vkwe Uf3ƆW[ s{WPWOWEN޷&~Vܒ=-Vx iVWpώ0w ] TE'F}CuV_cA^ '}oDxҙiޒ@b_t;^Ϳy$9 \|=:qM׉Vie z~x UV WKW5$ԁ|]V?'f}t-?w創Y/g;Hh_7?[/v~ =ۆHm^yxOt_'V8 n{$ gW,/!dNThRk/hv8b<7>I%G-ѠFEgs7#(|77x~iOWhw#4JtaCwʦA`:=@\f"k}@>å`FsB-_dD8n4 jG_\:AaO_ ӂ*`iiG [`z_-'-*R~|{;Zap]w~ҝi{[JhqOqG)mwh2(k>FX冹\ؾu\80M]ބjs7y]-Ш+QYͣky@tB}*@]ZP8Rj 9 !;3@ : m_y|ېQî?@a갍15_/sv~zGFhvW>Ϟ2v7:ǡ-GG,~WҖש{^v %`{MT.*E*X /z5B]9!5% \8 <طzq=!;H|^~Ryd\YS]v̀eRX|Q6y\! !L.JbMd(}+]7aO RgACjNb^4i*y+@⒯Wy;΀j̶*oY4rS/ oRw7+Neׂvi]̽zz'p2ju帲#u&skd+{$?"'|ݑqfx%+O6=;ɼd$EcD)@%5P~$PߟYFz*T#5T(H_Дd5"օK~4NU<L%! ýq4ϝ)GPnh].Ó coFsFw&V]zTAsg5kA2q%GCO) ;2-Mi #&~jYhR0u+@5O_ @z: U/ q5JoMK{Nܪ#9T0z!QiHFL@Hm9h]B鯑=韀xkt(|34lui3>U]dԶq1cm^T~4:5?J< @ѹSѢ[d SHeׂ@.V7Nǵ(b כ /AWOS 锂j [g=dY5Ѣd]6 K~>+M=':3S˽uHh?j :gSJ k!w#~~3>_`w#rEy\tJ; T ~#/OEk^žFTbսJošY[^ rGnsH7׉i$`44RkuO>/Xi&.Y:'\(m cV&Ϸ"Co 5C#Yٻv!uZ~s6PTӕ}ASty*nY%e {=KB北GW柛K+[o@ iJnJ\m,,ToC]+ 2;7HcA-GhCvjޝNԸr^i EY}{0}ݪ1~CEj?;$SLTT6N\ qC_pKYum_<ABZړ35bGs>\G?_DRVF\ijopb92u&\+#kwS: .CkqRFn|箤ˢjDx/Tp"pj5\9+zd/*?+Yxi;n>ޟ?75L:3?)U$@Wrz=HG=Hz(R OLGz ΂N5ܗ%\᪃te?-D *)^HCFY-rQK[FvL?QErOp‡/o0І[+ ^~w4!).˼+oꀴyJ"HKGvRAƥw E Yy"7J]{f(wps |,,`#1g>]GòBj0Fă8R_E#O$GlE0ҸZx@x}/Hag-ɳ67n)Hfڔ +) )o"S NlͽmϻEZn+ OWYm :fR)U 3?u͐f ji`{{=6xhSZ'j)jI|@?miػ B}ڑS à5@x9{sDǫ@&9\o^t F;a<0=%Bg݈2_1s@yl[nKu|+J$zEh=JlO?oe@t /ty ?dlQ 5[ga/XH7 ICPOɈy@.z׹Orx1K @CqO* }}|-~Ωdw;:: xxn`>n`ɬ>?:Jen_n{d׷? [r9JW+:p.ww4PyX<{>O|QStuPR $7h (-x5./ P6xL@d:kum{m;0+%6G CT=A`p_^_oTLTAߏ VǷ%T ?3cy>T47v0'@l3I51ٜW LЀP0wFoBeJ3r5|1;/FczPl~Ѓ,?::`?(}<7mT;Hn2)}h6.?7V~ckF~H˵į0wTF7o,mWy4kNFaOF85%4$!qڛ[2w<Ǻ:9_]>uZ)h0&˱koE >?Rh`eVu)\% Xx>:-ܬ?\!3WjG?~`Ous[sul6It/yFսJUO&̞?%;`|Se9HukGZ^D /ͼ3cn@Uq/倮 #8ZaOh_k zRk-n˚@uIuY@vI)kUpf U^եK 6b kn+1t/.H@\)  =k%x=V}%6X l~ӏ9ݭT-RNr@ay6=vK5<.VyPVBv;&Z-{[ ùE 0K^Q`pc¬v6<=dGm;g[g}v;?O8f^ 0eE 1¸Z(k[J^rC3m {Ʊ4tv9S}ouW';+ؿӶ%YewP|g慯P$ >Fe63T0r*w( W+Lu)hx$!Z:^;z(Q+ nP1g& Ô//vhଡ଼-i&BGvK0~y@0$Lj)P ,Qάqzj'Д>In ~(LӭZf{b|?iFeWwPö= $y`u/zUfg=kcN21X*}<0r}r a@mh{5-#Қ]@^Zr [ޔ+ a6jl:׾ 5}*K0n!l2zq<Pw1/ہ%='X1R7[^+Nvu{"śeE\yAgvS.iat|PLxU71 (MܟN۠)?i2IH @hpҗ; mAB]v{ 4{lA'`Xg;pᥝ0.(k[}fr@.VTK0]@N2 c509*4~an{ +TSܵI%0ɋzB5cRb9!nq(B5qI&yZP};h'b._د @Y ắh V#C`@a+Z_lٵ~y& [w h mƛcKӁy>3C*{i^U>susAİloVkTfa|bKmjw. B@#zr‡]O7D:tNIW\Rl' FwgaPR0y₎*_uD:t&4 cO;@~Ф*;M;@q?L_ ,utٶ! ]yx3U^/G>݌:X kU٢or-@U2_Rƞ0vSj0M'FQ3Pڒeű= ÌSCӌtKM?wMߓ@}X~8KNV^G ?l| XÛ^{L-ʏݛ)!/z|VfB_5' 1#1BeGo..J{d$׃`E j!wEO~ïQx`fnAMsM7>= Y+`-S^@@*zPk/) ΘIe МS``ѫnڣ:i w]]ȑUJsڛ ̌6Il4*5ׂvyb/ O Ył}o)}0ST$fS)0gTOzk@>1m2ݤvD[G'\\ Peݸ@38`a 4qC)vP]/ 7/O~)YasY0&w#3tzi&ȥWS ʕI@JTPZWZr(1~?dq b\$`SH>\fی84~tLFmtË{ih, #y[ϸ@ٛrnՔ8kH2xTzvO$bW@ǭȃok(>ܮ/3m@֓rhU+Yh#7%J߈ HoTWYrBz%.W[Co`_I'-cFUL`sG$ V5'mCHQ,D4j=vҳЊ?yD{wƩtU2 ?wUOWF :\ #_&1NOw]49yWi@1kW\ ؝y %"[aCD6g]EqMJf+fK1 wnÜ)["bz.4L̟,RQgD(cs1 DG~׌0>`ȶry`xmk@i n/lt]%`$_)?R i_^kGǽ'_1h?~e PDGAAof2N(aC1[lh{YPRS8<<+v&ݰRBYO}X^*/O~ޤdYBIU*PF\@ OyW&r ~Շ\TސHq@Tw1K8,y؝J@OY~qj rp~$̞Oqh\@ jR#0T;餅l K,^ 2󉗖Pp"Ffm6>LP ̀ھ;M)z& ({k8埗Omފ;ZZoq[E ID?գsrozi ``O1{q nJ@ }b| X& @P:쮷ԄqD)0޿=uKaO``sMi Qmk/Ȉ{:'fe+u_h/ OzY!v^/'@@A޻+a z6ԶLJe8/I*U>c| ~>/md`T5SaF-D<ЮrƆۭ?YŻXo~Q xu1+I@UAUz* !~gfw[4 as0@ܧ vۗ2MtΠyvLU Y؅l;vfj}) GzrxqtTe gۮyh1UI ,]'eX`J G(P+T˘s3~7sz{^,{·W`PNU:} ([v`|z([rጤPiZ$0B)e? ;ՋC)#zi0ql[/Eϟ <'{ͭ=wVڙ@mZ^l, ҟ đtMVLWKa2Q/~t֯*[HAF&:'򋃡n}W@RףMt䎜Цų3Hȓ i`d>-@!~D٩" ۼd3u}HwjF?3Y#d i$!J"m7e k4Y}d]MQbMk#+6߃_9i{gx).fՌ_1S<}Dw뿊yt-#"کjdQ> )&6켊$'MoyP E7;[,_xZ ߸w #md󱸅eH7rG2HxnRI gCyuvGe3CdݾO?7'=^UȺR" HWOiC6BGnNA1k! TmZLY|9$|B=lVnz ɋ)#mf4w4 Xpx,Nik"]C7ע ѢȦiLt 2t/>F*6V\2DgNd@Ӄr/܀_c)mYHrԖiԃ,!YY|V2 2(FG[Į)LA^:t EBlŽ,pIx"sב `v;r8ve$ciJ42m-ԑvD8cy13EGڊ9=27i&&G12z@S;.dv.PkrbKdB牞bI§D}+M&m7S}`GUh",yIɴO $2;< /zb*e_Rrw^zoP 4,K mđ<#%2LcRx!4AȺsφMHFv)DHmv\6nrj~HGG{*N#gW#G&,U]CH@'R|žײhSD˱Sʁmclust/man/0000755000176200001440000000000014241635115012336 5ustar liggesusersmclust/man/priorControl.Rd0000644000176200001440000000321513752165066015333 0ustar liggesusers\name{priorControl} \alias{priorControl} \title{ Conjugate Prior for Gaussian Mixtures. } \description{ Specify a conjugate prior for Gaussian mixtures. } \usage{ priorControl(functionName = "defaultPrior", \dots) } \arguments{ \item{functionName}{ The name of the function specifying the conjugate prior. By default the function \code{\link{defaultPrior}} is used, and this can also be used as a template for alternative specification. } \item{\dots}{ Optional named arguments to the function specified in \code{functionName} together with their values. } } \value{ A list with the function name as the first component. The remaining components (if any) consist of a list of arguments to the function with assigned values. } \details{ The function \code{priorControl} is used to specify a conjugate prior for EM within \emph{MCLUST}.\cr Note that, as described in \code{\link{defaultPrior}}, in the multivariate case only 10 out of 14 models may be used in conjunction with a prior, i.e. those available in \emph{MCLUST} up to version 4.4. } \references{ C. Fraley and A. E. Raftery (2007). Bayesian regularization for normal mixture estimation and model-based clustering. \emph{Journal of Classification 24:155-181}. } \seealso{ \code{\link{mclustBIC}}, \code{\link{me}}, \code{\link{mstep}}, \code{\link{defaultPrior}} } \examples{ # default prior irisBIC <- mclustBIC(iris[,-5], prior = priorControl()) summary(irisBIC, iris[,-5]) # no prior on the mean; default prior on variance irisBIC <- mclustBIC(iris[,-5], prior = priorControl(shrinkage = 0)) summary(irisBIC, iris[,-5]) } \keyword{cluster} mclust/man/meE.Rd0000644000176200001440000001216113752164615013344 0ustar liggesusers\name{meE} \alias{meE} \alias{meV} \alias{meX} \alias{meEII} \alias{meVII} \alias{meEEI} \alias{meVEI} \alias{meEVI} \alias{meVVI} \alias{meEEE} \alias{meVEE} \alias{meEVE} \alias{meVVE} \alias{meEEV} \alias{meVEV} \alias{meEVV} \alias{meVVV} \alias{meXII} \alias{meXXI} \alias{meXXX} \title{EM algorithm starting with M-step for a parameterized Gaussian mixture model} \description{ Implements the EM algorithm for a parameterized Gaussian mixture model, starting with the maximization step. } \usage{ meE(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meV(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meX(data, prior = NULL, warn = NULL, \dots) meEII(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVII(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meEEI(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVEI(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meEVI(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVVI(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meEEE(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVEE(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meEVE(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVVE(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meEEV(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVEV(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meEVV(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meVVV(data, z, prior=NULL, control=emControl(), Vinv=NULL, warn=NULL, \dots) meXII(data, prior = NULL, warn = NULL, \dots) meXXI(data, prior = NULL, warn = NULL, \dots) meXXX(data, prior = NULL, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the ith observation belonging to the \emph{k}th component of the mixture. } \item{prior}{ Specification of a conjugate prior on the means and variances. The default assumes no prior. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{Vinv}{ An estimate of the reciprocal hypervolume of the data region, when the model is to include a noise term. Set to a negative value or zero if a noise term is desired, but an estimate is unavailable --- in that case function \code{hypvol} will be used to obtain the estimate. The default is not to assume a noise term in the model through the setting \code{Vinv=NULL}. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when the estimation fails. The default is given by \code{mclust.options("warn")}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ The estimate of the reciprocal hypervolume of the data region used in the computation when the input indicates the addition of a noise component to the model. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{Attributes:}{ \code{"info"} Information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations.\cr } } \seealso{ \code{\link{em}}, \code{\link{me}}, \code{\link{estep}}, \code{\link{mclust.options}} } \examples{ meVVV(data = iris[,-5], z = unmap(iris[,5])) } \keyword{cluster} mclust/man/estep.Rd0000644000176200001440000000620414124774626013762 0ustar liggesusers\name{estep} \alias{estep} \title{ E-step for parameterized Gaussian mixture models. } \description{ Implements the expectation step of EM algorithm for parameterized Gaussian mixture models. } \usage{ estep(data, modelName, parameters, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{parameters}{ A names list giving the parameters of the model. The components are as follows: \describe{ \item{\code{pro}}{ Mixing proportions for the components of the mixture. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ An estimate of the reciprocal hypervolume of the data region. If set to NULL or a negative value, the default is determined by applying function \code{hypvol} to the data. Used only when \code{pro} includes an additional mixing proportion for a noise component. } } } \item{warn}{ A logical value indicating whether or not a warning should be issued when computations fail. The default is \code{warn=FALSE}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ The input parameters. } \item{loglik}{ The log-likelihood for the data in the mixture model. } \item{Attributes}{ \code{"WARNING"}: an appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{estepE}}, \dots, \code{\link{estepVVV}}, \code{\link{em}}, \code{\link{mstep}}, \code{\link{mclust.options}} \code{\link{mclustVariance}} } \examples{ \donttest{ msEst <- mstep(modelName = "VVV", data = iris[,-5], z = unmap(iris[,5])) names(msEst) estep(modelName = msEst$modelName, data = iris[,-5], parameters = msEst$parameters)} } \keyword{cluster} mclust/man/MclustDR.Rd0000644000176200001440000001157313760217372014337 0ustar liggesusers\name{MclustDR} \alias{MclustDR} \alias{print.MclustDR} \title{Dimension reduction for model-based clustering and classification} \description{ A dimension reduction method for visualizing the clustering or classification structure obtained from a finite mixture of Gaussian densities. } \usage{ MclustDR(object, lambda = 1, normalized = TRUE, Sigma, tol = sqrt(.Machine$double.eps)) } \arguments{ \item{object}{An object of class \code{'Mclust'} or \code{'MclustDA'} resulting from a call to, respectively, \code{\link{Mclust}} or \code{\link{MclustDA}}.} \item{lambda}{A tuning parameter in the range [0,1] as described in Scrucca (2014). The directions that mostly separate the estimated clusters or classes are recovered using the default value 1. Users can set this parameter to balance the relative importance of information derived from cluster/class means and covariances. For instance, a value of 0.5 gives equal importance to differences in means and covariances among clusters/classes.} \item{normalized}{Logical. If \code{TRUE} directions are normalized to unit norm.} \item{Sigma}{Marginal covariance matrix of data. If not provided is estimated by the MLE of observed data.} \item{tol}{A tolerance value.} } \details{ The method aims at reducing the dimensionality by identifying a set of linear combinations, ordered by importance as quantified by the associated eigenvalues, of the original features which capture most of the clustering or classification structure contained in the data. Information on the dimension reduction subspace is obtained from the variation on group means and, depending on the estimated mixture model, on the variation on group covariances (see Scrucca, 2010). Observations may then be projected onto such a reduced subspace, thus providing summary plots which help to visualize the underlying structure. The method has been extended to the supervised case, i.e. when the true classification is known (see Scrucca, 2014). This implementation doesn't provide a formal procedure for the selection of dimensionality. A future release will include one or more methods. } \value{ An object of class \code{'MclustDR'} with the following components: \item{call}{The matched call} \item{type}{A character string specifying the type of model for which the dimension reduction is computed. Currently, possible values are \code{"Mclust"} for clustering, and \code{"MclustDA"} or \code{"EDDA"} for classification.} \item{x}{The data matrix.} \item{Sigma}{The covariance matrix of the data.} \item{mixcomp}{A numeric vector specifying the mixture component of each data observation.} \item{class}{A factor specifying the classification of each data observation. For model-based clustering this is equivalent to the corresponding mixture component. For model-based classification this is the known classification.} \item{G}{The number of mixture components.} \item{modelName}{The name of the parameterization of the estimated mixture model(s). See \code{\link{mclustModelNames}}.} \item{mu}{A matrix of means for each mixture component.} \item{sigma}{An array of covariance matrices for each mixture component.} \item{pro}{The estimated prior for each mixture component.} \item{M}{The kernel matrix.} \item{lambda}{The tuning parameter.} \item{evalues}{The eigenvalues from the generalized eigen-decomposition of the kernel matrix.} \item{raw.evectors}{The raw eigenvectors from the generalized eigen-decomposition of the kernel matrix, ordered according to the eigenvalues.} \item{basis}{The basis of the estimated dimension reduction subspace.} \item{std.basis}{The basis of the estimated dimension reduction subspace standardized to variables having unit standard deviation.} \item{numdir}{The dimension of the projection subspace.} \item{dir}{The estimated directions, i.e. the data projected onto the estimated dimension reduction subspace.} } \references{ Scrucca, L. (2010) Dimension reduction for model-based clustering. \emph{Statistics and Computing}, 20(4), pp. 471-484. Scrucca, L. (2014) Graphical Tools for Model-based Mixture Discriminant Analysis. \emph{Advances in Data Analysis and Classification}, 8(2), pp. 147-165. } \author{Luca Scrucca} %\note{} \seealso{ \code{\link{summary.MclustDR}}, \code{\link{plot.MclustDR}}, \code{\link{Mclust}}, \code{\link{MclustDA}}. } \examples{ # clustering data(diabetes) mod <- Mclust(diabetes[,-1]) summary(mod) dr <- MclustDR(mod) summary(dr) plot(dr, what = "scatterplot") plot(dr, what = "evalues") dr <- MclustDR(mod, lambda = 0.5) summary(dr) plot(dr, what = "scatterplot") plot(dr, what = "evalues") # classification data(banknote) da <- MclustDA(banknote[,2:7], banknote$Status, modelType = "EDDA") dr <- MclustDR(da) summary(dr) da <- MclustDA(banknote[,2:7], banknote$Status) dr <- MclustDR(da) summary(dr) } \keyword{multivariate} mclust/man/MclustDA.Rd0000644000176200001440000002066514516406613014316 0ustar liggesusers\name{MclustDA} \alias{MclustDA} \alias{print.MclustDA} \title{MclustDA discriminant analysis} \description{ Discriminant analysis based on Gaussian finite mixture modeling. } \usage{ MclustDA(data, class, G = NULL, modelNames = NULL, modelType = c("MclustDA", "EDDA"), prior = NULL, control = emControl(), initialization = NULL, warn = mclust.options("warn"), verbose = interactive(), \dots) } \arguments{ \item{data}{ A data frame or matrix giving the training data. } \item{class}{ A vector giving the known class labels (either a numerical value or a character string) for the observations in the training data.} \item{G}{ An integer vector specifying the numbers of mixture components (clusters) for which the BIC is to be calculated within each class. The default is \code{G = 1:5}.\cr A different set of mixture components for each class can be specified by providing this argument with a list of integers for each class. See the examples below. } \item{modelNames}{ A vector of character strings indicating the models to be fitted by EM within each class (see the description in \code{\link{mclustModelNames}}). A different set of mixture models for each class can be specified by providing this argument with a list of character strings. See the examples below. } \item{modelType}{ A character string specifying whether the models given in \code{modelNames} should fit a different number of mixture components and covariance structures for each class (\code{"MclustDA"}, the default) or should be constrained to have a single component for each class with the same covariance structure among classes (\code{"EDDA"}). See Details section and the examples below. } \item{prior}{ The default assumes no prior, but this argument allows specification of a conjugate prior on the means and variances through the function \code{\link{priorControl}}. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{initialization}{ A list containing zero or more of the following components: \describe{ \item{\code{hcPairs}}{ A matrix of merge pairs for hierarchical clustering such as produced by function \code{hc}. The default is to compute a hierarchical clustering tree by applying function \code{hc} with \code{modelName = "E"} to univariate data and \code{modelName = "VVV"} to multivariate data or a subset as indicated by the \code{subset} argument. The hierarchical clustering results are used as starting values for EM.} \item{\code{subset}}{ A logical or numeric vector specifying a subset of the data to be used in the initial hierarchical clustering phase.} } } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when estimation fails. The default is controlled by \code{\link{mclust.options}}. } \item{verbose}{ A logical controlling if a text progress bar is displayed during the fitting procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise. } \item{\dots }{Further arguments passed to or from other methods.} } \value{ An object of class \code{'MclustDA'} providing the optimal (according to BIC) mixture model. The details of the output components are as follows: \item{call}{The matched call.} \item{data}{The input data matrix.} \item{class}{The input class labels.} \item{type}{A character string specifying the \code{modelType} estimated.} \item{models}{A list of \code{\link{Mclust}} objects containing information on fitted model for each class.} \item{n}{The total number of observations in the data.} \item{d}{The dimension of the data.} % \item{BIC}{All BIC values.} \item{bic}{Optimal BIC value.} \item{loglik}{Log-likelihood for the selected model.} \item{df}{Number of estimated parameters.} } \details{ The \code{"EDDA"} method for discriminant analysis is described in Bensmail and Celeux (1996), while \code{"MclustDA"} in Fraley and Raftery (2002). } \references{ Scrucca L., Fraley C., Murphy T. B. and Raftery A. E. (2023) \emph{Model-Based Clustering, Classification, and Density Estimation Using mclust in R}. Chapman & Hall/CRC, ISBN: 978-1032234953, https://mclust-org.github.io/book/ Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, \emph{Journal of the American Statistical Association}, 97/458, pp. 611-631. Bensmail, H., and Celeux, G. (1996) Regularized Gaussian Discriminant Analysis Through Eigenvalue Decomposition.\emph{Journal of the American Statistical Association}, 91, 1743-1748. } \author{Luca Scrucca} \seealso{ \code{\link{summary.MclustDA}}, \code{\link{plot.MclustDA}}, \code{\link{predict.MclustDA}}, \code{\link{classError}} } \examples{ odd <- seq(from = 1, to = nrow(iris), by = 2) even <- odd + 1 X.train <- iris[odd,-5] Class.train <- iris[odd,5] X.test <- iris[even,-5] Class.test <- iris[even,5] # common EEE covariance structure (which is essentially equivalent to linear discriminant analysis) irisMclustDA <- MclustDA(X.train, Class.train, modelType = "EDDA", modelNames = "EEE") summary(irisMclustDA, parameters = TRUE) summary(irisMclustDA, newdata = X.test, newclass = Class.test) # common covariance structure selected by BIC irisMclustDA <- MclustDA(X.train, Class.train, modelType = "EDDA") summary(irisMclustDA, parameters = TRUE) summary(irisMclustDA, newdata = X.test, newclass = Class.test) # general covariance structure selected by BIC irisMclustDA <- MclustDA(X.train, Class.train) summary(irisMclustDA, parameters = TRUE) summary(irisMclustDA, newdata = X.test, newclass = Class.test) plot(irisMclustDA) plot(irisMclustDA, dimens = 3:4) plot(irisMclustDA, dimens = 4) plot(irisMclustDA, what = "classification") plot(irisMclustDA, what = "classification", newdata = X.test) plot(irisMclustDA, what = "classification", dimens = 3:4) plot(irisMclustDA, what = "classification", newdata = X.test, dimens = 3:4) plot(irisMclustDA, what = "classification", dimens = 4) plot(irisMclustDA, what = "classification", dimens = 4, newdata = X.test) plot(irisMclustDA, what = "train&test", newdata = X.test) plot(irisMclustDA, what = "train&test", newdata = X.test, dimens = 3:4) plot(irisMclustDA, what = "train&test", newdata = X.test, dimens = 4) plot(irisMclustDA, what = "error") plot(irisMclustDA, what = "error", dimens = 3:4) plot(irisMclustDA, what = "error", dimens = 4) plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test) plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test, dimens = 3:4) plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test, dimens = 4) \donttest{ # simulated 1D data n <- 250 set.seed(1) triModal <- c(rnorm(n,-5), rnorm(n,0), rnorm(n,5)) triClass <- c(rep(1,n), rep(2,n), rep(3,n)) odd <- seq(from = 1, to = length(triModal), by = 2) even <- odd + 1 triMclustDA <- MclustDA(triModal[odd], triClass[odd]) summary(triMclustDA, parameters = TRUE) summary(triMclustDA, newdata = triModal[even], newclass = triClass[even]) plot(triMclustDA, what = "scatterplot") plot(triMclustDA, what = "classification") plot(triMclustDA, what = "classification", newdata = triModal[even]) plot(triMclustDA, what = "train&test", newdata = triModal[even]) plot(triMclustDA, what = "error") plot(triMclustDA, what = "error", newdata = triModal[even], newclass = triClass[even]) # simulated 2D cross data data(cross) odd <- seq(from = 1, to = nrow(cross), by = 2) even <- odd + 1 crossMclustDA <- MclustDA(cross[odd,-1], cross[odd,1]) summary(crossMclustDA, parameters = TRUE) summary(crossMclustDA, newdata = cross[even,-1], newclass = cross[even,1]) plot(crossMclustDA, what = "scatterplot") plot(crossMclustDA, what = "classification") plot(crossMclustDA, what = "classification", newdata = cross[even,-1]) plot(crossMclustDA, what = "train&test", newdata = cross[even,-1]) plot(crossMclustDA, what = "error") plot(crossMclustDA, what = "error", newdata =cross[even,-1], newclass = cross[even,1]) } } \keyword{multivariate} mclust/man/logLik.Mclust.Rd0000644000176200001440000000151514124774626015331 0ustar liggesusers\name{logLik.Mclust} \alias{logLik.Mclust} \title{Log-Likelihood of a \code{Mclust} object} \description{ Returns the log-likelihood for a \code{'Mclust'} object.} \usage{ \method{logLik}{Mclust}(object, \dots) } \arguments{ \item{object}{an object of class \code{'Mclust'} resulting from a call to \code{\link{Mclust}}.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{Returns an object of class \code{'logLik'} with an element providing the maximized log-likelihood, and further arguments giving the number of (estimated) parameters in the model (\code{"df"}) and the sample size (\code{"nobs"}).} \author{Luca Scrucca} % \note{} \seealso{\code{\link{Mclust}}.} \examples{ \donttest{ irisMclust <- Mclust(iris[,1:4]) summary(irisMclust) logLik(irisMclust) } } \keyword{multivariate} mclust/man/densityMclust.Rd0000644000176200001440000000617014516406574015512 0ustar liggesusers\name{densityMclust} \alias{densityMclust} \title{Density Estimation via Model-Based Clustering} \description{ Produces a density estimate for each data point using a Gaussian finite mixture model from \code{Mclust}. } \usage{ densityMclust(data, \dots, plot = TRUE) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{\dots }{ Additional arguments for the \code{\link{Mclust}} function. In particular, setting the arguments \code{G} and \code{modelNames} allow to specify the number of mixture components and the type of model to be fitted. By default an "optimal" model is selected based on the BIC criterion. } \item{plot}{ A logical value specifying if the estimated density should be plotted. For more contols on the resulting graph see the associated \code{\link{plot.densityMclust}} method. } } \value{ An object of class \code{densityMclust}, which inherits from \code{Mclust}. This contains all the components described in \code{\link{Mclust}} and the additional element: \item{density}{The density evaluated at the input \code{data} computed from the estimated model.} } %\details{} \references{ Scrucca L., Fraley C., Murphy T. B. and Raftery A. E. (2023) \emph{Model-Based Clustering, Classification, and Density Estimation Using mclust in R}. Chapman & Hall/CRC, ISBN: 978-1032234953, https://mclust-org.github.io/book/ Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, \emph{Journal of the American Statistical Association}, 97/458, pp. 611-631. } \author{Revised version by Luca Scrucca based on the original code by C. Fraley and A.E. Raftery.} \seealso{ \code{\link{plot.densityMclust}}, \code{\link{Mclust}}, \code{\link{summary.Mclust}}, \code{\link{predict.densityMclust}}. } \examples{ dens <- densityMclust(faithful$waiting) summary(dens) summary(dens, parameters = TRUE) plot(dens, what = "BIC", legendArgs = list(x = "topright")) plot(dens, what = "density", data = faithful$waiting) dens <- densityMclust(faithful, modelNames = "EEE", G = 3, plot = FALSE) summary(dens) summary(dens, parameters = TRUE) plot(dens, what = "density", data = faithful, drawlabels = FALSE, points.pch = 20) plot(dens, what = "density", type = "hdr") plot(dens, what = "density", type = "hdr", prob = c(0.1, 0.9)) plot(dens, what = "density", type = "hdr", data = faithful) plot(dens, what = "density", type = "persp") \donttest{ dens <- densityMclust(iris[,1:4], G = 2) summary(dens, parameters = TRUE) plot(dens, what = "density", data = iris[,1:4], col = "slategrey", drawlabels = FALSE, nlevels = 7) plot(dens, what = "density", type = "hdr", data = iris[,1:4]) plot(dens, what = "density", type = "persp", col = grey(0.9)) } } \keyword{cluster} mclust/man/hcE.Rd0000644000176200001440000000604314124774626013342 0ustar liggesusers\name{hcE} \alias{hcE} \alias{hcV} \alias{hcEII} \alias{hcVII} \alias{hcEEE} \alias{hcVVV} \title{Model-based Hierarchical Clustering} \description{ Agglomerative hierarchical clustering based on maximum likelihood for a Gaussian mixture model parameterized by eigenvalue decomposition. } \usage{ hcE(data, partition = NULL, minclus=1, \dots) hcV(data, partition = NULL, minclus = 1, alpha = 1, \dots) hcEII(data, partition = NULL, minclus = 1, \dots) hcVII(data, partition = NULL, minclus = 1, alpha = 1, \dots) hcEEE(data, partition = NULL, minclus = 1, \dots) hcVVV(data, partition = NULL, minclus = 1, alpha = 1, beta = 1, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{partition}{ A numeric or character vector representing a partition of observations (rows) of \code{data}. If provided, group merges will start with this partition. Otherwise, each observation is assumed to be in a cluster by itself at the start of agglomeration. } \item{minclus}{ A number indicating the number of clusters at which to stop the agglomeration. The default is to stop when all observations have been merged into a single cluster. } \item{alpha, beta}{ Additional tuning parameters needed for initializatiion in some models. For details, see Fraley 1998. The defaults provided are usually adequate. } \item{\dots}{ Catch unused arguments from a \code{do.call} call. } } \value{ A numeric two-column matrix in which the \emph{i}th row gives the minimum index for observations in each of the two clusters merged at the \emph{i}th stage of agglomerative hierarchical clustering. } \details{ Most models have memory usage of the order of the square of the number groups in the initial partition for fast execution. Some models, such as equal variance or \code{"EEE"}, do not admit a fast algorithm under the usual agglomerative hierachical clustering paradigm. These use less memory but are much slower to execute. } \references{ J. D. Banfield and A. E. Raftery (1993). Model-based Gaussian and non-Gaussian Clustering. \emph{Biometrics 49:803-821}. C. Fraley (1998). Algorithms for model-based Gaussian hierarchical clustering. \emph{SIAM Journal on Scientific Computing 20:270-281}. C. Fraley and A. E. Raftery (2002). Model-based clustering, discriminant analysis, and density estimation. \emph{Journal of the American Statistical Association 97:611-631}. } \seealso{ \code{\link{hc}}, \code{\link{hclass}} \code{\link{hcRandomPairs}} } \examples{ hcTree <- hcEII(data = iris[,-5]) cl <- hclass(hcTree,c(2,3)) \donttest{ par(pty = "s", mfrow = c(1,1)) clPairs(iris[,-5],cl=cl[,"2"]) clPairs(iris[,-5],cl=cl[,"3"]) par(mfrow = c(1,2)) dimens <- c(1,2) coordProj(iris[,-5], classification=cl[,"2"], dimens=dimens) coordProj(iris[,-5], classification=cl[,"3"], dimens=dimens) } } \keyword{cluster} mclust/man/summary.mclustBIC.Rd0000644000176200001440000000760413175055217016161 0ustar liggesusers\name{summary.mclustBIC} \alias{summary.mclustBIC} \alias{print.summary.mclustBIC} \alias{summary.mclustBIC} \alias{summaryMclustBIC} \alias{summaryMclustBICn} \alias{printSummaryMclustBIC} \alias{printSummaryMclustBICn} \title{Summary function for model-based clustering via BIC} \description{ Optimal model characteristics and classification for model-based clustering via \code{mclustBIC}. } \usage{ \method{summary}{mclustBIC}(object, data, G, modelNames, \dots) } \arguments{ \item{object}{ An \code{'mclustBIC'} object, which is the result of applying \code{mclustBIC} to \code{data}. } \item{data}{ The matrix or vector of observations used to generate `object'. } \item{G}{ A vector of integers giving the numbers of mixture components (clusters) from which the best model according to BIC will be selected (\code{as.character(G)} must be a subset of the row names of \code{object}). The default is to select the best model for all numbers of mixture components used to obtain \code{object}. } \item{modelNames}{ A vector of integers giving the model parameterizations from which the best model according to BIC will be selected (\code{as.character(model)} must be a subset of the column names of \code{object}). The default is to select the best model for parameterizations used to obtain \code{object}. } \item{\dots}{ Not used. For generic/method consistency. } } \value{ A list giving the optimal (according to BIC) parameters, conditional probabilities \code{z}, and log-likelihood, together with the associated classification and its uncertainty. The details of the output components are as follows: \item{modelName}{ A character string denoting the model corresponding to the optimal BIC. } \item{n}{ The number of observations in the data. } \item{d}{ The dimension of the data. } \item{G}{ The number of mixture components in the model corresponding to the optimal BIC. } \item{bic}{ The optimal BIC value. } \item{loglik}{ The log-likelihood corresponding to the optimal BIC. } \item{parameters}{ A list with the following components: \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If missing, equal proportions are assumed. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in the data belongs to the \emph{k}th class. } \item{classification}{ \code{map(z)}: The classification corresponding to \code{z}. } \item{uncertainty}{ The uncertainty associated with the classification. } \item{Attributes:}{ \code{"bestBICvalues"} Some of the best bic values for the analysis.\cr \code{"prior"} The prior as specified in the input.\cr \code{"control"} The control parameters for EM as specified in the input.\cr \code{"initialization"} The parameters used to initial EM for computing the maximum likelihood values used to obtain the BIC. } } \seealso{ \code{\link{mclustBIC}} \code{\link{mclustModel}} } \examples{ irisBIC <- mclustBIC(iris[,-5]) summary(irisBIC, iris[,-5]) summary(irisBIC, iris[,-5], G = 1:6, modelNames = c("VII", "VVI", "VVV")) } \keyword{cluster} % docclass is function mclust/man/covw.Rd0000644000176200001440000000210513465001006013572 0ustar liggesusers\name{covw} \alias{covw} \title{Weighted means, covariance and scattering matrices conditioning on a weighted matrix} \description{ Compute efficiently (via Fortran code) the means, covariance and scattering matrices conditioning on a weighted or indicator matrix } \usage{ covw(X, Z, normalize = TRUE) } \arguments{ \item{X}{A \eqn{(n x p)} data matrix, with \eqn{n} observations on \eqn{p} variables.} \item{Z}{A \eqn{(n x G)} matrix of weights, with \eqn{G} number of groups.} \item{normalize}{A logical indicating if rows of \code{Z} should be normalized to sum to one.} } \value{A list with the following components: \item{mean}{A \eqn{(p x G)} matrix of weighted means.} \item{S}{A \eqn{(p x p x G)} array of weighted covariance matrices.} \item{W}{A \eqn{(p x p x G)} array of weighted scattering matrices.} } %\seealso{} \author{M. Fop and L. Scrucca} \examples{ # Z as an indicator matrix X <- iris[,1:4] Z <- unmap(iris$Species) str(covw(X, Z)) # Z as a matrix of weights mod <- Mclust(X, G = 3, modelNames = "VVV") str(covw(X, mod$z)) } \keyword{multivariate} mclust/man/randProj.Rd0000644000176200001440000001460514124774626014425 0ustar liggesusers\name{randProj} \alias{randProj} \title{Random projections of multidimensional data modeled by an MVN mixture} \description{ Plots random projections given multidimensional data and parameters of an MVN mixture model for the data. } \usage{ randProj(data, seeds = NULL, parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "error", "uncertainty"), quantiles = c(0.75, 0.95), addEllipses = TRUE, fillEllipses = mclust.options("fillEllipses"), symbols = NULL, colors = NULL, scale = FALSE, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, cex = 1, PCH = ".", main = FALSE, \dots) } \arguments{ \item{data}{ A numeric matrix or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{seeds}{ An integer value or a vector of integer values to be used as seed for random number generation. If multiple values are provided, then each seed should produce a different projection. By default, a single seed is drawn randomnly, so each call of \code{randProj()} produces different projections. } \item{parameters}{ A named list giving the parameters of an \emph{MCLUST} model, used to produce superimposing ellipses on the plot. The relevant components are as follows: \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix in which the \code{[i,k]}th entry gives the probability of observation \emph{i} belonging to the \emph{k}th class. Used to compute \code{classification} and \code{uncertainty} if those arguments aren't available. } \item{classification}{ A numeric or character vector representing a classification of observations (rows) of \code{data}. If present argument \code{z} will be ignored. } \item{truth}{ A numeric or character vector giving a known classification of each data point. If \code{classification} or \code{z} is also present, this is used for displaying classification errors. } \item{uncertainty}{ A numeric vector of values in \emph{(0,1)} giving the uncertainty of each data point. If present argument \code{z} will be ignored. } \item{what}{ Choose from one of the following three options: \code{"classification"} (default), \code{"error"}, \code{"uncertainty"}. } \item{quantiles}{ A vector of length 2 giving quantiles used in plotting uncertainty. The smallest symbols correspond to the smallest quantile (lowest uncertainty), medium-sized (open) symbols to points falling between the given quantiles, and large (filled) symbols to those in the largest quantile (highest uncertainty). The default is \emph{(0.75,0.95)}. } \item{addEllipses}{ A logical indicating whether or not to add ellipses with axes corresponding to the within-cluster covariances in case of \code{"classification"} or \code{"uncertainty"} plots. } \item{fillEllipses}{ A logical specifying whether or not to fill ellipses with transparent colors when \code{addEllipses = TRUE}. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotColors")}. } \item{scale}{ A logical variable indicating whether or not the two chosen dimensions should be plotted on the same scale, and thus preserve the shape of the distribution. Default: \code{scale=FALSE} } \item{xlim, ylim}{ Optional arguments specifying bounds for the ordinate, abscissa of the plot. This may be useful for when comparing plots. } \item{xlab, ylab}{ Optional arguments specifying the labels for, respectively, the horizontal and vertical axis. } \item{cex}{ A numerical value specifying the size of the plotting symbols. The default value is 1. } \item{PCH}{ An argument specifying the symbol to be used when a classificatiion has not been specified for the data. The default value is a small dot ".". } \item{main}{ A logical variable or \code{NULL} indicating whether or not to add a title to the plot identifying the dimensions used. } \item{\dots}{ Other graphics parameters. } } \value{ A plot showing a random two-dimensional projection of the data, together with the location of the mixture components, classification, uncertainty, and/or classification errors. The function also returns an invisible list with components \code{basis}, the randomnly generated basis of the projection subspace, \code{data}, a matrix of projected data, and \code{mu} and \code{sigma} the component parameters transformed to the projection subspace. } \seealso{ \code{\link{clPairs}}, \code{\link{coordProj}}, \code{\link{mclust2Dplot}}, \code{\link{mclust.options}} } \examples{ \donttest{ est <- meVVV(iris[,-5], unmap(iris[,5])) par(pty = "s", mfrow = c(1,1)) randProj(iris[,-5], seeds=1:3, parameters = est$parameters, z = est$z, what = "classification", main = TRUE) randProj(iris[,-5], seeds=1:3, parameters = est$parameters, z = est$z, truth = iris[,5], what = "error", main = TRUE) randProj(iris[,-5], seeds=1:3, parameters = est$parameters, z = est$z, what = "uncertainty", main = TRUE) } } \keyword{cluster} mclust/man/MclustBootstrap.Rd0000644000176200001440000001066314241634641016003 0ustar liggesusers\name{MclustBootstrap} \alias{MclustBootstrap} \alias{print.MclustBootstrap} \title{Resampling-based Inference for Gaussian finite mixture models} \description{Bootstrap or jackknife estimation of standard errors and percentile bootstrap confidence intervals for the parameters of a Gaussian mixture model.} \usage{ MclustBootstrap(object, nboot = 999, type = c("bs", "wlbs", "pb", "jk"), max.nonfit = 10*nboot, verbose = interactive(), \dots) } \arguments{ \item{object}{An object of class \code{'Mclust'} or \code{'densityMclust'} providing an estimated Gaussian mixture model.} \item{nboot}{The number of bootstrap replications.} \item{type}{A character string specifying the type of resampling to use: \describe{ \item{\code{"bs"}}{nonparametric bootstrap} \item{\code{"wlbs"}}{weighted likelihood bootstrap} \item{\code{"pb"}}{parametric bootstrap} \item{\code{"jk"}}{jackknife} } } \item{max.nonfit}{The maximum number of non-estimable models allowed.} \item{verbose}{A logical controlling if a text progress bar is displayed during the resampling procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise.} \item{\dots}{Further arguments passed to or from other methods.} } \details{For a fitted Gaussian mixture model with \code{object$G} mixture components and covariances parameterisation \code{object$modelName}, this function returns either the bootstrap distribution or the jackknife distribution of mixture parameters. In the former case, the nonparametric bootstrap or the weighted likelihood bootstrap approach could be used, so the the bootstrap procedure generates \code{nboot} bootstrap samples of the same size as the original data by resampling with replacement from the observed data. In the jackknife case, the procedure considers all the samples obtained by omitting one observation at time. The resulting resampling distribution can then be used to obtain standard errors and percentile confidence intervals by the use of \code{\link{summary.MclustBootstrap}} function.} \value{An object of class \code{'MclustBootstrap'} with the following components: \item{n}{The number of observations in the data.} \item{d}{The dimension of the data.} \item{G}{A value specifying the number of mixture components.} \item{modelName}{A character string specifying the mixture model covariances parameterisation (see \code{\link{mclustModelNames}}).} \item{parameters}{A list of estimated parameters for the mixture components with the following components: \describe{ \item{\code{pro}}{a vector of mixing proportions.} \item{\code{mean}}{a matrix of means for each component.} \item{\code{variance}}{an array of covariance matrices for each component.} } } \item{nboot}{The number of bootstrap replications if \code{type = "bs"} or \code{type = "wlbs"}. The sample size if \code{type = "jk"}.} \item{type}{The type of resampling approach used.} \item{nonfit}{The number of resamples that did not convergence during the procedure.} \item{pro}{A matrix of dimension (\code{nboot} x \code{G}) containing the bootstrap distribution for the mixing proportion.} \item{mean}{An array of dimension (\code{nboot} x \code{d} x \code{G}), where \code{d} is the dimension of the data, containing the bootstrap distribution for the component means.} \item{variance}{An array of dimension (\code{nboot} x \code{d} x \code{d} x \code{G}), where \code{d} is the dimension of the data, containing the bootstrap distribution for the component covariances.} } \references{ Davison, A. and Hinkley, D. (1997) \emph{Bootstrap Methods and Their Applications}. Cambridge University Press. McLachlan, G.J. and Peel, D. (2000) \emph{Finite Mixture Models}. Wiley. O'Hagan A., Murphy T. B., Gormley I. C. and Scrucca L. (2015) On Estimation of Parameter Uncertainty in Model-Based Clustering. Submitted to \emph{Computational Statistics}. } \seealso{\code{\link{summary.MclustBootstrap}}, \code{\link{plot.MclustBootstrap}}, \code{\link{Mclust}}, \code{\link{densityMclust}}.} \examples{ \donttest{ data(diabetes) X <- diabetes[,-1] modClust <- Mclust(X) bootClust <- MclustBootstrap(modClust) summary(bootClust, what = "se") summary(bootClust, what = "ci") data(acidity) modDens <- densityMclust(acidity, plot = FALSE) modDens <- MclustBootstrap(modDens) summary(modDens, what = "se") summary(modDens, what = "ci") } } \keyword{htest} \keyword{cluster} mclust/man/densityMclust.diagnostic.Rd0000644000176200001440000000450314516406617017631 0ustar liggesusers\name{densityMclust.diagnostic} \alias{densityMclust.diagnostic} \title{Diagnostic plots for \code{mclustDensity} estimation} \description{ Diagnostic plots for density estimation. Only available for the one-dimensional case. } \usage{ densityMclust.diagnostic(object, type = c("cdf", "qq"), col = c("black", "black"), lwd = c(2,1), lty = c(1,1), legend = TRUE, grid = TRUE, \dots) } \arguments{ \item{object}{An object of class \code{'mclustDensity'} obtained from a call to \code{\link{densityMclust}} function.} \item{type}{The type of graph requested: \describe{ \item{\code{"cdf"} =}{a plot of the estimated CDF versus the empirical distribution function.} \item{\code{"qq"} =}{a Q-Q plot of sample quantiles versus the quantiles obtained from the inverse of the estimated cdf.} } } \item{col}{A pair of values for the color to be used for plotting, respectively, the estimated CDF and the empirical cdf.} \item{lwd}{A pair of values for the line width to be used for plotting, respectively, the estimated CDF and the empirical cdf.} \item{lty}{A pair of values for the line type to be used for plotting, respectively, the estimated CDF and the empirical cdf.} \item{legend}{A logical indicating if a legend must be added to the plot of fitted CDF vs the empirical CDF.} \item{grid}{A logical indicating if a \code{\link{grid}} should be added to the plot.} \item{\dots}{Additional arguments.} } \details{ The two diagnostic plots for density estimation in the one-dimensional case are discussed in Loader (1999, pp- 87-90). } % \value{} \references{ Loader C. (1999), Local Regression and Likelihood. New York, Springer. Scrucca L., Fraley C., Murphy T. B. and Raftery A. E. (2023) \emph{Model-Based Clustering, Classification, and Density Estimation Using mclust in R}. Chapman & Hall/CRC, ISBN: 978-1032234953, https://mclust-org.github.io/book/ } \author{Luca Scrucca} \seealso{ \code{\link{densityMclust}}, \code{\link{plot.densityMclust}}. } \examples{ \donttest{ x <- faithful$waiting dens <- densityMclust(x, plot = FALSE) plot(dens, x, what = "diagnostic") # or densityMclust.diagnostic(dens, type = "cdf") densityMclust.diagnostic(dens, type = "qq") } } \keyword{cluster} \keyword{dplot} mclust/man/gmmhd.Rd0000644000176200001440000001202114124774626013730 0ustar liggesusers\name{gmmhd} \alias{gmmhd} \alias{print.gmmhd} \alias{summary.gmmhd} \alias{print.summary.gmmhd} \alias{plot.gmmhd} \alias{gmmhdClusterCores} \alias{gmmhdClassify} \title{Identifying Connected Components in Gaussian Finite Mixture Models for Clustering} \description{ Starting with the density estimate obtained from a fitted Gaussian finite mixture model, cluster cores are identified from the connected components at a given density level. Once cluster cores are identified, the remaining observations are allocated to those cluster cores for which the probability of cluster membership is the highest. } \usage{ gmmhd(object, ngrid = min(round((log(nrow(data)))*10), nrow(data)), dr = list(d = 3, lambda = 1, cumEvalues = NULL, mindir = 2), classify = list(G = 1:5, modelNames = mclust.options("emModelNames")[-c(8, 10)]), \dots) \method{plot}{gmmhd}(x, what = c("mode", "cores", "clusters"), \dots) } \arguments{ \item{object}{An object returned by \code{\link{Mclust}}.} \item{ngrid}{An integer specifying the number of grid points used to compute the density levels.} \item{dr}{A list of parameters used in the dimension reduction step.} \item{classify}{A list of parameters used in the classification step.} \item{x}{An object of class \code{'gmmhd'} as returned by the function \code{gmmhd}.} \item{what}{A string specifying the type of plot to be produced. See Examples section.} \item{\dots}{further arguments passed to or from other methods.} } \details{ Model-based clustering associates each component of a finite mixture distribution to a group or cluster. An underlying implicit assumption is that a one-to-one correspondence exists between mixture components and clusters. However, a single Gaussian density may not be sufficient, and two or more mixture components could be needed to reasonably approximate the distribution within a homogeneous group of observations. This function implements the methodology proposed by Scrucca (2016) based on the identification of high density regions of the underlying density function. Starting with an estimated Gaussian finite mixture model, the corresponding density estimate is used to identify the cluster cores, i.e. those data points which form the core of the clusters. These cluster cores are obtained from the connected components at a given density level \eqn{c}. A mode function gives the number of connected components as the level \eqn{c} is varied. Once cluster cores are identified, the remaining observations are allocated to those cluster cores for which the probability of cluster membership is the highest. The method usually improves the identification of non-Gaussian clusters compared to a fully parametric approach. Furthermore, it enables the identification of clusters which cannot be obtained by merging mixture components, and it can be straightforwardly extended to cases of higher dimensionality. } \value{ A list of class \code{gmmhd} with the following components: \item{Mclust}{The input object of class \code{"Mclust"} representing an estimated Gaussian finite mixture model.} \item{MclustDA}{An object of class \code{"MclustDA"} containing the model used for the classification step.} \item{MclustDR}{An object of class \code{"MclustDR"} containing the dimension reduction step if performed, otherwise \code{NULL}.} \item{x}{The data used in the algorithm. This can be the input data or a projection if a preliminary dimension reduction step is performed.} \item{density}{The density estimated from the input Gaussian finite mixture model evaluated at the input data.} \item{con}{A list of connected components at each step.} \item{nc}{A vector giving the number of connected components (i.e. modes) at each step.} \item{pn}{Vector of values over a uniform grid of proportions of length \code{ngrid}.} \item{qn}{Vector of density quantiles corresponding to proportions \code{pn}.} \item{pc}{Vector of empirical proportions corresponding to quantiles \code{qn}.} \item{clusterCores}{Vector of cluster cores numerical labels; \code{NA}s indicate that an observation does not belong to any cluster core.} \item{clusterCores}{Vector of numerical labels giving the final clustering.} \item{numClusters}{An integer giving the number of clusters.} } \references{ Scrucca, L. (2016) Identifying connected components in Gaussian finite mixture models for clustering. \emph{Computational Statistics & Data Analysis}, 93, 5-17. } \author{ Luca Scrucca \email{luca.scrucca@unipg.it} } %\note{} \seealso{\code{\link{Mclust}}} \examples{ \donttest{ data(faithful) mod <- Mclust(faithful) summary(mod) plot(as.densityMclust(mod), faithful, what = "density", points.pch = mclust.options("classPlotSymbols")[mod$classification], points.col = mclust.options("classPlotColors")[mod$classification]) GMMHD <- gmmhd(mod) summary(GMMHD) plot(GMMHD, what = "mode") plot(GMMHD, what = "cores") plot(GMMHD, what = "clusters") } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. Use one of RShowDoc("KEYWORDS") \keyword{cluster} mclust/man/plot.densityMclust.Rd0000644000176200001440000001160114241634613016452 0ustar liggesusers\name{plot.densityMclust} \alias{plot.densityMclust} \alias{plotDensityMclust1} \alias{plotDensityMclust2} \alias{plotDensityMclustd} \title{Plots for Mixture-Based Density Estimate} \description{ Plotting methods for an object of class \code{'mclustDensity'}. Available graphs are plot of BIC values and density for univariate and bivariate data. For higher data dimensionality a scatterplot matrix of pairwise densities is drawn. } \usage{ \method{plot}{densityMclust}(x, data = NULL, what = c("BIC", "density", "diagnostic"), \dots) plotDensityMclust1(x, data = NULL, col = gray(0.3), hist.col = "lightgrey", hist.border = "white", breaks = "Sturges", \dots) plotDensityMclust2(x, data = NULL, nlevels = 11, levels = NULL, prob = c(0.25, 0.5, 0.75), points.pch = 1, points.col = 1, points.cex = 0.8, \dots) plotDensityMclustd(x, data = NULL, nlevels = 11, levels = NULL, prob = c(0.25, 0.5, 0.75), points.pch = 1, points.col = 1, points.cex = 0.8, gap = 0.2, \dots) } \arguments{ \item{x}{An object of class \code{'mclustDensity'} obtained from a call to \code{\link{densityMclust}} function.} \item{data}{Optional data points.} \item{what}{The type of graph requested: \describe{ \item{\code{"density"} =}{a plot of estimated density; if \code{data} is also provided the density is plotted over data points (see Details section).} \item{\code{"BIC"} =}{a plot of BIC values for the estimated models versus the number of components.} \item{\code{"diagnostic"} =}{diagnostic plots (only available for the one-dimensional case, see \code{\link{densityMclust.diagnostic}})} } } \item{col}{The color to be used to draw the density line in 1-dimension or contours in higher dimensions.} \item{hist.col}{The color to be used to fill the bars of the histogram.} \item{hist.border}{The color of the border around the bars of the histogram.} \item{breaks}{See the argument in function \code{\link[graphics]{hist}}.} \item{points.pch, points.col, points.cex}{The character symbols, colors, and magnification to be used for plotting \code{data} points.} \item{nlevels}{An integer, the number of levels to be used in plotting contour densities.} \item{levels}{A vector of density levels at which to draw the contour lines.} \item{prob}{A vector of probability levels for computing HDR. Only used if \code{type = "hdr"} and supersede previous \code{nlevels} and \code{levels} arguments.} \item{gap}{Distance between subplots, in margin lines, for the matrix of pairwise scatterplots.} \item{\dots}{Additional arguments passed to \code{\link{surfacePlot}}.} } \details{The function \code{plot.densityMclust} allows to obtain the plot of estimated density or the graph of BIC values for evaluated models. If \code{what = "density"} the produced plot dependes on the dimensionality of the data. For one-dimensional data a call with no \code{data} provided produces a plot of the estimated density over a sensible range of values. If \code{data} is provided the density is over-plotted on a histogram for the observed data. For two-dimensional data further arguments available are those accepted by the \code{\link{surfacePlot}} function. In particular, the density can be represented through \code{"contour"}, \code{"hdr"}, \code{"image"}, and \code{"persp"} type of graph. For \code{type = "hdr"} Highest Density Regions (HDRs) are plotted for probability levels \code{prob}. See \code{\link{hdrlevels}} for details. For higher dimensionality a scatterplot matrix of pairwise projected densities is drawn. } % \value{} \author{Luca Scrucca} \seealso{ \code{\link{densityMclust}}, \code{\link{surfacePlot}}, \code{\link{densityMclust.diagnostic}}, \code{\link{Mclust}}. } \examples{ \donttest{ dens <- densityMclust(faithful$waiting, plot = FALSE) summary(dens) summary(dens, parameters = TRUE) plot(dens, what = "BIC", legendArgs = list(x = "topright")) plot(dens, what = "density", data = faithful$waiting) dens <- densityMclust(faithful, plot = FALSE) summary(dens) summary(dens, parameters = TRUE) plot(dens, what = "density", data = faithful, drawlabels = FALSE, points.pch = 20) plot(dens, what = "density", type = "hdr") plot(dens, what = "density", type = "hdr", prob = seq(0.1, 0.9, by = 0.1)) plot(dens, what = "density", type = "hdr", data = faithful) plot(dens, what = "density", type = "persp") dens <- densityMclust(iris[,1:4], plot = FALSE) summary(dens, parameters = TRUE) plot(dens, what = "density", data = iris[,1:4], col = "slategrey", drawlabels = FALSE, nlevels = 7) plot(dens, what = "density", type = "hdr", data = iris[,1:4]) plot(dens, what = "density", type = "persp", col = grey(0.9)) } } \keyword{cluster} \keyword{dplot} mclust/man/predict.MclustDA.Rd0000644000176200001440000000305514124774626015750 0ustar liggesusers\name{predict.MclustDA} \alias{predict.MclustDA} \title{Classify multivariate observations by Gaussian finite mixture modeling} \description{Classify multivariate observations based on Gaussian finite mixture models estimated by \code{\link{MclustDA}}.} \usage{ \method{predict}{MclustDA}(object, newdata, prop = object$prop, \dots) } \arguments{ \item{object}{an object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}.} \item{newdata}{a data frame or matrix giving the data. If missing the train data obtained from the call to \code{\link{MclustDA}} are classified.} \item{prop}{the class proportions or prior class probabilities to belong to each class; by default, this is set at the class proportions in the training data.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{ Returns a list of with the following components: \item{classification}{a factor of predicted class labels for \code{newdata}.} \item{z}{a matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in \code{newdata} belongs to the \emph{k}th class.} } \author{Luca Scrucca} % \note{} \seealso{\code{\link{MclustDA}}.} \examples{ \donttest{ odd <- seq(from = 1, to = nrow(iris), by = 2) even <- odd + 1 X.train <- iris[odd,-5] Class.train <- iris[odd,5] X.test <- iris[even,-5] Class.test <- iris[even,5] irisMclustDA <- MclustDA(X.train, Class.train) predTrain <- predict(irisMclustDA) predTrain predTest <- predict(irisMclustDA, X.test) predTest } } \keyword{multivariate} mclust/man/acidity.Rd0000644000176200001440000000230714327442052014256 0ustar liggesusers\name{acidity} \alias{acidity} \docType{data} \title{Acidity data} \description{ Acidity index measured in a sample of 155 lakes in the Northeastern United States. Following Crawford et al. (1992, 1994), the data are expressed as log(ANC+50), where ANC is the acidity neutralising capacity value. The data were also used to fit mixture of gaussian distributions by Richardson and Green (1997), and by McLachlan and Peel (2000, Sec. 6.6.2). } \usage{data(acidity)} \source{\code{https://www.stats.bris.ac.uk/~peter/mixdata}} \references{ Crawford, S. L. (1994) An application of the Laplace method to finite mixture distribution. \emph{Journal of the American Statistical Association}, 89, 259--267. Crawford, S. L., DeGroot, M. H., Kadane, J. B., and Small, M. J. (1994) Modeling lake chemistry distributions: Approximate Bayesian methods for estimating a finite mixture model. \emph{Technometrics}, 34, 441--453. McLachlan, G. and Peel, D. (2000) \emph{Finite Mixture Models}. Wiley, New York. Richardson, S. and Green, P. J. (1997) On Bayesian analysis of mixtures with unknown number of components (with discussion). \emph{Journal of the Royal Statistical Society, Series B}, 59, 731--792. } \keyword{datasets} mclust/man/plot.MclustDR.Rd0000644000176200001440000001142514124774626015315 0ustar liggesusers\name{plot.MclustDR} \alias{plot.MclustDR} \alias{plotEvalues.MclustDR} \title{Plotting method for dimension reduction for model-based clustering and classification} \description{ Graphs data projected onto the estimated subspace for model-based clustering and classification. } \usage{ \method{plot}{MclustDR}(x, dimens, what = c("scatterplot", "pairs", "contour", "classification", "boundaries", "density", "evalues"), symbols, colors, col.contour = gray(0.7), col.sep = grey(0.4), ngrid = 200, nlevels = 5, asp = NULL, \dots) } \arguments{ \item{x}{ An object of class \code{'MclustDR'} resulting from a call to \code{\link{MclustDR}}. } \item{dimens}{ A vector of integers giving the dimensions of the desired coordinate projections for multivariate data. } \item{what}{ The type of graph requested: \describe{ \item{\code{"scatterplot"} =}{a two-dimensional plot of data projected onto the first two directions specified by \code{dimens} and with data points marked according to the corresponding mixture component. By default, the first two directions are selected for plotting.} \item{\code{"pairs"} =}{a scatterplot matrix of data projected onto the estimated subspace and with data points marked according to the corresponding mixture component. By default, all the available directions are used, unless they have been specified by \code{dimens}.} \item{\code{"contour"} =}{a two-dimensional plot of data projected onto the first two directions specified by \code{dimens} (by default, the first two directions) with density contours for classes or clusters and data points marked according to the corresponding mixture component.} \item{\code{"classification"} =}{a two-dimensional plot of data projected onto the first two directions specified by \code{dimens} (by default, the first two directions) with classification region and data points marked according to the corresponding mixture component.} \item{\code{"boundaries"} =}{a two-dimensional plot of data projected onto the first two directions specified by \code{dimens} (by default, the first two directions) with uncertainty boundaries and data points marked according to the corresponding mixture component. The uncertainty is shown using a greyscale with darker regions indicating higher uncertainty. } \item{\code{"density"} =}{a one-dimensional plot of estimated density for the first direction specified by \code{dimens} (by default, the first one). A set of box-plots for each estimated cluster or known class are also shown at the bottom of the graph. } } } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique mixture component. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{factor}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique cluster or known class. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{factor}). The default is given by \code{mclust.options("classPlotColors")}. } \item{col.contour}{ The color of contours in case \code{what = "contour"}. } \item{col.sep}{ The color of classification boundaries in case \code{what = "classification"}. } \item{ngrid}{ An integer specifying the number of grid points to use in evaluating the classification regions. } \item{nlevels}{ The number of levels to use in case \code{what = "contour"}. } \item{asp}{For scatterplots the \eqn{y/x} aspect ratio, see \code{\link{plot.window}}. } \item{\dots}{further arguments passed to or from other methods.} } %\details{} %\value{} \references{ Scrucca, L. (2010) Dimension reduction for model-based clustering. \emph{Statistics and Computing}, 20(4), pp. 471-484. } \author{Luca Scrucca} %\note{} \seealso{\link{MclustDR}} \examples{ \donttest{ mod <- Mclust(iris[,1:4], G = 3) dr <- MclustDR(mod, lambda = 0.5) plot(dr, what = "evalues") plot(dr, what = "pairs") plot(dr, what = "scatterplot", dimens = c(1,3)) plot(dr, what = "contour") plot(dr, what = "classification", ngrid = 200) plot(dr, what = "boundaries", ngrid = 200) plot(dr, what = "density") plot(dr, what = "density", dimens = 2) data(banknote) da <- MclustDA(banknote[,2:7], banknote$Status, G = 1:3) dr <- MclustDR(da) plot(dr, what = "evalues") plot(dr, what = "pairs") plot(dr, what = "contour") plot(dr, what = "classification", ngrid = 200) plot(dr, what = "boundaries", ngrid = 200) plot(dr, what = "density") plot(dr, what = "density", dimens = 2) } } \keyword{multivariate} mclust/man/clustCombi-internals.Rd0000644000176200001440000000034612460535131016727 0ustar liggesusers\name{clustCombi-internal} \title{Internal clustCombi functions} \alias{combi} \alias{pcws2_reg} \alias{pcws3_reg} \alias{xlog} \description{ Internal functions not intended to be called directly by users. } \keyword{internal} mclust/man/partconv.Rd0000644000176200001440000000164012460535131014460 0ustar liggesusers\name{partconv} \alias{partconv} \title{Numeric Encoding of a Partitioning} \description{ Converts a vector interpreted as a classification or partitioning into a numeric vector. } \usage{ partconv(x, consec=TRUE) } \arguments{ \item{x}{ A vector interpreted as a classification or partitioning. } \item{consec}{ Logical value indicating whether or not to consecutive class numbers should be used . } } \value{ Numeric encoding of \code{x}. When \code{consec = TRUE}, the distinct values in \code{x} are numbered by the order in which they appear. When \code{consec = FALSE}, each distinct value in \code{x} is numbered by the index corresponding to its first appearance in \code{x}. } \seealso{ \code{\link{partuniq}} } \examples{ partconv(iris[,5]) set.seed(0) cl <- sample(LETTERS[1:9], 25, replace=TRUE) partconv(cl, consec=FALSE) partconv(cl, consec=TRUE) } \keyword{cluster} mclust/man/mclustModelNames.Rd0000644000176200001440000000373713752164012016112 0ustar liggesusers\name{mclustModelNames} \alias{mclustModelNames} \title{ MCLUST Model Names } \description{ Description of model names used in the \emph{MCLUST} package. } \usage{ mclustModelNames(model) } \arguments{ \item{model}{A string specifying the model.} } \details{ The following models are available in package \pkg{mclust}:\cr \bold{univariate mixture} \cr \describe{ \item{\code{"E"}}{equal variance (one-dimensional)} \item{\code{"V"}}{variable/unqual variance (one-dimensional)} } \bold{multivariate mixture}\cr \describe{ \item{\code{"EII"}}{spherical, equal volume} \item{\code{"VII"}}{spherical, unequal volume} \item{\code{"EEI"}}{diagonal, equal volume and shape} \item{\code{"VEI"}}{diagonal, varying volume, equal shape} \item{\code{"EVI"}}{diagonal, equal volume, varying shape} \item{\code{"VVI"}}{diagonal, varying volume and shape} \item{\code{"EEE"}}{ellipsoidal, equal volume, shape, and orientation} \item{\code{"VEE"}}{ellipsoidal, equal shape and orientation (*)} \item{\code{"EVE"}}{ellipsoidal, equal volume and orientation (*)} \item{\code{"VVE"}}{ellipsoidal, equal orientation (*)} \item{\code{"EEV"}}{ellipsoidal, equal volume and equal shape} \item{\code{"VEV"}}{ellipsoidal, equal shape} \item{\code{"EVV"}}{ellipsoidal, equal volume (*)} \item{\code{"VVV"}}{ellipsoidal, varying volume, shape, and orientation} } \bold{single component}\cr \describe{ \item{\code{"X"}}{univariate normal} \item{\code{"XII"}}{spherical multivariate normal} \item{\code{"XXI"}}{diagonal multivariate normal} \item{\code{"XXX"}}{ellipsoidal multivariate normal} } (*) new models in \pkg{mclust} version >= 5.0.0. } \value{Returns a list with the following components: \item{model}{a character string indicating the model (as in input).} \item{type}{the description of the indicated model (see Details section).} } \seealso{ \code{\link{Mclust}}, \code{\link{mclustBIC}} } \examples{ mclustModelNames("E") mclustModelNames("EEE") mclustModelNames("VVV") mclustModelNames("XXI") } \keyword{cluster} mclust/man/banknote.Rd0000644000176200001440000000141012501077123014416 0ustar liggesusers\name{banknote} \alias{banknote} \docType{data} \title{Swiss banknotes data} \description{ The data set contains six measurements made on 100 genuine and 100 counterfeit old-Swiss 1000-franc bank notes.} \usage{data(banknote)} \format{A data frame with the following variables: \describe{ \item{Status}{the status of the banknote: \code{genuine} or \code{counterfeit}} \item{Length}{Length of bill (mm)} \item{Left}{Width of left edge (mm)} \item{Right}{Width of right edge (mm)} \item{Bottom}{Bottom margin width (mm)} \item{Top}{Top margin width (mm)} \item{Diagonal}{Length of diagonal (mm)} } } \source{Flury, B. and Riedwyl, H. (1988). \emph{Multivariate Statistics: A practical approach.} London: Chapman & Hall, Tables 1.1 and 1.2, pp. 5-8.} \keyword{datasets} mclust/man/wreath.Rd0000644000176200001440000000103113175055360014115 0ustar liggesusers\name{wreath} \alias{wreath} \title{Data Simulated from a 14-Component Mixture} \usage{data(wreath)} \description{ A dataset consisting of 1000 observations drawn from a 14-component normal mixture in which the covariances of the components have the same size and shape but differ in orientation. } \references{ C. Fraley, A. E. Raftery and R. Wehrens (2005). Incremental model-based clustering for large datasets with small clusters. \emph{Journal of Computational and Graphical Statistics 14:1:18}. } \keyword{datasets} mclust/man/mclustICL.Rd0000644000176200001440000001044214124774626014500 0ustar liggesusers\name{mclustICL} \alias{mclustICL} \alias{print.mclustICL} \alias{summary.mclustICL} \alias{print.summary.mclustICL} \title{ICL Criterion for Model-Based Clustering} \description{ ICL (Integrated Complete-data Likelihood) for parameterized Gaussian mixture models fitted by EM algorithm initialized by model-based hierarchical clustering. } \usage{ mclustICL(data, G = NULL, modelNames = NULL, initialization = list(hcPairs = NULL, subset = NULL, noise = NULL), x = NULL, \dots) \method{summary}{mclustICL}(object, G, modelNames, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{G}{ An integer vector specifying the numbers of mixture components (clusters) for which the criteria should be calculated. The default is \code{G = 1:9}. } \item{modelNames}{ A vector of character strings indicating the models to be fitted in the EM phase of clustering. The help file for \code{\link{mclustModelNames}} describes the available models. The default is: \describe{ \item{\code{c("E", "V")}}{for univariate data} \item{\code{mclust.options("emModelNames")}}{for multivariate data (n > d)} \item{\code{c("EII", "VII", "EEI", "EVI", "VEI", "VVI")}}{the spherical and diagonal models for multivariate data (n <= d)} } } \item{initialization}{ A list containing zero or more of the following components: \describe{ \item{\code{hcPairs}}{ A matrix of merge pairs for hierarchical clustering such as produced by function \code{hc}. For multivariate data, the default is to compute a hierarchical clustering tree by applying function \code{hc} with \code{modelName = "VVV"} to the data or a subset as indicated by the \code{subset} argument. The hierarchical clustering results are to start EM. For univariate data, the default is to use quantiles to start EM. } \item{\code{subset}}{ A logical or numeric vector specifying a subset of the data to be used in the initial hierarchical clustering phase. } } } \item{x}{ An object of class \code{'mclustICL'}. If supplied, \code{mclustICL} will use the settings in \code{x} to produce another object of class \code{'mclustICL'}, but with \code{G} and \code{modelNames} as specified in the arguments. Models that have already been computed in \code{x} are not recomputed. All arguments to \code{mclustICL} except \code{data}, \code{G} and \code{modelName} are ignored and their values are set as specified in the attributes of \code{x}. Defaults for \code{G} and \code{modelNames} are taken from \code{x}. } \item{\dots}{ Futher arguments used in the call to \code{\link{Mclust}}. See also \code{\link{mclustBIC}}. } \item{object}{ An integer vector specifying the numbers of mixture components (clusters) for which the criteria should be calculated. The default is \code{G = 1:9}. } } \value{ Returns an object of class \code{'mclustICL'} containing the the ICL criterion for the specified mixture models and numbers of clusters. The corresponding \code{print} method shows the matrix of values and the top models according to the ICL criterion. The \code{summary} method shows only the top models. } \references{ Biernacki, C., Celeux, G., Govaert, G. (2000). Assessing a mixture model for clustering with the integrated completed likelihood. \emph{IEEE Trans. Pattern Analysis and Machine Intelligence}, 22 (7), 719-725. Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. } \seealso{ \code{\link{plot.mclustICL}}, \code{\link{Mclust}}, \code{\link{mclustBIC}}, \code{\link{mclustBootstrapLRT}}, \code{\link{bic}}, \code{\link{icl}} } \examples{ data(faithful) faithful.ICL <- mclustICL(faithful) faithful.ICL summary(faithful.ICL) plot(faithful.ICL) \donttest{ # compare with faithful.BIC <- mclustBIC(faithful) faithful.BIC plot(faithful.BIC) } } \keyword{cluster} mclust/man/hc.Rd0000644000176200001440000001260614525074174013233 0ustar liggesusers\name{hc} \alias{hc} \alias{print.hc} \alias{as.hclust.hc} \title{Model-based Agglomerative Hierarchical Clustering} \description{ Agglomerative hierarchical clustering based on maximum likelihood criteria for Gaussian mixture models parameterized by eigenvalue decomposition. } \usage{ hc(data, modelName = "VVV", use = "VARS", partition = dupPartition(data), minclus = 1, \dots) \method{as.hclust}{hc}(x, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations (\eqn{n}) and columns correspond to variables (\eqn{d}). } \item{modelName}{ A character string indicating the model to be used in model-based agglomerative hierarchical clustering.\cr Possible models are: \describe{ \item{\code{"E"}}{equal variance (one-dimensional);} \item{\code{"V"}}{spherical, variable variance (one-dimensional);} \item{\code{"EII"}}{spherical, equal volume;} \item{\code{"VII"}}{spherical, unequal volume;} \item{\code{"EEE"}}{ellipsoidal, equal volume, shape, and orientation;} \item{\code{"VVV"}}{ellipsoidal, varying volume, shape, and orientation (default).} } If \code{hc()} is used for initialization of EM algorithm then the default is taken from \code{mclust.options("hcModelName")}. See \code{\link{mclust.options}}. } \item{use}{ A character string specifying the type of input variables/data transformation to be used for model-based agglomerative hierarchical clustering.\cr Possible values are: \describe{ \item{\code{"VARS"}}{original variables (default);} \item{\code{"STD"}}{standardized variables (centered and scaled);} \item{\code{"SPH"}}{sphered variables (centered, scaled and uncorrelated) computed using SVD;} \item{\code{"PCS"}}{principal components computed using SVD on centered variables (i.e. using the covariance matrix);} \item{\code{"PCR"}}{principal components computed using SVD on standardized (center and scaled) variables (i.e. using the correlation matrix);} \item{\code{"SVD"}}{scaled SVD transformation.} } If \code{hc()} is used for initialization of EM algorithm then the default is taken from \code{mclust.options("hcUse")}. See \code{\link{mclust.options}}.\cr For further details see Scrucca and Raftery (2015). } \item{partition}{ A numeric or character vector representing a partition of observations (rows) of \code{data}. If provided, group merges will start with this partition. Otherwise, each observation is assumed to be in a cluster by itself at the start of agglomeration. Starting with version 5.4.8, by default the function \code{\link{dupPartition}} is used to start with all duplicated observations in the same group, thereby keeping duplicates in the same group throughout the modelling process. } \item{minclus}{ A number indicating the number of clusters at which to stop the agglomeration. The default is to stop when all observations have been merged into a single cluster. } \item{\dots}{ Arguments for the method-specific \code{hc} functions. See for example \code{\link{hcE}}. } \item{x}{ An object of class \code{'hc'} resulting from a call to \code{hc()}. } } \value{ The function \code{hc()} returns a numeric two-column matrix in which the \emph{i}th row gives the minimum index for observations in each of the two clusters merged at the \emph{i}th stage of agglomerative hierarchical clustering. Several other informations are also returned as attributes. The method \code{as.hclust.hc()} can be used to convert the input object from class \code{'hc'} to class \code{'hclust'}. } \details{ Most models have memory usage of the order of the square of the number groups in the initial partition for fast execution. Some models, such as equal variance or \code{"EEE"}, do not admit a fast algorithm under the usual agglomerative hierarchical clustering paradigm. These use less memory but are much slower to execute. } \note{ If \code{modelName = "E"} (univariate with equal variances) or \code{modelName = "EII"} (multivariate with equal spherical covariances), then underlying model is the same as that for Ward's method for hierarchical clustering. } \references{ Banfield J. D. and Raftery A. E. (1993). Model-based Gaussian and non-Gaussian Clustering. \emph{Biometrics}, 49:803-821. Fraley C. (1998). Algorithms for model-based Gaussian hierarchical clustering. \emph{SIAM Journal on Scientific Computing}, 20:270-281. Fraley C. and Raftery A. E. (2002). Model-based clustering, discriminant analysis, and density estimation. \emph{Journal of the American Statistical Association}, 97:611-631. Scrucca L. and Raftery A. E. (2015). Improved initialisation of model-based clustering using Gaussian hierarchical partitions. \emph{Advances in Data Analysis and Classification}, 9/4:447-460. } \seealso{ \code{\link{hcE}}, \dots, \code{\link{hcVVV}}, \code{\link{plot.hc}}, \code{\link{hclass}}, \code{\link{mclust.options}} } \examples{ hcTree <- hc(modelName = "VVV", data = iris[,-5]) hcTree cl <- hclass(hcTree,c(2,3)) table(cl[,"2"]) table(cl[,"3"]) \donttest{ clPairs(iris[,-5], classification = cl[,"2"]) clPairs(iris[,-5], classification = cl[,"3"]) } } \keyword{cluster} mclust/man/imputeData.Rd0000644000176200001440000000326014124774626014736 0ustar liggesusers\name{imputeData} \alias{imputeData} \alias{matchCluster} \title{Missing data imputation via the \pkg{mix} package} \description{ Imputes missing data using the \pkg{mix} package. } \usage{ imputeData(data, categorical = NULL, seed = NULL, verbose = interactive()) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations containing missing values. Categorical variables are allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{categorical}{ A logical vectors whose \emph{i}th entry is \code{TRUE} if the \emph{i}th variable or column of \code{data} is to be interpreted as categorical and \code{FALSE} otherwise. The default is to assume that a variable is to be interpreted as categorical only if it is a factor. } \item{seed}{ A seed for the function \code{rngseed} that is used to initialize the random number generator in \pkg{mix}. By default, a seed is chosen uniformly in the interval \code{(.Machine$integer.max/1024, .Machine$integer.max)}. } \item{verbose}{ A logical, if \code{TRUE} reports info about iterations of the algorithm. } } \value{ A dataset of the same dimensions as \code{data} with missing values filled in. } \references{ Schafer J. L. (1997). Analysis of Imcomplete Multivariate Data, Chapman and Hall. } \seealso{ \code{\link{imputePairs}} } \examples{ \donttest{ # Note that package 'mix' must be installed data(stlouis, package = "mix") # impute the continuos variables in the stlouis data stlimp <- imputeData(stlouis[,-(1:3)]) # plot imputed values imputePairs(stlouis[,-(1:3)], stlimp) } } \keyword{cluster} mclust/man/crimcoords.Rd0000644000176200001440000000764414317554640015013 0ustar liggesusers\name{crimcoords} \alias{crimcoords} \alias{print.crimcoords} \alias{summary.crimcoords} \alias{print.summary.crimcoords} \alias{plot.crimcoords} \title{Discriminant coordinates data projection} \description{ Compute the discriminant coordinates or crimcoords obtained by projecting the observed data from multiple groups onto the discriminant subspace. The optimal projection subspace is given by the linear transformation of the original variables that maximizes the ratio of the between-groups covariance (which represents groups separation) to the pooled within-group covariance (which represents within-group dispersion).} \usage{ crimcoords(data, classification, numdir = NULL, unbiased = FALSE, \dots) \method{summary}{crimcoords}(object, numdir, \dots) \method{plot}{crimcoords}(x, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{classification}{ A vector (numerical, character string, or factor) giving the groups classification (either the known class labels or the estimated clusters) for the observed data.} \item{numdir}{ An integer value specifying the number of directions of the discriminant subspace to return. If not provided, the maximal number of directions are returned (which is given by the number of non-null eigenvalues, the minimum among the number of variables and the number of groups minus one). However, since the effectiveness of the discriminant coordinates in highlighting the separation of groups is decreasing, it might be useful to provide a smaller value, say 2 or 3.} \item{unbiased}{ A logical specifying if unbiased estimates should be used for the between-groups and within-groups covariances. By default \code{unbiased = FALSE} so MLE estimates are used. Note that the use of unbiased or MLE estimates only changes the eigenvalues and eigenvectors of the generalized eigendecomposition by a constant of proportionality, so the discriminant coordinates or crimcoords are essentially the same.} \item{object, x}{ An object of class \code{crimcoords} as returned by \code{crimcoords()} function.} \item{\dots}{further arguments passed to or from other methods.} } \value{ A list of class \code{crimcoords} with the following components: \item{means}{A matrix of within-groups means.} \item{B}{The between-groups covariance matrix.} \item{W}{The pooled within-groups covariance matrix.} \item{evalues}{A vector of eigenvalues.} \item{basis}{A matrix of eigenvectors specifying the basis of the discriminant subspace.} \item{projection}{A matrix of projected data points onto the discriminant subspace.} \item{classification}{A vector giving the groups classification.} } \references{ Gnanadesikan, R. (1977) \emph{Methods for Statistical Data Analysis of Multivariate Observations}. John Wiley 1& Sons, Sec. 4.2. Flury, B. (1997) \emph{A First Course in Multivariate Statistics}. Springer, Sec. 7.3. } \author{ Luca Scrucca \email{luca.scrucca@unipg.it} } %\note{} \seealso{\code{\link{MclustDR}}, \code{\link{clPairs}}.} \examples{ # discriminant coordinates for the iris data using known classes data("iris") CRIMCOORDS = crimcoords(iris[,-5], iris$Species) summary(CRIMCOORDS) plot(CRIMCOORDS) # banknote data data("banknote") # discriminant coordinate on known classes CRIMCOORDS = crimcoords(banknote[,-1], banknote$Status) summary(CRIMCOORDS) plot(CRIMCOORDS) # discriminant coordinates on estimated clusters mod = Mclust(banknote[,-1]) CRIMCOORDS = crimcoords(banknote[,-1], mod$classification) summary(CRIMCOORDS) plot(CRIMCOORDS) plot(CRIMCOORDS$projection, type = "n") text(CRIMCOORDS$projection, cex = 0.8, labels = strtrim(banknote$Status, 2), col = mclust.options("classPlotColors")[1:mod$G][mod$classification]) } \keyword{multivariate} mclust/man/mstepE.Rd0000644000176200001440000001171514124774626014102 0ustar liggesusers\name{mstepE} \alias{mstepE} \alias{mstepV} \alias{mstepEII} \alias{mstepVII} \alias{mstepEEI} \alias{mstepVEI} \alias{mstepEVI} \alias{mstepVVI} \alias{mstepEEE} \alias{mstepEEV} \alias{mstepVEV} \alias{mstepVVV} \alias{mstepEVE} \alias{mstepEVV} \alias{mstepVEE} \alias{mstepVVE} \title{M-step for a parameterized Gaussian mixture model} \description{ Maximization step in the EM algorithm for a parameterized Gaussian mixture model. } \usage{ mstepE( data, z, prior = NULL, warn = NULL, \dots) mstepV( data, z, prior = NULL, warn = NULL, \dots) mstepEII( data, z, prior = NULL, warn = NULL, \dots) mstepVII( data, z, prior = NULL, warn = NULL, \dots) mstepEEI( data, z, prior = NULL, warn = NULL, \dots) mstepVEI( data, z, prior = NULL, warn = NULL, control = NULL, \dots) mstepEVI( data, z, prior = NULL, warn = NULL, \dots) mstepVVI( data, z, prior = NULL, warn = NULL, \dots) mstepEEE( data, z, prior = NULL, warn = NULL, \dots) mstepEEV( data, z, prior = NULL, warn = NULL, \dots) mstepVEV( data, z, prior = NULL, warn = NULL, control = NULL,\dots) mstepVVV( data, z, prior = NULL, warn = NULL, \dots) mstepEVE( data, z, prior = NULL, warn = NULL, control = NULL, \dots) mstepEVV( data, z, prior = NULL, warn = NULL, \dots) mstepVEE( data, z, prior = NULL, warn = NULL, control = NULL, \dots) mstepVVE( data, z, prior = NULL, warn = NULL, control = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the ith observation belonging to the \emph{k}th component of the mixture. In analyses involving noise, this should not include the conditional probabilities for the noise component. } \item{prior}{ Specification of a conjugate prior on the means and variances. The default assumes no prior. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when the estimation fails. The default is given by \code{mclust.options("warn")}. } \item{control}{ Values controlling termination for models \code{"VEI"} and \code{"VEV"} that have an iterative M-step. This should be a list with components named \emph{itmax} and \emph{tol}. These components can be of length 1 or 2; in the latter case, \code{mstep} will use the second value, under the assumption that the first applies to an outer iteration (as in the function \code{me}). The default uses the default values from the function \code{emControl}, which sets no limit on the number of iterations, and a relative tolerance of \code{sqrt(.Machine$double.eps)} on successive iterates. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{Attributes:}{ \code{"info"} For those models with iterative M-steps (\code{"VEI"} and \code{"VEV"}), information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \note{ This function computes the M-step only for MVN mixtures, so in analyses involving noise, the conditional probabilities input should exclude those for the noise component. \cr In contrast to \code{me} for the EM algorithm, computations in \code{mstep} are carried out unless failure due to overflow would occur. To impose stricter tolerances on a single \code{mstep}, use \code{me} with the \emph{itmax} component of the \code{control} argument set to 1. } \seealso{ \code{\link{mstep}}, \code{\link{me}}, \code{\link{estep}}, \code{\link{mclustVariance}}, \code{\link{priorControl}}, \code{\link{emControl}}. } \examples{ \donttest{ mstepVII(data = iris[,-5], z = unmap(iris[,5]))} } \keyword{cluster} mclust/man/surfacePlot.Rd0000644000176200001440000001234314515765116015130 0ustar liggesusers\name{surfacePlot} \alias{surfacePlot} \title{Density or uncertainty surface for bivariate mixtures} \description{ Plots a density or uncertainty surface given bivariate data and parameters of a MVN mixture model for the data. } \usage{ surfacePlot(data, parameters, what = c("density", "uncertainty"), type = c("contour", "hdr", "image", "persp"), transformation = c("none", "log", "sqrt"), grid = 200, nlevels = 11, levels = NULL, prob = c(0.25, 0.5, 0.75), col = gray(0.5), col.palette = function(...) hcl.colors(..., "blues", rev = TRUE), hdr.palette = blue2grey.colors, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, main = FALSE, scale = FALSE, swapAxes = FALSE, verbose = FALSE, \dots) } \arguments{ \item{data}{ A matrix, or data frame of bivariate observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{parameters}{ A named list giving the parameters of an \emph{MCLUST} model, used to produce superimposing ellipses on the plot. The relevant components are as follows: \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{what}{ Choose from one of the following options: \code{"density"} (default), \code{"uncertainty"} indicating what to plot. } \item{type}{ Choose from one of the following three options: \code{"contour"} (default), \code{"hdr"}, \code{"image"}, and \code{"persp"} indicating the plot type. } \item{transformation}{ Choose from one of the following three options: \code{"none"} (default), \code{"log"}, \code{"sqrt"} indicating a transformation to be applied before plotting. } \item{grid}{ The number of grid points (evenly spaced on each axis). The mixture density and uncertainty is computed at \code{grid x grid} points to produce the surface plot. Default: \code{100}. } \item{nlevels}{ The number of levels to use for a contour plot. Default: \code{11}. } \item{levels}{ A vector of levels at which to draw the lines in a contour plot. } \item{prob}{ A vector of probability levels for computing HDR. Only used if \code{type = "hdr"} and supersede previous \code{nlevels} and \code{levels} arguments. } \item{col}{ A string specifying the colour to be used for \code{type = "contour"} and \code{type = "persp"} plots. } \item{col.palette}{ A function which defines a palette of colours to be used for \code{type = "image"} plots. } \item{hdr.palette}{ A function which defines a palette of colours to be used for \code{type = "hdr"} plots. } \item{xlim, ylim}{ Optional argument specifying bounds for the ordinate, abscissa of the plot. This may be useful for when comparing plots. } \item{xlab, ylab}{ Optional argument specifying labels for the x-axis and y-axis. } \item{main}{ A logical variable or \code{NULL} indicating whether or not to add a title to the plot identifying the dimensions used. } \item{scale}{ A logical variable indicating whether or not the two dimensions should be plotted on the same scale, and thus preserve the shape of the distribution. The default is not to scale. } \item{swapAxes}{ A logical variable indicating whether or not the axes should be swapped for the plot. } \item{verbose}{ A logical variable telling whether or not to print an indication that the function is in the process of computing values at the grid points, which typically takes some time to complete. } \item{\dots}{ Other graphics parameters. } } \value{ A plots showing (a transformation of) the density or uncertainty for the given mixture model and data. The function also returns an invisible list with components \code{x}, \code{y}, and \code{z} in which \code{x} and \code{y} are the values used to define the grid and \code{z} is the transformed density or uncertainty at the grid points. } \details{ For an image plot, a color scheme may need to be selected on the display device in order to view the plot. } \seealso{ \code{\link{mclust2Dplot}} } \examples{ \donttest{ faithfulModel <- Mclust(faithful) surfacePlot(faithful, parameters = faithfulModel$parameters, type = "contour", what = "density", transformation = "none", drawlabels = FALSE) surfacePlot(faithful, parameters = faithfulModel$parameters, type = "persp", what = "density", transformation = "log") surfacePlot(faithful, parameters = faithfulModel$parameters, type = "contour", what = "uncertainty", transformation = "log") } } \keyword{cluster} mclust/man/predict.densityMclust.Rd0000644000176200001440000000326014241634527017134 0ustar liggesusers\name{predict.densityMclust} \alias{predict.densityMclust} \title{Density estimate of multivariate observations by Gaussian finite mixture modeling} \description{Compute density estimation for multivariate observations based on Gaussian finite mixture models estimated by \code{\link{densityMclust}}.} \usage{ \method{predict}{densityMclust}(object, newdata, what = c("dens", "cdens", "z"), logarithm = FALSE, \dots) } \arguments{ \item{object}{an object of class \code{'densityMclust'} resulting from a call to \code{\link{densityMclust}}.} \item{newdata}{a vector, a data frame or matrix giving the data. If missing the density is computed for the input data obtained from the call to \code{\link{densityMclust}}.} \item{what}{a character string specifying what to retrieve: \code{"dens"} returns a vector of values for the mixture density; \code{"cdens"} returns a matrix of component densities for each mixture component (along the columns); \code{"z"} returns a matrix of conditional probabilities of each data point to belong to a mixture component.} \item{logarithm}{A logical value indicating whether or not the logarithm of the density or component densities should be returned.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{ Returns a vector or a matrix of densities evaluated at \code{newdata} depending on the argument \code{what} (see above). } \author{Luca Scrucca} % \note{} \seealso{\code{\link{Mclust}}.} \examples{ \donttest{ x <- faithful$waiting dens <- densityMclust(x, plot = FALSE) x0 <- seq(50, 100, by = 10) d0 <- predict(dens, x0) plot(dens, what = "density") points(x0, d0, pch = 20) } } \keyword{multivariate} mclust/man/map.Rd0000644000176200001440000000225413175052667013417 0ustar liggesusers\name{map} \alias{map} \title{Classification given Probabilities} \description{ Converts a matrix in which each row sums to 1 to an integer vector specifying for each row the column index of the maximum. } \usage{ map(z, warn = mclust.options("warn"), \dots) } \arguments{ \item{z}{ A matrix (for example a matrix of conditional probabilities in which each row sums to 1 as produced by the E-step of the EM algorithm). } \item{warn}{ A logical variable indicating whether or not a warning should be issued when there are some columns of \code{z} for which no row attains a maximum. } \item{\dots }{ Provided to allow lists with elements other than the arguments can be passed in indirect or list calls with \code{do.call}. } } \value{ A integer vector with one entry for each row of z, in which the \emph{i}-th value is the column index at which the \emph{i}-th row of \code{z} attains a maximum. } \seealso{ \code{\link{unmap}}, \code{\link{estep}}, \code{\link{em}}, \code{\link{me}}. } \examples{ emEst <- me(modelName = "VVV", data = iris[,-5], z = unmap(iris[,5])) map(emEst$z) } \keyword{cluster} % docclass is function mclust/man/EuroUnemployment.Rd0000644000176200001440000000144314516420360016155 0ustar liggesusers\name{EuroUnemployment} \alias{EuroUnemployment} \docType{data} \title{Unemployment data for European countries in 2014} \description{ The data set contains unemployment rates for 31 European countries for the year 2014.} \usage{data(EuroUnemployment)} \format{A data frame with the following variables: \describe{ \item{TUR}{Total unemployment rate, i.e. percentage of unemployed persons aged 15-74 in the economically active population.} \item{YUR}{Youth unemployment rate, i.e. percentage of unemployed persons aged 15-24 in the economically active population.} \item{LUR}{Long-term unemployment rate, i.e. percentage of unemployed persons who have been unemployed for 12 months or more.} } } \source{Dataset downloaded from EUROSTAT \url{https://ec.europa.eu/eurostat}.} \keyword{datasets} mclust/man/logLik.MclustDA.Rd0000644000176200001440000000201014124774626015525 0ustar liggesusers\name{logLik.MclustDA} \alias{logLik.MclustDA} \title{Log-Likelihood of a \code{MclustDA} object} \description{ Returns the log-likelihood for a \code{MclustDA} object.} \usage{ \method{logLik}{MclustDA}(object, data, \dots) } \arguments{ \item{object}{an object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}.} \item{data}{the data for which the log-likelihood must be computed. If missing, the observed data from the \code{'MclustDA'} object is used.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{Returns an object of class \code{'logLik'} with an element providing the maximized log-likelihood, and further arguments giving the number of (estimated) parameters in the model (\code{"df"}) and the sample size (\code{"nobs"}).} \author{Luca Scrucca} % \note{} \seealso{\code{\link{MclustDA}}.} \examples{ \donttest{ irisMclustDA <- MclustDA(iris[,1:4], iris$Species) summary(irisMclustDA) logLik(irisMclustDA) } } \keyword{multivariate} mclust/man/MclustSSC.Rd0000644000176200001440000001344513742016673014463 0ustar liggesusers\name{MclustSSC} \alias{MclustSSC} \alias{print.MclustSSC} \title{MclustSSC semi-supervised classification} \description{ Semi-Supervised classification based on Gaussian finite mixture modeling. } \usage{ MclustSSC(data, class, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), warn = mclust.options("warn"), verbose = interactive(), \dots) } \arguments{ \item{data}{ A data frame or matrix giving the training data. } \item{class}{ A vector giving the known class labels (either a numerical value or a character string) for the observations in the training data. Observations with unknown class are encoded as \code{NA}. } \item{G}{ An integer value specifying the numbers of mixture components or classes. By default is set equal to the number of known classes. See the examples below. } \item{modelNames}{ A vector of character strings indicating the models to be fitted by EM (see the description in \code{\link{mclustModelNames}}). See the examples below. } \item{prior}{ The default assumes no prior, but this argument allows specification of a conjugate prior on the means and variances through the function \code{\link{priorControl}}. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when estimation fails. The default is controlled by \code{\link{mclust.options}}. } \item{verbose}{ A logical controlling if a text progress bar is displayed during the fitting procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise. } \item{\dots }{Further arguments passed to or from other methods.} } \value{ An object of class \code{'MclustSSC'} providing the optimal (according to BIC) Gaussian mixture model for semi-supervised classification. The details of the output components are as follows: \item{call}{The matched call.} \item{data}{The input data matrix.} \item{class}{The input class labels (including \code{NA}s for unknown labels.} \item{modelName}{A character string specifying the "best" estimated model.} \item{G}{A numerical value specifying the number of mixture components or classes of the "best" estimated model.} \item{n}{The total number of observations in the data.} \item{d}{The dimension of the data.} \item{BIC}{All BIC values.} \item{loglik}{Log-likelihood for the selected model.} \item{df}{Number of estimated parameters.} \item{bic}{Optimal BIC value.} \item{parameters}{ A list with the following components: \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in the test data belongs to the \emph{k}th class. } \item{classification}{ The classification corresponding to \code{z}, i.e. \code{map(z)}. } \item{prior}{ The prior used (if any). } \item{control}{ A list of control parameters used in the EM algorithm. } } \details{ The semi-supervised approach implemented in \code{MclustSSC()} is a simple Gaussian mixture model for classification where at the first M-step only observations with known class labels are used for parameters estimation. Then, a standard EM algorithm is used for updating the probabiltiy of class membership for unlabelled data while keeping fixed the probabilities for labelled data. } \references{ Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. } \author{Luca Scrucca} \seealso{ \code{\link{summary.MclustSSC}}, \code{\link{plot.MclustSSC}}, \code{\link{predict.MclustSSC}} } \examples{ # Simulate two overlapping groups n <- 200 pars <- list(pro = c(0.5, 0.5), mean = matrix(c(-1,1), nrow = 2, ncol = 2, byrow = TRUE), variance = mclustVariance("EII", d = 2, G = 2)) pars$variance$sigmasq <- 1 data <- sim("EII", parameters = pars, n = n, seed = 12) class <- data[,1] X <- data[,-1] clPairs(X, class, symbols = c(1,2), main = "Full classified data") # Randomly remove labels cl <- class; cl[sample(1:n, size = 195)] <- NA table(cl, useNA = "ifany") clPairs(X, ifelse(is.na(cl), 0, class), symbols = c(0, 16, 17), colors = c("grey", 4, 2), main = "Partially classified data") # Fit semi-supervised classification model mod_SSC <- MclustSSC(X, cl) summary(mod_SSC, parameters = TRUE) pred_SSC <- predict(mod_SSC) table(Predicted = pred_SSC$classification, Actual = class) ngrid <- 50 xgrid <- seq(-3, 3, length.out = ngrid) ygrid <- seq(-4, 4.5, length.out = ngrid) xygrid <- expand.grid(xgrid, ygrid) pred_SSC <- predict(mod_SSC, newdata = xygrid) col <- mclust.options("classPlotColors")[class] pch <- class pch[!is.na(cl)] = ifelse(cl[!is.na(cl)] == 1, 19, 17) plot(X, pch = pch, col = col) contour(xgrid, ygrid, matrix(pred_SSC$z[,1], ngrid, ngrid), add = TRUE, levels = 0.5, drawlabels = FALSE, lty = 2, lwd = 2) } \keyword{classification} mclust/man/summary.MclustSSC.Rd0000644000176200001440000000221413742016253016141 0ustar liggesusers\name{summary.MclustSSC} \alias{summary.MclustSSC} \alias{print.summary.MclustSSC} \title{Summarizing semi-supervised classification model based on Gaussian finite mixtures} \description{Summary method for class \code{"MclustSSC"}.} \usage{ \method{summary}{MclustSSC}(object, parameters = FALSE, \dots) \method{print}{summary.MclustSSC}(x, digits = getOption("digits"), \dots) } \arguments{ \item{object}{An object of class \code{'MclustSSC'} resulting from a call to \code{\link{MclustSSC}}.} \item{x}{An object of class \code{'summary.MclustSSC'}, usually, a result of a call to \code{summary.MclustSSC}.} \item{parameters}{Logical; if \code{TRUE}, the parameters of mixture components are printed.} \item{digits}{The number of significant digits to use when printing.} \item{\dots}{Further arguments passed to or from other methods.} } % \details{} \value{The function \code{summary.MclustSSC} computes and returns a list of summary statistics of the estimated MclustSSC model for semi-supervised classification.} \author{Luca Scrucca} % \note{} \seealso{\code{\link{MclustSSC}}, \code{\link{plot.MclustSSC}}.} \keyword{classification} mclust/man/MclustDRsubsel.Rd0000644000176200001440000001226214234443102015536 0ustar liggesusers\name{MclustDRsubsel} \alias{MclustDRsubsel} \alias{print.MclustDRsubsel} \alias{MclustDRsubsel_classif} \alias{MclustDRsubsel_cluster} \alias{MclustDRrecoverdir} \alias{MclustDRsubsel1cycle} \alias{print.MclustDRsubsel} \alias{summary.MclustDRsubsel} \title{Subset selection for GMMDR directions based on BIC} \description{ Implements a subset selection method for selecting the relevant directions spanning the dimension reduction subspace for visualizing the clustering or classification structure obtained from a finite mixture of Gaussian densities.} \usage{ MclustDRsubsel(object, G = 1:9, modelNames = mclust.options("emModelNames"), \dots, bic.stop = 0, bic.cutoff = 0, mindir = 1, verbose = interactive()) } \arguments{ \item{object}{An object of class \code{'MclustDR'} resulting from a call to \code{\link{MclustDR}}.} \item{G}{An integer vector specifying the numbers of mixture components or clusters.} \item{modelNames}{A vector of character strings indicating the models to be fitted. See \code{\link{mclustModelNames}} for a description of the available models.} \item{\dots}{Further arguments passed through \code{\link{Mclust}} or \code{\link{MclustDA}}.} \item{bic.stop}{A criterion to terminate the search. If maximal BIC difference is less than \code{bic.stop} then the algorithm stops. \cr Two tipical values are: \tabular{ll}{ \code{0}: \tab algorithm stops when the BIC difference becomes negative (default);\cr \code{-Inf}: \tab algorithm continues until all directions have been selected. } } \item{bic.cutoff}{A value specifying how to select simplest ``best'' model within \code{bic.cutoff} from the maximum value achieved. Setting this to \code{0} (default) simply select the model with the largest BIC difference.} \item{mindir}{An integer value specifying the minimum number of directions to be estimated.} \item{verbose}{A logical or integer value specifying if and how much detailed information should be reported during the iterations of the algorithm. \cr Possible values are: \tabular{ll}{ \code{0} or \code{FALSE}: \tab no trace info is shown;\cr \code{1} or \code{TRUE}: \tab a trace info is shown at each step of the search;\cr \code{2}: \tab a more detailed trace info is is shown.} } } \details{ The GMMDR method aims at reducing the dimensionality by identifying a set of linear combinations, ordered by importance as quantified by the associated eigenvalues, of the original features which capture most of the clustering or classification structure contained in the data. This is implemented in \code{\link{MclustDR}}. The \code{MclustDRsubsel} function implements the greedy forward search algorithm discussed in Scrucca (2010) to prune the set of all GMMDR directions. The criterion used to select the relevant directions is based on the BIC difference between a clustering model and a model in which the feature proposal has no clustering relevance. The steps are the following: 1. Select the first feature to be the one which maximizes the BIC difference between the best clustering model and the model which assumes no clustering, i.e. a single component. 2. Select the next feature amongst those not previously included, to be the one which maximizes the BIC difference. 3. Iterate the previous step until all the BIC differences for the inclusion of a feature become less than \code{bic.stop}. At each step, the search over the model space is performed with respect to the model parametrisation and the number of clusters. } \value{ An object of class \code{'MclustDRsubsel'} which inherits from \code{'MclustDR'}, so it has the same components of the latter plus the following: \item{basisx}{The basis of the estimated dimension reduction subspace expressed in terms of the original variables.} \item{std.basisx}{The basis of the estimated dimension reduction subspace expressed in terms of the original variables standardized to have unit standard deviation.} } \references{ Scrucca, L. (2010) Dimension reduction for model-based clustering. \emph{Statistics and Computing}, 20(4), pp. 471-484. Scrucca, L. (2014) Graphical Tools for Model-based Mixture Discriminant Analysis. \emph{Advances in Data Analysis and Classification}, 8(2), pp. 147-165 } \author{Luca Scrucca} %\note{} \seealso{ \code{\link{MclustDR}}, \code{\link{Mclust}}, \code{\link{MclustDA}}. } \examples{ \donttest{ # clustering data(crabs, package = "MASS") x <- crabs[,4:8] class <- paste(crabs$sp, crabs$sex, sep = "|") mod <- Mclust(x) table(class, mod$classification) dr <- MclustDR(mod) summary(dr) plot(dr) drs <- MclustDRsubsel(dr) summary(drs) table(class, drs$classification) plot(drs, what = "scatterplot") plot(drs, what = "pairs") plot(drs, what = "contour") plot(drs, what = "boundaries") plot(drs, what = "evalues") # classification data(banknote) da <- MclustDA(banknote[,2:7], banknote$Status) table(banknote$Status, predict(da)$class) dr <- MclustDR(da) summary(dr) drs <- MclustDRsubsel(dr) summary(drs) table(banknote$Status, predict(drs)$class) plot(drs, what = "scatterplot") plot(drs, what = "classification") plot(drs, what = "boundaries")} } \keyword{multivariate} mclust/man/mclustBootstrapLRT.Rd0000644000176200001440000001111714124774626016430 0ustar liggesusers\name{mclustBootstrapLRT} \alias{mclustBootstrapLRT} \alias{print.mclustBootstrapLRT} \alias{plot.mclustBootstrapLRT} \title{Bootstrap Likelihood Ratio Test for the Number of Mixture Components} \description{Perform the likelihood ratio test (LRT) for assessing the number of mixture components in a specific finite mixture model parameterisation. The observed significance is approximated by using the (parametric) bootstrap for the likelihood ratio test statistic (LRTS).} \usage{ mclustBootstrapLRT(data, modelName = NULL, nboot = 999, level = 0.05, maxG = NULL, verbose = interactive(), \dots) \method{print}{mclustBootstrapLRT}(x, \dots) \method{plot}{mclustBootstrapLRT}(x, G = 1, hist.col = "grey", hist.border = "lightgrey", breaks = "Scott", col = "forestgreen", lwd = 2, lty = 3, main = NULL, \dots) } \arguments{ \item{data}{A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables.} \item{modelName}{A character string indicating the mixture model to be fitted. The help file for \code{\link{mclustModelNames}} describes the available models.} \item{nboot}{The number of bootstrap replications to use (by default 999).} \item{level}{The significance level to be used to terminate the sequential bootstrap procedure.} \item{maxG}{The maximum number of mixture components \eqn{G} to test. If not provided the procedure is stopped when a test is not significant at the specified \code{level}.} \item{verbose}{A logical controlling if a text progress bar is displayed during the bootstrap procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise.} \item{\dots}{Further arguments passed to or from other methods. In particular, see the optional arguments in \code{\link{mclustBIC}}.} \item{x}{An \code{'mclustBootstrapLRT'} object.} \item{G}{A value specifying the number of components for which to plot the bootstrap distribution.} \item{hist.col}{The colour to be used to fill the bars of the histogram.} \item{hist.border}{The color of the border around the bars of the histogram.} \item{breaks}{See the argument in function \code{\link[graphics]{hist}}.} \item{col, lwd, lty}{The color, line width and line type to be used to represent the observed LRT statistic.} \item{main}{The title for the graph.} } \details{The implemented algorithm for computing the LRT observed significance using the bootstrap is the following. Let \eqn{G_0} be the number of mixture components under the null hypothesis versus \eqn{G_1 = G_0+1} under the alternative. Bootstrap samples are drawn by simulating data under the null hypothesis. Then, the p-value may be approximated using eq. (13) on McLachlan and Rathnayake (2014). Equivalently, using the notation of Davison and Hinkley (1997) it may be computed as \deqn{\textnormal{p-value} = \frac{1 + \#\{LRT^*_b \ge LRTS_{obs}\}}{B+1}}{% p-value = (1 + #{LRTS*_b \ge LRT_obs}) / (B+1)} where \cr \eqn{B} = number of bootstrap samples \cr \eqn{LRT_{obs}}{LRT_obs} = LRTS computed on the observed data\cr \eqn{LRT^*_b}{LRT*_b} = LRTS computed on the \eqn{b}th bootstrap sample. } \value{An object of class \code{'mclustBootstrapLRT'} with the following components: \item{G}{A vector of number of components tested under the null hypothesis.} \item{modelName}{A character string specifying the mixture model as provided in the function call (see above).} \item{obs}{The observed values of the LRTS.} \item{boot}{A matrix of dimension \code{nboot} x the number of components tested containing the bootstrap values of LRTS.} \item{p.value}{A vector of p-values.} } \references{ Davison, A. and Hinkley, D. (1997) \emph{Bootstrap Methods and Their Applications}. Cambridge University Press. McLachlan G.J. (1987) On bootstrapping the likelihood ratio test statistic for the number of components in a normal mixture. \emph{Applied Statistics}, 36, 318-324. McLachlan, G.J. and Peel, D. (2000) \emph{Finite Mixture Models}. Wiley. McLachlan, G.J. and Rathnayake, S. (2014) On the number of components in a Gaussian mixture model. \emph{Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery}, 4(5), pp. 341-355. } \seealso{\code{\link{mclustBIC}}, \code{\link{mclustICL}}, \code{\link{Mclust}}} \examples{ \donttest{ data(faithful) faithful.boot = mclustBootstrapLRT(faithful, model = "VVV") faithful.boot plot(faithful.boot, G = 1) plot(faithful.boot, G = 2) } } \keyword{htest} \keyword{cluster} mclust/man/plot.MclustBoostrap.Rd0000644000176200001440000000420014124774626016572 0ustar liggesusers\name{plot.MclustBootstrap} \alias{plot.MclustBootstrap} \title{Plot of bootstrap distributions for mixture model parameters} \description{ Plots the bootstrap distribution of parameters as returned by the \code{\link{MclustBootstrap}} function. } \usage{ \method{plot}{MclustBootstrap}(x, what = c("pro", "mean", "var"), show.parest = TRUE, show.confint = TRUE, hist.col = "grey", hist.border = "lightgrey", breaks = "Sturges", col = "forestgreen", lwd = 2, lty = 3, xlab = NULL, xlim = NULL, ylim = NULL, \dots) } \arguments{ \item{x}{Object returned by \code{MclustBootstrap}.} \item{what}{Character string specifying if mixing proportions (\code{"pro"}), component means (\code{"mean"}) or component variances (\code{"var"}) should be drawn.} \item{show.parest}{A logical specifying if the parameter estimate should be drawn as vertical line.} \item{show.confint}{A logical specifying if the resampling-based confidence interval should be drawn at the bottom of the graph. Confidence level can be provided as further argument \code{conf.level}; see \code{\link{summary.MclustBootstrap}}.} \item{hist.col}{The color to be used to fill the bars of the histograms.} \item{hist.border}{The color of the border around the bars of the histograms.} \item{breaks}{See the argument in function \code{\link[graphics]{hist}}.} \item{col, lwd, lty}{The color, line width and line type to be used to represent the estimated parameters and confidence intervals.} \item{xlab}{Optional label for the horizontal axis.} \item{xlim, ylim}{A two-values vector of axis range for, respectively, horizontal and vertical axis.} \item{\dots}{Other graphics parameters.} } \value{ A plot for each variable/component of the selected parameters. } \seealso{ \code{\link{MclustBootstrap}} } \examples{ \donttest{ data(diabetes) X <- diabetes[,-1] modClust <- Mclust(X, G = 3, modelNames = "VVV") bootClust <- MclustBootstrap(modClust, nboot = 99) par(mfrow = c(1,3), mar = c(4,2,2,0.5)) plot(bootClust, what = "pro") par(mfrow = c(3,3), mar = c(4,2,2,0.5)) plot(bootClust, what = "mean") } } \keyword{cluster} mclust/man/mstep.Rd0000644000176200001440000000671314124774626013777 0ustar liggesusers\name{mstep} \alias{mstep} \title{M-step for parameterized Gaussian mixture models} \description{ Maximization step in the EM algorithm for parameterized Gaussian mixture models. } \usage{ mstep(data, modelName, z, prior = NULL, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the ith observation belonging to the \emph{k}th component of the mixture. In analyses involving noise, this should not include the conditional probabilities for the noise component. } \item{prior}{ Specification of a conjugate prior on the means and variances. The default assumes no prior. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when the estimation fails. The default is given by \code{mclust.options("warn")}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{Attributes:}{ \code{"info"} For those models with iterative M-steps (\code{"VEI"} and \code{"VEV"}), information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \note{ This function computes the M-step only for MVN mixtures, so in analyses involving noise, the conditional probabilities input should exclude those for the noise component. \cr In contrast to \code{me} for the EM algorithm, computations in \code{mstep} are carried out unless failure due to overflow would occur. To impose stricter tolerances on a single \code{mstep}, use \code{me} with the \emph{itmax} component of the \code{control} argument set to 1. } \seealso{ \code{\link{mstepE}}, \dots, \code{\link{mstepVVV}}, \code{\link{emControl}}, \code{\link{me}}, \code{\link{estep}}, \code{\link{mclust.options}}. } \examples{ \donttest{ mstep(modelName = "VII", data = iris[,-5], z = unmap(iris[,5]))} } \keyword{cluster} mclust/man/hclass.Rd0000644000176200001440000000212214124774626014112 0ustar liggesusers\name{hclass} \alias{hclass} \title{ Classifications from Hierarchical Agglomeration } \description{ Determines the classifications corresponding to different numbers of groups given merge pairs from hierarchical agglomeration. } \usage{ hclass(hcPairs, G) } \arguments{ \item{hcPairs}{ A numeric two-column matrix in which the \emph{i}th row gives the minimum index for observations in each of the two clusters merged at the \emph{i}th stage of agglomerative hierarchical clustering. } \item{G}{ An integer or vector of integers giving the number of clusters for which the corresponding classfications are wanted. } } \value{ A matrix with \code{length(G)} columns, each column corresponding to a classification. Columns are indexed by the character representation of the integers in \code{G}. } \seealso{ \code{\link{hc}}, \code{\link{hcE}} } \examples{ hcTree <- hc(modelName="VVV", data = iris[,-5]) cl <- hclass(hcTree,c(2,3)) \donttest{ par(pty = "s", mfrow = c(1,1)) clPairs(iris[,-5],cl=cl[,"2"]) clPairs(iris[,-5],cl=cl[,"3"]) } } \keyword{cluster} mclust/man/adjustedRandIndex.Rd0000644000176200001440000000266113752165071016237 0ustar liggesusers\name{adjustedRandIndex} \alias{adjustedRandIndex} \title{ Adjusted Rand Index } \description{ Computes the adjusted Rand index comparing two classifications. } \usage{ adjustedRandIndex(x, y) } \arguments{ \item{x}{ A numeric or character vector of class labels. } \item{y}{ A numeric or character vector of class labels. The length of \code{y} should be the same as that of \code{x}. } } \value{ The adjusted Rand index comparing the two partitions (a scalar). This index has zero expected value in the case of random partition, and it is bounded above by 1 in the case of perfect agreement between two partitions. } \references{ L. Hubert and P. Arabie (1985) Comparing Partitions, \emph{Journal of the Classification}, 2, pp. 193-218. } \seealso{ \code{\link{classError}}, \code{\link{mapClass}}, \code{\link{table}} } \examples{ a <- rep(1:3, 3) a b <- rep(c("A", "B", "C"), 3) b adjustedRandIndex(a, b) a <- sample(1:3, 9, replace = TRUE) a b <- sample(c("A", "B", "C"), 9, replace = TRUE) b adjustedRandIndex(a, b) a <- rep(1:3, 4) a b <- rep(c("A", "B", "C", "D"), 3) b adjustedRandIndex(a, b) irisHCvvv <- hc(modelName = "VVV", data = iris[,-5]) cl3 <- hclass(irisHCvvv, 3) adjustedRandIndex(cl3,iris[,5]) irisBIC <- mclustBIC(iris[,-5]) adjustedRandIndex(summary(irisBIC,iris[,-5])$classification,iris[,5]) adjustedRandIndex(summary(irisBIC,iris[,-5],G=3)$classification,iris[,5]) } \keyword{cluster} mclust/man/me.Rd0000644000176200001440000001074114124774626013244 0ustar liggesusers\name{me} \alias{me} \title{EM algorithm starting with M-step for parameterized MVN mixture models} \description{ Implements the EM algorithm for MVN mixture models parameterized by eignevalue decomposition, starting with the maximization step. } \usage{ me(data, modelName, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{z}{ A matrix whose \code{[i,k]}th entry is an initial estimate of the conditional probability of the ith observation belonging to the \emph{k}th component of the mixture. } \item{prior}{ Specification of a conjugate prior on the means and variances. See the help file for \code{priorControl} for further information. The default assumes no prior. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{Vinv}{ If the model is to include a noise term, \code{Vinv} is an estimate of the reciprocal hypervolume of the data region. If set to a negative value or 0, the model will include a noise term with the reciprocal hypervolume estimated by the function \code{hypvol}. The default is not to assume a noise term in the model through the setting \code{Vinv=NULL}. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when the estimation fails. The default is set in \code{mclust.options("warn")}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{n}{ The number of observations in the data. } \item{d}{ The dimension of the data. } \item{G}{ The number of mixture components. } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ The estimate of the reciprocal hypervolume of the data region used in the computation when the input indicates the addition of a noise component to the model. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{control}{ The list of control parameters for EM used. } \item{prior}{ The specification of a conjugate prior on the means and variances used, \code{NULL} if no prior is used. } \item{Attributes:}{ \code{"info"} Information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{meE}}, \dots, \code{\link{meVVV}}, \code{\link{em}}, \code{\link{mstep}}, \code{\link{estep}}, \code{\link{priorControl}}, \code{\link{mclustModelNames}}, \code{\link{mclustVariance}}, \code{\link{mclust.options}} } \examples{ \donttest{ me(modelName = "VVV", data = iris[,-5], z = unmap(iris[,5]))} } \keyword{cluster} mclust/man/predict.Mclust.Rd0000644000176200001440000000317113175055063015532 0ustar liggesusers\name{predict.Mclust} \alias{predict.Mclust} \title{Cluster multivariate observations by Gaussian finite mixture modeling} \description{Cluster prediction for multivariate observations based on Gaussian finite mixture models estimated by \code{\link{Mclust}}.} \usage{ \method{predict}{Mclust}(object, newdata, \dots) } \arguments{ \item{object}{an object of class \code{'Mclust'} resulting from a call to \code{\link{Mclust}}.} \item{newdata}{a data frame or matrix giving the data. If missing the clustering data obtained from the call to \code{\link{Mclust}} are classified.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{ Returns a list of with the following components: \item{classification}{a factor of predicted cluster labels for \code{newdata}.} \item{z}{a matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in \code{newdata} belongs to the \emph{k}th cluster.} } \author{Luca Scrucca} % \note{} \seealso{\code{\link{Mclust}}.} \examples{ model <- Mclust(faithful) # predict cluster for the observed data pred <- predict(model) str(pred) pred$z # equal to model$z pred$classification # equal to plot(faithful, col = pred$classification, pch = pred$classification) # predict cluster over a grid grid <- apply(faithful, 2, function(x) seq(min(x), max(x), length = 50)) grid <- expand.grid(eruptions = grid[,1], waiting = grid[,2]) pred <- predict(model, grid) plot(grid, col = mclust.options("classPlotColors")[pred$classification], pch = 15, cex = 0.5) points(faithful, pch = model$classification) } \keyword{multivariate} mclust/man/plot.MclustSSC.Rd0000644000176200001440000000342113742016344015424 0ustar liggesusers\name{plot.MclustSSC} \alias{plot.MclustSSC} \title{Plotting method for MclustSSC semi-supervised classification} \description{ Plots for semi-supervised classification based on Gaussian finite mixture models. } \usage{ \method{plot}{MclustSSC}(x, what = c("BIC", "classification", "uncertainty"), \dots) } \arguments{ \item{x}{ An object of class \code{'MclustSSC'} resulting from a call to \code{\link{MclustSSC}}. } \item{what}{ A string specifying the type of graph requested. Available choices are: \describe{ \item{\code{"BIC"} =}{plot of BIC values used for model selection, i.e. for choosing the model class covariances.} \item{\code{"classification"} =}{a plot of data with points marked based on the known and the predicted classification.} \item{\code{"uncertainty"} =}{a plot of classification uncertainty.} } If not specified, in interactive sessions a menu of choices is proposed. } \item{\dots}{further arguments passed to or from other methods. See \code{\link{plot.Mclust}}.} } %\value{} %\details{} \author{Luca Scrucca} \seealso{ \code{\link{MclustSSC}} } \examples{ X <- iris[,1:4] class <- iris$Species # randomly remove class labels set.seed(123) class[sample(1:length(class), size = 120)] <- NA table(class, useNA = "ifany") clPairs(X, ifelse(is.na(class), 0, class), symbols = c(0, 16, 17, 18), colors = c("grey", 4, 2, 3), main = "Partially classified data") # Fit semi-supervised classification model mod_SSC <- MclustSSC(X, class) summary(mod_SSC, parameters = TRUE) pred_SSC <- predict(mod_SSC) table(Predicted = pred_SSC$classification, Actual = class, useNA = "ifany") plot(mod_SSC, what = "BIC") plot(mod_SSC, what = "classification") plot(mod_SSC, what = "uncertainty") } \keyword{multivariate} mclust/man/mclust-deprecated.Rd0000644000176200001440000000135513405515075016241 0ustar liggesusers\name{mclust-deprecated} \alias{cv.MclustDA} \alias{cv1EMtrain} \alias{bicEMtrain} \title{Deprecated Functions in mclust package} \description{ These functions are provided for compatibility with older versions of the \pkg{mclust} package only, and may be removed eventually. } \usage{ cv.MclustDA(\dots) cv1EMtrain(data, labels, modelNames=NULL) bicEMtrain(data, labels, modelNames=NULL) } \arguments{ \item{\dots}{pass arguments down.} \item{data}{A numeric vector or matrix of observations.} \item{labels}{Labels for each element or row in the dataset.} \item{modelNames}{Vector of model names that should be tested. The default is to select all available model names.} } \seealso{\code{\link{deprecated}}} mclust/man/clustCombiOptim.Rd0000644000176200001440000000415313475242100015741 0ustar liggesusers\name{clustCombiOptim} \alias{clustCombiOptim} \title{Optimal number of clusters obtained by combining mixture components} \description{ Return the optimal number of clusters by combining mixture components based on the entropy method discussed in the reference given below. } \usage{ clustCombiOptim(object, reg = 2, plot = FALSE, \dots) } \arguments{ \item{object}{ An object of class \code{'clustCombi'} resulting from a call to \code{\link{clustCombi}}. } \item{reg}{ The number of parts of the piecewise linear regression for the entropy plots. Choose 2 for a two-segment piecewise linear regression model (i.e. 1 change-point), and 3 for a three-segment piecewise linear regression model (i.e. 3 change-points). } \item{plot}{ Logical, if \code{TRUE} an entropy plot is also produced. } \item{\dots}{Further arguments passed to or from other methods.} } \value{ The function returns a list with the following components: \item{numClusters.combi}{The estimated number of clusters.} \item{z.combi}{A matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in the data belongs to the \emph{k}th cluster.} \item{cluster.combi}{The clustering labels.} } \references{ J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} } \author{ J.-P. Baudry, A. E. Raftery, L. Scrucca } \seealso{ \code{\link{combiPlot}}, \code{\link{entPlot}}, \code{\link{clustCombi}} } \examples{ data(Baudry_etal_2010_JCGS_examples) output <- clustCombi(data = ex4.1) combiOptim <- clustCombiOptim(output) str(combiOptim) # plot optimal clustering with alpha color transparency proportional to uncertainty zmax <- apply(combiOptim$z.combi, 1, max) col <- mclust.options("classPlotColors")[combiOptim$cluster.combi] vadjustcolor <- Vectorize(adjustcolor) alphacol = (zmax - 1/combiOptim$numClusters.combi)/(1-1/combiOptim$numClusters.combi) col <- vadjustcolor(col, alpha.f = alphacol) plot(ex4.1, col = col, pch = mclust.options("classPlotSymbols")[combiOptim$cluster.combi]) } \keyword{ cluster } mclust/man/summary.MclustDR.Rd0000644000176200001440000000214713175055251016024 0ustar liggesusers\name{summary.MclustDR} \alias{summary.MclustDR} \alias{print.summary.MclustDR} \title{Summarizing dimension reduction method for model-based clustering and classification} \description{Summary method for class \code{"MclustDR"}.} \usage{ \method{summary}{MclustDR}(object, numdir, std = FALSE, \dots) \method{print}{summary.MclustDR}(x, digits = max(5, getOption("digits") - 3), \dots) } \arguments{ \item{object}{An object of class \code{'MclustDR'} resulting from a call to \code{\link{MclustDR}}.} \item{x}{An object of class \code{'summary.MclustDR'}, usually, a result of a call to \code{summary.MclustDR}.} \item{numdir}{An integer providing the number of basis directions to be printed.} \item{std}{if \code{TRUE} the coefficients basis are scaled such that all predictors have unit standard deviation.} \item{digits}{The number of significant digits to use when printing.} \item{\dots}{Further arguments passed to or from other methods.} } %\details{} %\value{} \author{Luca Scrucca} %\note{} \seealso{ \code{\link{MclustDR}}, \code{\link{plot.MclustDR}} } %\examples{} %\keyword{} mclust/man/wdbc.Rd0000644000176200001440000000660714163460525013561 0ustar liggesusers\name{wdbc} \alias{wdbc} \docType{data} \title{UCI Wisconsin Diagnostic Breast Cancer Data} \description{ The data set provides data for 569 patients on 30 features of the cell nuclei obtained from a digitized image of a fine needle aspirate (FNA) of a breast mass. For each patient the cancer was diagnosed as malignant or benign.} \usage{data(wdbc)} \format{A data frame with 569 observations on the following variables: \describe{ \item{\code{ID}}{ID number} \item{\code{Diagnosis}}{cancer diagnosis: \code{M} = malignant, \code{B} = benign} \item{\code{Radius_mean}}{a numeric vector} \item{\code{Texture_mean}}{a numeric vector} \item{\code{Perimeter_mean}}{a numeric vector} \item{\code{Area_mean}}{a numeric vector} \item{\code{Smoothness_mean}}{a numeric vector} \item{\code{Compactness_mean}}{a numeric vector} \item{\code{Concavity_mean}}{a numeric vector} \item{\code{Nconcave_mean}}{a numeric vector} \item{\code{Symmetry_mean}}{a numeric vector} \item{\code{Fractaldim_mean}}{a numeric vector} \item{\code{Radius_se}}{a numeric vector} \item{\code{Texture_se}}{a numeric vector} \item{\code{Perimeter_se}}{a numeric vector} \item{\code{Area_se}}{a numeric vector} \item{\code{Smoothness_se}}{a numeric vector} \item{\code{Compactness_se}}{a numeric vector} \item{\code{Concavity_se}}{a numeric vector} \item{\code{Nconcave_se}}{a numeric vector} \item{\code{Symmetry_se}}{a numeric vector} \item{\code{Fractaldim_se}}{a numeric vector} \item{\code{Radius_extreme}}{a numeric vector} \item{\code{Texture_extreme}}{a numeric vector} \item{\code{Perimeter_extreme}}{a numeric vector} \item{\code{Area_extreme}}{a numeric vector} \item{\code{Smoothness_extreme}}{a numeric vector} \item{\code{Compactness_extreme}}{a numeric vector} \item{\code{Concavity_extreme}}{a numeric vector} \item{\code{Nconcave_extreme}}{a numeric vector} \item{\code{Symmetry_extreme}}{a numeric vector} \item{\code{Fractaldim_extreme}}{a numeric vector} } } \details{ The recorded features are: \itemize{ \item \code{Radius} as mean of distances from center to points on the perimeter \item \code{Texture} as standard deviation of gray-scale values \item \code{Perimeter} as cell nucleus perimeter \item \code{Area} as cell nucleus area \item \code{Smoothness} as local variation in radius lengths \item \code{Compactness} as cell nucleus compactness, perimeter^2 / area - 1 \item \code{Concavity} as severity of concave portions of the contour \item \code{Nconcave} as number of concave portions of the contour \item \code{Symmetry} as cell nucleus shape \item \code{Fractaldim} as fractal dimension, "coastline approximation" - 1 } For each feature the recorded values are computed from each image as \code{_mean}, \code{_se}, and \code{_extreme}, for the mean, the standard error, and the mean of the three largest values. } \source{The Breast Cancer Wisconsin (Diagnostic) Data Set (\code{wdbc.data}, \code{wdbc.names}) from the UCI Machine Learning Repository \url{https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+(Diagnostic)}. Please note the UCI conditions of use.} \references{ Mangasarian, O. L., Street, W. N., and Wolberg, W. H. (1995) Breast cancer diagnosis and prognosis via linear programming. \emph{Operations Research}, 43(4), pp. 570-577. } \keyword{datasets} mclust/man/summary.Mclust.Rd0000644000176200001440000000245114241635166015600 0ustar liggesusers\name{summary.Mclust} \alias{summary.Mclust} \alias{print.summary.Mclust} \title{Summarizing Gaussian Finite Mixture Model Fits} \description{Summary method for class \code{"Mclust"}.} \usage{ \method{summary}{Mclust}(object, classification = TRUE, parameters = FALSE, \dots) \method{print}{summary.Mclust}(x, digits = getOption("digits"), \dots) } \arguments{ \item{object}{An object of class \code{'Mclust'} resulting of a call to \code{\link{Mclust}} or \code{\link{densityMclust}}.} \item{x}{An object of class \code{'summary.Mclust'}, usually, a result of a call to \code{summary.Mclust}.} \item{classification}{Logical; if \code{TRUE} a table of MAP classification/clustering of observations is printed.} \item{parameters}{Logical; if \code{TRUE}, the parameters of mixture components are printed.} \item{digits}{The number of significant digits to use when printing.} \item{\dots}{Further arguments passed to or from other methods.} } % \details{} % \value{} \author{Luca Scrucca} % \note{} \seealso{\code{\link{Mclust}}, \code{\link{densityMclust}}.} \examples{ \donttest{ mod1 = Mclust(iris[,1:4]) summary(mod1) summary(mod1, parameters = TRUE, classification = FALSE) mod2 = densityMclust(faithful, plot = FALSE) summary(mod2) summary(mod2, parameters = TRUE) } } \keyword{cluster} mclust/man/entPlot.Rd0000644000176200001440000000540214124774626014266 0ustar liggesusers\name{entPlot} \alias{entPlot} \title{ Plot Entropy Plots } \description{ Plot "entropy plots" to help select the number of classes from a hierarchy of combined clusterings. } \usage{ entPlot(z, combiM, abc = c("standard", "normalized"), reg = 2, \dots) } \arguments{ \item{z}{ A matrix whose \code{[i,k]}th entry is the probability that observation \emph{i} in the data belongs to the \emph{k}th class, for the initial solution (ie before any combining). Typically, the one returned by \code{Mclust}/BIC. } \item{combiM}{ A list of "combining matrices" (as provided by \code{clustCombi}), ie \code{combiM[[K]]} is the matrix whose \emph{k}th row contains only zeros, but in columns corresponding to the labels of the classes in the \emph{(K+1)}-classes solution to be merged to get the \emph{K}-classes combined solution. \code{combiM} must contain matrices from \code{K} = number of classes in \code{z} to one. } \item{abc}{ Choose one or more of: "standard", "normalized", to specify whether the number of observations involved in each combining step should be taken into account to scale the plots or not. } \item{reg}{ The number of parts of the piecewise linear regression for the entropy plots. Choose one or more of: 2 (for 1 change-point), 3 (for 2 change-points). } \item{\dots}{ Other graphical arguments to be passed to the plot functions. } } \details{ Please see the article cited in the references for more details. A clear elbow in the "entropy plot" should suggest the user to consider the corresponding number(s) of class(es). } \value{ if \code{abc = "standard"}, plots the entropy against the number of clusters and the difference between the entropy of successive combined solutions against the number of clusters. if \code{abc = "normalized"}, plots the entropy against the cumulated number of observations involved in the successive combining steps and the difference between the entropy of successive combined solutions divided by the number of observations involved in the corresponding combining step against the number of clusters. } \references{ J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} } \author{ J.-P. Baudry, A. E. Raftery, L. Scrucca } \seealso{ \code{\link{plot.clustCombi}}, \code{\link{combiPlot}}, \code{\link{clustCombi}} } \examples{ \donttest{ data(Baudry_etal_2010_JCGS_examples) # run Mclust to get the MclustOutput output <- clustCombi(data = ex4.2, modelNames = "VII") entPlot(output$MclustOutput$z, output$combiM, reg = c(2,3)) # legend: in red, the single-change-point piecewise linear regression; # in blue, the two-change-point piecewise linear regression. } } \keyword{ cluster } mclust/man/cdens.Rd0000644000176200001440000000600513766613336013736 0ustar liggesusers\name{cdens} \alias{cdens} \title{ Component Density for Parameterized MVN Mixture Models } \description{ Computes component densities for observations in MVN mixture models parameterized by eigenvalue decomposition. } \usage{ cdens(data, modelName, parameters, logarithm = FALSE, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{parameters}{ The parameters of the model: \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{logarithm}{ A logical value indicating whether or not the logarithm of the component densities should be returned. The default is to return the component densities, obtained from the log component densities by exponentiation. } \item{warn}{ A logical value indicating whether or not a warning should be issued when computations fail. The default is \code{warn=FALSE}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A numeric matrix whose \code{[i,k]}th entry is the density or log density of observation \emph{i} in component \emph{k}. The densities are not scaled by mixing proportions. } \note{ When one or more component densities are very large in magnitude, it may be possible to compute the logarithm of the component densities but not the component densities themselves due to overflow. } \seealso{ \code{\link{cdensE}}, \dots, \code{\link{cdensVVV}}, \code{\link{dens}}, \code{\link{estep}}, \code{\link{mclustModelNames}}, \code{\link{mclustVariance}}, \code{\link{mclust.options}}, \code{\link{do.call}} } \examples{ z2 <- unmap(hclass(hcVVV(faithful),2)) # initial value for 2 class case model <- me(modelName = "EEE", data = faithful, z = z2) cdens(modelName = "EEE", data = faithful, logarithm = TRUE, parameters = model$parameters)[1:5,] data(cross) odd <- seq(1, nrow(cross), by = 2) oddBIC <- mclustBIC(cross[odd,-1]) oddModel <- mclustModel(cross[odd,-1], oddBIC) ## best parameter estimates names(oddModel) even <- odd + 1 densities <- cdens(modelName = oddModel$modelName, data = cross[even,-1], parameters = oddModel$parameters) cbind(class = cross[even,1], densities)[1:5,] } \keyword{cluster} mclust/man/classPriorProbs.Rd0000644000176200001440000001050214124774626015765 0ustar liggesusers\name{classPriorProbs} \alias{classPriorProbs} % R CMD Rd2pdf classPriorProbs.Rd \title{Estimation of class prior probabilities by EM algorithm} \description{ A simple procedure to improve the estimation of class prior probabilities when the training data does not reflect the true a priori probabilities of the target classes. The EM algorithm used is described in Saerens et al (2002).} \usage{ classPriorProbs(object, newdata = object$data, itmax = 1e3, eps = sqrt(.Machine$double.eps)) } \arguments{ \item{object}{ an object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}. } \item{newdata}{ a data frame or matrix giving the data. If missing the train data obtained from the call to \code{\link{MclustDA}} are used. } \item{itmax}{ an integer value specifying the maximal number of EM iterations. } \item{eps}{ a scalar specifying the tolerance associated with deciding when to terminate the EM iterations. } } \details{ The estimation procedure employes an EM algorithm as described in Saerens et al (2002). } \value{A vector of class prior estimates which can then be used in the \code{\link{predict.MclustDA}} to improve predictions.} \references{ Saerens, M., Latinne, P. and Decaestecker, C. (2002) Adjusting the outputs of a classifier to new a priori probabilities: a simple procedure, \emph{Neural computation}, 14 (1), 21--41. } \seealso{\code{\link{MclustDA}}, \code{\link{predict.MclustDA}}} \examples{ \donttest{ # generate data from a mixture f(x) = 0.9 * N(0,1) + 0.1 * N(3,1) n <- 10000 mixpro <- c(0.9, 0.1) class <- factor(sample(0:1, size = n, prob = mixpro, replace = TRUE)) x <- ifelse(class == 1, rnorm(n, mean = 3, sd = 1), rnorm(n, mean = 0, sd = 1)) hist(x[class==0], breaks = 11, xlim = range(x), main = "", xlab = "x", col = adjustcolor("dodgerblue2", alpha.f = 0.5), border = "white") hist(x[class==1], breaks = 11, add = TRUE, col = adjustcolor("red3", alpha.f = 0.5), border = "white") box() # generate training data from a balanced case-control sample, i.e. # f(x) = 0.5 * N(0,1) + 0.5 * N(3,1) n_train <- 1000 class_train <- factor(sample(0:1, size = n_train, prob = c(0.5, 0.5), replace = TRUE)) x_train <- ifelse(class_train == 1, rnorm(n_train, mean = 3, sd = 1), rnorm(n_train, mean = 0, sd = 1)) hist(x_train[class_train==0], breaks = 11, xlim = range(x_train), main = "", xlab = "x", col = adjustcolor("dodgerblue2", alpha.f = 0.5), border = "white") hist(x_train[class_train==1], breaks = 11, add = TRUE, col = adjustcolor("red3", alpha.f = 0.5), border = "white") box() # fit a MclustDA model mod <- MclustDA(x_train, class_train) summary(mod, parameters = TRUE) # test set performance pred <- predict(mod, newdata = x) classError(pred$classification, class)$error BrierScore(pred$z, class) # compute performance over a grid of prior probs priorProp <- seq(0.01, 0.99, by = 0.01) CE <- BS <- rep(as.double(NA), length(priorProp)) for(i in seq(priorProp)) { pred <- predict(mod, newdata = x, prop = c(1-priorProp[i], priorProp[i])) CE[i] <- classError(pred$classification, class = class)$error BS[i] <- BrierScore(pred$z, class) } # estimate the optimal class prior probs (priorProbs <- classPriorProbs(mod, x)) pred <- predict(mod, newdata = x, prop = priorProbs) # compute performance at the estimated class prior probs classError(pred$classification, class = class)$error BrierScore(pred$z, class) matplot(priorProp, cbind(CE,BS), type = "l", lty = 1, lwd = 2, xlab = "Class prior probability", ylab = "", ylim = c(0,max(CE,BS)), panel.first = { abline(h = seq(0,1,by=0.05), col = "grey", lty = 3) abline(v = seq(0,1,by=0.05), col = "grey", lty = 3) }) abline(v = mod$prop[2], lty = 2) # training prop abline(v = mean(class==1), lty = 4) # test prop (usually unknown) abline(v = priorProbs[2], lty = 3, lwd = 2) # estimated prior probs legend("topleft", legend = c("ClassError", "BrierScore"), col = 1:2, lty = 1, lwd = 2, inset = 0.02) # Summary of results: priorProp[which.min(CE)] # best prior of class 1 according to classification error priorProp[which.min(BS)] # best prior of class 1 according to Brier score priorProbs # optimal estimated class prior probabilities } } \keyword{classif} mclust/man/chevron.Rd0000644000176200001440000000107612460535131014273 0ustar liggesusers\name{chevron} \alias{chevron} \title{Simulated minefield data} \usage{data(chevron)} \description{A set of simulated bivariate minefield data (1104 observations).} \references{ A. Dasgupta and A. E. Raftery (1998). Detecting features in spatial point processes with clutter via model-based clustering. \emph{Journal of the American Statistical Association 93:294-302}. C. Fraley and A.E. Raftery (1998). \emph{Computer Journal 41:578-588}. G. J. McLachlan and D. Peel (2000). \emph{Finite Mixture Models}, Wiley, pages 110-112. } \keyword{datasets} mclust/man/plot.MclustDA.Rd0000644000176200001440000001453414124774626015300 0ustar liggesusers\name{plot.MclustDA} \alias{plot.MclustDA} \title{Plotting method for MclustDA discriminant analysis} \description{ Plots for model-based mixture discriminant analysis results, such as scatterplot of training and test data, classification of train and test data, and errors. } \usage{ \method{plot}{MclustDA}(x, what = c("scatterplot", "classification", "train&test", "error"), newdata, newclass, dimens = NULL, symbols, colors, main = NULL, \dots) } \arguments{ \item{x}{ An object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}. } \item{what}{ A string specifying the type of graph requested. Available choices are: \describe{ \item{\code{"scatterplot"} =}{a plot of training data with points marked based on the known classification. Ellipses corresponding to covariances of mixture components are also drawn.} \item{\code{"classification"} =}{a plot of data with points marked on based the predicted classification; if \code{newdata} is provided then the test set is shown otherwise the training set.} \item{\code{"train&test"} =}{a plot of training and test data with points marked according to the type of set.} \item{\code{"error"} =}{a plot of training set (or test set if \code{newdata} and \code{newclass} are provided) with misclassified points marked.} } If not specified, in interactive sessions a menu of choices is proposed. } \item{newdata}{ A data frame or matrix for test data. } \item{newclass}{ A vector giving the class labels for the observations in the test data (if known). } \item{dimens}{ A vector of integers giving the dimensions of the desired coordinate projections for multivariate data. The default is to take all the the available dimensions for plotting. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{factor}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{factor}). The default is given by \code{mclust.options("classPlotColors")}. } \item{main}{ A logical, a character string, or \code{NULL} (default) for the main title. If \code{NULL} or \code{FALSE} no title is added to a plot. If \code{TRUE} a default title is added identifying the type of plot drawn. If a character string is provided, this is used for the title. } \item{\dots}{further arguments passed to or from other methods.} } %\value{} \details{ For more flexibility in plotting, use \code{mclust1Dplot}, \code{mclust2Dplot}, \code{surfacePlot}, \code{coordProj}, or \code{randProj}. } \author{Luca Scrucca} \seealso{ \code{\link{MclustDA}}, \code{\link{surfacePlot}}, \code{\link{coordProj}}, \code{\link{randProj}} } \examples{ \donttest{ odd <- seq(from = 1, to = nrow(iris), by = 2) even <- odd + 1 X.train <- iris[odd,-5] Class.train <- iris[odd,5] X.test <- iris[even,-5] Class.test <- iris[even,5] # common EEE covariance structure (which is essentially equivalent to linear discriminant analysis) irisMclustDA <- MclustDA(X.train, Class.train, modelType = "EDDA", modelNames = "EEE") summary(irisMclustDA, parameters = TRUE) summary(irisMclustDA, newdata = X.test, newclass = Class.test) # common covariance structure selected by BIC irisMclustDA <- MclustDA(X.train, Class.train, modelType = "EDDA") summary(irisMclustDA, parameters = TRUE) summary(irisMclustDA, newdata = X.test, newclass = Class.test) # general covariance structure selected by BIC irisMclustDA <- MclustDA(X.train, Class.train) summary(irisMclustDA, parameters = TRUE) summary(irisMclustDA, newdata = X.test, newclass = Class.test) plot(irisMclustDA) plot(irisMclustDA, dimens = 3:4) plot(irisMclustDA, dimens = 4) plot(irisMclustDA, what = "classification") plot(irisMclustDA, what = "classification", newdata = X.test) plot(irisMclustDA, what = "classification", dimens = 3:4) plot(irisMclustDA, what = "classification", newdata = X.test, dimens = 3:4) plot(irisMclustDA, what = "classification", dimens = 4) plot(irisMclustDA, what = "classification", dimens = 4, newdata = X.test) plot(irisMclustDA, what = "train&test", newdata = X.test) plot(irisMclustDA, what = "train&test", newdata = X.test, dimens = 3:4) plot(irisMclustDA, what = "train&test", newdata = X.test, dimens = 4) plot(irisMclustDA, what = "error") plot(irisMclustDA, what = "error", dimens = 3:4) plot(irisMclustDA, what = "error", dimens = 4) plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test) plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test, dimens = 3:4) plot(irisMclustDA, what = "error", newdata = X.test, newclass = Class.test, dimens = 4) # simulated 1D data n <- 250 set.seed(1) triModal <- c(rnorm(n,-5), rnorm(n,0), rnorm(n,5)) triClass <- c(rep(1,n), rep(2,n), rep(3,n)) odd <- seq(from = 1, to = length(triModal), by = 2) even <- odd + 1 triMclustDA <- MclustDA(triModal[odd], triClass[odd]) summary(triMclustDA, parameters = TRUE) summary(triMclustDA, newdata = triModal[even], newclass = triClass[even]) plot(triMclustDA) plot(triMclustDA, what = "classification") plot(triMclustDA, what = "classification", newdata = triModal[even]) plot(triMclustDA, what = "train&test", newdata = triModal[even]) plot(triMclustDA, what = "error") plot(triMclustDA, what = "error", newdata = triModal[even], newclass = triClass[even]) # simulated 2D cross data data(cross) odd <- seq(from = 1, to = nrow(cross), by = 2) even <- odd + 1 crossMclustDA <- MclustDA(cross[odd,-1], cross[odd,1]) summary(crossMclustDA, parameters = TRUE) summary(crossMclustDA, newdata = cross[even,-1], newclass = cross[even,1]) plot(crossMclustDA) plot(crossMclustDA, what = "classification") plot(crossMclustDA, what = "classification", newdata = cross[even,-1]) plot(crossMclustDA, what = "train&test", newdata = cross[even,-1]) plot(crossMclustDA, what = "error") plot(crossMclustDA, what = "error", newdata =cross[even,-1], newclass = cross[even,1]) } } \keyword{multivariate} mclust/man/hcRandomPairs.Rd0000644000176200001440000000224113750455151015362 0ustar liggesusers\name{hcRandomPairs} \alias{hcRandomPairs} \alias{randomPairs} \title{Random hierarchical structure} \description{Create a hierarchical structure using a random hierarchical partition of the data.} \usage{ hcRandomPairs(data, seed = NULL, \dots) } \arguments{ \item{data}{ A numeric matrix or data frame of observations. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{seed}{ Optional single value, interpreted as an integer, specifying the seed for random partition. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A numeric two-column matrix in which the \emph{i}th row gives the minimum index for observations in each of the two clusters merged at the \emph{i}th stage of a random agglomerative hierarchical clustering. } \seealso{ \code{\link{hc}}, \code{\link{hclass}} \code{\link{hcVVV}} } \examples{ data <- iris[,1:4] randPairs <- hcRandomPairs(data) str(randPairs) # start model-based clustering from a random partition mod <- Mclust(data, initialization = list(hcPairs = randPairs)) summary(mod) } \keyword{cluster} mclust/man/summary.MclustDA.Rd0000644000176200001440000000262313465000766016005 0ustar liggesusers\name{summary.MclustDA} \alias{summary.MclustDA} \alias{print.summary.MclustDA} \title{Summarizing discriminant analysis based on Gaussian finite mixture modeling} \description{Summary method for class \code{"MclustDA"}.} \usage{ \method{summary}{MclustDA}(object, parameters = FALSE, newdata, newclass, \dots) \method{print}{summary.MclustDA}(x, digits = getOption("digits"), \dots) } \arguments{ \item{object}{An object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}.} \item{x}{An object of class \code{'summary.MclustDA'}, usually, a result of a call to \code{summary.MclustDA}.} \item{parameters}{Logical; if \code{TRUE}, the parameters of mixture components are printed.} \item{newdata}{A data frame or matrix giving the test data.} \item{newclass}{A vector giving the class labels for the observations in the test data.} \item{digits}{The number of significant digits to use when printing.} \item{\dots}{Further arguments passed to or from other methods.} } % \details{} \value{The function \code{summary.MclustDA} computes and returns a list of summary statistics of the estimated MclustDA or EDDA model for classification.} \author{Luca Scrucca} % \note{} \seealso{\code{\link{MclustDA}}, \code{\link{plot.MclustDA}}.} \examples{ mod = MclustDA(data = iris[,1:4], class = iris$Species) summary(mod) summary(mod, parameters = TRUE) } \keyword{multivariate} mclust/man/estepE.Rd0000644000176200001440000000752714124774626014100 0ustar liggesusers\name{estepE} \alias{estepE} \alias{estepV} \alias{estepEII} \alias{estepVII} \alias{estepEEI} \alias{estepVEI} \alias{estepEVI} \alias{estepVVI} \alias{estepEEE} \alias{estepEEV} \alias{estepVEV} \alias{estepVVV} \alias{estepEVE} \alias{estepEVV} \alias{estepVEE} \alias{estepVVE} \title{ E-step in the EM algorithm for a parameterized Gaussian mixture model. } \description{ Implements the expectation step in the EM algorithm for a parameterized Gaussian mixture model. } \usage{ estepE(data, parameters, warn = NULL, \dots) estepV(data, parameters, warn = NULL, \dots) estepEII(data, parameters, warn = NULL, \dots) estepVII(data, parameters, warn = NULL, \dots) estepEEI(data, parameters, warn = NULL, \dots) estepVEI(data, parameters, warn = NULL, \dots) estepEVI(data, parameters, warn = NULL, \dots) estepVVI(data, parameters, warn = NULL, \dots) estepEEE(data, parameters, warn = NULL, \dots) estepEEV(data, parameters, warn = NULL, \dots) estepVEV(data, parameters, warn = NULL, \dots) estepVVV(data, parameters, warn = NULL, \dots) estepEVE(data, parameters, warn = NULL, \dots) estepEVV(data, parameters, warn = NULL, \dots) estepVEE(data, parameters, warn = NULL, \dots) estepVVE(data, parameters, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{parameters}{ The parameters of the model: %\itemize{ %\item An argument describing the variance (depends on the model): \describe{ \item{\code{pro}}{ Mixing proportions for the components of the mixture. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{mu}{ The mean for each component. If there is more than one component, this is a matrix whose columns are the means of the components. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ An estimate of the reciprocal hypervolume of the data region. If not supplied or set to a negative value, the default is determined by applying function \code{hypvol} to the data. Used only when \code{pro} includes an additional mixing proportion for a noise component. } } %} } \item{warn}{ A logical value indicating whether or certain warnings should be issued. The default is given by \code{mclust.options("warn")}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ Character string identifying the model. } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ The input parameters. } \item{loglik}{ The logliklihood for the data in the mixture model. } \item{Attribute}{ \code{"WARNING"}: An appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{estep}}, \code{\link{em}}, \code{\link{mstep}}, \code{\link{do.call}}, \code{\link{mclustVariance}}, \code{\link{mclust.options}}. } \examples{ \donttest{ msEst <- mstepEII(data = iris[,-5], z = unmap(iris[,5])) names(msEst) estepEII(data = iris[,-5], parameters = msEst$parameters)} } \keyword{cluster} mclust/man/mvnX.Rd0000644000176200001440000000572514124774626013601 0ustar liggesusers\name{mvnX} \alias{mvnX} \alias{mvnXII} \alias{mvnXXI} \alias{mvnXXX} \title{ Univariate or Multivariate Normal Fit } \description{ Computes the mean, covariance, and log-likelihood from fitting a single Gaussian (univariate or multivariate normal). } \usage{ mvnX(data, prior = NULL, warn = NULL, \dots) mvnXII(data, prior = NULL, warn = NULL, \dots) mvnXXI(data, prior = NULL, warn = NULL, \dots) mvnXXX(data, prior = NULL, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{prior}{ Specification of a conjugate prior on the means and variances. The default assumes no prior. } \item{warn}{ A logical value indicating whether or not a warning should be issued whenever a singularity is encountered. The default is given by \code{mclust.options("warn")}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \details{ \describe{ \item{\code{mvnXII}}{computes the best fitting Gaussian with the covariance restricted to be a multiple of the identity.} \item{\code{mvnXXI}}{computes the best fitting Gaussian with the covariance restricted to be diagonal.} \item{\code{mvnXXX}}{computes the best fitting Gaussian with ellipsoidal (unrestricted) covariance.} } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{parameters}{ \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{Attributes:}{ \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{mvn}}, \code{\link{mstepE}} } \examples{ \donttest{ n <- 1000 set.seed(0) x <- rnorm(n, mean = -1, sd = 2) mvnX(x) mu <- c(-1, 0, 1) set.seed(0) x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% (2*diag(3)), MARGIN = 2, STATS = mu, FUN = "+") mvnXII(x) set.seed(0) x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% diag(1:3), MARGIN = 2, STATS = mu, FUN = "+") mvnXXI(x) Sigma <- matrix(c(9,-4,1,-4,9,4,1,4,9), 3, 3) set.seed(0) x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% chol(Sigma), MARGIN = 2, STATS = mu, FUN = "+") mvnXXX(x) } } \keyword{cluster} mclust/man/dupPartition.Rd0000644000176200001440000000126414124774626015325 0ustar liggesusers\name{dupPartition} \alias{dupPartition} \title{Partition the data by grouping together duplicated data} \description{ Duplicated data are grouped together to form a basic partition that can be used to start hierarchical agglomeration. } \usage{ dupPartition(data) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. If a matrix or data frame, rows correspond to observations (\eqn{n}) and columns correspond to variables (\eqn{d}). } } \value{ A vector of indices indicating the partition. } \seealso{ \code{\link{hc}} } \examples{ \donttest{ dupPartition(iris[,1:4]) dupPartition(iris) dupPartition(iris$Species) } } \keyword{cluster} mclust/man/coordProj.Rd0000644000176200001440000001271114124774626014603 0ustar liggesusers\name{coordProj} \alias{coordProj} \title{ Coordinate projections of multidimensional data modeled by an MVN mixture. } \description{ Plots coordinate projections given multidimensional data and parameters of an MVN mixture model for the data. } \usage{ coordProj(data, dimens = c(1,2), parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "error", "uncertainty"), addEllipses = TRUE, fillEllipses = mclust.options("fillEllipses"), symbols = NULL, colors = NULL, scale = FALSE, xlim = NULL, ylim = NULL, cex = 1, PCH = ".", main = FALSE, \dots) } \arguments{ \item{data}{ A numeric matrix or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{dimens}{ A vector of length 2 giving the integer dimensions of the desired coordinate projections. The default is \code{c(1,2)}, in which the first dimension is plotted against the second. } \item{parameters}{ A named list giving the parameters of an \emph{MCLUST} model, used to produce superimposing ellipses on the plot. The relevant components are as follows: \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix in which the \code{[i,k]}th entry gives the probability of observation \emph{i} belonging to the \emph{k}th class. Used to compute \code{classification} and \code{uncertainty} if those arguments aren't available. } \item{classification}{ A numeric or character vector representing a classification of observations (rows) of \code{data}. If present argument \code{z} will be ignored. } \item{truth}{ A numeric or character vector giving a known classification of each data point. If \code{classification} or \code{z} is also present, this is used for displaying classification errors. } \item{uncertainty}{ A numeric vector of values in \emph{(0,1)} giving the uncertainty of each data point. If present argument \code{z} will be ignored. } \item{what}{ Choose from one of the following three options: \code{"classification"} (default), \code{"error"}, \code{"uncertainty"}. } \item{addEllipses}{ A logical indicating whether or not to add ellipses with axes corresponding to the within-cluster covariances in case of \code{"classification"} or \code{"uncertainty"} plots. } \item{fillEllipses}{ A logical specifying whether or not to fill ellipses with transparent colors when \code{addEllipses = TRUE}. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotColors")}. } \item{scale}{ A logical variable indicating whether or not the two chosen dimensions should be plotted on the same scale, and thus preserve the shape of the distribution. Default: \code{scale=FALSE} } \item{xlim, ylim}{ Arguments specifying bounds for the ordinate, abscissa of the plot. This may be useful for when comparing plots. } \item{cex}{ A numerical value specifying the size of the plotting symbols. The default value is 1. } \item{PCH}{ An argument specifying the symbol to be used when a classification has not been specified for the data. The default value is a small dot ".". } \item{main}{ A logical variable or \code{NULL} indicating whether or not to add a title to the plot identifying the dimensions used. } \item{\dots}{ Other graphics parameters. } } \value{ A plot showing a two-dimensional coordinate projection of the data, together with the location of the mixture components, classification, uncertainty, and/or classification errors. } \seealso{ \code{\link{clPairs}}, \code{\link{randProj}}, \code{\link{mclust2Dplot}}, \code{\link{mclust.options}} } \examples{ \donttest{ est <- meVVV(iris[,-5], unmap(iris[,5])) par(pty = "s", mfrow = c(1,1)) coordProj(iris[,-5], dimens=c(2,3), parameters = est$parameters, z = est$z, what = "classification", main = TRUE) coordProj(iris[,-5], dimens=c(2,3), parameters = est$parameters, z = est$z, truth = iris[,5], what = "error", main = TRUE) coordProj(iris[,-5], dimens=c(2,3), parameters = est$parameters, z = est$z, what = "uncertainty", main = TRUE) } } \keyword{cluster} mclust/man/classError.Rd0000644000176200001440000000375113467317376014771 0ustar liggesusers\name{classError} \alias{classError} \title{Classification error} \description{ Computes the errore rate of a given classification relative to the known classes, and the location of misclassified data points.} \usage{ classError(classification, class) } \arguments{ \item{classification}{ A numeric, character vector or factor specifying the predicted class labels. Must have the same length as \code{class}. } \item{class}{ A numeric, character vector or factor of known true class labels. Must have the same length as \code{classification}. } } \value{ A list with the following two components: \item{misclassified}{ The indexes of the misclassified data points in a minimum error mapping between the predicted classification and the known true classes. } \item{errorRate}{ The error rate corresponding to a minimum error mapping between the predicted classification and the known true classes. } } \details{ If more than one mapping between predicted classification and the known truth corresponds to the minimum number of classification errors, only one possible set of misclassified observations is returned. } \seealso{ \code{\link{map}} \code{\link{mapClass}}, \code{\link{table}} } \examples{ (a <- rep(1:3, 3)) (b <- rep(c("A", "B", "C"), 3)) classError(a, b) (a <- sample(1:3, 9, replace = TRUE)) (b <- sample(c("A", "B", "C"), 9, replace = TRUE)) classError(a, b) class <- factor(c(5,5,5,2,5,3,1,2,1,1), levels = 1:5) probs <- matrix(c(0.15, 0.01, 0.08, 0.23, 0.01, 0.23, 0.59, 0.02, 0.38, 0.45, 0.36, 0.05, 0.30, 0.46, 0.15, 0.13, 0.06, 0.19, 0.27, 0.17, 0.40, 0.34, 0.18, 0.04, 0.47, 0.34, 0.32, 0.01, 0.03, 0.11, 0.04, 0.04, 0.09, 0.05, 0.28, 0.27, 0.02, 0.03, 0.12, 0.25, 0.05, 0.56, 0.35, 0.22, 0.09, 0.03, 0.01, 0.75, 0.20, 0.02), nrow = 10, ncol = 5) cbind(class, probs, map = map(probs)) classError(map(probs), class) } \keyword{cluster} mclust/man/majorityVote.Rd0000644000176200001440000000125613107132441015317 0ustar liggesusers\name{majorityVote} \alias{majorityVote} \title{Majority vote} \description{ A function to compute the majority vote (some would say plurality) label in a vector of labels, breaking ties at random.} \usage{ majorityVote(x) } \arguments{ \item{x}{A vector of values, either numerical or not.} } \value{A list with the following components: \item{table}{A table of votes for each unique value of \code{x}.} \item{ind}{An integer specifying which unique value of \code{x} corresponds to the majority vote.} \item{majority}{A string specifying the majority vote label.} } %\seealso{} \author{L. Scrucca} \examples{ x <- c("A", "C", "A", "B", "C", "B", "A") majorityVote(x) } mclust/man/Mclust.Rd0000644000176200001440000002063214516406600014077 0ustar liggesusers\name{Mclust} \alias{Mclust} \alias{print.Mclust} \title{Model-Based Clustering} \description{ Model-based clustering based on parameterized finite Gaussian mixture models. Models are estimated by EM algorithm initialized by hierarchical model-based agglomerative clustering. The optimal model is then selected according to BIC. } \usage{ Mclust(data, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), initialization = NULL, warn = mclust.options("warn"), x = NULL, verbose = interactive(), \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations (\eqn{n}) and columns correspond to variables (\eqn{d}). } \item{G}{ An integer vector specifying the numbers of mixture components (clusters) for which the BIC is to be calculated. The default is \code{G=1:9}. } \item{modelNames}{ A vector of character strings indicating the models to be fitted in the EM phase of clustering. The default is: \itemize{ \item for univariate data (\eqn{d = 1}): \code{c("E", "V")} \item for multivariate data (\eqn{n > d}): all the models available in \code{mclust.options("emModelNames")} \item for multivariate data (\eqn{n <= d}): the spherical and diagonal models, i.e. \code{c("EII", "VII", "EEI", "EVI", "VEI", "VVI")} } The help file for \code{\link{mclustModelNames}} describes the available models. } \item{prior}{ The default assumes no prior, but this argument allows specification of a conjugate prior on the means and variances through the function \code{\link{priorControl}}. \cr Note that, as described in \code{\link{defaultPrior}}, in the multivariate case only 10 out of 14 models may be used in conjunction with a prior, i.e. those available in \emph{MCLUST} up to version 4.4. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{initialization}{ A list containing zero or more of the following components: \describe{ \item{\code{hcPairs}}{ A matrix of merge pairs for hierarchical clustering such as produced by function \code{\link{hc}}. \cr For multivariate data, the default is to compute a hierarchical agglomerative clustering tree by applying function \code{\link{hc}} with model specified by \code{mclust.options("hcModelName")}, and data transformation set by \code{mclust.options("hcUse")}.\cr All the input or a subset as indicated by the \code{subset} argument is used for initial clustering.\cr The hierarchical clustering results are then used to start the EM algorithm from a given partition.\cr For univariate data, the default is to use quantiles to start the EM algorithm. However, hierarchical clustering could also be used by calling \code{\link{hc}} with model specified as \code{"V"} or \code{"E"}. } \item{\code{subset}}{ A logical or numeric vector specifying a subset of the data to be used in the initial hierarchical clustering phase. No subset is used unless the number of observations exceeds the value specified by \code{mclust.options("subset")}, which by default is set to 2000 (see \code{\link{mclust.options}}). Note that in this case to guarantee exact reproducibility of results a seed must be specified (see \code{\link{set.seed}}).} \item{\code{noise}}{ A logical or numeric vector indicating an initial guess as to which observations are noise in the data. If numeric the entries should correspond to row indexes of the data. If supplied, a noise term will be added to the model in the estimation.} } } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued. The default is controlled by \code{\link{mclust.options}}. } \item{x}{ An object of class \code{'mclustBIC'}. If supplied, BIC values for models that have already been computed and are available in \code{x} are not recomputed. All arguments, with the exception of \code{data}, \code{G} and \code{modelName}, are ignored and their values are set as specified in the attributes of \code{x}. Defaults for \code{G} and \code{modelNames} are taken from \code{x}. } \item{verbose}{ A logical controlling if a text progress bar is displayed during the fitting procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ An object of class \code{'Mclust'} providing the optimal (according to BIC) mixture model estimation. The details of the output components are as follows: \item{call}{The matched call} \item{data}{The input data matrix.} \item{modelName}{ A character string denoting the model at which the optimal BIC occurs. } \item{n}{ The number of observations in the data. } \item{d}{ The dimension of the data. } \item{G}{ The optimal number of mixture components. } \item{BIC}{ All BIC values. } \item{loglik}{ The log-likelihood corresponding to the optimal BIC. } \item{df}{ The number of estimated parameters. } \item{bic}{ BIC value of the selected model. } \item{icl}{ ICL value of the selected model. } \item{hypvol}{ The hypervolume parameter for the noise component if required, otherwise set to \code{NULL} (see \code{\link{hypvol}}). } \item{parameters}{ A list with the following components: \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If missing, equal proportions are assumed. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in the test data belongs to the \emph{k}th class. } \item{classification}{ The classification corresponding to \code{z}, i.e. \code{map(z)}. } \item{uncertainty}{ The uncertainty associated with the classification. } } \references{ Scrucca L., Fraley C., Murphy T. B. and Raftery A. E. (2023) \emph{Model-Based Clustering, Classification, and Density Estimation Using mclust in R}. Chapman & Hall/CRC, ISBN: 978-1032234953, https://mclust-org.github.io/book/ Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, \emph{Journal of the American Statistical Association}, 97/458, pp. 611-631. C. Fraley and A. E. Raftery (2007) Bayesian regularization for normal mixture estimation and model-based clustering. \emph{Journal of Classification}, 24, 155-181. } \seealso{ \code{\link{summary.Mclust}}, \code{\link{plot.Mclust}}, \code{\link{priorControl}}, \code{\link{emControl}}, \code{\link{hc}}, \code{\link{mclustBIC}}, \code{\link{mclustModelNames}}, \code{\link{mclust.options}} } \examples{ mod1 <- Mclust(iris[,1:4]) summary(mod1) mod2 <- Mclust(iris[,1:4], G = 3) summary(mod2, parameters = TRUE) # Using prior mod3 <- Mclust(iris[,1:4], prior = priorControl()) summary(mod3) mod4 <- Mclust(iris[,1:4], prior = priorControl(functionName="defaultPrior", shrinkage=0.1)) summary(mod4) # Clustering of faithful data with some artificial noise added nNoise <- 100 set.seed(0) # to make it reproducible Noise <- apply(faithful, 2, function(x) runif(nNoise, min = min(x)-.1, max = max(x)+.1)) data <- rbind(faithful, Noise) plot(faithful) points(Noise, pch = 20, cex = 0.5, col = "lightgrey") set.seed(0) NoiseInit <- sample(c(TRUE,FALSE), size = nrow(faithful)+nNoise, replace = TRUE, prob = c(3,1)/4) mod5 <- Mclust(data, initialization = list(noise = NoiseInit)) summary(mod5, parameter = TRUE) plot(mod5, what = "classification") } \keyword{cluster} mclust/man/nVarParams.Rd0000644000176200001440000000225414515765000014703 0ustar liggesusers\name{nVarParams} \alias{nVarParams} \title{ Number of Variance Parameters in Gaussian Mixture Models } \description{ Gives the number of variance parameters for parameterizations of the Gaussian mixture model that are used in MCLUST. } \usage{ nVarParams(modelName, d, G, \dots) } \arguments{ \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{d}{ The dimension of the data. Not used for models in which neither the shape nor the orientation varies. } \item{G}{ The number of components in the Gaussian mixture model used to compute \code{loglik}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ The number of variance parameters in the corresponding Gaussian mixture model. } \details{ To get the total number of parameters in model, add \code{G*d} for the means and \code{G-1} for the mixing proportions if they are unequal. } \seealso{ \code{\link{bic}}, \code{\link{nMclustParams}}. } \examples{ mapply(nVarParams, mclust.options("emModelNames"), d = 2, G = 3) } \keyword{cluster} mclust/man/mclust-internal.Rd0000644000176200001440000000133414156711510015746 0ustar liggesusers\name{mclust-internal} \title{Internal MCLUST functions} \alias{pickBIC} \alias{bicFill} \alias{grid1} \alias{grid2} \alias{mvn2plot} \alias{vecnorm} \alias{traceW} \alias{qclass} \alias{unchol} \alias{shapeO} \alias{orth2} \alias{charconv} \alias{[.mclustBIC} \alias{checkModelName} \alias{balancedFolds} \alias{permuteRows} \alias{projpar.MclustDR} \alias{projdir.MclustDR} %\alias{mvdnorm} \alias{ellipse} \alias{eigen.decomp} \alias{getParameters.MclustDA} \alias{as.Mclust} \alias{as.Mclust.default} \alias{as.Mclust.densityMclust} \alias{as.densityMclust} \alias{as.densityMclust.default} \alias{as.densityMclust.Mclust} \description{ Internal functions not intended to be called directly by users. } \keyword{internal} mclust/man/em.Rd0000644000176200001440000001203514124774626013242 0ustar liggesusers\name{em} \alias{em} \title{EM algorithm starting with E-step for parameterized Gaussian mixture models} \description{ Implements the EM algorithm for parameterized Gaussian mixture models, starting with the expectation step. } \usage{ em(data, modelName, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{parameters}{ A names list giving the parameters of the model. The components are as follows: \describe{ \item{\code{pro}}{ Mixing proportions for the components of the mixture. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ An estimate of the reciprocal hypervolume of the data region. If set to NULL or a negative value, the default is determined by applying function \code{hypvol} to the data. Used only when \code{pro} includes an additional mixing proportion for a noise component. } } } \item{prior}{ Specification of a conjugate prior on the means and variances. The default assumes no prior. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{warn}{ A logical value indicating whether or not a warning should be issued when computations fail. The default is \code{warn=FALSE}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{n}{ The number of observations in the data. } \item{d}{ The dimension of the data. } \item{G}{ The number of mixture components. } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ The estimate of the reciprocal hypervolume of the data region used in the computation when the input indicates the addition of a noise component to the model. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{control}{ The list of control parameters for EM used. } \item{prior}{ The specification of a conjugate prior on the means and variances used, \code{NULL} if no prior is used. } \item{Attributes:}{ \code{"info"} Information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{emE}}, \dots, \code{\link{emVVV}}, \code{\link{estep}}, \code{\link{me}}, \code{\link{mstep}}, \code{\link{mclust.options}}, \code{\link{do.call}} } \examples{ \donttest{ msEst <- mstep(modelName = "EEE", data = iris[,-5], z = unmap(iris[,5])) names(msEst) em(modelName = msEst$modelName, data = iris[,-5], parameters = msEst$parameters) do.call("em", c(list(data = iris[,-5]), msEst)) ## alternative call } } \keyword{cluster} mclust/man/mclustBICupdate.Rd0000644000176200001440000000245114124774626015672 0ustar liggesusers\name{mclustBICupdate} \alias{mclustBICupdate} \title{Update BIC values for parameterized Gaussian mixture models} \description{ Update the BIC (Bayesian Information Criterion) for parameterized Gaussian mixture models by taking the best from BIC results as returned by \code{\link{mclustBIC}}. } \usage{ mclustBICupdate(BIC, \dots) } \arguments{ \item{BIC}{Object of class \code{'mclustBIC'} containing the BIC values as returned by a call to \code{\link{mclustBIC}}. } \item{\dots}{Further objects of class \code{'mclustBIC'} to be merged.} } \value{ An object of class \code{'mclustBIC'} containing the best values obtained from merging the input arguments. Attributes are also updated according to the best BIC found, so calling \code{\link{Mclust}} on the resulting ouput will return the corresponding best model (see example). } \seealso{ \code{\link{mclustBIC}}, \code{\link{Mclust}}. } \examples{ \donttest{ data(galaxies, package = "MASS") galaxies <- galaxies / 1000 # use several random starting points BIC <- NULL for(j in 1:100) { rBIC <- mclustBIC(galaxies, verbose = FALSE, initialization = list(hcPairs = hcRandomPairs(galaxies))) BIC <- mclustBICupdate(BIC, rBIC) } pickBIC(BIC) plot(BIC) mod <- Mclust(galaxies, x = BIC) summary(mod) } } \keyword{cluster} mclust/man/combiPlot.Rd0000644000176200001440000000473714124774626014603 0ustar liggesusers\name{combiPlot} \alias{combiPlot} \title{ Plot Classifications Corresponding to Successive Combined Solutions } \description{ Plot classifications corresponding to successive combined solutions. } \usage{ combiPlot(data, z, combiM, \dots) } \arguments{ \item{data}{ The data. } \item{z}{ A matrix whose [i,k]th entry is the probability that observation i in the data belongs to the kth class, for the initial solution (ie before any combining). Typically, the one returned by \code{Mclust}/BIC. } \item{combiM}{ A "combining matrix" (as provided by \code{\link{clustCombi}}), ie a matrix whose kth row contains only zeros, but in columns corresponding to the labels of the classes in the initial solution to be merged together to get the combined solution. } \item{\dots}{ Other arguments to be passed to the \code{\link{Mclust}} plot functions. } } \value{ Plot the classifications obtained by MAP from the matrix \code{t(combiM \%*\% t(z))}, which is the matrix whose [i,k]th entry is the probability that observation i in the data belongs to the kth class, according to the combined solution obtained by merging (according to \code{combiM}) the initial solution described by \code{z}. } \references{ J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} } \author{ J.-P. Baudry, A. E. Raftery, L. Scrucca } \seealso{ \code{\link{clustCombi}}, \code{\link{combMat}}, \code{\link{clustCombi}} } \examples{ \donttest{ data(Baudry_etal_2010_JCGS_examples) MclustOutput <- Mclust(ex4.1) MclustOutput$G # Mclust/BIC selected 6 classes par(mfrow=c(2,2)) combiM0 <- diag(6) # is the identity matrix # no merging: plot the initial solution, given by z combiPlot(ex4.1, MclustOutput$z, combiM0, cex = 3) title("No combining") combiM1 <- combMat(6, 1, 2) # let's merge classes labeled 1 and 2 combiM1 combiPlot(ex4.1, MclustOutput$z, combiM1) title("Combine 1 and 2") # let's merge classes labeled 1 and 2, and then components labeled (in this # new 5-classes combined solution) 1 and 2 combiM2 <- combMat(5, 1, 2) \%*\% combMat(6, 1, 2) combiM2 combiPlot(ex4.1, MclustOutput$z, combiM2) title("Combine 1, 2 and then 1 and 2 again") plot(0,0,type="n", xlab = "", ylab = "", axes = FALSE) legend("center", legend = 1:6, col = mclust.options("classPlotColors"), pch = mclust.options("classPlotSymbols"), title = "Class labels:")} } \keyword{cluster} mclust/man/summary.MclustBootstrap.Rd0000644000176200001440000000262614241635004017471 0ustar liggesusers\name{summary.MclustBootstrap} \alias{summary.MclustBootstrap} \alias{print.summary.MclustBootstrap} \title{Summary Function for Bootstrap Inference for Gaussian Finite Mixture Models} \description{Summary of bootstrap distribution for the parameters of a Gaussian mixture model providing either standard errors or percentile bootstrap confidence intervals.} \usage{ \method{summary}{MclustBootstrap}(object, what = c("se", "ci", "ave"), conf.level = 0.95, \dots) } \arguments{ \item{object}{An object of class \code{'MclustBootstrap'} as returned by \code{\link{MclustBootstrap}}.} \item{what}{A character string: \code{"se"} for the standard errors; \code{"ci"} for the confidence intervals; \code{"ave"} for the averages.} \item{conf.level}{A value specifying the confidence level of the interval.} \item{\dots}{Further arguments passed to or from other methods.} } \details{For details about the procedure used to obtain the bootstrap distribution see \code{\link{MclustBootstrap}}.} %\value{} \seealso{\code{\link{MclustBootstrap}}.} \examples{ \donttest{ data(diabetes) X = diabetes[,-1] modClust = Mclust(X) bootClust = MclustBootstrap(modClust) summary(bootClust, what = "se") summary(bootClust, what = "ci") data(acidity) modDens = densityMclust(acidity, plot = FALSE) modDens = MclustBootstrap(modDens) summary(modDens, what = "se") summary(modDens, what = "ci") } } \keyword{htest} \keyword{cluster} mclust/man/figures/0000755000176200001440000000000013762445702014012 5ustar liggesusersmclust/man/figures/logo.png0000644000176200001440000020501213376734350015461 0ustar liggesusersPNG  IHDRX.' iCCPICC Profile8U]hU>sg#$Sl4t? % V46nI6"dΘ83OEP|1Ŀ (>/ % (>P苦;3ie|{g蹪X-2s=+WQ+]L6O w[C{_F qb Uvz?Zb1@/zcs>~if,ӈUSjF 1_Mjbuݠpamhmçϙ>a\+5%QKFkm}ۖ?ޚD\!~6,-7SثŜvķ5Z;[rmS5{yDyH}r9|-ăFAJjI.[/]mK 7KRDrYQO-Q||6 (0 MXd(@h2_f<:”_δ*d>e\c?~,7?& ك^2Iq2"y@g|UP`o@IDATxxGӀ$m {m!@4JBIB0Bz#|'BK# 00W;EۨI+$>,novnnwv `L 0&`L 0&`L 0&`L 0&`L 0&44L EI = %0 , !2zn]cP !QHQ4Jbo7O B V6㚅%7=, 0`-XY(p@7$%gQZ$tnV#$װy0 ~IQMcfq|T9! bL  /Y2(R~b4dR0R~]a3;0uq b]9y'`'ǑsL6;IL~lҶ'*<\n0Ԥ|N((G G~Lf Z|`n H&@wk?(U 7׵jbV4PӗflZV”RMUuWbLm0pZh=pRHT9F;>B] a­AZ1Xk30 ⻨iY<YU`@;3> !:fAdMD!I#!vнMQ".si Ph,g2`Ba # a `Vh |}efx]BܒPѦť0_l^3{^r}&'H&@EOcqx`Tݡ.javj3y0 $9oˬ\qY`a` og hLMa Ķ_u{@ATIGqy3RxeAaDir`'FgR3v鐡(jXxMJQ"1c/ ʗ2]=}*BY`aPdtqMiE 2W4{1AXQY[P67)Vrb#`[Xb=8aL6 ]L@+ߏ {{@aZiP+.RJ6Nr=[\^ ϟ-aE+F :f]8e@DIŏ}Q"*g0$:)ЏP`f YqLOa''=)c^n S(,Jeম@! @/yz#+̥Ҹ5m&1$_8x#p^QZt#ѹrs9W!3: )YYEPť aA 8OP:Z5Mh"T4QI\7AXddQU#Y{O k O°!AhcބCS1A=1!0Bk(<*{? N,NŘf_=n 06 |#9c~G#я@.ը` antȢtы,};|>QZRdºö ʂ`OO,wK v$Ci$ "Z!?pm[ !d:_^XQlzg fᅒW6|0A1A0Nl %"Tջ  ׎%үl9pZ?~謰~)0[=c|I> Ӑ1`u0 h*JZqpmOGa Tӽo1r\x&G\#46Jf8\XA&Alx67o]F/h)S0AQΞ_OC/*~_J0GRIS_L||Xw!:;/64:)2x KBЫ}2t5b…BPz0!BAxyƅC fILa{5E!׼^I_C ^ؖyMe=}C?ȡ 1cĩ$ *l6MȜyNQ2Y46 C %cq$c:d(kƖC{CW>|a39Ifʆ,lv~pasc鐥Eu!ޣ g.LB {7+^Y-"\~ z<+B b"(WdH=$p:dQHRa8 7 O,/OF|ǝyl)L2i|&;z DErנu}aErtL`ݮJ}ATilY&dDLD`@I#@JƂG BGC6fYK^@ws7u+:#czgVP\$.2j^98eq%F"@0 [cL"BDe9L 'a|Dw3J&5X:9F! "bjL,@ )a(Q"Y('@靧|67 I N\?x^M*.cɎbҨV5ȸP~.Lrf}O k AeC&1 0!9c~GEDjyh8u("&)%fh_;okS}v_P8.o|,sE%ޮZhq]R6a:.riicÕ:wh> 藀Lbzp6W\zgeyYCoY3_ /%Rhh\'JEJ|MaHYTKyJz-{w艋̥O5.;J?ES0-cQ5.xC>S!h[`:u5bn"靧~2w._>z%-!-6 t{j$CC!Kb7 nmcY'&kvSfoQy:'~&L( {l)Ԍ)t#pA( ȅ8 J,a>Kd1߶A,I0:Ê{{ o6 ݢzd0a3c9L)܇+ŨHAz'~>b4d)z!^΄t;A!KÓZ(;@^ȅ 0;ӿ΂w_Q^9l;k%5ʕqu2e 4i63@jڮ{J`YV6&8zF\zg^-qy̡[m7,6 _1 :$#.ts[Ã>+8s(&PF/ .&aXp4ƣVaBYO`OSFϫe0?#EC6u-:u[Ŝ( 0,u0y%6tA66 8)$V 4C ZNMibDd9L T!?[ǥg̲cK߃%K KilxN(jG¨kBv\Drq~e)IW&_3sod$_!)Azh~ $)rpgYT4(,.U;/YKhzg?T.y|{hP86 |UGEOdFQqWtȎX~&n LrqMPd;_ڎ#ąe!KL @u'!wi> /nE׺~Ƹy2CŔ0QT;>r^H\ KaDLR-6 / t.l- 16П!Br[Pz祫ܟ`xȱS8Ϩc}l=)YC$:Pa1֎CF  ~LqP<"YzhA_ɒE on1 T<P鐯ȅ aȅ 0"@? ΅%#+M`v= eA'G.sB^@,F"C0_ktȢ&[0LÚB;eN~oRM16 |&|3Cą \_\zg4 2qafo\eW)A#-0ō @n]KJ<{Sҧ%s:dm6`+靿g(E梧!pBXR8Gό4+S24K2 0,NKb@MpA6LECV`uX+0&w~~x-p&G\#e5Ӈ E ;"X TU3b:9g.L 0&p@~wu+|r7`,aE?W=l0~( 'ýbCCct}:tF C73&GN*zTY.D Ι_߸ph8# !%47pjTu`CnUOXegU`>E'.[>l6c3B]EaP~1 {ʘ٥y@й>F0n`L ,J/LL`v= JN@R,hh0_Ïqkb:Pa3N&᝹8@2niրsd;:"GAՠOz {Sr(x-*1"p§Ж+ІA! "c O L,Sq!'Ut.]$@?YˡP L@/ӗfZ4|(䋧[-& c}&u"b;qxQFAҽ>xUL |_wx鞮\#JDRִo#wi4Q+&"o 2!raL 0MHÜ24]j_F(㫴ſu3orU/0A15i:Ir&#Ʈ`8!tYW&2(aN| ~\w@Hzg*iޖͦ2gsoGסtͯQ Z)x1+.p";.58J#"|]4py$  \rN$}ܒ8ݿK(Z_n 3.œzBi.;S`p#ї#M DY/פ6 Q+t}z)>rurkܴp{|3:TBڃAH=bȰ һ% EϦ52 0&O; ,};|NTϮ%.oBq#xُzTaujl1~X<޶GNh$3CnqQV4brEq,Yo8rN ވύF#rpf,>^G 0jad| .0!wtȢpVI|0HkQgmIl8F2ΙӇV$M֮/6Q 3R"s:dAL YJX_2 עsb/Am$A_0H_7gA_|0@?gX<%H.L 0&`9=1/8x2gG)fbx忷ICwo2v%@}+d.3]dHpdph.4e4O`46 HK_p #)鋯4e c} ѡ0!_Ҁ! !BS*ZO(p0?oÇd0?꒱(,k]f6 :/j_GcvAQ!ֽ Ř4ą 0&7oVSդ85]]Vq3,8@zq}&?g6REwER:䉢8ga(|8&_[Fg+a\ĴYa+hdx=Vӭ],I8V7#)1:J߰~EYU@ؼ4LZJ̐£]0+vB \ 3N7uſ%̼wx0t¤Pe!lu,dХ(.L 0&'ryAq_'T; 0s~k:a%  Yh /xn#Du( ^3 dL Qgjr p7vKN*7ߘ`Bxݕ@\$_- #nFi_gYYgLtdicM\q;|V+>c/oEUQFAy Lƹ 6 Ԟ~N }O8!X'Тnu5^(smXl||ih Fϸȿ Q1! T!с~(N/sln*ViB+7 {ObpJȰ SӚХe-!Y&ڝaGY*1>OzձDh9VTOo_("NӦ`s@Ƀvb'&&2jdP/!'(N^rgr nzT wOW3Js||.'‚ :<tTC~LBt5O⫅] ^#z͋|(C/} Q7. 6ks0Pzl&o$]x—vc]tȩė{">CN7L}µS}ZF ֠+qZCSժ2S|yv1i.1Fv؉7L(zo5a:1G?YX1'L܅/qE /gXZMЏJBOԴ]F=j|/mYG+/$ck#7F^сTDn˘Ewa6z]21=eCFqNжa Kehn 5~=#4=iT4O*hI+,YhwԴa/z{.A?vmN ZţJ0VjRVP@+2wB_sC{BW1E?FjGSg aIAƕ8R}ȳ`Z\As3\ ''zF;yiXVhjb56 4TV&`;+.9T)j5c#fYӐtrpT'YM͓3NojvռWۥy4 ꖇ4 nQ YUHh1ta<]!鐫_iKy|XL>BfwyۣA뿿YWɣn}q􃞨xۓMIkPW(K 19SRl ZWotn-qݍf3fb7SEKr遉d$SL 2˦\Mjt^G.oUQKE$Q=0[6v^>h - x#*&dI 6:I<mͩ !7{oT5(xQ$SkǪjnNqUcd {#uR1Q)Ep{ՎP Qa@~42 \U}-٭[CRUXQmN V }gkZRHd jD/.Ob HWo;o rt G,4fJ6t֚`+7|\~Q)"| UX;R0pQ&D^KsdL- ҥ"5zr ?'OY%*Fw[(7 ;*ha -ym{؈ yWEխd$|3&?$HBY#̋:+d֔m#@Mo Q;JZZ. c rգPH4BI{\)o=$ΨA9߸{@P '*Zccc KR\ J4'xi5(aPVTQ)kG) `*6ʟ=G+804BJ!g,Jڤ*CLPB/bIJraZhu.oһC֭]6_ p`xŝ~ŏ Aqߎk(FV/P fe&:gzl眆lٍ6?p $KPh⭀Bjz%@Fh %ȒL KaкUB{t)M*x5.֯2HhP-REKӔrlڪԣwj 9e` .LJra`ݚKi+"+N' ӄ!?sW2҂T]9?ŽmqSTiQW[88EC_Ä?:S(٪ަ@2rE&ũ|a|+/zo|e(- wtjT]n8#i }/>M,hML{6ŤZj'9hrdHm[Y!r+  {1E>|j%̲+gNˆnk 8E=YTSCt`,(!yDžBw 4Ey3Ė G KYIZrSj LG0c΀Uw9NA5U퓡 FHT;RTp/Jx__t1hx y.wr-? X#01)32.U 1M[Qv4'gtc~x^kĈ Ym?^Z| r9 E G C>?Te&%S*erR%Vk\lOxx,ujݡ/`ΖV$pa.Ŷoa=y/(Ø WĴ;7qa a rA.h}@+$0Ghi&]T"eIo6//%8 "(:Lp UĹp!@b(q}V\L#0@& "(L;uUahYH0-(r[~2vG`8,}2gQ-jq]&`YѨ> Sq)RՄwkR(.zv?nA>B  ~hZ҃OP6 4x=~jѿ54r1@^x'Yv_6X aD. hɗa%e m>iecڕktڝǁ.0x@+1$:~6/T'" "Ƶcaf]&iZٟ/ OL@6 4KO'僖K~B5-,/,@R6EDaP1\ 6 :`Hf\$\@{gnDϙq6g@+ګVi]kp:%P}D<ma ܦPPbTpOBT&,-{hIYˡn}7[zQ8^M@+ 3=$/Bڵnnr֪{~sU7_R\&a?{"v kyuc\I*3QRb^'8Qɴy)dQ5ga3u7Tw6 -qO~b nw7h$PǕypGOP\lx5s0kb[  ]7I[UnhY2 La?{"@NUK`߮+;<[կ˲<:u@UE2v])j^W@X6 tvBX.ț^mpؼ6 M ~{Qys"8($5 U'>ʑ8]Ywa;5Z/~cF[*%9|JIk؅axEIk$b zȓ^R:H(U8T?g%x0xҹ LS:\ 3frؙ57.JrA:꯭Q}ľnv?C\.%raaKwrY{mNPWkQWF a(Ywj;o>s,Y rN'%IC_w,o4i:|6ͣX0^FG|z딑Ԗ0Ǚ|c@IDATEKE ~‡|{Y9|%y=chM (<3H ^!S߶Qs-}.g='! _k$xux7i* 'E>_+hӠF(O=>^D8<77>[~;UR79Њ*YL9؂ΡZ I̅N٦Yt7Jub/iUyH5C8u_j5UBgjUxM)4s_1:cHø e؄nA˔=k0pZԭ7T/O  Y2b"BJpN9=[+0Sa"X6Wc=aJ^43dd NˉׁpT=zg}TNjO#C jh ٬,t8,tRyӊQ`EGÎ^TPMɺ(gj gWZ@Vd#+9ں,mS; WhlQ-XoQٔ]#QJ Q(>!Rjct̫SXa*ݚ~|=.+I?. eNZOk[pF7ZzU-;ђF1˚$­s$E/_=dW%:ݫ.nPUFYELcXēϔ  1}>kcotsvYS* ]9us.AVǦlwOǺ4HE詟|nUa]dˆ!@sz ywo-N;7r@~> ;ShJaCk iCC 1BtxE*զJǧ9Jt"|3#gްiز\K:'ҁAӚho4szWևioONqΙ'!/iޜja H8)h1D8A ;lFG目%6Y c/zxQzh%]ލk)`|:Q" LCώ t4PC2ijp݅L)M5P(]Înd\Fy(@R3 -ky atL=}ߨR"{>(oD^C 8%A^dzb_stW%u*4.}Ev #Jcr>PFntp=S4LU4d;B 3pdlRHzU-0X1J+P,Q48 T^U",MiI]U74E/.O ? @7::z mL/.L@xU `LK0xn 0& adz:1&6 eL 0&GlNL 0&D /f`L z<+`L 0/`KY&`z$ `LK0xn 0& adz:1&6 eL 0&GlNL 0&D /f`L z<+`L 0/`KY&`z$ `LK0xn 0& adz:1&6 eL 0&GlNL 0&D /f`L z<+`L 0/`KY&`z$GX'&@(.M%p4@`4H} aÓ*1&ٹ󸹼Ýa}QA9Q(ܽǡF `L 8OG gGz@wA9b)>v Nᦙ`C 9Փ䧞M>_guL O%.fL 0&cxN?A{?̅VT-j aY`L@<6 3e* l(A{%6hOT>0*My L 0&6 GH82 -U[m>Q` .L 0&<6 gGBJct"p~8jWvVusպw0&6lhŵEd>B&+-[n0h@}ӫ( ȑKK\l= |qaL z:]0n=<c @i<2?=ͺ& ^^Oq*0 n1qx8, T"ptLFD?#oMt /[9bX)V9aroT1xEevлNQU.*rlc\'L J*3aS)i"+oL@'0ɉ85Z cȵŒO kreƮW\ril$)7l;j㎣ϳʼ xO%x87WF@8wBcl '$( L\|EXbVրc_0sqb[߮zBG D뱛->r7dl##FkGՅtd E<2uԺo*]{BR8sF3fzvI,M#  a/ϝ%3[i cŮB3aP[_ΝY 漲'N\Ҋ+S>>͂aOSfhhAJC?B ?n*/-/DCpmå0ln +0(yp>@l_z՘b>H3Ukv2I.鐋Ŏ>+@@/o ʍ1{NaB/ܲỌˍ:]&}]8r"?w a0VVD{X)81BV6&Ԩ?{&ys *7Cll'/u~i߶a` o6 "`n"Wρ)BàqU7w6&|%ݣ&W%rђ[PhmRTE00w0^ 襵\o(ܳG9,s} \(%GO/VAP{GF\mUM-DSaI^e$9?k~jCS\/a;*7&+Qm% ߜ 79k VBC  wqUn$`)YMeQw6 €ښaOЬAqlUG0.RC{[" ҽ}pa JR$ *thv rr `cO@Uq Ѷ-FsZ!_j^)=}\{$?$zk_Dm{D{0WI`m}I/FZ\2I;70B\$#80+3C\|4;od'eu.tX^ 'v-jvk&ˇS߄#߂ݻ@ڜ]Ҏ tg5 @ +l!xW$k..[ )@|g*%~گ]M{ˮ:=?v);1>Z/qLIJFӹs8].v,@H.85Ⱦh֤E yjN r/$e3}7㢜Nz6{ܠ[a, 8ÏZrb⒣._qJ/\SnYٔm}U4 ,͒_$;S~Jl6#{d%ye΄аa砡6k{{t,B׃vm79S? 8b`Ǹ ΖB`6T2=Uh?z釀- .)Tݏ⁧G`v["n:Zi'MЋK`JDjh8޼3^NS!Wo?Wf $phxPz"(ai! P'{|v،YT/`%Efa\PXvQ+Ifd͝Bp S@GI|j].p/S7{#W˩ӮPm5fX?%Ryod$7&wyq(xWb$0!.ʨK6(AS2x@5tS oPkwyK{Gs,nK^M#"OaC2E7z g ]tlwTŎ! tnkcǎ"0b͐S$a gP DE0PSK>CϿXޱmڗwCWB޼cjd쪛~^^V (P[aeqzI֬Y֭S^ّPiѢz11 L <[8F鼊ȃÎ;pa؉M$BW+, e\ׅR/jWt޽7nt~7O?\2MVV1c\s50tPCjdr#S wQ)2Ž:ҝaM7_᧏?C1ڞ< N+S 6tBNE+ 淟3.aoEjO);RCbĉ0|a`JLL( U_>^4Pq̄ȰWv`ʔ)D$AtreߙQ;(j'fR ##V>of9lA=@gR)ַTJ(z% V-Jdcv.l?z&rg) є… O?\A.]SNбcGŗ -Q={ʕ+aٲepԩr1s==DFFoI < ›4)w?VvAxsZqƐ a_ѹ:w Ch _2MzCJ 7xjԨQU1z©OحSvŕ7*A]Cpג]^}U:śo飌TY ][V^Z뮻|??PV\qwZ%8'|4Z(}BZZiW&^LJ${cg([MEF( A O4h̚5K104}y5Bї_%iQCp934Z/fϞ]iڀK 0`àZ=0=߻KaEpj1C!.yuŊd<_0>~lι^gYwgL$\.=8h B ̷o4_1Xt'w<u+bnpt"vN_v*E Yu}邩S?PiMr}X  -5)ؘmK`h 2+SdFH Tmۦ'NT#`/ pw/jS7cËqbqCi%#-(m) V+|R˃[o>{n BQZ̿>3CFkin6mT$!!AyСƲ#P)}+wT4 ,4'Ő\2mF)!Cd.]TZCys6pqO}ej[RMX KQ)tzdڑQɻղ`r-[4@\|@7] b$h^D+4E UfVT[v-EW' |D%##3gɓ'!#sr%\!ujCPLwZäK7hX -'ahJqyQ])pbXرc!GyDY:F̙k׆yzk`]{S<2 hR=P%>zpŪF \~([[\~&u:X sG;a8K98λ@E/='! (WAXUVy6Wܰ4Ȕypue܁c^񩌞^zEЪ#Na!@Yd n޼QF2+60 UhZw6\E@_#8ZPQ6tϔU4?B(fUK6\I ~ێJ,aPU{O*6m/5J,]_G_#Y΀S5DDzӾ~tsڐPhޤ|2>CNt)>z O.S(5mzX@L8K'w:64[8_wM Ν g)=zPeXߖ }Z&%rzFe!OY Z{Z+Kpi ZKd4HVBq=PT*S92L3}/+[C`]cS8y:DzŋEv|w͈.Z[ m^=}1It'i!no ~p}[Pka^h{o2]# :\1bpѳDIkׁȵA}4xܚ6׬ު5_e0{=ʥ+C(*89xp bgEҺ? lQemAdѶ-*30H%q{q">0*BCy A&{ĵ Wΐ7O DOdmm i up&…JU^))WD&9e0 mᡚtF:lE(/Ɓ _)w<8OEZ@EDH>W?ȬJ?5VT2 KƐC+_:yCWKЪ8u68lOUik@&CBvECOgQ :kpeԪ9#'_B؀5G}dF4j\2 NtF)1m7Kk bbbnXߢP gƨD-؞ $X1xW ;=yqAŌ5}rR|`ZK0J%zf!__OatHK8Wkެ&R5mSiEX[0,M^doZjM9o>v+gmEP 3ժQZs=Ej xa4׺VB y` ۷& tGNx~w|!GY}u17!5`A0tB= bRmS=_!/jtr|垓7?+V @OR Q4ȷ]R9ǁ=ⷸ O>Mߐ/o7gꐺc'|}QoPz`pbȞA&뮼2 hM9fΜ)5qV\ZVE p~QXw[ o!|wJ][$*UR٣d-Yqhzk^i.nqS^t/Oӹw gl>'+nR-dD17g(ݧXƋ 5豑]/`G{9.+cQ"ysdא\K^K@ER HX1@b0 @23dO[`([ V#?NdIY$A (T~9h_zk`"#S&oii-NF% AI(;-y I6y8AOS~>eU=-нwl|cjczmԫ=GL>JCdwHSkrl0RHiKCڂ;U `6cCQQd fϞ=-9{Fצ1 :;ѾaqYe(~0:aT75F:f kUp}ynґi *A'*3A9WҪ֏F?SuL2rvʔT)'cO;y -mBZ.vHPA|025hGKʱ/r.Rxue&R{j52xmU&0gc6lAD}7~zs7k\r0˛DF-E C?W<2A#9?PJPȣجIVzQM/Բ.L.B{>X<{us&WqlC?ޞ{ðՙyUp.ߛ]H}َªKp0ً| жV-OsyP7?lٗfAn spYT MxI MXlaCc2?A|Ԣۙ5of;S[AMD+~4\8RN+fS6sqtr8a~b,Q&hk.])sw@a`Ƥ)h`mAk6%+/KtaԩŋhHqQv⨣f/!jlܐEovRzKxa1zk1TFs)' 4%_Z^׳tBd B=\֑ Y;&VB;B>mԤ{}Y4=ӪdĹ jQUͅzR}o(!)O+`6+Րv pPt޳KY8f (}T樂~FFl+dhH釚6Mu8Xz6`-#ּ:nhͺϱ^ys[TΩ&CɵhN!#vrC19!)l*4؍&#;Tiz+0RNgAAX`C(r)\< >}h'黰D R'o---!bdNiVbcI-mzqgz@ O?}G(aQpޠ':VIОZ6G.y^I&qc>9zP)AJ吐e q56>JqX"-c ]B._\B!zje07%9; OAj@d7GA( kvd< 4 4X8 /DBI8g`="S>Ea4S>9'3 5$VMsg 68 ] 9֭[6MYu?*m9t?2 "%8qBJ1;vX),'zXnƓ^W&¨EJ^֖4l?‚չtmyJU+iM=zN4UiAܥ̙uRCLR6}w l[ Eh9'嵡P}H,J8vdLS4 ~ʿQ`{ + M#ܚxJdJd_U}`P?ZBӏ :FqL,˷}v#CX0]9n C̔GZ;vɓѺuk?Tߚ=˯?.v')JqhQBO[Li$k/@ynC!HR-RZ} w7ڷoݻKGW^z_?|#f諌 rțwTɷ%rFKM y|)#b|C槃ND}E@+W4I`E&ΨNlt-2qqq &wqRV1m4I0`!)d#p yIs0W%@g&Hjåi(,pgkJ۳j9TLYa:n~Ӵ|ȤWed(bQ|+Vtes۶m  R,$,\Pʮ|v3?Ld#%wS|4#0ZqpD,d/k. c)JݼhWP5f+C 3;<Эx(`U'nFQu~-@at3u+,S&#=$WZ];wb8 =nN.v@'4dOjmq(~J4}o >)^yzE0Ȥ9˓8#kJKҎQ1iFA(lw2ׯrIFdh1fgϚϜi,@WoKq&|JvX8K`|JLa $ N>`;OFjIvY  ?ápFQ E.!TN,ǎ)ݺ9VmsGn5ۀ81]LtG<PG[ T˦ik`YN.X` Tȟ{,u5.u3ݧ1iH{q?JSo#sS6HR"T2߃npd3 Gq06ڵk)Б g493IF#-^7v_s/R <' A!_J}? w;-?RhMρԤ[zԩc2EYouaڐ['+oAC,}gUAc  (9,HMËGVr7H`@&aXHQ?ul0`<4?jksyLN!ّԦbԐQC]6;jEdQ9xp0אf *T6¡CyUfy` đ'СCxѣҏ3^ OEݨ\hx'&wF2\t=ܣ+6b p#{Q7 >$'>˴=$%3kL[m|EA\He0l7)|_e&qߖX`MA@mc -[r/pS KzSl |q09s*c_~E>ħÑ6 5E@EEᝲOfQN `qM2*#l2ejJAd ~h[0rE)fyqX~ Ūj1]dT|˂v]aGc&''e˖;e?]/,+ŚV[`Y,ZĞl*fbU#<% sd~:D$T~qkᛸUl웸 p>11۴U`C!lccg|x05ħZ>Zx?`_Ko硚s ONkr-=VA&lUIڴSۣSVa5v깭ȽE=9 ?)R/ ^|Ag+.E&_JV u>Xw4qs+gptf@,r˘Gy4tԶ;:J3S}AJ  'P=DΜɴMp)mS O>x 0BG@#7.a2}qHġP}ST`Nӑ~iɂ\ƎQ.Mѫku!NQ^fH "iũ$P&ąsɒ_:l&٧[l)} 3)dvAgiW(97{?̙3G:{2;a;]DU A@5䃬ͷ eR7r](tz MAT\[Gk`@$oOǥ:-jR?jUE{}5~D]!L Ȁ ;.BTT)d9t$춡gqik)_̡ K.!{Ū!(j$ ToA0WSѣ١J*{ E#Mk6-;=knWtח{&&Ϋ6O!į9K26Rj%ـKmJېAp``ݹ*dё _qggGBA 4 ] Il!i>Ok۶T@(^xL:Q8dbbԪ%w8EuMD@h jZeʳrK%wANdMluETvx1_W6C˹Z'hkSħO-p`+ "0&Eg CԶ툽By 7qDFW F/4q~o⚍ $R@"uz9JmkI/toB[^uOyhbQK]$7mY@IDATŊI c09lpA7Ç k [wPr8lƠf?Z-9Z:!uIGmy'%Sr/cCxIxZ"nBiQ},Omuc73WHٴh;@ᓫCPe1q{ ix֗LlrJL0lmAQ)fQ1|FR٤p)NH+7mtSk \ȟ(r_ޤ?3*a6 ĉ-UyM7T4 !Љ޸`@c )9QqG=0/7e'm G#xyyMZBo%q~?,CC!;發c9g~2wU~?_}i۷IXLGo߾J*,ݓ'{ܥ85h`P4F#Рi}>(/= Ɛg@ Ccp!Hgt7  ʝ5%hl~bmA0!5 mz :V-En _~%o.0@Ce$P3% Q!]f0sr5v++!EB30uC!L4h04/.+ȥ`-4h-`Ȍ\~Z*~z7r72\}gѿIpIǻvR :dSi[kp0}]6zj0CyCm@~s+@ A4؎M.QY08~Ϟ޲r.\*r~bh-CZ\JSÔ)S$c.VqU&Zmޗj;kkb0\zѱD r(;eIi@{lyP31`ȑz{H mA 24s֯9FB&u_cR0YSl:4YM?͵;2]h*RhB tS~ӖCslϋc=(YM+u NquO+ 7Jҥ:*\5m2xTerXe?!wHEkER"BL)1Htbm_}1/[v4Bzl 36?U(\6o!{LQF{x饗Xη)˛xkNjVAmouqqHݱa? u7ځL@7ICSWjohv"D5tl[0Wod±B0=ɆW^qd5lU[`}E 6c.},ZO4x:qQv, m1Z4l0Őq } o,9k3XOaO\ Z :;;v(B V#KU~zA@򰯶3{ﭔ8{hh:mj^^4o*hԨtʕR wНv N%%\>3~7H>Xu  1 dmPY7@ɿI76]`|I-o1\hyJrlj'G Ykɇ¶դ1Gȵ0IoW-B=`4*؏B__)^ ̟qͦ[) 2[:W] .^FWN_KQG G66mn/C1iٚznqm@HH"mfTft,"Crwɗ>_8~ϗzCvQ|!(!l.r N[M#9@E'3otq)'*KnEvɤPZ ڬ1 Bo#7FkX8 H.Q#Hq.\ɑeJ۹ [I-^;vί{LQdN9'ztTӜ+/=wDJ"ТE ޽[ZY)+j섿'`~SVNow֛軽pBíZf‹2(TK`P-?谇#y2:+\h1o.oE4Q-PdRº@trlt%uRq/Fl/:uzeSzڒP@|o@ɘ Q>eۯd>^Wn@w-RJBd y݅ gAq~-Û eoc?Pvm/[Z@񾅵qG BG=WrdSnxz}犆YhGdMk=>\O.vz YqH>܌^Ѿ- %IHx"3[whH#PM'|{'o'e,+NWQyPVKafrz? ͐Bu9sF$$`` 7m\,.O|80T?9saiȳ};^r8[LJ1Uɀ3( G,G\51Xv<;-J7|)VL똂4u{ES1m1x,e2qvM5d1LNQm۶a͚5w 0wQ~ewח.CϿ X~9+ m~ZbVҊ'pfCʡb=ƣ0583 ` z]GdCN G.~nSSNևY4(W ŜHΕK7b\Vəg6h޼ب\&V[+ oEm[akLٲ",# {Y5kWSꟑgo}~4 @x9o ٢%QaC--So%''K.;"N(Tw}WqMӧOr'\bꍀ wëϣlCUhߴMHO\hz36df"f<}<yZ.s>l,\*챳He"?c֣@)$D6Qř9-,:^#a Aeo'~ŊҀVrpCNd|Ȼ}Cڂ+CbwC}8rAk3uD J8etDzcm3|zYi{uDn: XxHXZj8|Dтg ]2bxogKxclp"6c`leBva}liF 9NQ!UsJuͷYWdE¨W|Ƈ"0|p)`& `^l8^@&xivgu&vpxpv1ج䟯̦={`ƌ`5Z>6l2R}{ˇg B0~YX懍 I"Ek5n}vJv),Xu{ADYri?x2BsHs̙3^9HoQO ÔEO~_y(Zlފn{)GCA1t9 U={ v9ɱv c<AS R=0޶}&J0T@}Q޼[/ח,u(8C,mO <81S'7r.]p XH ![ s& m3Wl:'M> fUKa!Ro8x/_B>gPS~ qLBAUD* i=+o:'fJ}΍|FUدS׺JW$MH$%9<62ԓfZto^XI>v4qDxא]RG'# td\LYaZ@ɾq q%E pd!pIvѪZ~5б1|tRVe/3v%+AJM Z0M63$&$$QK/;B~p HF^!BGG&_e9?P,,JC|J@@3^8;i0>wl=<ٮu\:k0nwpTD2l(! ɻCWe^+ ,ڣΤɸlE襵 vu 1]rҟ?Eɋ=1ʢ_MH7o_u S0x74#.phЃ@dDi%r* -_ͩٳ4Ԥ>h]xyiHxZ_:xX-d[s=`Reuvr{N <,MޢtQjX5Yj>k|n->wG;w+Aa |5%E Rw: 3p*빪xHE`sC; qNˠγ%!iGy9WQ:. Q[;n/^۩K' CZ :&YnwrGfݾFH4Et$S H 7N`*5ifӧUV%eʄEB#uW/BוZ Ùh+Ga5N8A5umꄞ6-)O\ж^ǧP|po'L#ʒbR8vR\染: ,[U}AJ0*T%IY}FYL̎O}wsT8I(KOxIyZ q,,H@|17WJdv~w !WHٴY{fHd9aԛo?tLh(/@_ I%fígHUww[%j j]7;\~B'V|.JQ 9T@?,a57V;,˳cG4庲 \7ޔᦈE\0\ :UqB0J ZsP r[w#쯿W%,/!XB..C j]]^}aB0R5^cY(wBA1ɉ2A{h==)DI^:*sָٌލkQM² /Q4vp'fsw[U9ךYy튟Hݲ ֑6lGFȗezX Y'Oȉr/x -ڄ``Gp#QoU`%# JQtfЃH۹ЩLyM}P(aR花22 5i_3@@ PA/&d_9R(e!5n~j~[buR5F dH1`XɃr~KlG( J J V;L9BaIzF(utGo@؈Gm8YtIlg3%@@l%,ZXE[YGZܹa)1UC @W%Xk7ȭ, hfDOB΅Ea?K` \4(u%`pպP@@ P@S A݁J7~/+Ά9W]މx yL+U}%Z_\$zOhyH2W3Bf&KT f " PtJӭr1?v?@2tWgB%* 3>Qx}ԢLYGG\O!TCM\~Dݚx7ٵQ \UNu8c?H^#MaLǜi;a|ޏl5K1:p:zvNO!56d@dGp5z bi{ŧ磎P@AE#.Vhz9e$&qȼK2>jq`ate+O'@25p"ݬlG9ԴL= )p}\p\ J(<*r~d$GLd1;z]8e_UJGIm}/7 tsQWIe7z-ì-8sK2%tHq1"|]am#**@( h:u F> }ȇZ7Whoݭ_jJUl$b0+]gܵyj{[DX hGjz-ea'JDMJr^7o/oUSr !'AZ0O hIڀm W >iC򑞓u46zފGd0TV(9,S᏶XsU.w;+z ;=%;QRb{>k#6R؉}#7@t7:/TYfEMA1 g%Ubrc6UbqeGorM-,5"ޤX>m$bJB@h * x1@*"p4 &RITw||(|&HKZmNUƺJ2g,-n%NƵ˞#:0 GGzx+ ΂c(FL,($㥨o7Qi#HG{Z6N [`T7W/ʚEk*J Pb86/6Qް*6/}fQW/( ҤUY^+{k6X H PAUDzδG4dP쁩v̪ʠr}(5Xq8\ ;.`EY3+>~=A*[+@AP+?vqӵ 3Y10v]qo M&:`m춗Ge>_δ;tn~fF/CmJn8/Te{s1l)kGZRA-mn8q_lc0Qh[x=ݐh]$6J˶YKhA9VCM!Rl[2omt$ä+ᯒC J~,bQC_iʲHYLx;Ɔ<>;OzKdJ||)reSqLȰZ59J$sD@5sw%5Tj8Jbm\m=:)G^Ecf6rgV 9"WժsNNE9;RТs94$'n6quD@lU ՜+ ;gȍ+dgiV_}ZBb1G8YGDcʟ!,sԆJ']8[3*#ȸqvA#$O28|ƠK㐰 MME,J8M x4`6qC1{3v`SR{̟L&·-V۰|1_?|KZ\Ql|^]TSŬU| ZW\$Oe3AN\٪_].:_~R>z F[SXHۜ{ҷ+}{ ?J߂)עPu{W|0aO oTJƠUmX0m%Ȥq%U@6LInU\^Wk倫 콠,[FFm# ݯdA4AMY\4o^( &` Bq \48R6o]B=:jA? A.Rc_N 9g"zH;Ѣ/$-L+6mB |VBb*gU-{lw `>y6ռcg!!dwe@ j񠑷&}oH2)֑{;urG\ӅcA!I |8".s,JB@z ppSGZlٳ7h ^ O}7eV ;>dŋ4899R\8n2Z}3HXB96s<8 U!TB,j"1ej-lMR:xts-l'N`ӦM8z(^zaȐ!رd `KRwdN.rC@:"qq )).%(v C:DOaPdh[:ׯڵkS%ddd`W^y ({$N+#C@ mZo1oo.Yp#u0>im#b ̍7bٲeHHH`gY)f={Jo]tA``ͺX8|0f͚.4m4L8>\eq9q d:6 ƾ!;b@e:c.CV.NKW}M^؎`ʕXd @M.#F :` Z(7oq뭷UYΙ`QIgObe8޺I\  *t1#pvO5z}2}~<ݸ;m+|4?K^!#{߾}Jn?H@iehh(._]Rnj3ߣaÆ 'dJ+'Q0:r. ]+ע BwW~؊ #s%Il(؍)këÃ<9Lx xo]` Og?~|taU丬9 o&Ezs,nf`C{OWń69xB&CXY[d!ae/k#5 Ν;ѯ_? MRysE~fΧFCZLu},E]|7M\e=b FutYgvm^"K݅NfHgM>'OɝƎӧ; on $9b宒@-3vI].Ye[5M|F($}fʧaMR6yfg#*E@eM PnN GxC|E.$V\8uT3aTBf7e >#av4Զt2?sZenO?O]pǎS^lii7Ҽ:vmdmLlz̽LC,@ 2Q !y94loo^~3k'/j ܕRHᇒj^?uQo@Od%iNdQ&*_Jg]ld18 zy*~ܡ{(O:83kDd6+_JYRqȗ,Ơ @CT.1 *r.\ŔϪ}5Vu×HpBAk/nt1]ѻwo52٫27]ED.{Op1Z HsM]Vʅؼj-c%1+qT {Dzb3UfƑĭJ JUxNZM,H'56Xh}G8e|č๼ oF.9~)'4J&`p{єȡ\ 3cۑ6NFh ZpoZ(]ӧ#+#ͅq#:/RSq~؞TIiaiϠ#@AU ĝOL,tE N.BZD#o"{[S`m$uD!|ݎTdo۟6PRiqv튶ųk3hmV y݌n ϰu&hڱqQe6n rwi;"8qYRc_N'W@@ PJ`PJDC#YE)A݅8QTF^V*Gnגx`qqc^0q+_T(5,a!/㲯Rp}¥u`ٜ ON E0kemR H'cnG6ӆнp8ٴ @X *-7@߷XSK=,甤Y\%@)^ MtT$^N(HMMm 5k&4< /Y[18;$g):Rֺ&IqJqŬ2{[!l G@h J)2|gR:d^'CX訴o߬E"vpIoh(o_L~nTr!K nL/ c`"l޼ &yuI 3Jx^&Ɏ#_Q[r͹"P73`JJJB4m|!bd@#oFG:tr]GRHl+PϹx.u]|hIܰ E'<7dg|j]%^6B"{), dMW x`Pe6NI1K9JkW9 Nglkx$`ymEġ:w9_lC;Nrrh2HrYfP0B@+/dŸrF̙3ذZիWY i$l0'zTJ+t,|2JzPwd:$إ3E,ɓ@" S&0ke 6L'+w:G_R$u\2 8"o#XZ{at}: #YHaA(ᤰqD@@ P:`P:D/@"Ɔ1_gNTӗq1"s/x <q=2 qMޘk̵tu& _}JLq޺ ),̏e,rF@ ^ Pذ(2 /}s5"r`H6$R%(j)8H&(Mj\ͽE_sﯛ l)ڜ!WJ<55l-,_+9531ō @B@UKNۊnaet~m%f=#rrq'Ҩ*M߁[q{J" .8h^L|H‚=Ay8reLI-1nq8s?LM B$8H$g@l^ eM P*zLzQAsR}7uO4#:fsϡshߔ7IDNZ%Uf;pmxaAUK1΃spp!#9\j4P1pӺKҶ`kF :nwM!=bD@0s¤~? 6m rs-|Ь7waz98<ؖwֹO*љ8#$ĉ .th=_klo Y$M/7x ,q2(Gm}Y#~a$eJ(+$8R"O')ۘYSKU0Ü'QSekT-9$Bk>{P_(Ws/Ղޟ;6 V#5wg6$?B. B,LAduɫ1&u\'N9qbˆ0xk֕u\Oy Wr/J[O{Y {<1I zPxi,(M s6G P y&8Օ4oG _+eƊ.?xdߒZ@( ?ƟCܨ $$= #||nwyS NQXS\',5 ,oI WڴAu 9x˹>Q*1@#y:1ގ` [͖12%"M*7 ^&MBC i7OLB8|0x tYKƆs/(Z^x[:̣nL;vWng@ # *ሥl xaM&=3$E ɞ@[G}z"8Wjm.![|ի8q"z!}$h#)\L妔,@N\"s<{o}0Ƣ $mZ>ͩB}[CI Fq!T$0@IDAT8B0pńN^x6g LKAߍ1RƷ/u8øjW{r:H^ϢwGE@p&?'A@"&do`pZ"S=!e=B @4-]9@);;"S=dV?+5: UE0(`HH5GW[T!&D7z*U"rq <3?*bq0c ]VsTX< (?bwg׶6cW|>}TI999s]O?œAafW{ìz9uT *ԣla8۹(F=nMUI`w.nYWW|65Xfذa0k,1bZPbb"ܹsj5s(,xꩧJG%80, QLJ,_$nR-/.5~=s 6$%$03 X~X>sF]m)↑l@\\4iDoܸ*OV|Tí`ԏT9  Lu'\)B!{Z 38艳NWҦbAߊIVU$((HC;XYU4T<>|XV GW>dddgC@Vr'ea HytN@K`o0mq`kxlJ{NMMܻK ٻQG$R xT`Z[k 8nNйe#Du5%FCi =$ c־Ĵ;w^Uz:88? %qMU6ZđLJ~(N!(z6^#D⿵CwСCШQ#:(J/͸y:Ûj,=8'8Ha< D:Fop,|,:xRHt ^ ݐr#YK > ,_-ьtm~+k,DD$&“APοS^O,?$ | |gX0 X:e vт?\s#!nk ǠC-G_$op)VxFxD (u=/C3kn%}镚)PVa;gB]p:qirWCmto NPb.Q ,`o4 2'|y|w:J/Rh?n-̺ RE'qgruSZZ*pppŋUjC6JrQQZlq+MƋ~q ,Aw 0S^fGׄeD M߂(X{pQshAꛚ;,Yvat0k5eS0{1ifeMѪpJcÍzKz-` _7J &?W͢k"`~SyWŁ'NT~7IUOBr4AY %jAkpixs*ގnNX zK # Rj:3㺈75-V5MD d^cSn\:+o%fb˗㲰s}w=9/3dH\,Y֜; ;eW!8̈{FUdC[[-j \ &Q$,*n5ݻUNs|۰O QEw2>u!A(~/\F 6?h _cתynQw~GzD(0yԗnzA5<-[JA2!ɛP@T*p5KU~L  mVjKNNNvy'<6y*P5j>]s& !G$B !gt9C¹bham pט9.y\sΆ_|wi&.PҪQ}P"~ܩr XcnGO<[ߌ꼰@<ǿ%?{܏xs`U'5J*~sZ-Rt 3GGGU>"PA *!^;ܐ}?@] PiuIybSMJ&a?{C]=U wxuH *4 B&NZ&<<"J ȣbR2~Qw ģ?uQ"A{Zo>ؿr… 5%h N =\sxun,+uaēp"Zf }LouW?Nv}OWuU5cRJ1 mٲEb^ 6>!G㮾w+@/1P49t߷~.Bx@=^W\T tl-aO|s( Hy1)UkcΜ9۶mX;vlekHNȲMpuƠb`i-\B#D \&ng 80~~ros ؘ~!8w}v~q7ѿyI  *nAدo}W^ݓTfΝ;:zF TltA#,MBPcf-iio;;;qD-:X;Mg`S<-D/S<0Hv6y0&@ӍSҼrE/XšG.&QP) "&u99XyERZ^ML:աbWHIIׯU>}(?~T̅r\dj Š5:L-|hE 2x L\y1?|ENC/NB#נ3{3 |kzjS^2كqwсx:(thfټp jRL`č!芪PSqOŋUy9ӦMS9C %Khja8x1I,fsZ}r BFvϡCeY`rG.ú?" @@u0z^!Q [[z:75u=)l_lRن6Ns1r{{caI9B֮Дzf!sp 9=Q{f'#KClMcfۦf'ŶtBC,{x9\`%Ѐ3CU1Lw Dv1)tᅪ𽭵5 iE3]c}aAMӕ@p)*#UKa0՗C\~P"a"JNœ^ 5+׋-AR$Gs3ψEhgq(|4fUƩcT7]oh|H~rIyş?:NoMs"`R [ P@dUQq &(z_M1/B^U"aꝩlq ۸Lj3,*36 0٢AQ˳$N}Plk 4V$/s'YV,s.QրpwwSNi`,| n_Gm̀HJf i_"N[ +ph ? f=3:1Wlُ'n:q?\#pO:7Ϝpst:enR!<أNeؙ ߙS N߼`ycwp"OdX_w,_|7n81'gCPz[0i(Jb13 r-ϯtjݺcǎu#!C#~pU.yakgZ2iǀByg]`W=9\\ة%;gk}!KQc_: EנX&kkLw̜9*d2㧏h0e)bRW;8V鱵+`%+rƇځ]柋|we%(f%'bmLw fmi)^%q!5~WO+8kgĥX_k>{~z9xZeDGGUYĩW4qgIwnKF`^L X4}t1bF:]s#[ [Dk8l.,W: )._u/hT"].y(<>0ܻ?KZI4j 3Z ={CΏZN!n,((P&@@Q(Ȇ w.\ NE`b!\@Q_UYr*xUr&!SPTZ9Ҧww¨eD1Ps>0wUB+=?L_ST(wNQuF#M:˨g&''ĉ麜3)@I|r|ե:%&C_ S2՜i}[/pЂ@5e1`N uت0(#Z}MSU2W &Eԅ۞8p9)ZvNCB.uضʦ52CA"/5zEKMj. 1P8-l5~L;>wg2i1zc۽`i`L62ıb>c !.Pqkc=wޢ2 1ij!C 吃03YhP ]aZbǤ@emà_sڤ0/MXqt{Egg;XCvhkN6) ]~]g&6v ;65GNS'P('ԲdhmvRgy[;ٵm'}>p_ΞnGˉZF@CIoC|Z`C|-7ȇ|I^9 iy[9dám<.[ H=,%O+#zؗԓ4λI` D {(CښjZ5q6Lʔ@LC>Q9+БB. rܐuΘ(&@ХV}_`4:\t<۩)' 4X@H=V?w;VM2YpxA(|q]`Ǡ[]E孿 >VC5l{6_ԔU (=l{ADĥa ܕObPdd 7Aмgok"IA: w0pna_&?ܜj+%=Hb^ƨke_rZ q'p!dRԈݮ8Ho†Ntv>jk;x6j m:YX[9J'@Zv!|* XA>ƨt`چ1 J@Dtr7Bt"g߉dn~`?_l7GtBNf-"PR&=nBб((-cwU&W]\{ACOlߎ ,Gŵ*&[jU 3@; #){QxFxLyYue&㪥JWvWܶ5Xa=/dDC2C``L`>ѩ1X0s >Cѓ zOQwܽdk-,6[c Xam5)ˡT('3(%)E\xU@0e2OޅPͨxº*N Ĥ%ۃah(,)@, \Qȕ[̉6t>NV w/u\PG$1_|,>;tg \N#<:Z:eTEU8~q?DD 21fT>#$9B97 "~ɪEftr6o\1 UKt ^I޹"5O(*RM`^[ctc=1%UoxUp!Pk~;2eRżMʥfCK IxNA8p8[lsZ ;CƎAF`F,^\YYKasXJ0)j;˕`f;1p2끬1b`.7N*Hz,e]_OEKw8p!Vk|K>"晌 19:I?Ƒ9t^'i/MoUl2@m$b0V6b[}oЂb>lO-W| 4dkܧ[D #ӺZ%8{[|APȐd0c#:fFE10Qf /2k!_(̶:aFxآESзZ6Z0 ]7^4Sa_O[ 8{FЂsCW 2:*ܰuôhс9:ҭRY | t[{i8ФBcR6'X͆pv:ؖ7jt'AՠB5} .CN;$'X0n+?dI->T; m9lLfBAЛwzn1z߷!5<5&ygK/KYqU4ph`ހԲd۳i!P'r+1qv)A(@s+ˋ ƄL9ӈg4s{w++i <][]e09G]pHM>p5ȒM;q7GXVjnR 35BC u&`anGT׎f JZ&+?/Hs-\Dz׾׽Ȏ߀7CѶnBy0\[ [/1wltEr]028 4iK憭wS1h1pk|Zc h<ԣ=:79~3툫Ucg{#e5kQRk7} >kmA I +rCbkYqaHf#>;[ͫpwRz Z;rᖬ{ uy>7v吷rA1J) Y?fQz*$@ ˮRdaxM Ei6=#ϜOHI^hR!';bjvn{b}NO}c(LpmQ9,H䊹k'mau ǠN<&9f̻ըY8C,s碼s+FeIiU^Dw^4:i8oA#M\PdAP, ݊=wtg} c/A#9i`6\ ^asao냀EtT?v ;TOL %6'"qZsMaӈ &K E"o)^Ay{+u+Zʋڙ {m u4{Pbu%!tg9dal^woW+*r Lݧ޳w$_vZgtcΞhZסEe3NGz\[me(&?r178O!Q:@ߓ܌!!@< aV6 po?pu]?WBμp,wVM,C@9Kp?9dtq'1HYgd10'ñ]}g5~\EޙLPlOpu_"zH)*(ːZey(ώ y]:50gӭҷA@GUK';k_LHL3(9!ceB94Lr L!nߎ7RX ݾq{ ^& Z8}B952.O i *f&10k7ڎھMP2m 8Z=*ۡ@ %p+9[C[H!e҂oPPs) '@?"6`FO|םd!(dpe"W$( 8mJxAOc`r0 a 0آD&$ *ٱhLyb~?)8( [7E΋17:`SqKҧ8R=CnT?׹>f.haL޹Oq`QhO :A tts3_ E 3M{7tse'k\% L-"qk!jH4$}#@]moΫPw#yg^XɎ/*ǣy {p;MLalr rΧ1*ҷ/ì}IEJ &|6l8tr J+3>rе0k9wiD*rbա (Ltbac8! A|8n33`O?`5Ǝķ6gy(lǻd=QGN\) %ɗ|n6F'r hսfo%Yogx1IOя3/dǸ .CK+y9 [?.Qd)1'm~9G'QIҕ-Z7q?To|';bJvC>~G9\BP qZcJ10em5fS ʹBеsFFsjН@TB:$Ihur^vɎe 2Az3`.-&]`Ł[HdF¡u5l,,,~\< SsDV#(0ͅ>J' &BO߂cנy8+M2L!@<(Slߎ,n :4wO"ygS|KX\D&C:u~h _0/d @s'0p+[E=quG{ mS'^&F!@]pKu8C,/5ȟ|īdLc`&ػ1pZP{+[*t^V+wvGygp#yg^\N 09#P18E1ƈDrPQ⽈&տuThG8Q)N|f=*IL;3/dGGLXQׁg k}/Ivcp?BoK>sMEßqʞЧSS^&/PCfQ!$((4pl%"r D @D",@:߈">ݿ8&D@xcmM$R LE\Zޝ`ʰ^ؙ䝍=` UP :(AQOnls"& '@f~LOj;O|'`k U O'our QZ.̿V{50˓vIo@*]ou! ;hC-*jсu(|/ߦTYT wQL9 -]64^}^ yP7& [X Khu09Q=)Bi՘priyPM"cXӝ88m{++[W80 _FyI8LΜi-F9|Xhq䦩AZI*c@%5kT&YC:N[8rpg`K\ǢyeN4_z3s&@|t-o9;K@X#mxuN+q̫id*䐓3!uN84L#@A$䝭m>ocHYn{n36$q5TCF] ͈ c`@dx9D*Vc$O^hs{?^\ i!M02(:sC'7d6hR_iRS،McXe^7Yx yW6}7[#Q0, Q2D9F@9 FVKlo hӞ $̋(;(-+B`UyI i)Y!I|.*@wTeI:3yw'yg^Pi!} :ׯBNMmH޹D\ĥcKp32.,L`)0Xr ,ySoM윾0&\/psRJ-2-BX8_9d|`YbѲ c`̟m}J%CyYwFBC%eru2C9F M` A&e}x;O}7 DUrgoנ2: ¼ T%]"F5tJ cT*YZQNέ??еN̵Q),z!bAx7$W^60gӥq'7ѾCZU޹O5/jmI 3`rL߀!@>(Z Yn[ 0JcF;oNRte^mecOx/Fm'吷>C+,$)4$(/޻7ŨP㈀  5t >TЗWءsxEUޙ9ca_#A>|P^64X'0flp Ք <ڹܗYY",U8%W | c'Y#!qAo^7h3S;39!qrȨw#sq W !@aU";5d?AFnZ=79(\\Z/Cl $+k!C D;r #%D(>d&ީ6(_+rE Tc!@< jБX%Xՠ!+ z#2NAc`d<8^A#*cZN8OB֌;Q=R0ʓv:Ʊe`1 Q{:Aqpo@x1xfD|~B`$pw--]zi:Q &DzXT"-@ fwGK9A$rutGe0~3z4V"~ۈZEtn an?C-N0#23 u*]nprtB cKj n ,./)x7W ]"`Fh&uEkvR+G?ryLu 6?nBz"`10J"T fSpJ.^W9#Dc`^ϓzC$Tu\ Eq8L~=tNe 23H` 2/: A!v/GP#.J$D"@ D"@ D"@ D"@ D?@2,|IENDB`mclust/man/mclust.options.Rd0000644000176200001440000001416714104430141015625 0ustar liggesusers\name{mclust.options} \alias{mclust.options} \title{Default values for use with MCLUST package} \description{Set or retrieve default values for use with MCLUST package.} \usage{ mclust.options(\dots) } \arguments{ \item{\dots}{one or more arguments provided in the \code{name = value} form, or no argument at all may be given. \cr Available arguments are described in the Details section below.} } \details{ \code{mclust.options()} is provided for assigning or retrieving default values used by various functions in \code{MCLUST}.\cr Available options are: \describe{ \item{\code{emModelNames}}{ A vector of 3-character strings that are associated with multivariate models for which EM estimation is available in MCLUST. \cr The current default is all of the multivariate mixture models supported in MCLUST. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{\code{hcModelName}}{ A character string specifying the multivariate model to be used in model-based agglomerative hierarchical clustering for initialization of EM algorithm.\cr The available models are the following: \describe{ \item{\code{"EII"}}{spherical, equal volume;} \item{\code{"EEE"}}{ellipsoidal, equal volume, shape, and orientation;} \item{\code{"VII"}}{spherical, unequal volume;} \item{\code{"VVV"}}{ellipsoidal, varying volume, shape, and orientation (default).} } } \item{\code{hcUse}}{ A character string specifying the type of input variables/transformation to be used in model-based agglomerative hierarchical clustering for initialization of EM algorithm.\cr Possible values are: \describe{ \item{\code{"VARS"}}{original variables;} \item{\code{"STD"}}{standardized variables (centered and scaled);} \item{\code{"SPH"}}{sphered variables (centered, scaled and uncorrelated) computed using SVD;} \item{\code{"PCS"}}{principal components computed using SVD on centered variables (i.e. using the covariance matrix);} \item{\code{"PCR"}}{principal components computed using SVD on standardized (center and scaled) variables (i.e. using the correlation matrix);} \item{\code{"SVD"}}{scaled SVD transformation (default);} \item{\code{"RND"}}{no transformation is applied but a random hierarchical structure is returned (see \code{\link{hcRandomPairs}}).} } For further details see Scrucca and Raftery (2015), Scrucca et al. (2016). } \item{\code{subset}}{ A value specifying the maximal sample size to be used in the model-based hierarchical clustering to start the EM algorithm. If data sample size exceeds this value, a random sample is drawn of size specified by \code{subset}. } \item{\code{fillEllipses}}{ A logical value specifying whether or not to fill with transparent colors ellipses corresponding to the within-cluster covariances in case of \code{"classification"} plot for \code{'Mclust'} objects, or \code{"scatterplot"} graphs for \code{'MclustDA'} objects. } \item{\code{bicPlotSymbols}}{ A vector whose entries correspond to graphics symbols for plotting the BIC values output from \code{\link{Mclust}} and \code{\link{mclustBIC}}. These are displayed in the legend which appears at the lower right of the BIC plots. } \item{\code{bicPlotColors}}{ A vector whose entries correspond to colors for plotting the BIC curves from output from \code{\link{Mclust}} and \code{\link{mclustBIC}}. These are displayed in the legend which appears at the lower right of the BIC plots. } \item{\code{classPlotSymbols}}{ A vector whose entries are either integers corresponding to graphics symbols or single characters for indicating classifications when plotting data. Classes are assigned symbols in the given order. } \item{\code{classPlotColors}}{ A vector whose entries correspond to colors for indicating classifications when plotting data. Classes are assigned colors in the given order. } \item{\code{warn}}{ A logical value indicating whether or not to issue certain warnings. Most of these warnings have to do with situations in which singularities are encountered. The default is \code{warn = FALSE}. } } The parameter values set via a call to this function will remain in effect for the rest of the session, affecting the subsequent behaviour of the functions for which the given parameters are relevant. } \value{ If the argument list is empty the function returns the current list of values. If the argument list is not empty, the returned list is invisible. } \seealso{ \code{\link{Mclust}}, \code{\link{MclustDA}}, \code{\link{densityMclust}}, \code{\link{emControl}} } \references{ Scrucca L. and Raftery A. E. (2015) Improved initialisation of model-based clustering using Gaussian hierarchical partitions. \emph{Advances in Data Analysis and Classification}, 9/4, pp. 447-460. Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. } \examples{ opt <- mclust.options() # save default values irisBIC <- mclustBIC(iris[,-5]) summary(irisBIC, iris[,-5]) mclust.options(emModelNames = c("EII", "EEI", "EEE")) irisBIC <- mclustBIC(iris[,-5]) summary(irisBIC, iris[,-5]) mclust.options(opt) # restore default values mclust.options() oldpar <- par(mfrow = c(2,1), no.readonly = TRUE) n <- with(mclust.options(), max(sapply(list(bicPlotSymbols, bicPlotColors),length))) plot(seq(n), rep(1,n), ylab = "", xlab = "", yaxt = "n", pch = mclust.options("bicPlotSymbols"), col = mclust.options("bicPlotColors")) title("mclust.options(\"bicPlotSymbols\") \n mclust.options(\"bicPlotColors\")") n <- with(mclust.options(), max(sapply(list(classPlotSymbols, classPlotColors),length))) plot(seq(n), rep(1,n), ylab = "", xlab = "", yaxt = "n", pch = mclust.options("classPlotSymbols"), col = mclust.options("classPlotColors")) title("mclust.options(\"classPlotSymbols\") \n mclust.options(\"classPlotColors\")") par(oldpar) } \keyword{cluster} mclust/man/sim.Rd0000644000176200001440000000702314124774626013432 0ustar liggesusers\name{sim} \alias{sim} \title{ Simulate from Parameterized MVN Mixture Models } \description{ Simulate data from parameterized MVN mixture models. } \usage{ sim(modelName, parameters, n, seed = NULL, \dots) } \arguments{ \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{parameters}{ A list with the following components: \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If missing, equal proportions are assumed. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{n}{ An integer specifying the number of data points to be simulated. } \item{seed}{ An optional integer argument to \code{set.seed} for reproducible random class assignment. By default the current seed will be used. Reproducibility can also be achieved by calling \code{set.seed} before calling \code{sim}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A matrix in which first column is the classification and the remaining columns are the \code{n} observations simulated from the specified MVN mixture model. \item{Attributes:}{ \code{"modelName"} A character string indicating the variance model used for the simulation. } } \details{ This function can be used with an indirect or list call using \code{do.call}, allowing the output of e.g. \code{mstep}, \code{em}, \code{me}, \code{Mclust} to be passed directly without the need to specify individual parameters as arguments. } \seealso{ \code{\link{simE}}, \dots, \code{\link{simVVV}}, \code{\link{Mclust}}, \code{\link{mstep}}, \code{\link{do.call}} } \examples{ irisBIC <- mclustBIC(iris[,-5]) irisModel <- mclustModel(iris[,-5], irisBIC) names(irisModel) irisSim <- sim(modelName = irisModel$modelName, parameters = irisModel$parameters, n = nrow(iris)) \donttest{ do.call("sim", irisModel) # alternative call } par(pty = "s", mfrow = c(1,2)) dimnames(irisSim) <- list(NULL, c("dummy", (dimnames(iris)[[2]])[-5])) dimens <- c(1,2) lim1 <- apply(iris[,dimens],2,range) lim2 <- apply(irisSim[,dimens+1],2,range) lims <- apply(rbind(lim1,lim2),2,range) xlim <- lims[,1] ylim <- lims[,2] coordProj(iris[,-5], parameters=irisModel$parameters, classification=map(irisModel$z), dimens=dimens, xlim=xlim, ylim=ylim) coordProj(iris[,-5], parameters=irisModel$parameters, classification=map(irisModel$z), truth = irisSim[,-1], dimens=dimens, xlim=xlim, ylim=ylim) irisModel3 <- mclustModel(iris[,-5], irisBIC, G=3) irisSim3 <- sim(modelName = irisModel3$modelName, parameters = irisModel3$parameters, n = 500, seed = 1) \donttest{ irisModel3$n <- NULL irisSim3 <- do.call("sim",c(list(n=500,seed=1),irisModel3)) # alternative call } clPairs(irisSim3[,-1], cl = irisSim3[,1]) } \keyword{cluster} mclust/man/unmap.Rd0000644000176200001440000000321613752165063013755 0ustar liggesusers\name{unmap} \alias{unmap} \title{ Indicator Variables given Classification } \description{ Converts a classification into a matrix of indicator variables. } \usage{ unmap(classification, groups=NULL, noise=NULL, \dots) } \arguments{ \item{classification}{ A numeric or character vector. Typically the distinct entries of this vector would represent a classification of observations in a data set. } \item{groups}{ A numeric or character vector indicating the groups from which \code{classification} is drawn. If not supplied, the default is to assumed to be the unique entries of classification. } \item{noise}{ A single numeric or character value used to indicate the value of \code{groups} corresponding to noise. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ An \emph{n} by \emph{m} matrix of \emph{(0,1)} indicator variables, where \emph{n} is the length of \code{classification} and \emph{m} is the number of unique values or symbols in \code{classification}. Columns are labeled by the unique values in \code{classification}, and the \code{[i,j]}th entry is \emph{1} if \code{classification[i]} is the \emph{j}th unique value or symbol in sorted order \code{classification}. If a \code{noise} value of symbol is designated, the corresponding indicator variables are relocated to the last column of the matrix. } \seealso{ \code{\link{map}}, \code{\link{estep}}, \code{\link{me}} } \examples{ z <- unmap(iris[,5]) z[1:5, ] emEst <- me(modelName = "VVV", data = iris[,-5], z = z) emEst$z[1:5,] map(emEst$z) } \keyword{cluster} mclust/man/randomOrthogonalMatrix.Rd0000644000176200001440000000260013751305610017324 0ustar liggesusers\name{randomOrthogonalMatrix} \alias{randomOrthogonalMatrix} \title{Random orthogonal matrix} \description{ Generate a random orthogonal basis matrix of dimension \eqn{(nrow x ncol)} using the method in Heiberger (1978). } \usage{ randomOrthogonalMatrix(nrow, ncol, n = nrow, d = ncol, seed = NULL) } \arguments{ \item{nrow}{the number of rows of the resulting orthogonal matrix.} \item{ncol}{the number of columns of the resulting orthogonal matrix.} \item{n}{deprecated. See \code{nrow} above.} \item{d}{deprecated. See \code{ncol} above.} \item{seed}{an optional integer argument to use in \code{set.seed()} for reproducibility. By default the current seed will be used. Reproducibility can also be achieved by calling \code{set.seed()} before calling this function.} } \details{ The use of arguments \code{n} and \code{d} is deprecated and they will be removed in the future. } \value{ An orthogonal matrix of dimension \eqn{nrow x ncol} such that each column is orthogonal to the other and has unit lenght. Because of the latter, it is also called orthonormal. } \seealso{\code{\link{coordProj}}} \references{ Heiberger R. (1978) Generation of random orthogonal matrices. \emph{Journal of the Royal Statistical Society. Series C (Applied Statistics)}, 27(2), 199-206. } \examples{ B <- randomOrthogonalMatrix(10,3) zapsmall(crossprod(B)) } mclust/man/partuniq.Rd0000644000176200001440000000134613752165060014477 0ustar liggesusers\name{partuniq} \alias{partuniq} \title{ Classifies Data According to Unique Observations } \description{ Gives a one-to-one mapping from unique observations to rows of a data matrix. } \usage{ partuniq(x) } \arguments{ \item{x}{Matrix of observations.} } \value{ A vector of length \code{nrow(x)} with integer entries. An observation \code{k} is assigned an integer \code{i} whenever observation \code{i} is the first row of \code{x} that is identical to observation \code{k} (note that \code{i <= k}). } \seealso{ \code{\link{partconv}} } \examples{ set.seed(0) mat <- data.frame(lets = sample(LETTERS[1:2],9,TRUE), nums = sample(1:2,9,TRUE)) mat ans <- partuniq(mat) ans partconv(ans,consec=TRUE) } \keyword{cluster} mclust/man/cdfMclust.Rd0000644000176200001440000000402114241634746014557 0ustar liggesusers\name{cdfMclust} \alias{cdfMclust} \alias{quantileMclust} \title{ Cumulative Distribution and Quantiles for a univariate Gaussian mixture distribution } \description{ Compute the cumulative density function (cdf) or quantiles from an estimated one-dimensional Gaussian mixture fitted using \code{\link{densityMclust}}.} \usage{ cdfMclust(object, data, ngrid = 100, \dots) quantileMclust(object, p, \dots) } \arguments{ \item{object}{a \code{densityMclust} model object.} \item{data}{a numeric vector of evaluation points.} \item{ngrid}{the number of points in a regular grid to be used as evaluation points if no \code{data} are provided.} \item{p}{a numeric vector of probabilities.} \item{\dots}{further arguments passed to or from other methods.} } \details{The cdf is evaluated at points given by the optional argument \code{data}. If not provided, a regular grid of length \code{ngrid} for the evaluation points is used. The quantiles are computed using bisection linear search algorithm. } \value{ \code{cdfMclust} returns a list of \code{x} and \code{y} values providing, respectively, the evaluation points and the estimated cdf. \code{quantileMclust} returns a vector of quantiles. } \author{Luca Scrucca} \seealso{ \code{\link{densityMclust}}, \code{\link{plot.densityMclust}}. } \examples{ \donttest{ x <- c(rnorm(100), rnorm(100, 3, 2)) dens <- densityMclust(x, plot = FALSE) summary(dens, parameters = TRUE) cdf <- cdfMclust(dens) str(cdf) q <- quantileMclust(dens, p = c(0.01, 0.1, 0.5, 0.9, 0.99)) cbind(quantile = q, cdf = cdfMclust(dens, q)$y) plot(cdf, type = "l", xlab = "x", ylab = "CDF") points(q, cdfMclust(dens, q)$y, pch = 20, col = "red3") par(mfrow = c(2,2)) dens.waiting <- densityMclust(faithful$waiting) plot(cdfMclust(dens.waiting), type = "l", xlab = dens.waiting$varname, ylab = "CDF") dens.eruptions <- densityMclust(faithful$eruptions) plot(cdfMclust(dens.eruptions), type = "l", xlab = dens.eruptions$varname, ylab = "CDF") par(mfrow = c(1,1)) } } \keyword{cluster} \keyword{dplot} mclust/man/thyroid.Rd0000644000176200001440000000357214163460525014322 0ustar liggesusers\name{thyroid} \alias{thyroid} \docType{data} \title{UCI Thyroid Gland Data} \description{ Data on five laboratory tests administered to a sample of 215 patients. The tests are used to predict whether a patient's thyroid can be classified as euthyroidism (normal thyroid gland function), hypothyroidism (underactive thyroid not producing enough thyroid hormone) or hyperthyroidism (overactive thyroid producing and secreting excessive amounts of the free thyroid hormones T3 and/or thyroxine T4). Diagnosis of thyroid operation was based on a complete medical record, including anamnesis, scan, etc.} \usage{data(thyroid)} \format{A data frame with the following variables: \describe{ \item{Diagnosis}{Diagnosis of thyroid operation: \code{Hypo}, \code{Normal}, and \code{Hyper}.} \item{RT3U}{T3-resin uptake test (percentage).} \item{T4}{Total Serum thyroxin as measured by the isotopic displacement method.} \item{T3}{Total serum triiodothyronine as measured by radioimmuno assay.} \item{TSH}{Basal thyroid-stimulating hormone (TSH) as measured by radioimmuno assay.} \item{DTSH}{Maximal absolute difference of TSH value after injection of 200 micro grams of thyrotropin-releasing hormone as compared to the basal value.} } } \source{One of several databases in the Thyroid Disease Data Set (\code{new-thyroid.data}, \code{new-thyroid.names}) of the UCI Machine Learning Repository \url{https://archive.ics.uci.edu/ml/datasets/thyroid+disease}. Please note the UCI conditions of use.} \references{ Coomans, D., Broeckaert, M. Jonckheer M. and Massart D.L. (1983) Comparison of Multivariate Discriminant Techniques for Clinical Data - Application to the Thyroid Functional State, \emph{Meth. Inform. Med.} 22, pp. 93-101. Coomans, D. and I. Broeckaert (1986) \emph{Potential Pattern Recognition in Cemical and Medical Decision Making}, Research Studies Press, Letchworth, England. } \keyword{datasets} mclust/man/BrierScore.Rd0000644000176200001440000001152414241357241014670 0ustar liggesusers\name{BrierScore} \alias{BrierScore} % R CMD Rd2pdf BrierScore.Rd \title{Brier score to assess the accuracy of probabilistic predictions} \description{ The Brier score is a proper score function that measures the accuracy of probabilistic predictions.} \usage{ BrierScore(z, class) } \arguments{ \item{z}{ a matrix containing the predicted probabilities of each observation to be classified in one of the classes. Thus, the number of rows must match the length of \code{class}, and the number of columns the number of known classes. } \item{class}{ a numeric, character vector or factor containing the known class labels for each observation. If \code{class} is a factor, the number of classes is \code{nlevels(class)} with classes \code{levels(class)}. If \code{class} is a numeric or character vector, the number of classes is equal to the number of classes obtained via \code{unique(class)}. } } \details{ The Brier Score is the mean square difference between the true classes and the predicted probabilities. This function implements the original multi-class definition by Brier (1950), normalized to \eqn{[0,1]} as in Kruppa et al (2014). The formula is the following: \deqn{ BS = \frac{1}{2n} \sum_{i=1}^n \sum_{k=1}^K (C_{ik} - p_{ik})^2 } where \eqn{n} is the number of observations, \eqn{K} the number of classes, \eqn{C_{ik} = \{0,1\}} the indicator of class \eqn{k} for observation \eqn{i}, and \eqn{p_{ik}} is the predicted probability of observation \eqn{i} to belong to class \eqn{k}. The above formulation is applicable to multi-class predictions, including the binary case. A small value of the Brier Score indicates high prediction accuracy. The Brier Score is a strictly proper score (Gneiting and Raftery, 2007), which means that it takes its minimal value only when the predicted probabilities match the empirical probabilities. } \references{ Brier, G.W. (1950) Verification of forecasts expressed in terms of probability. \emph{Monthly Weather Review}, 78 (1): 1-3. Gneiting, G. and Raftery, A. E. (2007) Strictly proper scoring rules, prediction, and estimation. \emph{Journal of the American Statistical Association} 102 (477): 359-378. Kruppa, J., Liu, Y., Diener, H.-C., Holste, T., Weimar, C., Koonig, I. R., and Ziegler, A. (2014) Probability estimation with machine learning methods for dichotomous and multicategory outcome: Applications. \emph{Biometrical Journal}, 56 (4): 564-583. } \seealso{\code{\link{cvMclustDA}}} \examples{ # multi-class case class <- factor(c(5,5,5,2,5,3,1,2,1,1), levels = 1:5) probs <- matrix(c(0.15, 0.01, 0.08, 0.23, 0.01, 0.23, 0.59, 0.02, 0.38, 0.45, 0.36, 0.05, 0.30, 0.46, 0.15, 0.13, 0.06, 0.19, 0.27, 0.17, 0.40, 0.34, 0.18, 0.04, 0.47, 0.34, 0.32, 0.01, 0.03, 0.11, 0.04, 0.04, 0.09, 0.05, 0.28, 0.27, 0.02, 0.03, 0.12, 0.25, 0.05, 0.56, 0.35, 0.22, 0.09, 0.03, 0.01, 0.75, 0.20, 0.02), nrow = 10, ncol = 5) cbind(class, probs, map = map(probs)) BrierScore(probs, class) # two-class case class <- factor(c(1,1,1,2,2,1,1,2,1,1), levels = 1:2) probs <- matrix(c(0.91, 0.4, 0.56, 0.27, 0.37, 0.7, 0.97, 0.22, 0.68, 0.43, 0.09, 0.6, 0.44, 0.73, 0.63, 0.3, 0.03, 0.78, 0.32, 0.57), nrow = 10, ncol = 2) cbind(class, probs, map = map(probs)) BrierScore(probs, class) # two-class case when predicted probabilities are constrained to be equal to # 0 or 1, then the (normalized) Brier Score is equal to the classification # error rate probs <- ifelse(probs > 0.5, 1, 0) cbind(class, probs, map = map(probs)) BrierScore(probs, class) classError(map(probs), class)$errorRate # plot Brier score for predicted probabilities in range [0,1] class <- factor(rep(1, each = 100), levels = 0:1) prob <- seq(0, 1, by = 0.01) brier <- sapply(prob, function(p) { z <- matrix(c(1-p,p), nrow = length(class), ncol = 2, byrow = TRUE) BrierScore(z, class) }) plot(prob, brier, type = "l", main = "Scoring all one class", xlab = "Predicted probability", ylab = "Brier score") # brier score for predicting balanced data with constant prob class <- factor(rep(c(1,0), each = 50), levels = 0:1) prob <- seq(0, 1, by = 0.01) brier <- sapply(prob, function(p) { z <- matrix(c(1-p,p), nrow = length(class), ncol = 2, byrow = TRUE) BrierScore(z, class) }) plot(prob, brier, type = "l", main = "Scoring balanced classes", xlab = "Predicted probability", ylab = "Brier score") # brier score for predicting unbalanced data with constant prob class <- factor(rep(c(0,1), times = c(90,10)), levels = 0:1) prob <- seq(0, 1, by = 0.01) brier <- sapply(prob, function(p) { z <- matrix(c(1-p,p), nrow = length(class), ncol = 2, byrow = TRUE) BrierScore(z, class) }) plot(prob, brier, type = "l", main = "Scoring unbalanced classes", xlab = "Predicted probability", ylab = "Brier score") } \keyword{classif} mclust/man/nMclustParams.Rd0000644000176200001440000000275613175053603015431 0ustar liggesusers\name{nMclustParams} \alias{nMclustParams} \title{Number of Estimated Parameters in Gaussian Mixture Models} \description{ Gives the number of estimated parameters for parameterizations of the Gaussian mixture model that are used in MCLUST. } \usage{ nMclustParams(modelName, d, G, noise = FALSE, equalPro = FALSE, \dots) } \arguments{ \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{d}{ The dimension of the data. Not used for models in which neither the shape nor the orientation varies. } \item{G}{ The number of components in the Gaussian mixture model used to compute \code{loglik}. } \item{noise}{ A logical variable indicating whether or not the model includes an optional Poisson noise component. } \item{equalPro}{ A logical variable indicating whether or not the components in the model are assumed to be present in equal proportion. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ The number of variance parameters in the corresponding Gaussian mixture model. } \details{ To get the total number of parameters in model, add \code{G*d} for the means and \code{G-1} for the mixing proportions if they are unequal. } \seealso{ \code{\link{bic}}, \code{\link{nVarParams}}. } \examples{ mapply(nMclustParams, mclust.options("emModelNames"), d = 2, G = 3) } \keyword{cluster} mclust/man/plot.mclustICL.Rd0000644000176200001440000000126214124774626015455 0ustar liggesusers\name{plot.mclustICL} \alias{plot.mclustICL} \title{ICL Plot for Model-Based Clustering} \description{ Plots the ICL values returned by the \code{\link{mclustICL}} function. } \usage{ \method{plot}{mclustICL}(x, ylab = "ICL", \dots) } \arguments{ \item{x}{ Output from \code{\link{mclustICL}}. } \item{ylab}{ Label for the vertical axis of the plot. } \item{\dots}{ Further arguments passed to the \code{\link{plot.mclustBIC}} function. } } \value{ A plot of the ICL values. } \seealso{ \code{\link{mclustICL}} } \examples{ \donttest{ data(faithful) faithful.ICL = mclustICL(faithful) plot(faithful.ICL) } } \keyword{cluster} % docclass is function mclust/man/bic.Rd0000644000176200001440000000355414124774626013404 0ustar liggesusers\name{bic} \alias{bic} \title{ BIC for Parameterized Gaussian Mixture Models } \description{ Computes the BIC (Bayesian Information Criterion) for parameterized mixture models given the loglikelihood, the dimension of the data, and number of mixture components in the model. } \usage{ bic(modelName, loglik, n, d, G, noise=FALSE, equalPro=FALSE, \dots) } \arguments{ \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{loglik}{ The log-likelihood for a data set with respect to the Gaussian mixture model specified in the \code{modelName} argument. } \item{n}{ The number of observations in the data used to compute \code{loglik}. } \item{d}{ The dimension of the data used to compute \code{loglik}. } \item{G}{ The number of components in the Gaussian mixture model used to compute \code{loglik}. } \item{noise}{ A logical variable indicating whether or not the model includes an optional Poisson noise component. The default is to assume no noise component. } \item{equalPro}{ A logical variable indicating whether or not the components in the model are assumed to be present in equal proportion. The default is to assume unequal mixing proportions. } \item{\dots}{ Catches unused arguments in an indirect or list call via \code{do.call}. } } \value{ The BIC or Bayesian Information Criterion for the given input arguments. } \seealso{ \code{\link{mclustBIC}}, \code{\link{nVarParams}}, \code{\link{mclustModelNames}}. } \examples{ \donttest{ n <- nrow(iris) d <- ncol(iris)-1 G <- 3 emEst <- me(modelName="VVI", data=iris[,-5], unmap(iris[,5])) names(emEst) args(bic) bic(modelName="VVI", loglik=emEst$loglik, n=n, d=d, G=G) # do.call("bic", emEst) ## alternative call } } \keyword{cluster} mclust/man/mvn.Rd0000644000176200001440000000577313752165054013447 0ustar liggesusers\name{mvn} \alias{mvn} \title{ Univariate or Multivariate Normal Fit } \description{ Computes the mean, covariance, and log-likelihood from fitting a single Gaussian to given data (univariate or multivariate normal). } \usage{ mvn( modelName, data, prior = NULL, warn = NULL, \dots) } \arguments{ \item{modelName}{ A character string representing a model name. This can be either \code{"Spherical"}, \code{"Diagonal"}, or \code{"Ellipsoidal"} or else \cr \code{"X"} for one-dimensional data,\cr \code{"XII"} for a spherical Gaussian, \cr \code{"XXI"} for a diagonal Gaussian \cr \code{"XXX"} for a general ellipsoidal Gaussian } \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{prior}{ Specification of a conjugate prior on the means and variances. The default assumes no prior. } \item{warn}{ A logical value indicating whether or not a warning should be issued whenever a singularity is encountered. The default is given by \code{mclust.options("warn")}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{parameters}{ \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{Attributes:}{ \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{mvnX}}, \code{\link{mvnXII}}, \code{\link{mvnXXI}}, \code{\link{mvnXXX}}, \code{\link{mclustModelNames}} } \examples{ n <- 1000 set.seed(0) x <- rnorm(n, mean = -1, sd = 2) mvn(modelName = "X", x) mu <- c(-1, 0, 1) set.seed(0) x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% (2*diag(3)), MARGIN = 2, STATS = mu, FUN = "+") mvn(modelName = "XII", x) mvn(modelName = "Spherical", x) set.seed(0) x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% diag(1:3), MARGIN = 2, STATS = mu, FUN = "+") mvn(modelName = "XXI", x) mvn(modelName = "Diagonal", x) Sigma <- matrix(c(9,-4,1,-4,9,4,1,4,9), 3, 3) set.seed(0) x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% chol(Sigma), MARGIN = 2, STATS = mu, FUN = "+") mvn(modelName = "XXX", x) mvn(modelName = "Ellipsoidal", x) } \keyword{cluster} mclust/man/predict.MclustDR.Rd0000644000176200001440000000363413175055104015760 0ustar liggesusers\name{predict.MclustDR} \alias{predict.MclustDR} \alias{predict2D.MclustDR} \title{Classify multivariate observations on a dimension reduced subspace by Gaussian finite mixture modeling} \description{Classify multivariate observations on a dimension reduced subspace estimated from a Gaussian finite mixture model.} \usage{ \method{predict}{MclustDR}(object, dim = 1:object$numdir, newdata, eval.points, \dots) } \arguments{ \item{object}{an object of class \code{'MclustDR'} resulting from a call to \code{\link{MclustDR}.}} \item{dim}{the dimensions of the reduced subspace used for prediction.} \item{newdata}{a data frame or matrix giving the data. If missing the data obtained from the call to \code{\link{MclustDR}} are used.} \item{eval.points}{a data frame or matrix giving the data projected on the reduced subspace. If provided \code{newdata} is not used.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{ Returns a list of with the following components: \item{dir}{a matrix containing the data projected onto the \code{dim} dimensions of the reduced subspace.} \item{density}{densities from mixture model for each data point.} \item{z}{a matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in \code{newdata} belongs to the \emph{k}th class.} \item{uncertainty}{The uncertainty associated with the classification.} \item{classification}{A vector of values giving the MAP classification.} } \references{ Scrucca, L. (2010) Dimension reduction for model-based clustering. \emph{Statistics and Computing}, 20(4), pp. 471-484. } \author{Luca Scrucca} % \note{} \seealso{\code{\link{MclustDR}}.} \examples{ mod = Mclust(iris[,1:4]) dr = MclustDR(mod) pred = predict(dr) str(pred) data(banknote) mod = MclustDA(banknote[,2:7], banknote$Status) dr = MclustDR(mod) pred = predict(dr) str(pred) } \keyword{multivariate} mclust/man/dens.Rd0000644000176200001440000000451314124774626013574 0ustar liggesusers\name{dens} \alias{dens} \title{ Density for Parameterized MVN Mixtures } \description{ Computes densities of observations in parameterized MVN mixtures. } \usage{ dens(data, modelName, parameters, logarithm = FALSE, warn=NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{parameters}{ The parameters of the model: \describe{ \item{\code{pro}}{ The vector of mixing proportions for the components of the mixture. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{logarithm}{ A logical value indicating whether or not the logarithm of the component densities should be returned. The default is to return the component densities, obtained from the log component densities by exponentiation. } \item{warn}{ A logical value indicating whether or not a warning should be issued when computations fail. The default is \code{warn=FALSE}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A numeric vector whose \emph{i}th component is the density of the \emph{ith} observation in \code{data} in the MVN mixture specified by \code{parameters}. } \seealso{ \code{\link{cdens}}, \code{\link{mclust.options}}, \code{\link{do.call}} } \examples{ \donttest{ faithfulModel <- Mclust(faithful) Dens <- dens(modelName = faithfulModel$modelName, data = faithful, parameters = faithfulModel$parameters) Dens ## alternative call do.call("dens", faithfulModel)} } \keyword{cluster} mclust/man/imputePairs.Rd0000644000176200001440000000517514124774626015152 0ustar liggesusers\name{imputePairs} \alias{imputePairs} \title{ Pairwise Scatter Plots showing Missing Data Imputations } \description{ Creates a scatter plot for each pair of variables in given data, allowing display of imputations for missing values in different colors and symbols than non missing values. } \usage{ imputePairs(data, dataImp, symbols = c(1,16), colors = c("black", "red"), labels, panel = points, \dots, lower.panel = panel, upper.panel = panel, diag.panel = NULL, text.panel = textPanel, label.pos = 0.5 + has.diag/3, cex.labels = NULL, font.labels = 1, row1attop = TRUE, gap = 0.2) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations containing missing values. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{dataImp}{ The dataset \code{data} with missing values imputed. } \item{symbols}{ Either an integer or character vector assigning plotting symbols to the nonmissing data and impued values, respectively. The default is a closed circle for the nonmissing data and an open circle for the imputed values. } \item{colors}{ Either an integer or character vector assigning colors to the nonmissing data and impued values, respectively. The default is black for the nonmissing data and red for the imputed values. } \item{labels}{ As in function \code{pairs}. } \item{panel}{ As in function \code{pairs}. } \item{\dots}{ As in function \code{pairs}. } \item{lower.panel}{ As in function \code{pairs}. } \item{upper.panel}{ As in function \code{pairs}. } \item{diag.panel}{ As in function \code{pairs}. } \item{text.panel}{ As in function \code{pairs}. } \item{label.pos}{ As in function \code{pairs}. } \item{cex.labels}{ As in function \code{pairs}. } \item{font.labels}{ As in function \code{pairs}. } \item{row1attop}{ As in function \code{pairs}. } \item{gap}{ As in function \code{pairs}. } } \value{ A pairs plot displaying the location of missing and nonmissing values. } \references{ Schafer J. L. (1997). Analysis of Imcomplete Multivariate Data, Chapman and Hall. } \seealso{ \code{\link{pairs}}, \code{\link{imputeData}} } \examples{ \donttest{ # Note that package 'mix' must be installed data(stlouis, package = "mix") # impute the continuos variables in the stlouis data stlimp <- imputeData(stlouis[,-(1:3)]) # plot imputed values imputePairs(stlouis[,-(1:3)], stlimp) } } \keyword{cluster} mclust/man/emE.Rd0000644000176200001440000001365114124774626013354 0ustar liggesusers\name{emE} \alias{emE} \alias{emV} \alias{emX} \alias{emEII} \alias{emVII} \alias{emEEI} \alias{emVEI} \alias{emEVI} \alias{emVVI} \alias{emEEE} \alias{emEEV} \alias{emVEV} \alias{emVVV} \alias{emEVV} \alias{emEVE} \alias{emVEE} \alias{emVVE} \alias{emXII} \alias{emXXI} \alias{emXXX} \title{EM algorithm starting with E-step for a parameterized Gaussian mixture model} \description{ Implements the EM algorithm for a parameterized Gaussian mixture model, starting with the expectation step. } \usage{ emE(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emV(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emX(data, prior = NULL, warn = NULL, \dots) emEII(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVII(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emEEI(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVEI(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emEVI(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVVI(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emEEE(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVEE(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emEVE(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVVE(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emEEV(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVEV(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emEVV(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emVVV(data, parameters, prior = NULL, control = emControl(), warn = NULL, \dots) emXII(data, prior = NULL, warn = NULL, \dots) emXXI(data, prior = NULL, warn = NULL, \dots) emXXX(data, prior = NULL, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{parameters}{ The parameters of the model: \describe{ \item{\code{pro}}{ Mixing proportions for the components of the mixture. There should one more mixing proportion than the number of Gaussian components if the mixture model includes a Poisson noise term. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ An estimate of the reciprocal hypervolume of the data region. The default is determined by applying function \code{hypvol} to the data. Used only when \code{pro} includes an additional mixing proportion for a noise component. } } } \item{prior}{ The default assumes no prior, but this argument allows specification of a conjugate prior on the means and variances through the function \code{priorControl}. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{warn}{ A logical value indicating whether or not a warning should be issued whenever a singularity is encountered. The default is given in \code{mclust.options("warn")}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ The estimate of the reciprocal hypervolume of the data region used in the computation when the input indicates the addition of a noise component to the model. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{Attributes:}{ \code{"info"} Information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \seealso{ \code{\link{me}}, \code{\link{mstep}}, \code{\link{mclustVariance}}, \code{\link{mclust.options}}. } \examples{ \donttest{ msEst <- mstepEEE(data = iris[,-5], z = unmap(iris[,5])) names(msEst) emEEE(data = iris[,-5], parameters = msEst$parameters)} } \keyword{cluster} mclust/man/dmvnorm.Rd0000644000176200001440000000257313562733761014331 0ustar liggesusers\name{dmvnorm} \alias{dmvnorm} \title{Density of multivariate Gaussian distribution} \description{ Efficiently computes the density of observations for a generic multivariate Gaussian distribution. } \usage{ dmvnorm(data, mean, sigma, log = FALSE) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{mean}{ A vector of means for each variable. } \item{sigma}{ A positive definite covariance matrix. } \item{log}{ A logical value indicating whether or not the logarithm of the densities should be returned. } } \value{ A numeric vector whose \emph{i}th element gives the density of the \emph{ith} observation in \code{data} for the multivariate Gaussian distribution with parameters \code{mean} and \code{sigma}. } \seealso{ \code{\link{dnorm}}, \code{\link{dens}} } \examples{ # univariate ngrid <- 101 x <- seq(-5, 5, length = ngrid) dens <- dmvnorm(x, mean = 1, sigma = 5) plot(x, dens, type = "l") # bivariate ngrid <- 101 x1 <- x2 <- seq(-5, 5, length = ngrid) mu <- c(1,0) sigma <- matrix(c(1,0.5,0.5,2), 2, 2) dens <- dmvnorm(as.matrix(expand.grid(x1, x2)), mu, sigma) dens <- matrix(dens, ngrid, ngrid) image(x1, x2, dens) contour(x1, x2, dens, add = TRUE) } mclust/man/sigma2decomp.Rd0000644000176200001440000000546413752165052015214 0ustar liggesusers\name{sigma2decomp} \alias{sigma2decomp} \title{ Convert mixture component covariances to decomposition form. } \description{ Converts a set of covariance matrices from representation as a 3-D array to a parameterization by eigenvalue decomposition. } \usage{ sigma2decomp(sigma, G = NULL, tol = sqrt(.Machine$double.eps), \dots) } \arguments{ \item{sigma}{ Either a 3-D array whose [,,k]th component is the covariance matrix for the kth component in an MVN mixture model, or a single covariance matrix in the case that all components have the same covariance. } \item{G}{ The number of components in the mixture. When \code{sigma} is a 3-D array, the number of components can be inferred from its dimensions. } \item{tol}{ Tolerance for determining whether or not the covariances have equal volume, shape, and or orientation. The default is the square root of the relative machine precision, \code{sqrt(.Machine$double.eps)}, which is about \code{1.e-8}. } \item{\dots}{ Catches unused arguments from an indirect or list call via \code{do.call}. } } \value{ The covariance matrices for the mixture components in decomposition form, including the following components: \item{modelName}{ A character string indicating the infered model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{d}{ The dimension of the data. } \item{G}{ The number of components in the mixture model. } \item{scale}{ Either a \emph{G}-vector giving the scale of the covariance (the \emph{d}th root of its determinant) for each component in the mixture model, or a single numeric value if the scale is the same for each component. } \item{shape}{ Either a \emph{G} by \emph{d} matrix in which the \emph{k}th column is the shape of the covariance matrix (normalized to have determinant 1) for the \emph{k}th component, or a \emph{d}-vector giving a common shape for all components. } \item{orientation}{ Either a \emph{d} by \emph{d} by \emph{G} array whose \code{[,,k]}th entry is the orthonomal matrix whose columns are the eigenvectors of the covariance matrix of the \emph{k}th component, or a \emph{d} by \emph{d} orthonormal matrix if the mixture components have a common orientation. The \code{orientation} component of \code{decomp} can be omitted in spherical and diagonal models, for which the principal components are parallel to the coordinate axes so that the orientation matrix is the identity. } } \seealso{ \code{\link{decomp2sigma}} } \examples{ meEst <- meEEE(iris[,-5], unmap(iris[,5])) names(meEst$parameters$variance) meEst$parameters$variance$Sigma sigma2decomp(meEst$parameters$variance$Sigma, G = length(unique(iris[,5]))) } \keyword{cluster} mclust/man/mclustModel.Rd0000644000176200001440000000667513475242100015127 0ustar liggesusers\name{mclustModel} \alias{mclustModel} \title{ Best model based on BIC } \description{ Determines the best model from clustering via \code{mclustBIC} for a given set of model parameterizations and numbers of components. } \usage{ mclustModel(data, BICvalues, G, modelNames, \dots) } \arguments{ \item{data}{ The matrix or vector of observations used to generate `object'. } \item{BICvalues}{ An \code{'mclustBIC'} object, which is the result of applying \code{mclustBIC} to \code{data}. } \item{G}{ A vector of integers giving the numbers of mixture components (clusters) from which the best model according to BIC will be selected (\code{as.character(G)} must be a subset of the row names of \code{BICvalues}). The default is to select the best model for all numbers of mixture components used to obtain \code{BICvalues}. } \item{modelNames}{ A vector of integers giving the model parameterizations from which the best model according to BIC will be selected (\code{as.character(model)} must be a subset of the column names of \code{BICvalues}). The default is to select the best model for parameterizations used to obtain \code{BICvalues}. } \item{\dots}{ Not used. For generic/method consistency. } } \value{ A list giving the optimal (according to BIC) parameters, conditional probabilities \code{z}, and log-likelihood, together with the associated classification and its uncertainty. The details of the output components are as follows: \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{n}{ The number of observations in the data. } \item{d}{ The dimension of the data. } \item{G}{ The number of components in the Gaussian mixture model corresponding to the optimal BIC. } \item{bic}{ The optimal BIC value. } \item{loglik}{ The log-likelihood corresponding to the optimal BIC. } \item{parameters}{ A list with the following components: \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If missing, equal proportions are assumed. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ The estimate of the reciprocal hypervolume of the data region used in the computation when the input indicates the addition of a noise component to the model. } } } \item{z}{ A matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in the test data belongs to the \emph{k}th class. } } \seealso{ \code{\link{mclustBIC}} } \examples{ irisBIC <- mclustBIC(iris[,-5]) mclustModel(iris[,-5], irisBIC) mclustModel(iris[,-5], irisBIC, G = 1:6, modelNames = c("VII", "VVI", "VVV")) } \keyword{cluster} % docclass is function mclust/man/errorBars.Rd0000644000176200001440000000317212542725574014604 0ustar liggesusers\name{errorBars} \alias{errorBars} \title{Draw error bars on a plot} \description{ Draw error bars at x from upper to lower. If \code{horizontal = FALSE} (default) bars are drawn vertically, otherwise horizontally. } \usage{ errorBars(x, upper, lower, width = 0.1, code = 3, angle = 90, horizontal = FALSE, \dots) } \arguments{ \item{x}{A vector of values where the bars must be drawn.} \item{upper}{A vector of upper values where the bars must end.} \item{lower}{A vector of lower values where the bars must start.} \item{width}{A value specifying the width of the end-point segment.} \item{code}{An integer code specifying the kind of arrows to be drawn. For details see \code{\link[graphics]{arrows}}.} \item{angle}{A value specifying the angle at the arrow edge. For details see \code{\link[graphics]{arrows}}.} \item{horizontal}{A logical specifying if bars should be drawn vertically (default) or horizontally.} \item{\dots}{Further arguments are passed to \code{\link[graphics]{arrows}}.} } %\value{} \examples{ par(mfrow=c(2,2)) # Create a simple example dataset x <- 1:5 n <- c(10, 15, 12, 6, 3) se <- c(1, 1.2, 2, 1, .5) # upper and lower bars b <- barplot(n, ylim = c(0, max(n)*1.5)) errorBars(b, lower = n-se, upper = n+se, lwd = 2, col = "red3") # one side bars b <- barplot(n, ylim = c(0, max(n)*1.5)) errorBars(b, lower = n, upper = n+se, lwd = 2, col = "red3", code = 1) # plot(x, n, ylim = c(0, max(n)*1.5), pch = 0) errorBars(x, lower = n-se, upper = n+se, lwd = 2, col = "red3") # dotchart(n, labels = x, pch = 19, xlim = c(0, max(n)*1.5)) errorBars(x, lower = n-se, upper = n+se, col = "red3", horizontal = TRUE) } mclust/man/simE.Rd0000644000176200001440000000715314124774626013543 0ustar liggesusers\name{simE} \alias{simE} \alias{simV} \alias{simEII} \alias{simVII} \alias{simEEI} \alias{simVEI} \alias{simEVI} \alias{simVVI} \alias{simEEV} \alias{simEEE} \alias{simVEV} \alias{simVVV} \alias{simEVE} \alias{simEVV} \alias{simVEE} \alias{simVVE} \title{ Simulate from a Parameterized MVN Mixture Model } \description{ Simulate data from a parameterized MVN mixture model. } \usage{ simE(parameters, n, seed = NULL, \dots) simV(parameters, n, seed = NULL, \dots) simEII(parameters, n, seed = NULL, \dots) simVII(parameters, n, seed = NULL, \dots) simEEI(parameters, n, seed = NULL, \dots) simVEI(parameters, n, seed = NULL, \dots) simEVI(parameters, n, seed = NULL, \dots) simVVI(parameters, n, seed = NULL, \dots) simEEE(parameters, n, seed = NULL, \dots) simVEE(parameters, n, seed = NULL, \dots) simEVE(parameters, n, seed = NULL, \dots) simVVE(parameters, n, seed = NULL, \dots) simEEV(parameters, n, seed = NULL, \dots) simVEV(parameters, n, seed = NULL, \dots) simEVV(parameters, n, seed = NULL, \dots) simVVV(parameters, n, seed = NULL, \dots) } \arguments{ \item{parameters}{ A list with the following components: \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If missing, equal proportions are assumed. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{n}{ An integer specifying the number of data points to be simulated. } \item{seed}{ An optional integer argument to \code{set.seed()} for reproducible random class assignment. By default the current seed will be used. Reproducibility can also be achieved by calling \code{set.seed} before calling \code{sim}. } \item{\dots }{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A matrix in which first column is the classification and the remaining columns are the \code{n} observations simulated from the specified MVN mixture model. \item{Attributes:}{ \code{"modelName"} A character string indicating the variance model used for the simulation. } } \details{ This function can be used with an indirect or list call using \code{do.call}, allowing the output of e.g. \code{mstep}, \code{em} \code{me}, \code{Mclust}, to be passed directly without the need to specify individual parameters as arguments. } \seealso{ \code{\link{sim}}, \code{\link{Mclust}}, \code{\link{mstepE}}, \code{\link{mclustVariance}}. } \examples{ \donttest{ d <- 2 G <- 2 scale <- 1 shape <- c(1, 9) O1 <- diag(2) O2 <- diag(2)[,c(2,1)] O <- array(cbind(O1,O2), c(2, 2, 2)) O variance <- list(d= d, G = G, scale = scale, shape = shape, orientation = O) mu <- matrix(0, d, G) ## center at the origin simdat <- simEEV( n = 200, parameters = list(pro=c(1,1),mean=mu,variance=variance), seed = NULL) cl <- simdat[,1] sigma <- array(apply(O, 3, function(x,y) crossprod(x*y), y = sqrt(scale*shape)), c(2,2,2)) paramList <- list(mu = mu, sigma = sigma) coordProj( simdat, paramList = paramList, classification = cl) } } \keyword{cluster} mclust/man/clustCombi.Rd0000644000176200001440000001217614124774626014753 0ustar liggesusers\name{clustCombi} \alias{clustCombi} \alias{print.clustCombi} \alias{summary.clustCombi} \alias{print.summary.clustCombi} \title{ Combining Gaussian Mixture Components for Clustering } \description{ Provides a hierarchy of combined clusterings from the EM/BIC Gaussian mixture solution to one class, following the methodology proposed in the article cited in the references. } \usage{ clustCombi(object = NULL, data = NULL, \dots) } \arguments{ \item{object}{ An object returned by \code{\link{Mclust}} giving the optimal (according to BIC) parameters, conditional probabilities, and log-likelihood, together with the associated classification and its uncertainty. If not provided, the \code{data} argument must be specified. } \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. If the \code{object} argument is not provided, the function \code{\link{Mclust}} is applied to the given \code{data} to fit a mixture model.} \item{\dots}{ Optional arguments to be passed to called functions. Notably, any argument (such as the numbers of components for which the BIC is computed; the models to be fitted by EM; initialization parameters for the EM algorithm, etc.) to be passed to \code{\link{Mclust}} in case \code{object = NULL}. Please see the \code{\link{Mclust}} documentation for more details. } } \details{ Mclust provides a Gaussian mixture fitted to the data by maximum likelihood through the EM algorithm, for the model and number of components selected according to BIC. The corresponding components are hierarchically combined according to an entropy criterion, following the methodology described in the article cited in the references section. The solutions with numbers of classes between the one selected by BIC and one are returned as a \code{clustCombi} class object. } \value{ A list of class \code{clustCombi} giving the hierarchy of combined solutions from the number of components selected by BIC to one. The details of the output components are as follows: \item{classification}{A list of the data classifications obtained for each combined solution of the hierarchy through a MAP assignment} \item{combiM}{A list of matrices. \code{combiM[[K]]} is the matrix used to combine the components of the (K+1)-classes solution to get the K-classes solution. Please see the examples.} \item{combiz}{A list of matrices. \code{combiz[[K]]} is a matrix whose [i,k]th entry is the probability that observation i in the data belongs to the kth class according to the K-classes combined solution.} \item{MclustOutput}{A list of class \code{Mclust}. Output of a call to the Mclust function (as provided by the user or the result of a call to the Mclust function) used to initiate the combined solutions hierarchy: please see the \code{\link{Mclust}} function documentation for details.} } \references{ J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} } \author{ J.-P. Baudry, A. E. Raftery, L. Scrucca } \seealso{ \code{\link{plot.clustCombi}} } \examples{ data(Baudry_etal_2010_JCGS_examples) # run Mclust using provided data output <- clustCombi(data = ex4.1) \donttest{ # or run Mclust and then clustcombi on the returned object mod <- Mclust(ex4.1) output <- clustCombi(mod) } output summary(output) \donttest{ # run Mclust using provided data and any further optional argument provided output <- clustCombi(data = ex4.1, modelName = "EEV", G = 1:15) } # plot the hierarchy of combined solutions plot(output, what = "classification") # plot some "entropy plots" which may help one to select the number of classes plot(output, what = "entropy") # plot the tree structure obtained from combining mixture components plot(output, what = "tree") # the selected model and number of components obtained from Mclust using BIC output$MclustOutput # the matrix whose [i,k]th entry is the probability that i-th observation in # the data belongs to the k-th class according to the BIC solution head( output$combiz[[output$MclustOutput$G]] ) # the matrix whose [i,k]th entry is the probability that i-th observation in # the data belongs to the k-th class according to the first combined solution head( output$combiz[[output$MclustOutput$G-1]] ) # the matrix describing how to merge the 6-classes solution to get the # 5-classes solution output$combiM[[5]] # for example the following code returns the label of the class (in the # 5-classes combined solution) to which the 4th class (in the 6-classes # solution) is assigned. Only two classes in the (K+1)-classes solution # are assigned the same class in the K-classes solution: the two which # are merged at this step output$combiM[[5]] %*% c(0,0,0,1,0,0) # recover the 5-classes soft clustering from the 6-classes soft clustering # and the 6 -> 5 combining matrix all( output$combiz[[5]] == t( output$combiM[[5]] \%*\% t(output$combiz[[6]]) ) ) # the hard clustering under the 5-classes solution head( output$classification[[5]] ) } \keyword{ cluster } mclust/man/cdensE.Rd0000644000176200001440000001053414124774626014044 0ustar liggesusers\name{cdensE} \alias{cdensE} \alias{cdensV} \alias{cdensX} \alias{cdensEII} \alias{cdensVII} \alias{cdensEEI} \alias{cdensVEI} \alias{cdensEVI} \alias{cdensVVI} \alias{cdensEEE} \alias{cdensEEV} \alias{cdensVEV} \alias{cdensVVV} \alias{cdensEVE} \alias{cdensEVV} \alias{cdensVEE} \alias{cdensVVE} \alias{cdensXII} \alias{cdensXXI} \alias{cdensXXX} \title{ Component Density for a Parameterized MVN Mixture Model } \description{ Computes component densities for points in a parameterized MVN mixture model. } \usage{ cdensE(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensV(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensX(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEII(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVII(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEEI(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVEI(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEVI(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVVI(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEEE(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEEV(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVEV(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVVV(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEVE(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensEVV(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVEE(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensVVE(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensXII(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensXXI(data, logarithm = FALSE, parameters, warn = NULL, \dots) cdensXXX(data, logarithm = FALSE, parameters, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{logarithm}{ A logical value indicating whether or not the logarithm of the component densities should be returned. The default is to return the component densities, obtained from the log component densities by exponentiation. } \item{parameters}{ The parameters of the model: \describe{ \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{pro}}{ Mixing proportions for the components of the mixture. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } } } \item{warn}{ A logical value indicating whether or not a warning should be issued when computations fail. The default is \code{warn=FALSE}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A numeric matrix whose \code{[i,j]}th entry is the density of observation \emph{i} in component \emph{j}. The densities are not scaled by mixing proportions. } \note{ When one or more component densities are very large in magnitude, then it may be possible to compute the logarithm of the component densities but not the component densities themselves due to overflow. } \seealso{ \code{\link{cdens}}, \code{\link{dens}}, \code{\link{mclustVariance}}, \code{\link{mstep}}, \code{\link{mclust.options}}, \code{\link{do.call}}. } \examples{ \donttest{ z2 <- unmap(hclass(hcVVV(faithful),2)) # initial value for 2 class case model <- meVVV(data=faithful, z=z2) cdensVVV(data=faithful, logarithm = TRUE, parameters = model$parameters) data(cross) z2 <- unmap(cross[,1]) model <- meEEV(data = cross[,-1], z = z2) EEVdensities <- cdensEEV( data = cross[,-1], parameters = model$parameters) cbind(cross[,-1],map(EEVdensities))} } \keyword{cluster} mclust/man/mclust-package.Rd0000644000176200001440000000345414516406605015540 0ustar liggesusers\name{mclust-package} \alias{mclust-package} \alias{mclust} \docType{package} \title{Gaussian Mixture Modelling for Model-Based Clustering, Classification, and Density Estimation} \description{ Gaussian finite mixture models estimated via EM algorithm for model-based clustering, classification, and density estimation, including Bayesian regularization and dimension reduction. } \details{ For a quick introduction to \pkg{mclust} see the vignette \href{../doc/mclust.html}{A quick tour of mclust}. See also: \itemize{ \item \code{\link{Mclust}} for clustering; \item \code{\link{MclustDA}} for supervised classification; \item \code{\link{MclustSSC}} for semi-supervised classification; \item \code{\link{densityMclust}} for density estimation. } } \author{ Chris Fraley, Adrian Raftery and Luca Scrucca. Maintainer: Luca Scrucca \email{luca.scrucca@unipg.it} } \references{ Scrucca L., Fraley C., Murphy T. B. and Raftery A. E. (2023) \emph{Model-Based Clustering, Classification, and Density Estimation Using mclust in R}. Chapman & Hall/CRC, ISBN: 978-1032234953, https://mclust-org.github.io/book/ Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, \emph{Journal of the American Statistical Association}, 97/458, pp. 611-631. } \examples{ \donttest{ # Clustering mod1 <- Mclust(iris[,1:4]) summary(mod1) plot(mod1, what = c("BIC", "classification")) # Classification data(banknote) mod2 <- MclustDA(banknote[,2:7], banknote$Status) summary(mod2) plot(mod2) # Density estimation mod3 <- densityMclust(faithful$waiting) summary(mod3) } } \keyword{package} mclust/man/defaultPrior.Rd0000644000176200001440000000773613752165011015301 0ustar liggesusers\name{defaultPrior} \alias{defaultPrior} \title{ Default conjugate prior for Gaussian mixtures } \description{ Default conjugate prior specification for Gaussian mixtures. } \usage{ defaultPrior(data, G, modelName, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{G}{ The number of mixture components. } \item{modelName}{ A character string indicating the model: \cr\cr \code{"E"}: equal variance (univariate) \cr \code{"V"}: variable variance (univariate)\cr \code{"EII"}: spherical, equal volume \cr \code{"VII"}: spherical, unequal volume \cr \code{"EEI"}: diagonal, equal volume and shape\cr \code{"VEI"}: diagonal, varying volume, equal shape\cr \code{"EVI"}: diagonal, equal volume, varying shape \cr \code{"VVI"}: diagonal, varying volume and shape \cr \code{"EEE"}: ellipsoidal, equal volume, shape, and orientation \cr \code{"EEV"}: ellipsoidal, equal volume and equal shape\cr \code{"VEV"}: ellipsoidal, equal shape \cr \code{"VVV"}: ellipsoidal, varying volume, shape, and orientation. \cr\cr A description of the models above is provided in the help of \code{\link{mclustModelNames}}. Note that in the multivariate case only 10 out of 14 models may be used in conjunction with a prior, i.e. those available in \emph{MCLUST} up to version 4.4. } \item{\dots}{ One or more of the following: \describe{ \item{\code{dof}}{ The degrees of freedom for the prior on the variance. The default is \code{d + 2}, where \code{d} is the dimension of the data. } \item{\code{scale}}{ The scale parameter for the prior on the variance. The default is \code{var(data)/G^(2/d)}, where \code{d} is the dimension of the data. } \item{\code{shrinkage}}{ The shrinkage parameter for the prior on the mean. The default value is 0.01. If 0 or NA, no prior is assumed for the mean. } \item{\code{mean}}{ The mean parameter for the prior. The default value is \code{colMeans(data)}. } } } } \value{ A list giving the prior degrees of freedom, scale, shrinkage, and mean. } \details{ \code{defaultPrior} is a function whose default is to output the default prior specification for EM within \emph{MCLUST}.\cr Furthermore, \code{defaultPrior} can be used as a template to specify alternative parameters for a conjugate prior. } \references{ C. Fraley and A. E. Raftery (2002). Model-based clustering, discriminant analysis, and density estimation. \emph{Journal of the American Statistical Association} 97:611-631. C. Fraley and A. E. Raftery (2005, revised 2009). Bayesian regularization for normal mixture estimation and model-based clustering. Technical Report, Department of Statistics, University of Washington. C. Fraley and A. E. Raftery (2007). Bayesian regularization for normal mixture estimation and model-based clustering. \emph{Journal of Classification} 24:155-181. } \seealso{ \code{\link{mclustBIC}}, \code{\link{me}}, \code{\link{mstep}}, \code{\link{priorControl}} } \examples{ # default prior irisBIC <- mclustBIC(iris[,-5], prior = priorControl()) summary(irisBIC, iris[,-5]) # equivalent to previous example irisBIC <- mclustBIC(iris[,-5], prior = priorControl(functionName = "defaultPrior")) summary(irisBIC, iris[,-5]) # no prior on the mean; default prior on variance irisBIC <- mclustBIC(iris[,-5], prior = priorControl(shrinkage = 0)) summary(irisBIC, iris[,-5]) # equivalent to previous example irisBIC <- mclustBIC(iris[,-5], prior = priorControl(functionName="defaultPrior", shrinkage=0)) summary(irisBIC, iris[,-5]) defaultPrior( iris[-5], G = 3, modelName = "VVV") } \keyword{cluster} mclust/man/clPairs.Rd0000644000176200001440000000761613656731011014235 0ustar liggesusers\name{clPairs} \alias{clPairs} \alias{clPairsLegend} \title{Pairwise Scatter Plots showing Classification} \description{ Creates a scatter plot for each pair of variables in given data. Observations in different classes are represented by different colors and symbols. } \usage{ clPairs(data, classification, symbols = NULL, colors = NULL, cex = NULL, labels = dimnames(data)[[2]], cex.labels = 1.5, gap = 0.2, grid = FALSE, \dots) clPairsLegend(x, y, class, col, pch, cex, box = TRUE, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{classification}{ A numeric or character vector representing a classification of observations (rows) of \code{data}. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class in \code{classification}. Elements in \code{symbols} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotColors")}. } \item{cex}{ A vector of numerical values specifying the size of the plotting symbol for each unique class in \code{classification}. Values in \code{cex} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). By default \code{cex = 1} for all classes is used. } \item{labels}{ A vector of character strings for labelling the variables. The default is to use the column dimension names of \code{data}. } \item{cex.labels}{ A numerical value specifying the size of the text labels. } \item{gap}{ An argument specifying the distance between subplots (see \code{\link{pairs}}). } \item{grid}{ A logical specifying if grid lines should be added to panels (see \code{\link{grid}}). } \item{x,y}{ The x and y co-ordinates with respect to a graphic device having plotting region coordinates \code{par("usr" = c(0,1,0,1))}. } \item{class}{ The class labels. } \item{box}{ A logical, if \code{TRUE} then a box is drawn around the current plot figure. } \item{col, pch}{ The colors and plotting symbols appearing in the legend. } \item{\dots}{ For a \code{clPairs} call may be additional arguments to be passed to \code{\link{pairs}}. For a \code{clPairsLegend} call may be additional arguments to be passed to \code{\link{legend}}. } } \details{ The function \code{clPairs()} draws scatter plots on the current graphics device for each combination of variables in \code{data}. Observations of different classifications are labeled with different symbols. The function \code{clPairsLegend()} can be used to add a legend. See examples below. } \value{ The function \code{clPairs()} invisibly returns a list with the following components: \item{class}{A character vector of class labels.} \item{col}{A vector of colors used for each class.} \item{pch}{A vector of plotting symbols used for each class.} } \seealso{ \code{\link{pairs}}, \code{\link{coordProj}}, \code{\link{mclust.options}} } \examples{ clPairs(iris[,1:4], cl = iris$Species) clp <- clPairs(iris[,1:4], cl = iris$Species, lower.panel = NULL) clPairsLegend(0.1, 0.4, class = clp$class, col = clp$col, pch = clp$pch, title = "Iris data") } \keyword{cluster} mclust/man/mclustLoglik.Rd0000644000176200001440000000160114124774626015307 0ustar liggesusers\name{mclustLoglik} \alias{mclustLoglik} \alias{print.mclustLoglik} \title{Log-likelihood from a table of BIC values for parameterized Gaussian mixture models} \description{ Compute the maximal log-likelihood from a table of BIC values contained in a \code{'mclustBIC'} object as returned by function \code{\link{mclustBIC}}. } \usage{ mclustLoglik(object, \dots) } \arguments{ \item{object}{An object of class \code{'mclustBIC'} containing the BIC values as returned by a call to \code{\link{mclustBIC}}. } \item{\dots}{ Catches unused arguments in an indirect or list call via \code{do.call}. } } \value{ An object of class \code{'mclustLoglik'} containing the maximal log-likelihood values for the Gaussian mixture models provided as input. } \seealso{ \code{\link{mclustBIC}}. } \examples{ \donttest{ BIC <- mclustBIC(iris[,1:4]) mclustLoglik(BIC) } } \keyword{cluster} mclust/man/predict.MclustSSC.Rd0000644000176200001440000000343714124774626016120 0ustar liggesusers\name{predict.MclustSSC} \alias{predict.MclustSSC} \title{Classification of multivariate observations by semi-supervised Gaussian finite mixtures} \description{Classify multivariate observations based on Gaussian finite mixture models estimated by \code{\link{MclustSSC}}.} \usage{ \method{predict}{MclustSSC}(object, newdata, \dots) } \arguments{ \item{object}{an object of class \code{'MclustSSC'} resulting from a call to \code{\link{MclustSSC}}.} \item{newdata}{a data frame or matrix giving the data. If missing the train data obtained from the call to \code{\link{MclustSSC}} are classified.} \item{\dots}{further arguments passed to or from other methods.} } % \details{} \value{ Returns a list of with the following components: \item{classification}{a factor of predicted class labels for \code{newdata}.} \item{z}{a matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in \code{newdata} belongs to the \emph{k}th class.} } \author{Luca Scrucca} % \note{} \seealso{\code{\link{MclustSSC}}.} \examples{ \donttest{ X <- iris[,1:4] class <- iris$Species # randomly remove class labels set.seed(123) class[sample(1:length(class), size = 120)] <- NA table(class, useNA = "ifany") clPairs(X, ifelse(is.na(class), 0, class), symbols = c(0, 16, 17, 18), colors = c("grey", 4, 2, 3), main = "Partially classified data") # Fit semi-supervised classification model mod_SSC <- MclustSSC(X, class) pred_SSC <- predict(mod_SSC) table(Predicted = pred_SSC$classification, Actual = class, useNA = "ifany") X_new = data.frame(Sepal.Length = c(5, 8), Sepal.Width = c(3.1, 4), Petal.Length = c(2, 5), Petal.Width = c(0.5, 2)) predict(mod_SSC, newdata = X_new) } } \keyword{classification} mclust/man/Baudry_etal_2010_JCGS_examples.Rd0000644000176200001440000000371214234504163020231 0ustar liggesusers\name{Baudry_etal_2010_JCGS_examples} \alias{Baudry_etal_2010_JCGS_examples} \alias{ex4.1} \alias{ex4.2} \alias{ex4.3} \alias{ex4.4.1} \alias{ex4.4.2} \alias{Test1D} \docType{data} \title{Simulated Example Datasets From Baudry et al. (2010)} \description{ Simulated datasets used in Baudry et al. (2010) to illustrate the proposed mixture components combining method for clustering. Please see the cited article for a detailed presentation of these datasets. The data frame with name exN.M is presented in Section N.M in the paper. Test1D (not in the article) has been simulated from a Gaussian mixture distribution in R. ex4.1 and ex4.2 have been simulated from a Gaussian mixture distribution in R^2. ex4.3 has been simulated from a mixture of a uniform distribution on a square and a spherical Gaussian distribution in R^2. ex4.4.1 has been simulated from a Gaussian mixture model in R^2 ex4.4.2 has been simulated from a mixture of two uniform distributions in R^3. } \usage{data(Baudry_etal_2010_JCGS_examples)} \format{ \code{ex4.1} is a data frame with 600 observations on 2 real variables. \code{ex4.2} is a data frame with 600 observations on 2 real variables. \code{ex4.3} is a data frame with 200 observations on 2 real variables. \code{ex4.4.1} is a data frame with 800 observations on 2 real variables. \code{ex4.4.2} is a data frame with 300 observations on 3 real variables. \code{Test1D} is a data frame with 200 observations on 1 real variable. } \references{ J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics}, 19(2):332-353. } \examples{ \donttest{ data(Baudry_etal_2010_JCGS_examples) output <- clustCombi(data = ex4.4.1) output # is of class clustCombi # plots the hierarchy of combined solutions, then some "entropy plots" which # may help one to select the number of classes plot(output) } } \keyword{datasets} mclust/man/cross.Rd0000644000176200001440000000102514124774626013767 0ustar liggesusers\name{cross} \alias{cross} \title{Simulated Cross Data} \usage{data(cross)} \description{ A 500 by 3 matrix in which the first column is the classification and the remaining columns are two data from a simulation of two crossed elliptical Gaussians. } \examples{ # This dataset was created as follows \donttest{ n <- 250 set.seed(0) cross <- rbind(matrix(rnorm(n*2), n, 2) \%*\% diag(c(1,9)), matrix(rnorm(n*2), n, 2) \%*\% diag(c(1,9))[,2:1]) cross <- cbind(c(rep(1,n),rep(2,n)), cross) } } \keyword{datasets} mclust/man/combMat.Rd0000644000176200001440000000173713475242100014213 0ustar liggesusers\name{combMat} \alias{combMat} \title{ Combining Matrix } \description{ Create a combining matrix } \usage{ combMat(K, l1, l2) } \arguments{ \item{K}{ The original number of classes: the matrix will define a combining from K to (K-1) classes. } \item{l1}{ Label of one of the two classes to be combined. } \item{l2}{ Label of the other class to be combined. } } \value{ If \code{z} is a vector (length \emph{K}) whose \emph{k}th entry is the probability that an observation belongs to the \emph{k}th class in a \emph{K}-classes classification, then \code{combiM \%*\% z} is the vector (length \emph{K-1}) whose \emph{k}th entry is the probability that the observation belongs to the \emph{k}th class in the \emph{K-1}-classes classification obtained by merging classes \code{l1} and \code{l2} in the initial classification. } \author{ J.-P. Baudry, A. E. Raftery, L. Scrucca } \seealso{ \code{\link{clustCombi}}, \code{\link{combiPlot}} } %\examples{} \keyword{ cluster } mclust/man/hdrlevels.Rd0000644000176200001440000000431313314423527014620 0ustar liggesusers\name{hdrlevels} \alias{hdrlevels} \title{Highest Density Region (HDR) Levels} \description{ Compute the levels of Highest Density Regions (HDRs) for any density and probability levels. } \usage{ hdrlevels(density, prob) } \arguments{ \item{density}{A vector of density values computed on a set of (observed) evaluation points.} \item{prob}{A vector of probability levels in the range \eqn{[0,1]}.} } \value{ The function returns a vector of density values corresponding to HDRs at given probability levels. } \details{ From Hyndman (1996), let \eqn{f(x)} be the density function of a random variable \eqn{X}. Then the \eqn{100(1-\alpha)\%} HDR is the subset \eqn{R(f_\alpha)} of the sample space of \eqn{X} such that \deqn{ R(f_\alpha) = {x : f(x) \ge f_\alpha } } where \eqn{f_\alpha} is the largest constant such that \eqn{ Pr( X \in R(f_\alpha)) \ge 1-\alpha } } \seealso{ \code{\link{plot.densityMclust}} } \references{ Rob J. Hyndman (1996) Computing and Graphing Highest Density Regions. \emph{The American Statistician}, 50(2):120-126. } \author{L. Scrucca} \examples{ # Example: univariate Gaussian x <- rnorm(1000) f <- dnorm(x) a <- c(0.5, 0.25, 0.1) (f_a <- hdrlevels(f, prob = 1-a)) plot(x, f) abline(h = f_a, lty = 2) text(max(x), f_a, labels = paste0("f_", a), pos = 3) mean(f > f_a[1]) range(x[which(f > f_a[1])]) qnorm(1-a[1]/2) mean(f > f_a[2]) range(x[which(f > f_a[2])]) qnorm(1-a[2]/2) mean(f > f_a[3]) range(x[which(f > f_a[3])]) qnorm(1-a[3]/2) # Example 2: univariate Gaussian mixture set.seed(1) cl <- sample(1:2, size = 1000, prob = c(0.7, 0.3), replace = TRUE) x <- ifelse(cl == 1, rnorm(1000, mean = 0, sd = 1), rnorm(1000, mean = 4, sd = 1)) f <- 0.7*dnorm(x, mean = 0, sd = 1) + 0.3*dnorm(x, mean = 4, sd = 1) a <- 0.25 (f_a <- hdrlevels(f, prob = 1-a)) plot(x, f) abline(h = f_a, lty = 2) text(max(x), f_a, labels = paste0("f_", a), pos = 3) mean(f > f_a) # find the regions of HDR ord <- order(x) f <- f[ord] x <- x[ord] x_a <- x[f > f_a] j <- which.max(diff(x_a)) region1 <- x_a[c(1,j)] region2 <- x_a[c(j+1,length(x_a))] plot(x, f, type = "l") abline(h = f_a, lty = 2) abline(v = region1, lty = 3, col = 2) abline(v = region2, lty = 3, col = 3) } \keyword{density} mclust/man/hypvol.Rd0000644000176200001440000000301313175052576014154 0ustar liggesusers\name{hypvol} \alias{hypvol} \title{ Aproximate Hypervolume for Multivariate Data } \description{ Computes a simple approximation to the hypervolume of a multivariate data set. } \usage{ hypvol(data, reciprocal=FALSE) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{reciprocal}{ A logical variable indicating whether or not the reciprocal hypervolume is desired rather than the hypervolume itself. The default is to return the hypervolume. } } \value{ Returns the minimum of the hypervolume computed from simple variable bounds and that computed from variable bounds of the principal component scores. Used for the default hypervolume parameter for the noise component when observations are designated as noise in \code{Mclust} and \code{mclustBIC}. } \references{ A. Dasgupta and A. E. Raftery (1998). Detecting features in spatial point processes with clutter via model-based clustering. \emph{Journal of the American Statistical Association 93:294-302}. C. Fraley and A.E. Raftery (1998). \emph{Computer Journal 41:578-588}. C. Fraley and A. E. Raftery (2002). Model-based clustering, discriminant analysis, and density estimation. \emph{Journal of the American Statistical Association 97:611-631}. } \seealso{ \code{\link{mclustBIC}} } \examples{ hypvol(iris[,-5]) } \keyword{cluster} mclust/man/mapClass.Rd0000644000176200001440000000225413752165016014377 0ustar liggesusers\name{mapClass} \alias{mapClass} \title{Correspondence between classifications} \description{ Best correspondence between classes given two vectors viewed as alternative classifications of the same object. } \usage{ mapClass(a, b) } \arguments{ \item{a}{ A numeric or character vector of class labels. } \item{b}{ A numeric or character vector of class labels. Must have the same length as \code{a}. } } \value{ A list with two named elements, \code{aTOb} and \code{bTOa} which are themselves lists. The \code{aTOb} list has a component corresponding to each unique element of \code{a}, which gives the element or elements of \code{b} that result in the closest class correspondence. The \code{bTOa} list has a component corresponding to each unique element of \code{b}, which gives the element or elements of \code{a} that result in the closest class correspondence. } \seealso{ \code{\link{classError}}, \code{\link{table}} } \examples{ a <- rep(1:3, 3) a b <- rep(c("A", "B", "C"), 3) b mapClass(a, b) a <- sample(1:3, 9, replace = TRUE) a b <- sample(c("A", "B", "C"), 9, replace = TRUE) b mapClass(a, b) } \keyword{cluster} mclust/man/mclustBIC.Rd0000644000176200001440000001523514515765104014466 0ustar liggesusers\name{mclustBIC} \alias{mclustBIC} \alias{EMclust} \alias{print.mclustBIC} \title{BIC for Model-Based Clustering} \description{ BIC for parameterized Gaussian mixture models fitted by EM algorithm initialized by model-based hierarchical clustering.} \usage{ mclustBIC(data, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), initialization = list(hcPairs = NULL, subset = NULL, noise = NULL), Vinv = NULL, warn = mclust.options("warn"), x = NULL, verbose = interactive(), \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{G}{ An integer vector specifying the numbers of mixture components (clusters) for which the BIC is to be calculated. The default is \code{G=1:9}, unless the argument \code{x} is specified, in which case the default is taken from the values associated with \code{x}. } \item{modelNames}{ A vector of character strings indicating the models to be fitted in the EM phase of clustering. The help file for \code{\link{mclustModelNames}} describes the available models. The default is: \describe{ \item{\code{c("E", "V")}}{for univariate data} \item{\code{mclust.options("emModelNames")}}{for multivariate data (n > d)} \item{\code{c("EII", "VII", "EEI", "EVI", "VEI", "VVI")}}{the spherical and diagonal models for multivariate data (n <= d)} } unless the argument \code{x} is specified, in which case the default is taken from the values associated with \code{x}. } \item{prior}{ The default assumes no prior, but this argument allows specification of a conjugate prior on the means and variances through the function \code{priorControl}. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{emControl()}. } \item{initialization}{ A list containing zero or more of the following components: \describe{ \item{\code{hcPairs}}{ A matrix of merge pairs for hierarchical clustering such as produced by function \code{\link{hc}}. \cr For multivariate data, the default is to compute a hierarchical agglomerative clustering tree by applying function \code{\link{hc}} with model specified by \code{mclust.options("hcModelName")}, and data transformation set by \code{mclust.options("hcUse")}.\cr All the input or a subset as indicated by the \code{subset} argument is used for initial clustering.\cr The hierarchical clustering results are then used to start the EM algorithm from a given partition.\cr For univariate data, the default is to use quantiles to start the EM algorithm. However, hierarchical clustering could also be used by calling \code{\link{hc}} with model specified as \code{"V"} or \code{"E"}. } \item{\code{subset}}{ A logical or numeric vector specifying a subset of the data to be used in the initial hierarchical clustering phase. By default no subset is used unless the number of observations exceeds the value specified by \code{mclust.options("subset")}. The \code{subset} argument is ignored if \code{hcPairs} are provided. Note that to guarantee exact reproducibility of results a seed must be specified (see \code{\link{set.seed}}). } \item{\code{noise}}{ A logical or numeric vector indicating an initial guess as to which observations are noise in the data. If numeric the entries should correspond to row indexes of the data. If supplied, a noise term will be added to the model in the estimation. } } } \item{Vinv}{ An estimate of the reciprocal hypervolume of the data region. The default is determined by applying function \code{hypvol} to the data. Used only if an initial guess as to which observations are noise is supplied. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when estimation fails. The default is controlled by \code{\link{mclust.options}}. } \item{x}{ An object of class \code{'mclustBIC'}. If supplied, \code{mclustBIC} will use the settings in \code{x} to produce another object of class \code{'mclustBIC'}, but with \code{G} and \code{modelNames} as specified in the arguments. Models that have already been computed in \code{x} are not recomputed. All arguments to \code{mclustBIC} except \code{data}, \code{G} and \code{modelName} are ignored and their values are set as specified in the attributes of \code{x}. Defaults for \code{G} and \code{modelNames} are taken from \code{x}. } \item{verbose}{ A logical controlling if a text progress bar is displayed during the fitting procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ Return an object of class \code{'mclustBIC'} containing the Bayesian Information Criterion for the specified mixture models numbers of clusters. Auxiliary information returned as attributes. The corresponding \code{print} method shows the matrix of values and the top models according to the BIC criterion. } \seealso{ \code{\link{summary.mclustBIC}}, \code{\link{priorControl}}, \code{\link{emControl}}, \code{\link{mclustModel}}, \code{\link{hc}}, \code{\link{me}}, \code{\link{mclustModelNames}}, \code{\link{mclust.options}} } \examples{ irisBIC <- mclustBIC(iris[,-5]) irisBIC plot(irisBIC) \donttest{ subset <- sample(1:nrow(iris), 100) irisBIC <- mclustBIC(iris[,-5], initialization=list(subset = subset)) irisBIC plot(irisBIC) irisBIC1 <- mclustBIC(iris[,-5], G=seq(from=1,to=9,by=2), modelNames=c("EII", "EEI", "EEE")) irisBIC1 plot(irisBIC1) irisBIC2 <- mclustBIC(iris[,-5], G=seq(from=2,to=8,by=2), modelNames=c("VII", "VVI", "VVV"), x= irisBIC1) irisBIC2 plot(irisBIC2) } nNoise <- 450 set.seed(0) poissonNoise <- apply(apply( iris[,-5], 2, range), 2, function(x, n) runif(n, min = x[1]-.1, max = x[2]+.1), n = nNoise) set.seed(0) noiseInit <- sample(c(TRUE,FALSE),size=nrow(iris)+nNoise,replace=TRUE, prob=c(3,1)) irisNdata <- rbind(iris[,-5], poissonNoise) irisNbic <- mclustBIC(data = irisNdata, G = 1:5, initialization = list(noise = noiseInit)) irisNbic plot(irisNbic) } \keyword{cluster} mclust/man/mclust1Dplot.Rd0000644000176200001440000001205514124774626015236 0ustar liggesusers\name{mclust1Dplot} \alias{mclust1Dplot} \title{ Plot one-dimensional data modeled by an MVN mixture. } \description{ Plot one-dimensional data given parameters of an MVN mixture model for the data. } \usage{ mclust1Dplot(data, parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "density", "error", "uncertainty"), symbols = NULL, colors = NULL, ngrid = length(data), xlab = NULL, ylab = NULL, xlim = NULL, ylim = NULL, cex = 1, main = FALSE, \dots) } \arguments{ \item{data}{ A numeric vector of observations. Categorical variables are not allowed. } \item{parameters}{ A named list giving the parameters of an \emph{MCLUST} model, used to produce superimposing ellipses on the plot. The relevant components are as follows: \describe{ \item{\code{pro}}{ Mixing proportions for the components of the mixture. There should one more mixing proportion than the number of Gaussian components if the mixture model includes a Poisson noise term. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix in which the \code{[i,k]}th entry gives the probability of observation \emph{i} belonging to the \emph{k}th class. Used to compute \code{classification} and \code{uncertainty} if those arguments aren't available. } \item{classification}{ A numeric or character vector representing a classification of observations (rows) of \code{data}. If present argument \code{z} will be ignored. } \item{truth}{ A numeric or character vector giving a known classification of each data point. If \code{classification} or \code{z} is also present, this is used for displaying classification errors. } \item{uncertainty}{ A numeric vector of values in \emph{(0,1)} giving the uncertainty of each data point. If present argument \code{z} will be ignored. } \item{what}{ Choose from one of the following options: \code{"classification"} (default), \code{"density"}, \code{"error"}, \code{"uncertainty"}. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class \code{classification}. Elements in \code{symbols} correspond to classes in \code{classification} in order of appearance in the observations (the order used by the function \code{unique}). The default is to use a single plotting symbol \emph{|}. Classes are delineated by showing them in separate lines above the whole of the data. } \item{colors}{ Either an integer or character vector assigning a color to each unique class \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the observations (the order used by the function \code{unique}). The default is given is \code{mclust.options("classPlotColors")}. } \item{ngrid}{ Number of grid points to use for density computation over the interval spanned by the data. The default is the length of the data set. } \item{xlab, ylab}{ An argument specifying a label for the axes. } \item{xlim, ylim}{ An argument specifying bounds of the plot. This may be useful for when comparing plots. } \item{cex}{ An argument specifying the size of the plotting symbols. The default value is 1. } \item{main}{ A logical variable or \code{NULL} indicating whether or not to add a title to the plot identifying the dimensions used. } \item{\dots}{ Other graphics parameters. } } \value{ A plot showing location of the mixture components, classification, uncertainty, density and/or classification errors. Points in the different classes are shown in separated levels above the whole of the data. } \seealso{ \code{\link{mclust2Dplot}}, \code{\link{clPairs}}, \code{\link{coordProj}} } \examples{ \donttest{ n <- 250 ## create artificial data set.seed(1) y <- c(rnorm(n,-5), rnorm(n,0), rnorm(n,5)) yclass <- c(rep(1,n), rep(2,n), rep(3,n)) yModel <- Mclust(y) mclust1Dplot(y, parameters = yModel$parameters, z = yModel$z, what = "classification") mclust1Dplot(y, parameters = yModel$parameters, z = yModel$z, what = "error", truth = yclass) mclust1Dplot(y, parameters = yModel$parameters, z = yModel$z, what = "density") mclust1Dplot(y, z = yModel$z, parameters = yModel$parameters, what = "uncertainty") } } \keyword{cluster} mclust/man/combiTree.Rd0000644000176200001440000000306214124774626014552 0ustar liggesusers\name{combiTree} \alias{combiTree} \title{Tree structure obtained from combining mixture components} \description{The method implemented in \code{\link{clustCombi}} can be used for combining Gaussian mixture components for clustering. This provides a hierarchical structure which can be graphically represented as a tree.} \usage{ combiTree(object, type = c("triangle", "rectangle"), yaxis = c("entropy", "step"), edgePar = list(col = "darkgray", lwd = 2), \dots) } \arguments{ \item{object}{ An object of class \code{'clustCombi'} resulting from a call to \code{\link{clustCombi}}. } \item{type}{ A string specifying the dendrogram's type. Possible values are \code{"triangle"} (default), and \code{"rectangle"}. } \item{yaxis}{ A string specifying the quantity used to draw the vertical axis. Possible values are \code{"entropy"} (default), and \code{"step"}. } \item{edgePar}{ A list of plotting parameters. See \code{\link[stats]{dendrogram}}. } \item{\dots}{Further arguments passed to or from other methods.} } %\details{} \value{ The function always draw a tree and invisibly returns an object of class \code{'dendrogram'} for fine tuning. } %\references{} \author{L. Scrucca} %\note{} \seealso{\code{\link{clustCombi}}} \examples{ \donttest{ data(Baudry_etal_2010_JCGS_examples) output <- clustCombi(data = ex4.1) combiTree(output) combiTree(output, type = "rectangle") combiTree(output, yaxis = "step") combiTree(output, type = "rectangle", yaxis = "step") } } \keyword{cluster} \keyword{hplot} mclust/man/plot.Mclust.Rd0000644000176200001440000000520714124774626015070 0ustar liggesusers\name{plot.Mclust} \alias{plot.Mclust} \title{Plotting method for Mclust model-based clustering} \description{ Plots for model-based clustering results, such as BIC, classification, uncertainty and density. } \usage{ \method{plot}{Mclust}(x, what = c("BIC", "classification", "uncertainty", "density"), dimens = NULL, xlab = NULL, ylab = NULL, addEllipses = TRUE, main = FALSE, \dots) } \arguments{ \item{x}{ Output from \code{Mclust}. } \item{what}{ A string specifying the type of graph requested. Available choices are: \describe{ \item{\code{"BIC"}}{plot of BIC values used for choosing the number of clusters.} \item{\code{"classification"} =}{a plot showing the clustering. For data in more than two dimensions a pairs plot is produced, followed by a coordinate projection plot using specified \code{dimens}. Ellipses corresponding to covariances of mixture components are also drawn if \code{addEllipses = TRUE}.} \item{\code{"uncertainty"}}{a plot of classification uncertainty. For data in more than two dimensions a coordinate projection plot is drawn using specified \code{dimens}.} \item{\code{"density"}}{a plot of estimated density. For data in more than two dimensions a matrix of contours for coordinate projection plot is drawn using specified \code{dimens}.} } If not specified, in interactive sessions a menu of choices is proposed. } \item{dimens}{ A vector of integers specifying the dimensions of the coordinate projections in case of \code{"classification"}, \code{"uncertainty"}, or \code{"density"} plots. } \item{xlab, ylab}{ Optional labels for the x-axis and the y-axis. } \item{addEllipses}{ A logical indicating whether or not to add ellipses with axes corresponding to the within-cluster covariances in case of \code{"classification"} or \code{"uncertainty"} plots. } \item{main}{ A logical or \code{NULL} indicating whether or not to add a title to the plot identifying the type of plot drawn. } \item{\dots}{ Other graphics parameters. } } \details{ For more flexibility in plotting, use \code{mclust1Dplot}, \code{mclust2Dplot}, \code{surfacePlot}, \code{coordProj}, or \code{randProj}. } \seealso{ \code{\link{Mclust}}, \code{\link{plot.mclustBIC}}, \code{\link{plot.mclustICL}}, \code{\link{mclust1Dplot}}, \code{\link{mclust2Dplot}}, \code{\link{surfacePlot}}, \code{\link{coordProj}}, \code{\link{randProj}}. } \examples{ \donttest{ precipMclust <- Mclust(precip) plot(precipMclust) faithfulMclust <- Mclust(faithful) plot(faithfulMclust) irisMclust <- Mclust(iris[,-5]) plot(irisMclust) } } \keyword{cluster} mclust/man/GvHD.Rd0000644000176200001440000000434514124774626013436 0ustar liggesusers\name{GvHD} \alias{GvHD} \alias{GvHD.pos} \alias{GvHD.control} \docType{data} \title{GvHD Dataset} \description{ GvHD (Graft-versus-Host Disease) data of Brinkman et al. (2007). Two samples of this flow cytometry data, one from a patient with the GvHD, and the other from a control patient. The GvHD positive and control samples consist of 9083 and 6809 observations, respectively. Both samples include four biomarker variables, namely, CD4, CD8b, CD3, and CD8. The objective of the analysis is to identify CD3+ CD4+ CD8b+ cell sub-populations present in the GvHD positive sample. A treatment of this data by combining mixtures is proposed in Baudry et al. (2010). } \usage{data(GvHD)} \format{ GvHD.pos (positive patient) is a data frame with 9083 observations on the following 4 variables, which are biomarker measurements. \describe{ \item{CD4}{} \item{CD8b}{} \item{CD3}{} \item{CD8}{} } GvHD.control (control patient) is a data frame with 6809 observations on the following 4 variables, which are biomarker measurements. \describe{ \item{CD4}{} \item{CD8b}{} \item{CD3}{} \item{CD8}{} } } \references{ R. R. Brinkman, M. Gasparetto, S.-J. J. Lee, A. J. Ribickas, J. Perkins, W. Janssen, R. Smiley and C. Smith (2007). High-content flow cytometry and temporal data analysis for defining a cellular signature of Graft-versus-Host Disease. \emph{Biology of Blood and Marrow Transplantation, 13: 691-700.} K. Lo, R. R. Brinkman, R. Gottardo (2008). Automated gating of flow cytometry data via robust model-based clustering. \emph{Cytometry A, 73: 321-332.} J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} } \examples{ \donttest{ data(GvHD) dat <- GvHD.pos[1:500,] # only a few lines for a quick example output <- clustCombi(data = dat) output # is of class clustCombi # plot the hierarchy of combined solutions plot(output, what = "classification") # plot some "entropy plots" which may help one to select the number of classes plot(output, what = "entropy") # plot the tree structure obtained from combining mixture components plot(output, what = "tree") } } \keyword{datasets} mclust/man/plot.hc.Rd0000644000176200001440000000705713731057511014206 0ustar liggesusers\name{plot.hc} \alias{plot.hc} \title{Dendrograms for Model-based Agglomerative Hierarchical Clustering} \description{ Display two types for dendrograms for model-based hierarchical clustering objects. } \usage{ \method{plot}{hc}(x, what=c("loglik","merge"), maxG=NULL, labels=FALSE, hang=0, \dots) } \arguments{ \item{x}{ An object of class \code{'hc'}. } \item{what}{ A character string indicating the type of dendrogram to be displayed.\cr Possible options are: \describe{ \item{\code{"loglik"}}{Distances between dendrogram levels are based on the classification likelihood.} \item{\code{"merge"}}{Distances between dendrogram levels are uniform, so that levels correspond to the number of clusters.} } } \item{maxG}{ The maximum number of clusters for the dendrogram. For \code{what = "merge"}, the default is the number of clusters in the initial partition. For \code{what = "loglik"}, the default is the minimnum of the maximum number of clusters for which the classification loglikelihood an be computed in most cases, and the maximum number of clusters for which the classification likelihood increases with increasing numbers of clusters. } \item{labels}{ A logical variable indicating whether or not to display leaf (observation) labels for the dendrogram (row names of the data). These are likely to be useful only if the number of observations in fairly small, since otherwise the labels will be too crowded to read. The default is not to display the leaf labels. } \item{hang}{ For \code{hclust} objects, this argument is the fraction of the plot height by which labels should hang below the rest of the plot. A negative value will cause the labels to hang down from 0. Because model-based hierarchical clustering does not share all of the properties of \code{hclust}, the \code{hang} argment won't work in many instances. } \item{\dots}{ Additional plotting arguments. } } \value{ A dendrogram is drawn, with distances based on either the classification likelihood or the merge level (number of clusters). } \details{ The plotting input does not share all of the properties of \code{hclust} objects, hence not all plotting arguments associated with \code{hclust} can be expected to work here. } \note{ If \code{modelName = "E"} (univariate with equal variances) or \code{modelName = "EII"} (multivariate with equal spherical covariances), then the underlying model is the same as for Ward's method for hierarchical clustering. } \references{ J. D. Banfield and A. E. Raftery (1993). Model-based Gaussian and non-Gaussian Clustering. \emph{Biometrics 49:803-821}. C. Fraley (1998). Algorithms for model-based Gaussian hierarchical clustering. \emph{SIAM Journal on Scientific Computing 20:270-281}. C. Fraley and A. E. Raftery (2002). Model-based clustering, discriminant analysis, and density estimation. \emph{Journal of the American Statistical Association 97:611-631}. } \seealso{ \code{\link{hc}} } \examples{ data(EuroUnemployment) hcTree <- hc(modelName = "VVV", data = EuroUnemployment) plot(hcTree, what = "loglik") plot(hcTree, what = "loglik", labels = TRUE) plot(hcTree, what = "loglik", maxG = 5, labels = TRUE) plot(hcTree, what = "merge") plot(hcTree, what = "merge", labels = TRUE) plot(hcTree, what = "merge", labels = TRUE, hang = 0.1) plot(hcTree, what = "merge", labels = TRUE, hang = -1) plot(hcTree, what = "merge", labels = TRUE, maxG = 5) } \keyword{cluster} mclust/man/plot.mclustBIC.Rd0000644000176200001440000000406514124774626015447 0ustar liggesusers\name{plot.mclustBIC} \alias{plot.mclustBIC} \title{BIC Plot for Model-Based Clustering} \description{ Plots the BIC values returned by the \code{\link{mclustBIC}} function. } \usage{ \method{plot}{mclustBIC}(x, G = NULL, modelNames = NULL, symbols = NULL, colors = NULL, xlab = NULL, ylab = "BIC", legendArgs = list(x = "bottomright", ncol = 2, cex = 1, inset = 0.01), \dots) } \arguments{ \item{x}{ Output from \code{mclustBIC}. } \item{G}{ One or more numbers of components corresponding to models fit in \code{x}. The default is to plot the BIC for all of the numbers of components fit. } \item{modelNames}{ One or more model names corresponding to models fit in \code{x}. The default is to plot the BIC for all of the models fit. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotColors")}. } \item{xlab}{ Optional label for the horizontal axis of the BIC plot. } \item{ylab}{ Label for the vertical axis of the BIC plot. } \item{legendArgs}{ Arguments to pass to the \code{legend} function. Set to \code{NULL} for no legend. } \item{\dots}{ Other graphics parameters. } } \value{ A plot of the BIC values. } \seealso{ \code{\link{mclustBIC}} } \examples{ \donttest{ plot(mclustBIC(precip), legendArgs = list(x = "bottomleft")) plot(mclustBIC(faithful)) plot(mclustBIC(iris[,-5])) } } \keyword{cluster} % docclass is function mclust/man/uncerPlot.Rd0000644000176200001440000000275413175055331014611 0ustar liggesusers\name{uncerPlot} \alias{uncerPlot} \title{ Uncertainty Plot for Model-Based Clustering } \description{ Displays the uncertainty in converting a conditional probablility from EM to a classification in model-based clustering. } \usage{ uncerPlot(z, truth, \dots) } \arguments{ \item{z}{ A matrix whose \emph{[i,k]}th entry is the conditional probability of the ith observation belonging to the \emph{k}th component of the mixture. } \item{truth}{ A numeric or character vector giving the true classification of the data. } \item{\dots }{ Provided to allow lists with elements other than the arguments can be passed in indirect or list calls with \code{do.call}. } } \value{ A plot of the uncertainty profile of the data, with uncertainties in increasing order of magnitude. If \code{truth} is supplied and the number of classes is the same as the number of columns of \code{z}, the uncertainty of the misclassified data is marked by vertical lines on the plot. } \details{ When \code{truth} is provided and the number of classes is compatible with \code{z}, the function \code{compareClass} is used to to find best correspondence between classes in \code{truth} and \code{z}. } \seealso{ \code{\link{mclustBIC}}, \code{\link{em}}, \code{\link{me}}, \code{\link{mapClass}} } \examples{ irisModel3 <- Mclust(iris[,-5], G = 3) uncerPlot(z = irisModel3$z) uncerPlot(z = irisModel3$z, truth = iris[,5]) } \keyword{cluster} % docclass is function mclust/man/mclust2Dplot.Rd0000644000176200001440000001316614124774626015243 0ustar liggesusers\name{mclust2Dplot} \alias{mclust2Dplot} \title{Plot two-dimensional data modelled by an MVN mixture} \description{ Plot two-dimensional data given parameters of an MVN mixture model for the data. } \usage{ mclust2Dplot(data, parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "uncertainty", "error"), addEllipses = TRUE, fillEllipses = mclust.options("fillEllipses"), symbols = NULL, colors = NULL, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, scale = FALSE, cex = 1, PCH = ".", main = FALSE, swapAxes = FALSE, \dots) } \arguments{ \item{data}{ A numeric matrix or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. In this case the data are two dimensional, so there are two columns. } \item{parameters}{ A named list giving the parameters of an \emph{MCLUST} model, used to produce superimposing ellipses on the plot. The relevant components are as follows: \describe{ \item{\code{pro}}{ Mixing proportions for the components of the mixture. There should one more mixing proportion than the number of Gaussian components if the mixture model includes a Poisson noise term. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } } } \item{z}{ A matrix in which the \code{[i,k]}th entry gives the probability of observation \emph{i} belonging to the \emph{k}th class. Used to compute \code{classification} and \code{uncertainty} if those arguments aren't available. } \item{classification}{ A numeric or character vector representing a classification of observations (rows) of \code{data}. If present argument \code{z} will be ignored. } \item{truth}{ A numeric or character vector giving a known classification of each data point. If \code{classification} or \code{z} is also present, this is used for displaying classification errors. } \item{uncertainty}{ A numeric vector of values in \emph{(0,1)} giving the uncertainty of each data point. If present argument \code{z} will be ignored. } \item{what}{ Choose from one of the following three options: \code{"classification"} (default), \code{"error"}, \code{"uncertainty"}. } \item{addEllipses}{ A logical indicating whether or not to add ellipses with axes corresponding to the within-cluster covariances. } \item{fillEllipses}{ A logical specifying whether or not to fill ellipses with transparent colors when \code{addEllipses = TRUE}. } \item{symbols}{ Either an integer or character vector assigning a plotting symbol to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given by \code{mclust.options("classPlotSymbols")}. } \item{colors}{ Either an integer or character vector assigning a color to each unique class in \code{classification}. Elements in \code{colors} correspond to classes in order of appearance in the sequence of observations (the order used by the function \code{unique}). The default is given is \code{mclust.options("classPlotColors")}. } \item{xlim, ylim}{ Optional argument specifying bounds for the ordinate, abscissa of the plot. This may be useful for when comparing plots. } \item{xlab, ylab}{ Optional argument specifying labels for the x-axis and y-axis. } \item{scale}{ A logical variable indicating whether or not the two chosen dimensions should be plotted on the same scale, and thus preserve the shape of the distribution. Default: \code{scale=FALSE} } \item{cex}{ An argument specifying the size of the plotting symbols. The default value is 1. } \item{PCH}{ An argument specifying the symbol to be used when a classificatiion has not been specified for the data. The default value is a small dot ".". } \item{main}{ A logical variable or \code{NULL} indicating whether or not to add a title to the plot identifying the dimensions used. } \item{swapAxes}{ A logical variable indicating whether or not the axes should be swapped for the plot. } \item{\dots}{ Other graphics parameters. } } \value{ A plot showing the data, together with the location of the mixture components, classification, uncertainty, and/or classification errors. } \seealso{ \code{\link{surfacePlot}}, \code{\link{clPairs}}, \code{\link{coordProj}}, \code{\link{mclust.options}} } \examples{ \donttest{ faithfulModel <- Mclust(faithful) mclust2Dplot(faithful, parameters=faithfulModel$parameters, z=faithfulModel$z, what = "classification", main = TRUE) mclust2Dplot(faithful, parameters=faithfulModel$parameters, z=faithfulModel$z, what = "uncertainty", main = TRUE) } } \keyword{cluster} mclust/man/me.weighted.Rd0000644000176200001440000001106214125666512015033 0ustar liggesusers\name{me.weighted} \alias{me.weighted} \title{EM algorithm with weights starting with M-step for parameterized MVN mixture models} \description{ Implements the EM algorithm for fitting MVN mixture models parameterized by eigenvalue decomposition, when observations have weights, starting with the maximization step. } \usage{ me.weighted(data, modelName, z, weights = NULL, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, \dots) } \arguments{ \item{data}{ A numeric vector, matrix, or data frame of observations. Categorical variables are not allowed. If a matrix or data frame, rows correspond to observations and columns correspond to variables. } \item{modelName}{ A character string indicating the model. The help file for \code{\link{mclustModelNames}} describes the available models. } \item{z}{ A matrix whose \code{[i,k]}th entry is an initial estimate of the conditional probability of the ith observation belonging to the \emph{k}th component of the mixture. } \item{weights}{ A vector of positive weights, where the \code{[i]}th entry is the weight for the ith observation. If any of the weights are greater than one, then they are scaled so that the maximum weight is one. } \item{prior}{ Specification of a conjugate prior on the means and variances. See the help file for \code{priorControl} for further information. The default assumes no prior. } \item{control}{ A list of control parameters for EM. The defaults are set by the call \code{\link{emControl}}. } \item{Vinv}{ If the model is to include a noise term, \code{Vinv} is an estimate of the reciprocal hypervolume of the data region. If set to a negative value or 0, the model will include a noise term with the reciprocal hypervolume estimated by the function \code{hypvol}. The default is not to assume a noise term in the model through the setting \code{Vinv=NULL}. } \item{warn}{ A logical value indicating whether or not certain warnings (usually related to singularity) should be issued when the estimation fails. The default is set by \code{warn} using \code{\link{mclust.options}}. } \item{\dots}{ Catches unused arguments in indirect or list calls via \code{do.call}. } } \value{ A list including the following components: \item{modelName}{ A character string identifying the model (same as the input argument). } \item{z}{ A matrix whose \code{[i,k]}th entry is the conditional probability of the \emph{i}th observation belonging to the \emph{k}th component of the mixture. } \item{parameters}{ \describe{ \item{\code{pro}}{ A vector whose \emph{k}th component is the mixing proportion for the \emph{k}th component of the mixture model. If the model includes a Poisson term for noise, there should be one more mixing proportion than the number of Gaussian components. } \item{\code{mean}}{ The mean for each component. If there is more than one component, this is a matrix whose kth column is the mean of the \emph{k}th component of the mixture model. } \item{\code{variance}}{ A list of variance parameters for the model. The components of this list depend on the model specification. See the help file for \code{\link{mclustVariance}} for details. } \item{\code{Vinv}}{ The estimate of the reciprocal hypervolume of the data region used in the computation when the input indicates the addition of a noise component to the model. } } } \item{loglik}{ The log likelihood for the data in the mixture model. } \item{Attributes:}{ \code{"info"} Information on the iteration.\cr \code{"WARNING"} An appropriate warning if problems are encountered in the computations. } } \author{Thomas Brendan Murphy} \seealso{ \code{\link{me}}, \code{\link{meE}}, \dots, \code{\link{meVVV}}, \code{\link{em}}, \code{\link{mstep}}, \code{\link{estep}}, \code{\link{priorControl}}, \code{\link{mclustModelNames}}, \code{\link{mclustVariance}}, \code{\link{mclust.options}} } \examples{ \donttest{ w <- rep(1,150) w[1] <- 0 me.weighted(data = iris[,-5], modelName = "VVV", z = unmap(iris[,5]), weights = w) }} mclust/man/plot.clustCombi.Rd0000644000176200001440000000443614124774626015730 0ustar liggesusers\name{plot.clustCombi} \alias{plot.clustCombi} \title{ Plot Combined Clusterings Results } \description{ Plot combined clusterings results: classifications corresponding to \code{Mclust}/BIC and to the hierarchically combined classes, "entropy plots" to help to select a number of classes, and the tree structure obtained from combining mixture components. } \usage{ \method{plot}{clustCombi}(x, what = c("classification", "entropy", "tree"), \dots) } \arguments{ \item{x}{ Object returned by \code{\link{clustCombi}} function. } \item{what}{ Type of plot. } \item{\dots}{ Other arguments to be passed to other functions: \code{\link{combiPlot}}, \code{\link{entPlot}}, \code{\link{combiTree}}. Please see the corresponding documentations. } } \value{ Classifications are plotted with \code{\link{combiPlot}}, which relies on the \code{Mclust} plot functions. Entropy plots are plotted with \code{\link{entPlot}} and may help to select a number of classes: please see the article cited in the references. Tree plots are produced by \code{\link{combiTree}} and graph the tree structure implied by the clusters combining process. } \references{ J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} } \author{ J.-P. Baudry, A. E. Raftery, L. Scrucca } \seealso{ \code{\link{combiPlot}}, \code{\link{entPlot}}, \code{\link{combiTree}}, \code{\link{clustCombi}}. } \examples{ \donttest{ data(Baudry_etal_2010_JCGS_examples) ## 1D Example output <- clustCombi(data = Test1D, G=1:15) # plots the hierarchy of combined solutions, then some "entropy plots" which # may help one to select the number of classes (please see the article cited # in the references) plot(output) ## 2D Example output <- clustCombi(data = ex4.1) # plots the hierarchy of combined solutions, then some "entropy plots" which # may help one to select the number of classes (please see the article cited # in the references) plot(output) ## 3D Example output <- clustCombi(data = ex4.4.2) # plots the hierarchy of combined solutions, then some "entropy plots" which # may help one to select the number of classes (please see the article cited # in the references) plot(output) } } \keyword{ cluster } mclust/man/mclustVariance.Rd0000644000176200001440000000745014516420127015613 0ustar liggesusers\name{mclustVariance} \alias{mclustVariance} \title{ Template for variance specification for parameterized Gaussian mixture models } \description{ Specification of variance parameters for the various types of Gaussian mixture models. } \usage{ mclustVariance(modelName, d = NULL, G = 2) } \arguments{ \item{modelName}{A character string specifying the model.} \item{d}{A integer specifying the dimension of the data.} \item{G}{An integer specifying the number of components in the mixture model.} } \details{The \code{variance} component in the \code{parameters} list from the output to e.g. \code{me} or \code{mstep} or input to e.g. \code{estep} may contain one or more of the following arguments, depending on the model: \describe{ \item{\code{modelName}}{ A character string indicating the model. } \item{\code{d}}{ The dimension of the data. } \item{\code{G}}{ The number of components in the mixture model. } \item{\code{sigmasq}}{ for the one-dimensional models (\code{"E"}, \code{"V"}) and spherical models (\code{"EII"}, \code{"VII"}). This is either a vector whose \emph{k}th component is the variance for the \emph{k}th component in the mixture model (\code{"V"} and \code{"VII"}), or a scalar giving the common variance for all components in the mixture model (\code{"E"} and \code{"EII"}). } \item{\code{Sigma}}{ For the equal variance models \code{"EII"}, \code{"EEI"}, and \code{"EEE"}. A \emph{d} by \emph{d} matrix giving the common covariance for all components of the mixture model. } \item{\code{cholSigma}}{ For the equal variance model \code{"EEE"}. A \emph{d} by \emph{d} upper triangular matrix giving the Cholesky factor of the common covariance for all components of the mixture model. } \item{\code{sigma}}{ For all multidimensional mixture models. A \emph{d} by \emph{d} by \emph{G} matrix array whose \code{[,,k]}th entry is the covariance matrix for the \emph{k}th component of the mixture model. } \item{\code{cholsigma}}{ For the unconstrained covariance mixture model \code{"VVV"}. A \emph{d} by \emph{d} by \emph{G} matrix array whose \code{[,,k]}th entry is the upper triangular Cholesky factor of the covariance matrix for the \emph{k}th component of the mixture model. } \item{\code{scale}}{ For diagonal models \code{"EEI"}, \code{"EVI"}, \code{"VEI"}, \code{"VVI"} and constant-shape models \code{"EEV"} and \code{"VEV"}. Either a \emph{G}-vector giving the scale of the covariance (the \emph{d}th root of its determinant) for each component in the mixture model, or a single numeric value if the scale is the same for each component. } \item{\code{shape}}{ For diagonal models \code{"EEI"}, \code{"EVI"}, \code{"VEI"}, \code{"VVI"} and constant-shape models \code{"EEV"} and \code{"VEV"}. Either a \emph{G} by \emph{d} matrix in which the \emph{k}th column is the shape of the covariance matrix (normalized to have determinant 1) for the \emph{k}th component, or a \emph{d}-vector giving a common shape for all components. } \item{\code{orientation}}{ For the constant-shape models \code{"EEV"} and \code{"VEV"}. Either a \emph{d} by \emph{d} by \emph{G} array whose \code{[,,k]}th entry is the orthonomal matrix whose columns are the eigenvectors of the covariance matrix of the \emph{k}th component, or a \emph{d} by \emph{d} orthonormal matrix if the mixture components have a common orientation. The \code{orientation} component is not needed in spherical and diagonal models, since the principal components are parallel to the coordinate axes so that the orientation matrix is the identity. } } In all cases, the value \code{-1} is used as a placeholder for unknown nonzero entries. } \keyword{cluster} mclust/man/emControl.Rd0000644000176200001440000000442213752164214014574 0ustar liggesusers\name{emControl} \alias{emControl} \title{Set control values for use with the EM algorithm} \description{ Supplies a list of values including tolerances for singularity and convergence assessment, for use functions involving EM within \emph{MCLUST}. } \usage{ emControl(eps, tol, itmax, equalPro) } \arguments{ \item{eps}{ A scalar tolerance associated with deciding when to terminate computations due to computational singularity in covariances. Smaller values of \code{eps} allow computations to proceed nearer to singularity. The default is the relative machine precision \code{.Machine$double.eps}, which is approximately \eqn{2e-16} on IEEE-compliant machines. } \item{tol}{ A vector of length two giving relative convergence tolerances for the log-likelihood and for parameter convergence in the inner loop for models with iterative M-step ("VEI", "VEE", "EVE", "VVE", "VEV"), respectively. The default is \code{c(1.e-5, sqrt(.Machine$double.eps))}. If only one number is supplied, it is used as the tolerance for the outer iterations and the tolerance for the inner iterations is as in the default. } \item{itmax}{ A vector of length two giving integer limits on the number of EM iterations and on the number of iterations in the inner loop for models with iterative M-step ("VEI", "VEE", "EVE", "VVE", "VEV"), respectively. The default is \code{c(.Machine$integer.max, .Machine$integer.max)} allowing termination to be completely governed by \code{tol}. If only one number is supplied, it is used as the iteration limit for the outer iteration only. } \item{equalPro}{ Logical variable indicating whether or not the mixing proportions are equal in the model. Default: \code{equalPro = FALSE}. } } \value{ A named list in which the names are the names of the arguments and the values are the values supplied to the arguments. } \details{ \code{emControl} is provided for assigning values and defaults for EM within \emph{MCLUST}. } \seealso{ \code{\link{em}}, \code{\link{estep}}, \code{\link{me}}, \code{\link{mstep}}, \code{\link{mclustBIC}} } \examples{ irisBIC <- mclustBIC(iris[,-5], control = emControl(tol = 1.e-6)) summary(irisBIC, iris[,-5]) } \keyword{cluster} mclust/man/icl.Rd0000644000176200001440000000161613205036712013375 0ustar liggesusers\name{icl} \alias{icl} \title{ ICL for an estimated Gaussian Mixture Model } \description{ Computes the ICL (Integrated Complete-data Likelihood) for criterion for a Gaussian Mixture Model fitted by \code{\link{Mclust}}. } \usage{ icl(object, \dots) } \arguments{ \item{object}{ An object of class \code{'Mclust'} resulting from a call to \code{\link{Mclust}}. } \item{\dots}{Further arguments passed to or from other methods.} } \value{ The ICL for the given input MCLUST model. } \references{ Biernacki, C., Celeux, G., Govaert, G. (2000). Assessing a mixture model for clustering with the integrated completed likelihood. \emph{IEEE Trans. Pattern Analysis and Machine Intelligence}, 22 (7), 719-725. } \seealso{ \code{\link{Mclust}}, \code{\link{mclustBIC}}, \code{\link{mclustICL}}, \code{\link{bic}}. } \examples{ mod <- Mclust(iris[,1:4]) icl(mod) } \keyword{cluster} mclust/man/diabetes.Rd0000644000176200001440000000267314164301253014412 0ustar liggesusers\name{diabetes} \alias{diabetes} \docType{data} \title{Diabetes Data (flawed)} \description{The data set contains three measurements made on 145 non-obese adult patients classified into three groups.} \usage{data(diabetes)} \format{A data frame with the following variables: \describe{ \item{class}{The type of diabete: \code{Normal}, \code{Overt}, and \code{Chemical}.} \item{glucose}{Area under plasma glucose curve after a three hour oral glucose tolerance test (OGTT).} \item{insulin}{Area under plasma insulin curve after a three hour oral glucose tolerance test (OGTT).} \item{sspg}{Steady state plasma glucose.} } } \details{This dataset is \emph{flawed} (compare with the reference) and it is provided here only for backward compatibility. A 5-variable version of the Reaven and Miller data is available in package \pkg{rrcov}. The \emph{glucose} and \emph{sspg} columns in this datsset are identical to the \emph{fpg} and \emph{insulin} columns, respectively in the \pkg{rrcov} version. The \emph{insulin} column in this dataset differs from the \emph{glucose} column in the \pkg{rrcov} version in one entry: observation 104 has the value 45 in the \emph{insulin} column in this data, and 455 in the corresponding \emph{glucose} column of the \pkg{rrcov} version.} \source{Reaven, G. M. and Miller, R. G. (1979). An attempt to define the nature of chemical diabetes using a multidimensional analysis. \emph{Diabetologia} 16:17-24.} \keyword{datasets} mclust/man/decomp2sigma.Rd0000644000176200001440000000413214124774626015212 0ustar liggesusers\name{decomp2sigma} \alias{decomp2sigma} \title{ Convert mixture component covariances to matrix form } \description{ Converts covariances from a parameterization by eigenvalue decomposition or cholesky factorization to representation as a 3-D array. } \usage{ decomp2sigma(d, G, scale, shape, orientation, \dots) } \arguments{ \item{d}{ The dimension of the data. } \item{G}{ The number of components in the mixture model. } \item{scale}{ Either a \emph{G}-vector giving the scale of the covariance (the \emph{d}th root of its determinant) for each component in the mixture model, or a single numeric value if the scale is the same for each component. } \item{shape}{ Either a \emph{G} by \emph{d} matrix in which the \emph{k}th column is the shape of the covariance matrix (normalized to have determinant 1) for the \emph{k}th component, or a \emph{d}-vector giving a common shape for all components. } \item{orientation}{ Either a \emph{d} by \emph{d} by \emph{G} array whose \code{[,,k]}th entry is the orthonomal matrix whose columns are the eigenvectors of the covariance matrix of the \emph{k}th component, or a \emph{d} by \emph{d} orthonormal matrix if the mixture components have a common orientation. The \code{orientation} component of \code{decomp} can be omitted in spherical and diagonal models, for which the principal components are parallel to the coordinate axes so that the orientation matrix is the identity. } \item{\dots}{ Catches unused arguments from an indirect or list call via \code{do.call}. } } \value{ A 3-D array whose \code{[,,k]}th component is the covariance matrix of the \emph{k}th component in an MVN mixture model. } \seealso{ \code{\link{sigma2decomp}} } \examples{ meEst <- meVEV(iris[,-5], unmap(iris[,5])) names(meEst) meEst$parameters$variance dec <- meEst$parameters$variance decomp2sigma(d=dec$d, G=dec$G, shape=dec$shape, scale=dec$scale, orientation = dec$orientation) \donttest{ do.call("decomp2sigma", dec) ## alternative call } } \keyword{cluster} mclust/man/cvMclustDA.Rd0000644000176200001440000000563014156711614014642 0ustar liggesusers\name{cvMclustDA} \alias{cvMclustDA} \title{MclustDA cross-validation} \description{ V-fold cross-validation for classification models based on Gaussian finite mixture modelling. } \usage{ cvMclustDA(object, nfold = 10, prop = object$prop, verbose = interactive(), \dots) } \arguments{ \item{object}{ An object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}. } \item{nfold}{ An integer specifying the number of folds (by defaul 10-fold CV is used). } \item{prop}{ A vector of class prior probabilities, which if not provided default to the class proportions in the training data. } \item{verbose}{ A logical controlling if a text progress bar is displayed during the cross-validation procedure. By default is \code{TRUE} if the session is interactive, and \code{FALSE} otherwise. } \item{\dots }{Further arguments passed to or from other methods.} } \details{ The function implements V-fold cross-validation for classification models fitted by \code{\link{MclustDA}}. Classification error and Brier score are the metrics returned, but other metrics can be computed using the output returned by this function (see Examples section below). } \value{ The function returns a list with the following components: \item{classification}{a factor of cross-validated class labels.} \item{z}{a matrix containing the cross-validated probabilites for class assignment.} \item{ce}{the cross-validation classification error.} \item{se.ce}{the standard error of the cross-validated classification error.} \item{brier}{the cross-validation Brier score.} \item{se.brier}{the standard error of the cross-validated Brier score.} } \author{Luca Scrucca} \seealso{ \code{\link{MclustDA}}, \code{\link{predict.MclustDA}}, \code{\link{classError}}, \code{\link{BrierScore}} } \examples{ \donttest{ # Iris data Class <- iris$Species X <- iris[,1:4] ## EDDA model with common covariance (essentially equivalent to linear discriminant analysis) irisEDDA <- MclustDA(X, Class, modelType = "EDDA", modelNames = "EEE") cv <- cvMclustDA(irisEDDA) # 10-fold CV (default) str(cv) cv <- cvMclustDA(irisEDDA, nfold = length(Class)) # LOO-CV str(cv) ## MclustDA model selected by BIC irisMclustDA <- MclustDA(X, Class) cv <- cvMclustDA(irisMclustDA) # 10-fold CV (default) str(cv) # Banknote data data("banknote") Class <- banknote$Status X <- banknote[,2:7] ## EDDA model selected by BIC banknoteEDDA <- MclustDA(X, Class, modelType = "EDDA") cv <- cvMclustDA(banknoteEDDA) # 10-fold CV (default) str(cv) (ConfusionMatrix <- table(Pred = cv$classification, Class)) TP <- ConfusionMatrix[1,1] FP <- ConfusionMatrix[1,2] FN <- ConfusionMatrix[2,1] TN <- ConfusionMatrix[2,2] (Sensitivity <- TP/(TP+FN)) (Specificity <- TN/(FP+TN)) } } \keyword{multivariate} mclust/DESCRIPTION0000644000176200001440000000335114525113245013273 0ustar liggesusersPackage: mclust Version: 6.0.1 Date: 2023-11-15 Title: Gaussian Mixture Modelling for Model-Based Clustering, Classification, and Density Estimation Description: Gaussian finite mixture models fitted via EM algorithm for model-based clustering, classification, and density estimation, including Bayesian regularization, dimension reduction for visualisation, and resampling-based inference. Authors@R: c(person("Chris", "Fraley", role = "aut"), person("Adrian E.", "Raftery", role = "aut", comment = c(ORCID = "0000-0002-6589-301X")), person("Luca", "Scrucca", role = c("aut", "cre"), email = "luca.scrucca@unipg.it", comment = c(ORCID = "0000-0003-3826-0484")), person("Thomas Brendan", "Murphy", role = "ctb", comment = c(ORCID = "0000-0002-5668-7046")), person("Michael", "Fop", role = "ctb", comment = c(ORCID = "0000-0003-3936-2757"))) Depends: R (>= 3.0) Imports: stats, utils, graphics, grDevices Suggests: knitr (>= 1.4), rmarkdown (>= 2.10), mix (>= 1.0), geometry (>= 0.4), MASS License: GPL (>= 2) URL: https://mclust-org.github.io/mclust/ VignetteBuilder: knitr Repository: CRAN ByteCompile: true NeedsCompilation: yes LazyData: yes Encoding: UTF-8 Packaged: 2023-11-15 08:01:53 UTC; luca Author: Chris Fraley [aut], Adrian E. Raftery [aut] (), Luca Scrucca [aut, cre] (), Thomas Brendan Murphy [ctb] (), Michael Fop [ctb] () Maintainer: Luca Scrucca Date/Publication: 2023-11-15 10:00:05 UTC mclust/build/0000755000176200001440000000000014525075361012670 5ustar liggesusersmclust/build/vignette.rds0000644000176200001440000000032014525075361015222 0ustar liggesusersb```b`a@, $؀XX84WnrNiq^Pn BaifrBI~iB~D%*n4)@y `aBRǚZ% 5/$~yVaaqIY0AAn0Ez0?¼Ht&${+%$Q/nnmclust/src/0000755000176200001440000000000014525075361012360 5ustar liggesusersmclust/src/mclustaddson.f0000644000176200001440000021773013643634507015244 0ustar liggesusers* ===================================================================== subroutine transpose(X, p) * * Compute transpose of a matrix * * ===================================================================== implicit NONE integer :: p, i, j double precision :: X(p,p), temp do j = 2, p do i = 1, j-1 temp = X(i,j) X(i,j) = X(j,i) X(j,i) = temp end do end do return end * ===================================================================== subroutine crossprodf(X, Y, n, p, q, XTY) * * Given matrices X and Y of dimension (n x p) and (n x q) computes * the matrix of cross-product, i.e. X' Y * * ===================================================================== implicit NONE integer n, p, q double precision X(n,p), Y(n,q), XTY(p,q) * Compute X'Y using DGEMM blas subroutine call DGEMM('T', 'N', p, q, n, 1.d0, X, n, Y, n, 0.d0, XTY, p) end * ====================================================================== subroutine covwf ( X, Z, n, p, G, M, S, W ) * * Given data matrix X(n x p) and weight matrix Z(n x G) computes * weighted means M(p x G), weighted covariance matrices S(p x p x G) * and weighted scattering matrices W(p x p x G) * * ====================================================================== implicit none integer :: n, p, G double precision :: X(n,p), Z(n,G) double precision :: M(p,G), S(p,p,G), W(p,p,G) integer :: j, k double precision :: sumZ(G), temp(n,p) * compute X'Z using BLAS call dgemm('T', 'N', p, G, n, 1.d0, X, n, Z, n, 0.d0, M, p) * compute row sums of Z sumZ = sum(Z, DIM = 1) do k = 1,G * compute means call dscal(p, (1.d0/sumZ(k)), M(:,k), 1) do j = 1,p * compute sqrt(Z) * (X - M) temp(:,j) = sqrt(Z(:,k)) * (X(:,j) - M(j,k)) end do * compute scattering matrix call dgemm('T', 'N', p, p, n, 1.d0, temp, n, temp, n, * 0.d0, W(:,:,k), p) * compute covariance matrix S(:,:,k) = W(:,:,k)/sumZ(k) end do return end ************************************************************************ **** EVV model ************************************************************************ * ====================================================================== subroutine msevv (x,z, n,p,G, mu,O,U,scale,shape,pro, lwork,info, * eps) * Maximization step for model EEV * ====================================================================== implicit none integer :: n, p, G double precision :: x(n,p), z(n,G) double precision :: mu(p,G), O(p,p,*), U(p,p,*), pro(G) double precision :: scale(G), shape(p,G) double precision :: sumz(G) integer :: i, j, k, info, lwork, l double precision :: temp(p), wrk(lwork), eps, dummy(1) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) * * double precision :: BIGLOG * parameter (BIGLOG = 709.d0) * * double precision :: SMALOG * parameter (SMALOG = -708.d0) *----------------------------------------------------------------------- * colsums of z sumz = sum(z, dim = 1) * a priori probabilities pro = sumz / dble(n) * pro = sumz / sum(sumz) * if there is noise sum(sumz) does not sum to n. See help(mstep) do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) * U(:,:,k) = U(:,:,k) + * * spread(temp, dim = 2, ncopies = p)* * * spread(temp, dim = 1, ncopies = p) * outer product, Press et al. (1992), p. 970 call dger(p, p, 1.d0, temp, 1, temp, 1, U(:,:,k), p) * more efficient end do * U contains the weighted scattering matrix O(:,:,k) = U(:,:,k) * call dgesvd('O', 'N', p, p, O(:,:,k), p, shape(:,k), * * dummy, 1, dummy, 1, wrk, lwork, info) call dgesvd('N', 'O', p, p, O(:,:,k), p, shape(:,k), * dummy, 1, dummy, 1, wrk, lwork, info) * O now contains eigenvectors of the scattering matrix * ##### NOTE: O is transposed * shape contains the eigenvalues * check if dgesvd converged (info == 0) if (info .ne. 0) then l = info else scale(k) = exp( sum( log(shape(:,k)) ) )**(1.d0/p) call dscal(p*p, 1.d0/scale(k), U(:,:,k), 1) call dscal(p, 1.d0/scale(k), shape(:,k), 1) * now U is the matrix Ck (Celeux, Govaert 1995, p.787) * and shape is the proper scaled shape (matrix A) end if end do * check very small eigenvalues (singular covariance) if (minval(shape) .le. sqrt(eps) .or. * minval(scale) .le. sqrt(eps)) then shape = FLMAX scale = FLMAX return end if scale(1) = sum(scale) / sum(sumz) return end * ====================================================================== subroutine esevv (x,z, n,p,G,Gnoise, mu,O,scale,shape,pro, Vinv, * loglik, eps) * Expectation step for model EVV * ====================================================================== implicit none integer :: n, p, G, Gnoise double precision :: x(n,p), z(n,Gnoise) double precision :: mu(p,G), O(p,p,G), scale, shape(p,G) double precision :: Vinv, pro(Gnoise) double precision :: temp1(p), temp2(p), temp3 integer :: i, k, j double precision :: const, logdet, loglik, eps double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- * check very small eigenvalues (singular covariance) if (minval(shape) .le. sqrt(eps) .or. scale .le. sqrt(eps)) then loglik = FLMAX return end if const = (-dble(p)/2.d0)*log2pi do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j,k)) + log(scale) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2, 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2, 1) call dgemv('N', p, p, 1.d0, * O(:,:,k), p, temp1, 1, 0.d0, temp2, 1) temp2 = temp2/sqrt(scale*shape(:,k)) temp3 = ddot(p, temp2, 1, temp2, 1) * temp3 contains the mahalanobis distance * z(i,k) = const - logdet/2.d0 - temp3/2.d0 + log(pro(k)) z(i,k) = const - logdet/2.d0 - temp3/2.d0 * help(cdens) --> The densities are not scaled by mixing proportions end do * z contains the log-density log(N(x|theta_k)) end do if ( pro(1) .lt. 0.d0 ) return * cdens function * noise component if (Vinv .gt. 0.d0) then * call dcopy( n, log(Vinv), 0, z(:,Gnoise), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(:,Gnoise), 1) end if * now column Gnoise of z contains log(Vinv) do i = 1,n z(i,:) = z(i,:) + log( pro(:) ) * Numerical Recipes pag.844 temp3 = maxval(z(i,:)) temp1(1) = temp3 + log( sum(exp(z(i,:) - temp3)) ) loglik = loglik + temp1(1) * ##### NOTE: do we need to check if (z - zmax) is too small? z(i,:) = exp( z(i,:) - temp1(1) ) * re-normalize probabilities temp3 = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3, z(i,:), 1 ) end do return end * ====================================================================== subroutine meevv (x,z, n,p,G,Gnoise, mu,O,U,scale,shape,pro,Vinv, * loglik, eqpro,itmax,tol,eps, * niterout,errout,lwork,info) * Maximization-expectation algorithm for model EVV * ====================================================================== implicit none logical :: eqpro integer :: n, p, G, Gnoise double precision :: x(n,p), z(n,Gnoise) double precision :: mu(p,G), O(p,p,G),scale(G),shape(p,G) double precision :: Vinv, pro(Gnoise) double precision :: U(p,p,G), sumz(Gnoise) double precision :: temp1(p), temp2(p), temp3, scsh(p) * double precision :: temp(*) integer :: i, j, k, info, lwork, l, itmax, niterout double precision :: tol, eps, errout, rteps double precision :: const, logdet, loglik, lkprev, wrk(lwork) double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- l = 0 rteps = sqrt(eps) niterout = 0 errout = FLMAX lkprev = FLMAX/2 loglik = FLMAX const = (-dble(p)/2.d0)*log2pi * WHILE loop using goto statement 100 continue niterout = niterout + 1 sumz = sum(z, dim = 1) if ( eqpro ) then if ( Vinv .gt. 0 ) then pro(Gnoise) = sumz(Gnoise) / dble(n) pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) sumz = pro * dble(n) else pro = 1 / dble(G) sumz = pro * dble(n) end if else pro = sumz / dble(n) end if * re-initialise U * call dcopy(p*p*G, 0.d0, 0, U, 1) dummy(1) = 0.d0 call dcopy(p*p*G, dummy, 0, U, 1) * M step.......................................................... do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1, 1, temp1, 1, U(:,:,k), p) end do * U contains the weighted scattering matrix O(:,:,k) = U(:,:,k) call dgesvd('N', 'O', p, p, O(:,:,k), p, shape(:,k), * dummy, 1, dummy, 1, wrk, lwork, info) * O now contains eigenvectors of the scattering matrix * ##### NOTE: O is transposed * shape contains the eigenvalues * check if dgesvd converged (info == 0) if (info .ne. 0) then l = info return else scale(k) = exp( sum( log(shape(:,k)) ) )**(1.d0/dble(p)) call dscal(p*p, 1.d0/scale(k), U(:,:,k), 1) call dscal(p, 1.d0/scale(k), shape(:,k), 1) * now U is the matrix Ck (Celeux, Govaert 1995, p.787) * and shape is the proper scaled shape (matrix A) end if end do if ( Vinv .gt. 0.d0 ) then scale(1) = sum(scale) / sum(sumz(1:G)) else scale(1) = sum(scale)/dble(n) end if * if noise lambda = num/sum_{k=1}^{G} n_k; pag. 787 Celeux, Govaert * ................................................................ * check very small eigenvalues (singular covariance) if (minval(shape) .le. rteps .or. minval(scale) .le. rteps) then loglik = FLMAX return end if * E step.......................................................... do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j,k)) + log(scale(1)) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed scsh = sqrt(scale(1)*shape(:,k)) do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2, 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2, 1) call dgemv('N', p, p, 1.d0, * O(:,:,k), p, temp1, 1, 0.d0, temp2, 1) temp2 = temp2/scsh temp3 = ddot(p, temp2, 1, temp2, 1) * temp3 contains the mahalanobis distance z(i,k) = const - logdet/2.d0 - temp3/2.d0 + log(pro(k)) end do * z contains the log-density log(N(x|theta_k)) + log(p_k) end do * noise component if (Vinv .gt. 0.d0) then * call dcopy( n, log(Vinv) + log(pro(Gnoise)), 0, z(:,Gnoise), 1) z(:,Gnoise) = log(Vinv) + log( pro(Gnoise) ) end if * now column Gnoise of z contains log(Vinv) + log(p_0) * with p_0 the proportion of noise loglik = 0.d0 do i = 1,n * Numerical Recipes pag.844 temp3 = maxval(z(i,:)) temp1(1) = temp3 + log( sum(exp(z(i,:) - temp3)) ) loglik = loglik + temp1(1) * ##### NOTE: do we need to check if (z - zmax) is too small? z(i,:) = exp( z(i,:) - temp1(1) ) * re-normalize probabilities temp3 = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3, z(i,:), 1 ) end do * ................................................................ errout = abs(loglik - lkprev)/(1.d0 + abs(loglik)) * errout = abs(loglik - lkprev) lkprev = loglik * temp(niterout) = loglik * Chris F (June 2015): pro should not be computed in the E-step * sumz = sum(z, dim = 1) * if ( eqpro ) then * if ( Vinv .gt. 0 ) then * pro(Gnoise) = sumz(Gnoise) / dble(n) * pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) * sumz = pro * dble(n) * else * pro = 1 / dble(G) * sumz = pro * dble(n) * end if * else * pro = sumz / dble(n) * end if * check if empty components * if ( minval(pro) .lt. rteps ) then if ( any(sumz .lt. rteps, 1) ) then loglik = -FLMAX return end if * WHILE condition if ( errout .gt. tol .and. niterout .lt. itmax ) goto 100 return end ************************************************************************ **** VEE model ************************************************************************ * ====================================================================== subroutine msvee (x,z, n,p,G, mu,U,C,scale,pro, lwork,info, * itmax,tol, niterin,errin) * Maximization step for model VEE * ====================================================================== implicit none integer :: n, p, G double precision :: x(n,p), z(n,G) double precision :: mu(p,G), U(p,p,G), C(p,p), pro(G) * ### NOTE: shape and orientation parameters are computed in R double precision :: scale(G) double precision :: sumz(G) integer :: i, j, k, info, lwork, l * integer :: dummy double precision :: temp1(p), temp2(p,p), temp3 double precision :: wrk(lwork), tol, errin, trgt, trgtprev integer :: itmax, niterin double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) *----------------------------------------------------------------------- * colsums of z sumz = sum(z, dim = 1) * a priori probabilities pro = sumz / dble(n) * pro = sumz / sum(sumz) * if there is noise sum(sumz) does not sum to n. See help(mstep) * compute weighted scattering matrix and means do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1, 1, temp1, 1, U(:,:,k), p) end do * U contains the weighted scattering matrix * check if U is positive definite (see help of dpotrf) * (through Choleski is more efficient) temp2 = U(:,:,k) call dpotrf('U', p, temp2, p, info) if ( info .ne. 0 ) then if ( info .lt. 0) then l = info return else if ( info .gt. 0 ) then info = 0 scale = FLMAX return end if end if end do * covariance matrix components estimation niterin = 0 errin = FLMAX trgt = FLMAX trgtprev = FLMAX/2 * WHILE loop using goto statement 100 continue niterin = niterin + 1 * initialise C * call dcopy(p*p, 0.d0, 0, C, 1) dummy(1) = 0.d0 call dcopy(p*p, dummy, 0, C, 1) * ### NOTE: scale is initialised in R do k = 1,G C = C + U(:,:,k)/scale(k) end do * C contains the numerator of matrix C in pag.785, Celeux, Govaert temp2 = C call dsyev('N', 'U', p, temp2, p, temp1, wrk, lwork, info) temp1 = temp1(p:1:-1) * temp1 contains the (decreasing) ordered eigenvalues of C * check if dsyev converged or illegal value if ( info .ne. 0 ) then l = info return end if temp3 = exp( sum(log(temp1)) )**(1/dble(p)) * temp3 is the denominator of C C = C/temp3 * C is now the actual matrix C of pag.785 * compute the inverse of C via Choleski temp2 = C call dpotrf('U', p, temp2, p, info) if ( info .ne. 0 ) then if ( info .lt. 0) then l = info return else if ( info .gt. 0 ) then info = 0 scale = FLMAX return end if end if call dpotri('U', p, temp2, p, info) if ( info .ne. 0 ) return do j = 2,p do k = 1,(j-1) temp2(j,k) = temp2(k,j) end do end do * temp2 is now the inverse of C scale = 0.d0 do k = 1,G do j = 1,p scale(k) = scale(k) + ddot(p, U(j,:,k), 1, temp2(:,j), 1) end do scale(k) = scale(k) / (dble(p)*sumz(k)) end do * scale contains now the lambdas (pag.784 of Celeux, Govaert) * evaluate target function * trgt = dble(n)*dble(p) + dble(p)*SUM(log(scale)*sumz) trgt = sum(sumz)*dble(p) + dble(p)*SUM(log(scale)*sumz) * error errin = abs(trgt - trgtprev)/(1.d0 + abs(trgt)) trgtprev = trgt * WHILE condition if ( errin .gt. tol .and. niterin .lt. itmax ) goto 100 return end * ====================================================================== subroutine esvee (x,z, n,p,G,Gnoise, mu,O,scale,shape,pro, Vinv, * loglik, eps) * Expectation step for model VEE * ====================================================================== implicit none integer :: n,p,G,Gnoise double precision :: x(n,p), z(n,Gnoise), pro(Gnoise), Vinv double precision :: mu(p,G), O(p,p), scale(G), shape(p) double precision :: temp1(p), temp2(p), temp3 integer :: i, k, j double precision :: const, logdet, loglik, eps double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- * check very small eigenvalues (cannot compute E step) if ( minval(shape) .le. sqrt(eps) .or. * minval(scale) .le. sqrt(eps) ) then loglik = FLMAX return end if const = (-dble(p)/2.d0)*log2pi do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j)) + log(scale(k)) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2, 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2, 1) call dgemv('N', p, p, 1.d0, * O, p, temp1, 1, 0.d0, temp2, 1) temp2 = temp2/sqrt(scale(k)*shape) temp3 = ddot(p, temp2, 1, temp2, 1) * temp3 contains the mahalanobis distance * z(i,k) = const - logdet/2.d0 - temp3/2.d0 + log(pro(k)) z(i,k) = const - logdet/2.d0 - temp3/2.d0 * help(cdens) --> The densities are not scaled by mixing proportions end do * z contains the log-density log(N(x|theta_k)) end do if ( pro(1) .lt. 0.d0 ) return * cdens function * noise component if (Vinv .gt. 0.d0) then * call dcopy( n, log(Vinv), 0, z(:,Gnoise), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(:,Gnoise), 1) end if * now column Gnoise of z contains log(Vinv) do i = 1,n z(i,:) = z(i,:) + log( pro(:) ) * Numerical Recipes pag.844 temp3 = maxval(z(i,:)) temp1(1) = temp3 + log( sum(exp(z(i,:) - temp3)) ) loglik = loglik + temp1(1) z(i,:) = exp( z(i,:) - temp1(1) ) * re-normalize probabilities temp3 = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3, z(i,:), 1 ) end do return end * ====================================================================== subroutine mevee ( x,z, n,p,G,Gnoise, mu,C,U,scale,shape,pro,Vinv, * loglik, eqpro,itmaxin,tolin,itmaxout,tolout,eps, * niterin,errin,niterout,errout,lwork,info ) * Maximization-expectation algorithm for model VEE * ====================================================================== implicit none logical :: eqpro integer :: n,p,G,Gnoise double precision :: x(n,p), z(n,Gnoise), pro(Gnoise), Vinv double precision :: mu(p,G), C(p,p), scale(G), shape(p) double precision :: U(p,p,G), sumz(Gnoise) double precision :: temp1(p), temp2(p,p), temp3, temp4(p) integer :: i, j, k, info, lwork, l * integer :: dummy integer :: itmaxin, itmaxout, niterin, niterout double precision :: tolin, tolout, errin, errout, eps, rteps double precision :: const, logdet, loglik, lkprev, wrk(lwork) double precision :: trgt, trgtprev double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- l = 0 rteps = sqrt(eps) niterout = 0 errout = FLMAX lkprev = FLMAX/2 loglik = FLMAX const = (-dble(p)/2.d0)*log2pi * WHILE loop for EM algorithm 100 continue niterout = niterout + 1 sumz = sum(z, dim = 1) if ( eqpro ) then if ( Vinv .gt. 0 ) then pro(Gnoise) = sumz(Gnoise) / dble(n) pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) sumz = pro * dble(n) else pro = 1 / dble(G) sumz = pro * dble(n) end if else pro = sumz / dble(n) end if * re-initialise U * call dcopy(p*p*G, 0.d0, 0, U, 1) dummy(1) = 0.d0 call dcopy(p*p*G, dummy, 0, U, 1) * compute weighted scattering matrix and means do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1, 1, temp1, 1, U(:,:,k), p) end do * U contains the weighted scattering matrix * check if U is positive definite (see help of dpotrf) * (through Choleski is more efficient) temp2 = U(:,:,k) call dpotrf('U', p, temp2, p, info) if ( info .ne. 0 ) then if ( info .lt. 0) then l = info return else if ( info .gt. 0 ) then info = 0 loglik = FLMAX return end if end if end do * M step.......................................................... * covariance matrix components estimation niterin = 0 errin = FLMAX trgt = FLMAX trgtprev = FLMAX/2 * initialise scale * call dcopy(G, 1.d0, 0, scale, 1) dummy(1) = 1.d0 call dcopy(G, dummy, 0, scale, 1) * WHILE loop for M step 110 continue niterin = niterin + 1 * initialise C * call dcopy(p*p, 0.d0, 0, C, 1) dummy(1) = 0.d0 call dcopy(p*p, dummy, 0, C, 1) do k = 1,G C = C + U(:,:,k)/scale(k) end do * C contains the numerator of matrix C in pag.785, Celeux, Govaert temp2 = C call dsyev('N', 'U', p, temp2, p, temp1, wrk, lwork, info) temp1 = temp1(p:1:-1) * temp1 contains the (decreasing) ordered eigenvalues of C * check if dsyev converged or illegal value if ( info .ne. 0 ) then l = info return end if temp3 = exp( sum(log(temp1)) )**(1/dble(p)) * temp3 is the denominator of C C = C/temp3 * C is now the actual matrix C of pag.785 * compute the inverse of C via Choleski temp2 = C call dpotrf('U', p, temp2, p, info) if ( info .ne. 0 ) then if ( info .lt. 0) then l = info return else if ( info .gt. 0 ) then info = 0 loglik = FLMAX return end if end if call dpotri('U', p, temp2, p, info) if ( info .ne. 0 ) return do j = 2,p do k = 1,(j-1) temp2(j,k) = temp2(k,j) end do end do * temp2 is now the inverse of C scale = 0.d0 do k = 1,G do j = 1,p scale(k) = scale(k) + ddot(p, U(j,:,k), 1, temp2(:,j), 1) end do scale(k) = scale(k) / (dble(p)*sumz(k)) end do * scale contains now the lambdas (pag.784 of Celeux, Govaert) * evaluate target function * trgt = dble(n)*dble(p) + dble(p)*SUM(log(scale)*sumz) trgt = sum(sumz(1:G))*dble(p) + dble(p)*SUM(log(scale)*sumz(1:G)) * error errin = abs(trgt - trgtprev)/(1.d0 + abs(trgt)) trgtprev = trgt * WHILE condition for M step if ( errin .gt. tolin .and. niterin .lt. itmaxin ) goto 110 * ................................................................ * eigenvalues of C shape = temp1 / temp3 * check very small eigenvalues (singular covariance) if (minval(shape) .le. rteps .or. minval(scale) .le. rteps) then loglik = FLMAX return end if * E step.......................................................... do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + log(shape(j)) + log(scale(k)) end do * compute mahalanobis distance for each observation do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp4, 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp4, 1) call dgemv('N', p, p, 1.d0, * temp2, p, temp1, 1, 0.d0, temp4, 1) temp4 = temp4/scale(k) temp3 = ddot(p, temp4, 1, temp1, 1) * temp3 contains the mahalanobis distance z(i,k) = const - logdet/2.d0 - temp3/2.d0 + log(pro(k)) * z(i,k) = const - logdet/2.d0 - temp3/2.d0 end do * z contains the log-density log(N(x|theta_k)) + log(p_k) end do * if ( pro(1) .lt. 0.d0 ) return * cdens function * noise component if (Vinv .gt. 0.d0) then z(:,Gnoise) = log(Vinv) + log( pro(Gnoise) ) end if * now column Gnoise of z contains log(Vinv) + log(p_0) * with p_0 the proportion of noise loglik = 0.d0 do i = 1,n * Numerical Recipes pag.844 temp3 = maxval(z(i,:)) temp1(1) = temp3 + log( sum(exp(z(i,:) - temp3)) ) loglik = loglik + temp1(1) z(i,:) = exp( z(i,:) - temp1(1) ) * re-normalize probabilities temp3 = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3, z(i,:), 1 ) end do * ................................................................ errout = abs(loglik - lkprev)/(1.d0 + abs(loglik)) * errout = abs(loglik - lkprev) lkprev = loglik * temp(niterout) = loglik * Chris F (June 2015): pro should not be computed in the E-step * sumz = sum(z, dim = 1) * if ( eqpro ) then * if ( Vinv .gt. 0 ) then * pro(Gnoise) = sumz(Gnoise) / dble(n) * pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) * sumz = pro * dble(n) * else * pro = 1 / dble(G) * sumz = pro * dble(n) * end if * else * pro = sumz / dble(n) * end if * check if empty components if ( minval(sumz) .lt. rteps ) then loglik = -FLMAX return end if * WHILE condition EM if ( errout .gt. tolout .and. niterout .lt. itmaxout ) goto 100 return end ************************************************************************ **** EVE model ************************************************************************ * ====================================================================== subroutine mseve (x,z, n,p,G, mu,U,O,scale,shape,pro, lwork,info, * itmax,tol, niterin,errin, eps) * Maximization step for model EVE * ====================================================================== implicit none integer :: n, p, G double precision :: x(n,p), z(n,G) double precision :: mu(p,G), U(p,p,G), pro(G), O(p,p) double precision :: scale, shape(p,G) double precision :: sumz(G), omega(G) integer :: i, j, k, info, lwork double precision :: temp1(p,p), temp2(p,p), temp3(p,p), temp4(p) double precision :: wrk(lwork), tol, errin, trgt, trgtprev, eps integer :: itmax, niterin double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot *----------------------------------------------------------------------- * colsums of z sumz = sum(z, dim = 1) * a priori probabilities pro = sumz / dble(n) * pro = sumz / sum(sumz) * if there is noise sum(sumz) does not sum to n. See help(mstep) * compute weighted scattering matrix and means do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1(:,1) = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1(:,1), 1, temp1(:,1), 1, * U(:,:,k), p) end do * U contains the weighted scattering matrix * compute the eigenvalues of U to be stored in omega temp2 = U(:,:,k) call dsyev('N', 'U', p, temp2, p, temp1(:,1), wrk, lwork, info) * now temp1 contains all the eigenvalues of U * check if dsyev converged and positive definite if ( info .ne. 0 ) then return else if ( minval(temp1(:,1)) .lt. sqrt(eps) ) then info = 0 scale = FLMAX return end if end if omega(k) = temp1(p,1) end do * omega contains the largest eigenvalue of each scattering matrix niterin = 0 errin = FLMAX trgt = FLMAX trgtprev = FLMAX/2 * covariance matrix components estimation * we consider algorithm MM 1 and MM 2 of Browne, McNicholas 2013 * with a modification in computing the orientation matrix in the MM 2 step * shape (matrix A) and orientation (matrix D) initialised in R * shape = matrix(1, p,G) * O = diag(p) * WHILE loop using goto statement 100 continue * ### NOTE: O is transposed niterin = niterin + 1 temp2 = 0.d0 temp3 = 0.d0 * temp3 will contain matrix F * Algorithm MM 1 ...................................... do k = 1,G do j = 1,p * temp1(j,:) = O(:,j) / shape(j,k) temp1(j,:) = O(j,:) / shape(j,k) end do * temp1 contains inv(A)t(D) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, U(:,:,k),p, * 0.d0, temp2,p ) * temp2 contains inv(A) %*% t(D) %*% W temp1 = temp2 - omega(k)*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(O) = t( R %*% t(P) ) = P %*% t(R) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * Algorithm MM 2 ...................................... * call dgemm( 'T','T', p,p,p, 1.d0, temp2,p, temp1,p, * * 0.d0, O,p ) call transpose(O, p) * O contains matrix D of Browne, McNicholas * Algorithm MM 2 temp1 = 0.d0 temp3 = 0.d0 do k = 1,G call dgemm( 'N','N', p,p,p, 1.d0, U(:,:,k),p, O,p, * 0.d0, temp1,p ) * temp1 contains W %*% D do j = 1,p temp2(:,j) = temp1(:,j) / shape(j,k) end do * temp2 contains W %*% D %*% inv(A) temp1 = temp2 - maxval( 1/shape(:,k) )*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(O) = R %*% t(P) * call dgemm( 'T','T', p,p,p, 1.d0, temp2,p, temp1,p, * * 0.d0, O,p ) O = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) * NOTE: we compute the TRANSPOSED of the matrix in the output in the paper call transpose(O, p) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * compute shape (matrix A) and target function trgt = 0.d0 do k = 1,G temp1 = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, O,p, U(:,:,k),p, * 0.d0, temp1,p ) * temp1 contains t(D) %*% W do j = 1,p shape(j,k) = ddot(p, temp1(j,:), 1, O(j,:), 1) end do shape(:,k) = shape(:,k)/ * exp( sum( log(shape(:,k)) ) )**(1.d0/dble(p)) * now shape contains matrix A of Celeux, Govaert pag. 785 * check positive values if ( minval(shape(:,k)) .lt. sqrt(eps) ) then info = 0 shape = FLMAX return end if temp4(1) = 0.d0 do j = 1,p * temp2(:,j) = O(:,j) * 1.d0/shape(j,k) temp2(:,j) = O(j,:) * 1.d0/shape(j,k) temp4(1) = temp4(1) + ddot(p, temp1(j,:), 1, temp2(:,j), 1) end do trgt = trgt + temp4(1) end do * error errin = abs(trgt - trgtprev)/(1.d0 + abs(trgt)) trgtprev = trgt * WHILE condition if ( errin .gt. tol .and. niterin .lt. itmax ) goto 100 scale = trgt / ( sum(sumz)*dble(p) ) return end * ====================================================================== subroutine eseve (x,z, n,p,G,Gnoise, mu,O,scale,shape,pro, Vinv, * loglik, eps) * Expectation step for model EVE * ====================================================================== implicit none integer :: n, p, G, Gnoise double precision :: x(n,p), z(n,Gnoise) double precision :: mu(p,G), O(p,p), scale, shape(p,G) double precision :: Vinv, pro(Gnoise) double precision :: temp1(p), temp2(p), temp3, temp4(n) integer :: i, k, j double precision :: const, logdet, loglik, eps double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- * check very small eigenvalues (singular covariance) if (minval(shape) .le. sqrt(eps) .or. scale .le. sqrt(eps)) then loglik = FLMAX return end if const = (-dble(p)/2.d0)*log2pi do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j,k)) + log(scale) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2, 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2, 1) call dgemv('N', p, p, 1.d0, * O, p, temp1, 1, 0.d0, temp2, 1) temp2 = temp2/sqrt(scale*shape(:,k)) temp3 = ddot(p, temp2, 1, temp2, 1) temp4(i) = temp3 * temp3 contains the mahalanobis distance * z(i,k) = const - logdet/2.d0 - temp3/2.d0 + log(pro(k)) z(i,k) = const - logdet/2.d0 - temp3/2.d0 * help(cdens) --> The densities are not scaled by mixing proportions end do * z contains the log-density log(N(x|theta_k)) end do if ( pro(1) .lt. 0.d0 ) return * cdens function * noise component if (Vinv .gt. 0.d0) then * call dcopy( n, log(Vinv), 0, z(:,Gnoise), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(:,Gnoise), 1) end if * now column Gnoise of z contains log(Vinv) do i = 1,n z(i,:) = z(i,:) + log( pro ) * Numerical Recipes pag.844 temp3 = maxval(z(i,:)) temp1(1) = temp3 + log( sum(exp(z(i,:) - temp3)) ) loglik = loglik + temp1(1) * ##### NOTE: do we need to check if (z - zmax) is too small? z(i,:) = exp( z(i,:) - temp1(1) ) * re-normalize probabilities temp3 = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3, z(i,:), 1 ) end do return end * ====================================================================== subroutine meeve ( x,z, n,p,G,Gnoise, mu,O,U,scale,shape,pro,Vinv, * loglik, eqpro,itmaxin,tolin,itmaxout,tolout,eps, * niterin,errin,niterout,errout,lwork,info ) * Maximization-expectation algorithm for model EVE * ====================================================================== implicit none logical :: eqpro integer :: n,p,G,Gnoise double precision :: x(n,p), z(n,Gnoise), pro(Gnoise), Vinv double precision :: mu(p,G), O(p,p), scale, shape(p,G) double precision :: U(p,p,G), sumz(Gnoise), omega(G) double precision :: temp1(p,p), temp2(p,p), temp3(p,p), temp4(p) integer :: i, j, k, info, lwork integer :: itmaxin, itmaxout, niterin, niterout double precision :: tolin, tolout, errin, errout, eps, rteps double precision :: const, logdet, loglik, lkprev, wrk(lwork) double precision :: trgt, trgtprev double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- rteps = sqrt(eps) niterout = 0 errout = FLMAX lkprev = FLMAX/2 loglik = FLMAX const = (-dble(p)/2.d0)*log2pi * WHILE loop for EM algorithm 100 continue niterout = niterout + 1 sumz = sum(z, dim = 1) if ( eqpro ) then if ( Vinv .gt. 0 ) then pro(Gnoise) = sumz(Gnoise) / dble(n) pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) sumz = pro * dble(n) else pro = 1 / dble(G) sumz = pro * dble(n) end if else pro = sumz / dble(n) end if * re-initialise U * call dcopy(p*p*G, 0.d0, 0, U, 1) dummy(1) = 0.d0 call dcopy(p*p*G, dummy, 0, U, 1) * compute weighted scattering matrix and means do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1(:,1) = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1(:,1), 1, temp1(:,1), 1, * U(:,:,k), p) end do * U contains the weighted scattering matrix * compute the eigenvalues of U to be stored in omega temp2 = U(:,:,k) call dsyev('N', 'U', p, temp2, p, temp1(:,1), wrk, lwork, info) * now temp1 contains all the eigenvalues of U * check if dsyev converged and positive definite if ( info .ne. 0 ) then return else if ( minval(temp1(:,1)) .lt. rteps ) then info = 0 scale = FLMAX return end if end if omega(k) = temp1(p,1) end do * omega contains the largest eigenvalue of each scattering matrix * M step.......................................................... niterin = 0 errin = FLMAX trgt = FLMAX trgtprev = FLMAX/2 * covariance matrix components estimation * we consider algorithm MM 1 and MM 2 of Browne, McNicholas 2013 * with a modification in computing the orientation matrix in the MM 2 step * shape (matrix A) and orientation (matrix D) initialised in R * shape = matrix(1, p,G) * O = diag(p) * ### NOTE: we don't re-initialize shape and orientation at each * outer iteration of the EM algorithm * WHILE loop for M step 110 continue * ### NOTE: O is transposed niterin = niterin + 1 temp2 = 0.d0 temp3 = 0.d0 * temp3 will contain matrix F * Algorithm MM 1 ...................................... do k = 1,G do j = 1,p * temp1(j,:) = O(:,j) / shape(j,k) temp1(j,:) = O(j,:) / shape(j,k) end do * temp1 contains inv(A)t(D) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, U(:,:,k),p, * 0.d0, temp2,p ) * temp2 contains inv(A) %*% t(D) %*% W temp1 = temp2 - omega(k)*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(O) = t( R %*% t(P) ) = P %*% t(R) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * Algorithm MM 2 ...................................... * call dgemm( 'T','T', p,p,p, 1.d0, temp2,p, temp1,p, * * 0.d0, O,p ) call transpose(O, p) * O contains matrix D of Browne, McNicholas * Algorithm MM 2 temp1 = 0.d0 temp3 = 0.d0 do k = 1,G call dgemm( 'N','N', p,p,p, 1.d0, U(:,:,k),p, O,p, * 0.d0, temp1,p ) * temp1 contains W %*% D do j = 1,p temp2(:,j) = temp1(:,j) / shape(j,k) end do * temp2 contains W %*% D %*% inv(A) temp1 = temp2 - maxval( 1/shape(:,k) )*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(O) = R %*% t(P) * call dgemm( 'T','T', p,p,p, 1.d0, temp2,p, temp1,p, * * 0.d0, O,p ) O = 0.d0 * NOTE: we compute the TRANSPOSED of the matrix in the output in the paper call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) call transpose(O, p) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * compute shape (matrix A) and target function trgt = 0.d0 do k = 1,G temp1 = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, O,p, U(:,:,k),p, * 0.d0, temp1,p ) * temp1 contains t(D) %*% W do j = 1,p shape(j,k) = ddot(p, temp1(j,:), 1, O(j,:), 1) end do shape(:,k) = shape(:,k)/ * exp( sum( log(shape(:,k)) ) )**(1.d0/dble(p)) * now shape contains matrix A of Celeux, Govaert pag. 785 * check positive values if ( minval(shape(:,k)) .lt. rteps ) then info = 0 loglik = FLMAX return end if temp4(1) = 0.d0 do j = 1,p * temp2(:,j) = O(:,j) * 1.d0/shape(j,k) temp2(:,j) = O(j,:) * 1.d0/shape(j,k) temp4(1) = temp4(1) + ddot(p, temp1(j,:), 1, temp2(:,j), 1) end do trgt = trgt + temp4(1) end do * error errin = abs(trgt - trgtprev)/(1.d0 + abs(trgt)) trgtprev = trgt * WHILE condition M step if ( errin .gt. tolin .and. niterin .lt. itmaxin ) goto 110 scale = trgt / ( sum(sumz(1:G))*dble(p) ) * ................................................................ * E step.......................................................... const = (-dble(p)/2.d0)*log2pi do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j,k)) + log(scale) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed do i = 1,n temp1(:,1) = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2(:,1), 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2(:,1), 1) call dgemv('N', p, p, 1.d0, * O, p, temp1(:,1), 1, 0.d0, temp2(:,1), 1) temp2(:,1) = temp2(:,1)/sqrt(scale*shape(:,k)) temp3(1,1) = ddot(p, temp2(:,1), 1, temp2(:,1), 1) * temp3 contains the mahalanobis distance z(i,k) = const - logdet/2.d0 - temp3(1,1)/2.d0 + log(pro(k)) * z(i,k) = const - logdet/2.d0 - temp3(1,1)/2.d0 end do * z contains the log-density log(N(x|theta_k)) + log(p_k) end do * noise component if (Vinv .gt. 0.d0) then z(:,Gnoise) = log(Vinv) + log( pro(Gnoise) ) end if * now column Gnoise of z contains log(Vinv) + log(p_0) loglik = 0.d0 do i = 1,n * Numerical Recipes pag.844 temp3(1,1) = maxval(z(i,:)) temp1(1,1) = temp3(1,1) + log( sum(exp(z(i,:) - temp3(1,1))) ) loglik = loglik + temp1(1,1) * ##### NOTE: do we need to check if (z - zmax) is too small? z(i,:) = exp( z(i,:) - temp1(1,1) ) * re-normalize probabilities temp3(1,1) = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3(1,1), z(i,:), 1 ) end do * ................................................................ errout = abs(loglik - lkprev)/(1.d0 + abs(loglik)) lkprev = loglik * Chris F (June 2015): pro should not be computed in the E-step * sumz = sum(z, dim = 1) * if ( eqpro ) then * if ( Vinv .gt. 0 ) then * pro(Gnoise) = sumz(Gnoise) / dble(n) * pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) * sumz = pro * dble(n) * else * pro = 1 / dble(G) * sumz = pro * dble(n) * end if * else * pro = sumz / dble(n) * end if * check if empty components if ( minval(sumz) .lt. rteps ) then loglik = -FLMAX return end if * WHILE condition EM if ( errout .gt. tolout .and. niterout .lt. itmaxout ) goto 100 return end ************************************************************************ **** VVE model ************************************************************************ * ====================================================================== subroutine msvve (x,z, n,p,G, mu,U,O,scale,shape,pro, lwork,info, * itmax,tol, niterin,errin, eps) * Maximization step for model VVE * ====================================================================== implicit none integer :: n, p, G double precision :: x(n,p), z(n,G) double precision :: mu(p,G), U(p,p,G), pro(G), O(p,p) double precision :: scale(G), shape(p,G) double precision :: sumz(G), omega(G) integer :: i, j, k, info, lwork double precision :: temp1(p,p), temp2(p,p), temp3(p,p), temp4(p) double precision :: wrk(lwork), tol, errin, trgt, trgtprev, eps integer :: itmax, niterin double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot *----------------------------------------------------------------------- * colsums of z sumz = sum(z, dim = 1) * a priori probabilities pro = sumz / dble(n) * pro = sumz / sum(sumz) * if there is noise sum(sumz) does not sum to n. See help(mstep) * compute weighted scattering matrix and means do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1(:,1) = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1(:,1), 1, temp1(:,1), 1, * U(:,:,k), p) end do * U contains the weighted scattering matrix * compute the eigenvalues of U to be stored in omega temp2 = U(:,:,k) call dsyev('N', 'U', p, temp2, p, temp1(:,1), wrk, lwork, info) * now temp1 contains all the eigenvalues of U * check if dsyev converged and positive definite if ( info .ne. 0 ) then return else if ( minval(temp1(:,1)) .lt. sqrt(eps) ) then info = 0 scale = FLMAX return end if end if omega(k) = temp1(p,1) end do * omega contains the largest eigenvalue of each scattering matrix niterin = 0 errin = FLMAX trgt = FLMAX trgtprev = FLMAX/2 * covariance matrix components estimation * we consider algorithm MM 1 and MM 2 of Browne, McNicholas 2013 * with a modification in computing the orientation matrix in the MM 2 step * shape (matrix A) and orientation (matrix D) initialised in R * shape = matrix(1, p,G) * O = diag(p) * WHILE loop using goto statement 100 continue * ### NOTE: O is transposed niterin = niterin + 1 temp2 = 0.d0 temp3 = 0.d0 * temp3 will contain matrix F * Algorithm MM 1 ...................................... do k = 1,G do j = 1,p * temp1(j,:) = O(:,j) / shape(j,k) temp1(j,:) = O(j,:) / shape(j,k) end do * temp1 contains inv(A)t(D) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, U(:,:,k),p, * 0.d0, temp2,p ) * temp2 contains inv(A) %*% t(D) %*% W temp1 = temp2 - omega(k)*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(P %*% t(R)) = R %*% t(P) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * Algorithm MM 2 ...................................... call transpose(O, p) * O contains matrix D of Browne, McNicholas * Algorithm MM 2 temp1 = 0.d0 temp3 = 0.d0 do k = 1,G call dgemm( 'N','N', p,p,p, 1.d0, U(:,:,k),p, O,p, * 0.d0, temp1,p ) * temp1 contains W %*% D do j = 1,p temp2(:,j) = temp1(:,j) / shape(j,k) end do * temp2 contains W %*% D %*% inv(A) temp1 = temp2 - maxval( 1/shape(:,k) )*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(O) = R %*% t(P) O = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) call transpose(O, p) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * compute shape (matrix A) and target function trgt = 0.d0 do k = 1,G temp1 = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, O,p, U(:,:,k),p, * 0.d0, temp1,p ) * temp1 contains t(D) %*% W do j = 1,p shape(j,k) = ddot(p, temp1(j,:), 1, O(j,:), 1) end do * shape(:,k) = shape(:,k)/ * * exp( sum( log(shape(:,k)) ) )**(1.d0/dble(p)) shape(:,k) = shape(:,k)/sumz(k) * now shape contains matrix A (scale*A) of Celeux, Govaert pag. 785 * compute scale parameter and shape matrix A scale(k) = exp( sum( log(shape(:,k)) ) )**(1/dble(p)) shape(:,k) = shape(:,k)/scale(k) * check positive values if ( minval(shape(:,k)) .lt. sqrt(eps) ) then info = 0 shape = FLMAX return end if temp4(1) = 0.d0 do j = 1,p * temp2(:,j) = O(:,j) * 1.d0/shape(j,k) temp2(:,j) = O(j,:) * 1.d0/shape(j,k) temp4(1) = temp4(1) + ddot(p, temp1(j,:), 1, temp2(:,j), 1) end do trgt = trgt + temp4(1) end do * error errin = abs(trgt - trgtprev)/(1.d0 + abs(trgt)) trgtprev = trgt * WHILE condition if ( errin .gt. tol .and. niterin .lt. itmax ) goto 100 return end * ====================================================================== subroutine esvve (x,z, n,p,G,Gnoise, mu,O,scale,shape,pro, Vinv, * loglik, eps) * Expectation step for model VVE * ====================================================================== implicit none integer :: n, p, G, Gnoise double precision :: x(n,p), z(n,Gnoise) double precision :: mu(p,G), O(p,p), scale(G), shape(p,G) double precision :: Vinv, pro(Gnoise) double precision :: temp1(p), temp2(p), temp3, temp4(n) integer :: i, k, j double precision :: const, logdet, loglik, eps double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- * check very small eigenvalues (singular covariance) if ( minval(shape) .le. sqrt(eps) .or. * minval(scale) .le. sqrt(eps) ) then loglik = FLMAX return end if const = (-dble(p)/2.d0)*log2pi do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j,k)) + log(scale(k)) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed do i = 1,n temp1 = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2, 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2, 1) call dgemv('N', p, p, 1.d0, * O, p, temp1, 1, 0.d0, temp2, 1) temp2 = temp2/sqrt(scale(k)*shape(:,k)) temp3 = ddot(p, temp2, 1, temp2, 1) temp4(i) = temp3 * temp3 contains the mahalanobis distance * z(i,k) = const - logdet/2.d0 - temp3/2.d0 + log(pro(k)) z(i,k) = const - logdet/2.d0 - temp3/2.d0 * help(cdens) --> The densities are not scaled by mixing proportions end do * z contains the log-density log(N(x|theta_k)) end do if ( pro(1) .lt. 0.d0 ) return * cdens function * noise component if (Vinv .gt. 0.d0) then * call dcopy( n, log(Vinv), 0, z(:,Gnoise), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(:,Gnoise), 1) end if * now column Gnoise of z contains log(Vinv) do i = 1,n z(i,:) = z(i,:) + log( pro ) * Numerical Recipes pag.844 temp3 = maxval(z(i,:)) temp1(1) = temp3 + log( sum(exp(z(i,:) - temp3)) ) loglik = loglik + temp1(1) * ##### NOTE: do we need to check if (z - zmax) is too small? z(i,:) = exp( z(i,:) - temp1(1) ) * re-normalize probabilities temp3 = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3, z(i,:), 1 ) end do return end * ====================================================================== subroutine mevve ( x,z, n,p,G,Gnoise, mu,O,U,scale,shape,pro,Vinv, * loglik, eqpro,itmaxin,tolin,itmaxout,tolout,eps, * niterin,errin,niterout,errout,lwork,info) * Maximization-expectation algorithm for model VVE * ====================================================================== implicit none logical :: eqpro integer :: n,p,G,Gnoise double precision :: x(n,p), z(n,Gnoise), pro(Gnoise), Vinv double precision :: mu(p,G), O(p,p), scale(G), shape(p,G) double precision :: U(p,p,G), sumz(Gnoise), omega(G) double precision :: temp1(p,p), temp2(p,p), temp3(p,p), temp4(p) integer :: i, j, k, info, lwork integer :: itmaxin, itmaxout, niterin, niterout double precision :: tolin, tolout, errin, errout, eps, rteps double precision :: const, logdet, loglik, lkprev, wrk(lwork) double precision :: trgt, trgtprev double precision :: log2pi parameter (log2pi = 1.837877066409345d0) double precision :: FLMAX parameter (FLMAX = 1.7976931348623157d308) external :: ddot double precision :: ddot double precision :: dummy(1) * double precision :: smalog * parameter (smalog = -708.d0) *----------------------------------------------------------------------- rteps = sqrt(eps) niterout = 0 errout = FLMAX lkprev = FLMAX/2 loglik = FLMAX const = (-dble(p)/2.d0)*log2pi * WHILE loop for EM algorithm 100 continue niterout = niterout + 1 sumz = sum(z, dim = 1) if ( eqpro ) then if ( Vinv .gt. 0 ) then pro(Gnoise) = sumz(Gnoise) / dble(n) pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) sumz = pro * dble(n) else pro = 1 / dble(G) sumz = pro * dble(n) end if else pro = sumz / dble(n) end if * re-initialise U * call dcopy(p*p*G, 0.d0, 0, U, 1) dummy(1) = 0.d0 call dcopy(p*p*G, dummy, 0, U, 1) * compute weighted scattering matrix and means do k = 1,G do j = 1,p mu(j,k) = sum(x(:,j)*z(:,k))/sumz(k) end do do i = 1,n temp1(:,1) = ( x(i,:) - mu(:,k) ) * sqrt(z(i,k)) call dger(p, p, 1.d0, temp1(:,1), 1, temp1(:,1), 1, * U(:,:,k), p) end do * U contains the weighted scattering matrix * compute the eigenvalues of U to be stored in omega temp2 = U(:,:,k) call dsyev('N', 'U', p, temp2, p, temp1(:,1), wrk, lwork, info) * now temp1 contains all the eigenvalues of U * check if dsyev converged and positive definite if ( info .ne. 0 ) then return else if ( minval(temp1(:,1)) .lt. rteps ) then info = 0 scale = FLMAX return end if end if omega(k) = temp1(p,1) end do * omega contains the largest eigenvalue of each scattering matrix * M step.......................................................... niterin = 0 errin = FLMAX trgt = FLMAX trgtprev = FLMAX/2 * covariance matrix components estimation * we consider algorithm MM 1 and MM 2 of Browne, McNicholas 2013 * with a modification in computing the orientation matrix in the MM 2 step * shape (matrix A) and orientation (matrix D) initialised in R * shape = matrix(1, p,G) * O = diag(p) * ### NOTE: we don't re-initialize shape and orientation at each * outer iteration of the EM algorithm * WHILE loop for M step 110 continue * ### NOTE: O is transposed niterin = niterin + 1 temp2 = 0.d0 temp3 = 0.d0 * temp3 will contain matrix F * Algorithm MM 1 ...................................... do k = 1,G do j = 1,p * temp1(j,:) = O(:,j) / shape(j,k) temp1(j,:) = O(j,:) / shape(j,k) end do * temp1 contains inv(A)t(D) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, U(:,:,k),p, * 0.d0, temp2,p ) * temp2 contains inv(A) %*% t(D) %*% W temp1 = temp2 - omega(k)*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(P %*% t(R)) = R %*% t(P) call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) * O contains TRANSPOSED orientation (matrix D of Browne, McNicholas) * ..................................................... * Algorithm MM 2 ...................................... call transpose(O, p) * O contains matrix D of Browne, McNicholas * Algorithm MM 2 temp1 = 0.d0 temp3 = 0.d0 do k = 1,G call dgemm( 'N','N', p,p,p, 1.d0, U(:,:,k),p, O,p, * 0.d0, temp1,p ) * temp1 contains W %*% D do j = 1,p temp2(:,j) = temp1(:,j) / shape(j,k) end do * temp2 contains W %*% D %*% inv(A) temp1 = temp2 - maxval( 1/shape(:,k) )*temp1 temp3 = temp3 + temp1 * temp3 contains the matrix F end do * compute matrices P and R where svd(F) = P %*% B %*% t(R) call dgesvd('A','A', p,p, temp3,p, temp4, temp1,p, temp2,p, * wrk, lwork, info) * now temp1 contains matrix P, temp2 contains matrix t(R) * temp4 contains the singular values * check if dgesvd converged if ( info .ne. 0 ) return * NOTE: t(O) = R %*% t(P) O = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, temp1,p, temp2,p, * 0.d0, O,p ) call transpose(O, p) * O contains TRANSPOSED matrix D of Browne, McNicholas * ..................................................... * compute shape (matrix A) and target function trgt = 0.d0 do k = 1,G temp1 = 0.d0 call dgemm( 'N','N', p,p,p, 1.d0, O,p, U(:,:,k),p, * 0.d0, temp1,p ) * temp1 contains t(D) %*% W do j = 1,p shape(j,k) = ddot(p, temp1(j,:), 1, O(j,:), 1) end do * shape(:,k) = shape(:,k)/ * * exp( sum( log(shape(:,k)) ) )**(1.d0/dble(p)) shape(:,k) = shape(:,k)/sumz(k) * now shape contains matrix A (scale*A) of Celeux, Govaert pag. 785 * compute scale parameter and shape matrix A scale(k) = exp( sum( log(shape(:,k)) ) )**(1/dble(p)) shape(:,k) = shape(:,k)/scale(k) * check positive values if (minval(shape(:,k)) .lt. rteps .or. * scale(k) .lt. rteps) then info = 0 loglik = FLMAX return end if temp4(1) = 0.d0 do j = 1,p * temp2(:,j) = O(:,j) * 1.d0/shape(j,k) temp2(:,j) = O(j,:) * 1.d0/shape(j,k) temp4(1) = temp4(1) + ddot(p, temp1(j,:), 1, temp2(:,j), 1) end do trgt = trgt + temp4(1) end do * error errin = abs(trgt - trgtprev)/(1.d0 + abs(trgt)) trgtprev = trgt * WHILE condition M step if ( errin .gt. tolin .and. niterin .lt. itmaxin ) goto 110 * do k = 1,G * scale(k) = exp( sum( log(shape(:,k)) ) )**(1/dble(p)) * shape(:,k) = shape(:,k)/scale(k) * end do * ................................................................ * E step.......................................................... const = (-dble(p)/2.d0)*log2pi do k = 1,G logdet = 0.d0 do j = 1,p logdet = logdet + ( log(shape(j,k)) + log(scale(k)) ) end do * compute mahalanobis distance for each observation * ##### NOTE: O is transposed do i = 1,n temp1(:,1) = ( x(i,:) - mu(:,k) ) * call dcopy(p, 0.d0, 0, temp2(:,1), 1) dummy(1) = 0.d0 call dcopy(p, dummy, 0, temp2(:,1), 1) call dgemv('N', p, p, 1.d0, * O, p, temp1(:,1), 1, 0.d0, temp2(:,1), 1) temp2(:,1) = temp2(:,1)/sqrt(scale(k)*shape(:,k)) temp3(1,1) = ddot(p, temp2(:,1), 1, temp2(:,1), 1) * temp3 contains the mahalanobis distance z(i,k) = const - logdet/2.d0 - temp3(1,1)/2.d0 + log(pro(k)) * z(i,k) = const - logdet/2.d0 - temp3(1,1)/2.d0 end do * z contains the log-density log(N(x|theta_k)) + log(p_k) end do * noise component if (Vinv .gt. 0.d0) then z(:,Gnoise) = log(Vinv) + log( pro(Gnoise) ) end if * now column Gnoise of z contains log(Vinv) + log(p_0) loglik = 0.d0 do i = 1,n * Numerical Recipes pag.844 temp3(1,1) = maxval(z(i,:)) temp1(1,1) = temp3(1,1) + log( sum(exp(z(i,:) - temp3(1,1))) ) loglik = loglik + temp1(1,1) * ##### NOTE: do we need to check if (z - zmax) is too small? z(i,:) = exp( z(i,:) - temp1(1,1) ) * re-normalize probabilities temp3(1,1) = sum( z(i,:) ) call dscal( Gnoise, 1.d0/temp3(1,1), z(i,:), 1 ) end do * ................................................................ errout = abs(loglik - lkprev)/(1.d0 + abs(loglik)) lkprev = loglik * Chris F (June 2015): pro should not be computed in the E-step * sumz = sum(z, dim = 1) * if ( eqpro ) then * if ( Vinv .gt. 0 ) then * pro(Gnoise) = sumz(Gnoise) / dble(n) * pro(1:G) = ( 1 - pro(Gnoise) ) / dble(G) * sumz = pro * dble(n) * else * pro = 1 / dble(G) * sumz = pro * dble(n) * end if * else * pro = sumz / dble(n) * end if * check if empty components if ( minval(sumz) .lt. rteps ) then loglik = -FLMAX return end if * WHILE condition EM if ( errout .gt. tolout .and. niterout .lt. itmaxout ) goto 100 return end mclust/src/init.c0000644000176200001440000004331613507677506013506 0ustar liggesusers#include #include // for NULL #include /* Routines registration obtained with tools::package_native_routine_registration_skeleton("~/R/mclust") FIXME: Check these declarations against the C/Fortran source code. */ /* .Fortran calls */ extern void F77_NAME(dmvnorm)(void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(covwf)(void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(crossprodf)(void *, void *, void *, void *, void *, void *); extern void F77_NAME(d2norm)(void *, void *, void *, void *); extern void F77_NAME(es1e)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(es1v)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(eseee)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(eseei)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(eseev)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(eseii)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(eseve)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esevi)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esevv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvee)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvei)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvev)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvii)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvve)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvvi)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(esvvv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(hc1e)(void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(hc1v)(void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(hceee)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(hceii)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(hcvii)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(hcvvv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mcltrw)(void *, void *, void *, void *, void *); extern void F77_NAME(me1e)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(me1ep)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(me1v)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(me1vp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeee)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeeep)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeei)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeeip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeev)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeevp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeii)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeiip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meeve)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meevi)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meevip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meevv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevee)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevei)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meveip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevev)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevevp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevii)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(meviip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevve)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevvi)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevvip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevvv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mevvvp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mnxiip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mnxxip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mnxxxp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(ms1e)(void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(ms1ep)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(ms1v)(void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(ms1vp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseee)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseeep)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseei)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseeip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseev)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseevp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseii)(void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseiip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mseve)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msevi)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msevip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msevv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvee)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvei)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msveip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvev)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvevp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvii)(void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msviip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvve)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvvi)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvvip)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvvv)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(msvvvp)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mvn1d)(void *, void *, void *, void *, void *); extern void F77_NAME(mvn1p)(void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mvnxii)(void *, void *, void *, void *, void *, void *); extern void F77_NAME(mvnxxi)(void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(mvnxxx)(void *, void *, void *, void *, void *, void *); extern void F77_NAME(shapeo)(void *, void *, void *, void *, void *, void *, void *); extern void F77_NAME(uncholf)(void *, void *, void *, void *, void *); static const R_FortranMethodDef FortranEntries[] = { {"dmvnorm", (DL_FUNC) &F77_NAME(dmvnorm), 8}, {"covwf", (DL_FUNC) &F77_NAME(covwf), 8}, {"crossprodf", (DL_FUNC) &F77_NAME(crossprodf), 6}, {"d2norm", (DL_FUNC) &F77_NAME(d2norm), 4}, {"es1e", (DL_FUNC) &F77_NAME(es1e), 9}, {"es1v", (DL_FUNC) &F77_NAME(es1v), 9}, {"eseee", (DL_FUNC) &F77_NAME(eseee), 12}, {"eseei", (DL_FUNC) &F77_NAME(eseei), 11}, {"eseev", (DL_FUNC) &F77_NAME(eseev), 14}, {"eseii", (DL_FUNC) &F77_NAME(eseii), 10}, {"eseve", (DL_FUNC) &F77_NAME(eseve), 14}, {"esevi", (DL_FUNC) &F77_NAME(esevi), 11}, {"esevv", (DL_FUNC) &F77_NAME(esevv), 14}, {"esvee", (DL_FUNC) &F77_NAME(esvee), 14}, {"esvei", (DL_FUNC) &F77_NAME(esvei), 11}, {"esvev", (DL_FUNC) &F77_NAME(esvev), 14}, {"esvii", (DL_FUNC) &F77_NAME(esvii), 10}, {"esvve", (DL_FUNC) &F77_NAME(esvve), 14}, {"esvvi", (DL_FUNC) &F77_NAME(esvvi), 11}, {"esvvv", (DL_FUNC) &F77_NAME(esvvv), 12}, {"hc1e", (DL_FUNC) &F77_NAME(hc1e), 7}, {"hc1v", (DL_FUNC) &F77_NAME(hc1v), 8}, {"hceee", (DL_FUNC) &F77_NAME(hceee), 12}, {"hceii", (DL_FUNC) &F77_NAME(hceii), 9}, {"hcvii", (DL_FUNC) &F77_NAME(hcvii), 10}, {"hcvvv", (DL_FUNC) &F77_NAME(hcvvv), 14}, {"mcltrw", (DL_FUNC) &F77_NAME(mcltrw), 5}, {"me1e", (DL_FUNC) &F77_NAME(me1e), 12}, {"me1ep", (DL_FUNC) &F77_NAME(me1ep), 16}, {"me1v", (DL_FUNC) &F77_NAME(me1v), 12}, {"me1vp", (DL_FUNC) &F77_NAME(me1vp), 16}, {"meeee", (DL_FUNC) &F77_NAME(meeee), 14}, {"meeeep", (DL_FUNC) &F77_NAME(meeeep), 18}, {"meeei", (DL_FUNC) &F77_NAME(meeei), 14}, {"meeeip", (DL_FUNC) &F77_NAME(meeeip), 18}, {"meeev", (DL_FUNC) &F77_NAME(meeev), 18}, {"meeevp", (DL_FUNC) &F77_NAME(meeevp), 22}, {"meeii", (DL_FUNC) &F77_NAME(meeii), 13}, {"meeiip", (DL_FUNC) &F77_NAME(meeiip), 17}, {"meeve", (DL_FUNC) &F77_NAME(meeve), 26}, {"meevi", (DL_FUNC) &F77_NAME(meevi), 14}, {"meevip", (DL_FUNC) &F77_NAME(meevip), 18}, {"meevv", (DL_FUNC) &F77_NAME(meevv), 22}, {"mevee", (DL_FUNC) &F77_NAME(mevee), 26}, {"mevei", (DL_FUNC) &F77_NAME(mevei), 17}, {"meveip", (DL_FUNC) &F77_NAME(meveip), 21}, {"mevev", (DL_FUNC) &F77_NAME(mevev), 18}, {"mevevp", (DL_FUNC) &F77_NAME(mevevp), 22}, {"mevii", (DL_FUNC) &F77_NAME(mevii), 13}, {"meviip", (DL_FUNC) &F77_NAME(meviip), 17}, {"mevve", (DL_FUNC) &F77_NAME(mevve), 26}, {"mevvi", (DL_FUNC) &F77_NAME(mevvi), 14}, {"mevvip", (DL_FUNC) &F77_NAME(mevvip), 18}, {"mevvv", (DL_FUNC) &F77_NAME(mevvv), 15}, {"mevvvp", (DL_FUNC) &F77_NAME(mevvvp), 19}, {"mnxiip", (DL_FUNC) &F77_NAME(mnxiip), 10}, {"mnxxip", (DL_FUNC) &F77_NAME(mnxxip), 11}, {"mnxxxp", (DL_FUNC) &F77_NAME(mnxxxp), 11}, {"ms1e", (DL_FUNC) &F77_NAME(ms1e), 7}, {"ms1ep", (DL_FUNC) &F77_NAME(ms1ep), 11}, {"ms1v", (DL_FUNC) &F77_NAME(ms1v), 7}, {"ms1vp", (DL_FUNC) &F77_NAME(ms1vp), 11}, {"mseee", (DL_FUNC) &F77_NAME(mseee), 9}, {"mseeep", (DL_FUNC) &F77_NAME(mseeep), 13}, {"mseei", (DL_FUNC) &F77_NAME(mseei), 9}, {"mseeip", (DL_FUNC) &F77_NAME(mseeip), 13}, {"mseev", (DL_FUNC) &F77_NAME(mseev), 12}, {"mseevp", (DL_FUNC) &F77_NAME(mseevp), 16}, {"mseii", (DL_FUNC) &F77_NAME(mseii), 8}, {"mseiip", (DL_FUNC) &F77_NAME(mseiip), 12}, {"mseve", (DL_FUNC) &F77_NAME(mseve), 18}, {"msevi", (DL_FUNC) &F77_NAME(msevi), 9}, {"msevip", (DL_FUNC) &F77_NAME(msevip), 13}, {"msevv", (DL_FUNC) &F77_NAME(msevv), 14}, {"msvee", (DL_FUNC) &F77_NAME(msvee), 16}, {"msvei", (DL_FUNC) &F77_NAME(msvei), 14}, {"msveip", (DL_FUNC) &F77_NAME(msveip), 18}, {"msvev", (DL_FUNC) &F77_NAME(msvev), 14}, {"msvevp", (DL_FUNC) &F77_NAME(msvevp), 18}, {"msvii", (DL_FUNC) &F77_NAME(msvii), 8}, {"msviip", (DL_FUNC) &F77_NAME(msviip), 12}, {"msvve", (DL_FUNC) &F77_NAME(msvve), 18}, {"msvvi", (DL_FUNC) &F77_NAME(msvvi), 9}, {"msvvip", (DL_FUNC) &F77_NAME(msvvip), 13}, {"msvvv", (DL_FUNC) &F77_NAME(msvvv), 10}, {"msvvvp", (DL_FUNC) &F77_NAME(msvvvp), 14}, {"mvn1d", (DL_FUNC) &F77_NAME(mvn1d), 5}, {"mvn1p", (DL_FUNC) &F77_NAME(mvn1p), 9}, {"mvnxii", (DL_FUNC) &F77_NAME(mvnxii), 6}, {"mvnxxi", (DL_FUNC) &F77_NAME(mvnxxi), 7}, {"mvnxxx", (DL_FUNC) &F77_NAME(mvnxxx), 6}, {"shapeo", (DL_FUNC) &F77_NAME(shapeo), 7}, {"uncholf", (DL_FUNC) &F77_NAME(uncholf), 5}, {NULL, NULL, 0} }; void R_init_mclust(DllInfo *dll) { R_registerRoutines(dll, NULL, NULL, FortranEntries, NULL); R_useDynamicSymbols(dll, FALSE); } mclust/src/Makevars0000644000176200001440000000006013475427014014047 0ustar liggesusersPKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) mclust/src/dmvnorm.f0000644000176200001440000000377113463252677014230 0ustar liggesusers* ===================================================================== subroutine dmvnorm ( x, mu, Sigma, n, p, w, hood, logdens) * * Compute log-density of multivariate Gaussian * * ===================================================================== implicit NONE integer n, p double precision hood double precision x(n,p), w(*), logdens(n) double precision mu(p), Sigma(p,p) integer info, i, j double precision detlog, umin, umax, const, temp double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision ddot external ddot * --------------------------------------------------------------------------- * Cholesky factorization call dpotrf('U', p, Sigma, p, info) if (info .ne. 0) then w(1) = dble(info) hood = FLMAX return end if call absrng( p, Sigma, (p+1), umin, umax) if (umax .le. one .and. umax .ge. umin*RTMAX) then w(1) = zero hood = FLMAX return end if if (umax .ge. one .and. umin .le. umax*RTMIN) then w(1) = zero hood = FLMAX return end if detlog = zero do j = 1, p detlog = detlog + log(abs(Sigma(j,j))) end do const = dble(p)*pi2log/two + detlog do i = 1, n call dcopy(p, x(i,1), n, w, 1) call daxpy(p, (-one), mu(1), 1, w, 1) call dtrsv('U', 'T', 'N', p, Sigma, p, w, 1) temp = ddot(p, w, 1, w, 1)/two logdens(i) = -(const+temp) end do w(1) = zero return end mclust/src/mclust.f0000644000176200001440000151702513726112623014044 0ustar liggesusersc modified to avoid printing for calls from Fortran within R double precision function dgam (x) c jan 1984 edition. w. fullerton, c3, los alamos scientific lab. c jan 1994 wpp@ips.id.ethz.ch, ehg@research.att.com declare xsml c jun 2019 renamed function from dgamma to avoid warning with intrinsic c function already named dgamma double precision x, gamcs(42), dxrel, pi, sinpiy, sq2pil, xmax, 1 xmin, y, d9lgmc, dcsevl, d1mach, xsml C external d1mach, d9lgmc, dcsevl, dexp, dint, dlog, dsin, dsqrt, C 1 initds external d1mach, d9lgmc, dcsevl c c series for gam on the interval 0. to 1.00000e+00 c with weighted error 5.79e-32 c log weighted error 31.24 c significant figures required 30.00 c decimal places required 32.05 c data gam cs( 1) / +.8571195590 9893314219 2006239994 2 d-2 / data gam cs( 2) / +.4415381324 8410067571 9131577165 2 d-2 / data gam cs( 3) / +.5685043681 5993633786 3266458878 9 d-1 / data gam cs( 4) / -.4219835396 4185605010 1250018662 4 d-2 / data gam cs( 5) / +.1326808181 2124602205 8400679635 2 d-2 / data gam cs( 6) / -.1893024529 7988804325 2394702388 6 d-3 / data gam cs( 7) / +.3606925327 4412452565 7808221722 5 d-4 / data gam cs( 8) / -.6056761904 4608642184 8554829036 5 d-5 / data gam cs( 9) / +.1055829546 3022833447 3182350909 3 d-5 / data gam cs( 10) / -.1811967365 5423840482 9185589116 6 d-6 / data gam cs( 11) / +.3117724964 7153222777 9025459316 9 d-7 / data gam cs( 12) / -.5354219639 0196871408 7408102434 7 d-8 / data gam cs( 13) / +.9193275519 8595889468 8778682594 0 d-9 / data gam cs( 14) / -.1577941280 2883397617 6742327395 3 d-9 / data gam cs( 15) / +.2707980622 9349545432 6654043308 9 d-10 / data gam cs( 16) / -.4646818653 8257301440 8166105893 3 d-11 / data gam cs( 17) / +.7973350192 0074196564 6076717535 9 d-12 / data gam cs( 18) / -.1368078209 8309160257 9949917230 9 d-12 / data gam cs( 19) / +.2347319486 5638006572 3347177168 8 d-13 / data gam cs( 20) / -.4027432614 9490669327 6657053469 9 d-14 / data gam cs( 21) / +.6910051747 3721009121 3833697525 7 d-15 / data gam cs( 22) / -.1185584500 2219929070 5238712619 2 d-15 / data gam cs( 23) / +.2034148542 4963739552 0102605193 2 d-16 / data gam cs( 24) / -.3490054341 7174058492 7401294910 8 d-17 / data gam cs( 25) / +.5987993856 4853055671 3505106602 6 d-18 / data gam cs( 26) / -.1027378057 8722280744 9006977843 1 d-18 / data gam cs( 27) / +.1762702816 0605298249 4275966074 8 d-19 / data gam cs( 28) / -.3024320653 7353062609 5877211204 2 d-20 / data gam cs( 29) / +.5188914660 2183978397 1783355050 6 d-21 / data gam cs( 30) / -.8902770842 4565766924 4925160106 6 d-22 / data gam cs( 31) / +.1527474068 4933426022 7459689130 6 d-22 / data gam cs( 32) / -.2620731256 1873629002 5732833279 9 d-23 / data gam cs( 33) / +.4496464047 8305386703 3104657066 6 d-24 / data gam cs( 34) / -.7714712731 3368779117 0390152533 3 d-25 / data gam cs( 35) / +.1323635453 1260440364 8657271466 6 d-25 / data gam cs( 36) / -.2270999412 9429288167 0231381333 3 d-26 / data gam cs( 37) / +.3896418998 0039914493 2081663999 9 d-27 / data gam cs( 38) / -.6685198115 1259533277 9212799999 9 d-28 / data gam cs( 39) / +.1146998663 1400243843 4761386666 6 d-28 / data gam cs( 40) / -.1967938586 3451346772 9510399999 9 d-29 / data gam cs( 41) / +.3376448816 5853380903 3489066666 6 d-30 / data gam cs( 42) / -.5793070335 7821357846 2549333333 3 d-31 / c data pi / 3.1415926535 8979323846 2643383279 50 d0 / c sq2pil is 0.5*alog(2*pi) = alog(sqrt(2*pi)) data sq2pil / 0.9189385332 0467274178 0329736405 62 d0 / data ngam, xmin, xmax, xsml, dxrel / 0, 4*0.d0 / c if (ngam.ne.0) go to 10 ngam = initds (gamcs, 42, 0.1*sngl(d1mach(3)) ) c call d9gaml (xmin, xmax) xsml = exp (max (log(d1mach(1)), -log(d1mach(2)))+0.01d0) dxrel = sqrt (d1mach(4)) c 10 y = abs(x) if (y.gt.10.d0) go to 50 c c compute gamma(x) for -xbnd .le. x .le. xbnd. reduce interval and find c gamma(1+y) for 0.0 .le. y .lt. 1.0 first of all. c n = int(x) if (x.lt.0.d0) n = n - 1 y = x - dble(float(n)) n = n - 1 dgam = 0.9375d0 + dcsevl (2.d0*y-1.d0, gamcs, ngam) if (n.eq.0) return c if (n.gt.0) go to 30 c c compute gamma(x) for x .lt. 1.0 c n = -n if (x.eq.0.d0) then dgam = d1mach(2) return endif if (x.lt.0.0d0 .and. x+dble(float(n-2)).eq.0.d0) then dgam = -d1mach(2) return endif if (y.lt.xsml) then dgam = d1mach(2) return endif c do 20 i=1,n dgam = dgam/(x+dble(float(i-1)) ) 20 continue return c c gamma(x) for x .ge. 2.0 and x .le. 10.0 c 30 do 40 i=1,n dgam = (y+dble(float(i))) * dgam 40 continue return c c gamma(x) for dabs(x) .gt. 10.0. recall y = dabs(x). c 50 if (x.gt.xmax) then dgam = d1mach(2) return endif c dgam = 0.d0 if (x.lt.xmin) return c dgam = exp ((y-0.5d0)*log(y) - y + sq2pil + d9lgmc(y) ) if (x.gt.0.d0) return c sinpiy = sin (pi*y) c if (sinpiy.eq.0.d0) then dgam = -d1mach(2) return endif c dgam = -pi/(y*sinpiy*dgam) c return end C modified to omit priniting for calls from Fortran within R subroutine d9gaml (xmin, xmax) c june 1977 edition. w. fullerton, c3, los alamos scientific lab. c c calculate the minimum and maximum legal bounds for x in gamma(x). c xmin and xmax are not the only bounds, but they are the only non- c trivial ones to calculate. c c output arguments -- c xmin dble prec minimum legal value of x in gamma(x). any smaller c value of x might result in underflow. c xmax dble prec maximum legal value of x in gamma(x). any larger c value of x might cause overflow. c double precision xmin, xmax, alnbig, alnsml, xln, xold, d1mach C external d1mach, dlog external d1mach c alnsml = log(d1mach(1)) xmin = -alnsml do 10 i=1,10 xold = xmin xln = log(xmin) xmin = xmin - xmin*((xmin+0.5d0)*xln - xmin - 0.2258d0 + alnsml) 1 / (xmin*xln+0.5d0) if (abs(xmin-xold).lt.0.005d0) go to 20 10 continue C call seteru (27hd9gaml unable to find xmin, 27, 1, 2) xmin = d1mach(2) xmax = -d1mach(2) return c 20 xmin = -xmin + 0.01d0 c alnbig = log (d1mach(2)) xmax = alnbig do 30 i=1,10 xold = xmax xln = log(xmax) xmax = xmax - xmax*((xmax-0.5d0)*xln - xmax + 0.9189d0 - alnbig) 1 / (xmax*xln-0.5d0) if (abs(xmax-xold).lt.0.005d0) go to 40 30 continue C call seteru (27hd9gaml unable to find xmax, 27, 2, 2) xmin = d1mach(2) xmax = -d1mach(2) return c 40 xmax = xmax - 0.01d0 xmin = dmax1 (xmin, -xmax+1.d0) c return end double precision function dcsevl (x, a, n) double precision a(n), x, twox, b0, b1, b2 double precision d1mach external d1mach c if (n.lt.1) then dcsevl = -d1mach(2) return endif if (n.gt.1000) then dcsevl = d1mach(2) return endif if (x.lt.(-1.1d0) .or. x.gt.1.1d0) then dcsevl = d1mach(2) return endif C added by CF to avoid uninitialized warnings b2 = 0 c twox = 2.0d0*x b1 = 0.d0 b0 = 0.d0 do 10 i=1,n b2 = b1 b1 = b0 ni = n - i + 1 b0 = twox*b1 - b2 + a(ni) 10 continue c dcsevl = 0.5d0 * (b0-b2) c return end double precision function d9lgmc (x) double precision x, algmcs(15), xbig, xmax, dcsevl, d1mach external d1mach, dcsevl, initds c data algmcs( 1) / +.1666389480 4518632472 0572965082 2 d+0 / data algmcs( 2) / -.1384948176 0675638407 3298605913 5 d-4 / data algmcs( 3) / +.9810825646 9247294261 5717154748 7 d-8 / data algmcs( 4) / -.1809129475 5724941942 6330626671 9 d-10 / data algmcs( 5) / +.6221098041 8926052271 2601554341 6 d-13 / data algmcs( 6) / -.3399615005 4177219443 0333059966 6 d-15 / data algmcs( 7) / +.2683181998 4826987489 5753884666 6 d-17 / data algmcs( 8) / -.2868042435 3346432841 4462239999 9 d-19 / data algmcs( 9) / +.3962837061 0464348036 7930666666 6 d-21 / data algmcs( 10) / -.6831888753 9857668701 1199999999 9 d-23 / data algmcs( 11) / +.1429227355 9424981475 7333333333 3 d-24 / data algmcs( 12) / -.3547598158 1010705471 9999999999 9 d-26 / data algmcs( 13) / +.1025680058 0104709120 0000000000 0 d-27 / data algmcs( 14) / -.3401102254 3167487999 9999999999 9 d-29 / data algmcs( 15) / +.1276642195 6300629333 3333333333 3 d-30 / c data nalgm, xbig, xmax / 0, 2*0.d0 / c if (nalgm.ne.0) go to 10 nalgm = initds (algmcs, 15, sngl(d1mach(3)) ) xbig = 1.0d0/sqrt(d1mach(3)) xmax = exp (dmin1(log(d1mach(2)/12.d0), -log(12.d0*d1mach(1)))) c 10 if (x.lt.10.d0) then d9lgmc = d1mach(2) return endif c if (x.ge.xmax) go to 20 c d9lgmc = 1.d0/(12.d0*x) if (x.lt.xbig) d9lgmc = dcsevl (2.0d0*(10.d0/x)**2-1.d0, algmcs, 1 nalgm) / x return c 20 d9lgmc = 0.d0 C call seteru (34hd9lgmc x so big d9lgmc underflows, 34, 2, 0) return c end double precision function dlngam (x) double precision x, y, xmax, dxrel, pi double precision sinpiy, sqpi2l, sq2pil double precision d1mach, d9lgmc external d1mach, d9lgmc double precision dgam c external dgamma c data sq2pil / 0.9189385332 0467274178 0329736405 62 d0 / c sq2pil = alog (sqrt(2*pi)), sqpi2l = alog(sqrt(pi/2)) data sqpi2l / +.2257913526 4472743236 3097614947 441 d+0 / data pi / 3.1415926535 8979323846 2643383279 50 d0 / c data xmax, dxrel / 2*0.d0 / c C added by CF to avoid uninitialized warnings dlngam = 0.d0 if (xmax.ne.0.d0) go to 10 xmax = d1mach(2)/dlog(d1mach(2)) dxrel = dsqrt (d1mach(4)) c 10 y = abs (x) if (y.gt.10.d0) go to 20 c c dlog (dabs (dgam(x)) ) for dabs(x) .le. 10.0 c dlngam = log (abs (dgam(x)) ) return c c dlog ( dabs (dgam(x)) ) for dabs(x) .gt. 10.0 c C20 if (y.gt.xmax) call seteru ( C 1 39hdlngam dabs(x) so big dlngam overflows, 39, 2, 2) 20 if (y.gt.xmax) dlngam = d1mach(2) if (y.gt.xmax) return c if (x.gt.0.d0) dlngam = sq2pil + (x-0.5d0)*log(x) - x + d9lgmc(y) if (x.gt.0.d0) return c sinpiy = abs (sin(pi*y)) C if (sinpiy.eq.0.d0) call seteru ( C 1 31hdlngam x is a negative integer, 31, 3, 2) if (sinpiy.eq.0.d0) dlngam = -d1mach(2) if (sinpiy.eq.0.d0) return c dlngam = sqpi2l + (x-0.5d0)*log(y) - x - log(sinpiy) - d9lgmc(y) c C if (dabs((x-dint(x-0.5d0))*dlngam/x).lt.dxrel) call seteru ( C 1 68hdlngam answer lt half precision because x too near negative C 2integer, 68, 1, 1) return c end function initds (dos, nos, eta) double precision dos(nos) integer i1mach external i1mach c C if (nos.lt.1) call seteru ( C 1 35hinitds number of coefficients lt 1, 35, 2, 2) if (nos.lt.1) initds = i1mach(9) c C added by CF to avoid uninitialized warnings i = 0 err = 0. do 10 ii=1,nos i = nos + 1 - ii err = err + abs(sngl(dos(i))) if (err.gt.eta) go to 20 10 continue c C20 if (i.eq.nos) call seteru (28hinitds eta may be too small, 28, C 1 1, 2) 20 continue initds = i c return end subroutine absrng( l, v, i, vmin, vmax) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE double precision v(*) integer i, j, k, l double precision temp, vmin, vmax c---------------------------------------------------------------------------- temp = abs(v(1)) vmin = temp vmax = temp if (l .eq. 1) return if (i .eq. 1) then do j = 2, l temp = abs(v(j)) vmin = min(vmin,temp) vmax = max(vmax,temp) end do else k = 1 + i do j = 2, l temp = abs(v(k)) vmin = min(vmin,temp) vmax = max(vmax,temp) k = k + i end do end if return end SUBROUTINE D2NORM ( N, X, INCX, VALUE ) * .. Scalar Arguments .. INTEGER INCX, N * .. Array Arguments .. DOUBLE PRECISION X( * ), VALUE * .. * * DNRM2 returns the euclidean norm of a vector via the function * name, so that * * DNRM2 := sqrt( x'*x ) * * THIS FUNCTION MODELLED AFTER DNRM2 BUT WRITTEN AS A SUBROUTINE * * -- This version written on 25-October-1982. * Modified on 14-October-1993 to inline the call to DLASSQ. * Sven Hammarling, Nag Ltd. * * * .. Parameters .. DOUBLE PRECISION ONE , ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. Local Scalars .. INTEGER IX DOUBLE PRECISION ABSXI, NORM, SCALE, SSQ * .. Intrinsic Functions .. INTRINSIC ABS, SQRT * .. * .. Executable Statements .. IF( N.LT.1 .OR. INCX.LT.1 )THEN NORM = ZERO ELSE IF( N.EQ.1 )THEN NORM = ABS( X( 1 ) ) ELSE SCALE = ZERO SSQ = ONE * The following loop is equivalent to this call to the LAPACK * auxiliary routine: * CALL DLASSQ( N, X, INCX, SCALE, SSQ ) * DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX IF( X( IX ).NE.ZERO )THEN ABSXI = ABS( X( IX ) ) IF( SCALE.LT.ABSXI )THEN SSQ = ONE + SSQ*( SCALE/ABSXI )**2 SCALE = ABSXI ELSE SSQ = SSQ + ( ABSXI/SCALE )**2 END IF END IF 10 CONTINUE NORM = SCALE * SQRT( SSQ ) END IF * VALUE = NORM RETURN * * End of D2NORM. * END subroutine mclrup( l, n, v, r, lr) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer l, n, lr double precision cs, sn c double precision v(n), r(lr,n) double precision v(*), r(lr,*) integer i, j, k, m if (l .eq. 1) return k = l - 1 if (k .le. n) then call dcopy( n, v, 1, r(k,1), lr) if (k .eq. 1) return if (n .gt. 1) then i = 1 m = n do j = 2, k call drotg( r(i,i), r(k,i), cs, sn) m = m - 1 call drot( m, r(i,j), lr, r(k,j), lr, cs, sn) i = j end do else call drotg( r(1,1), r(k,1), cs, sn) end if else if (n .gt. 1) then i = 1 m = n do j = 2, n call drotg( r(i,i), v(i), cs, sn) m = m - 1 call drot( m, r(i,j), lr, v(j), 1, cs, sn) i = j end do end if call drotg( r(n,n), v(n), cs, sn) end if return end subroutine mcltrw( x, n, p, u, ss) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p double precision ss c double precision x(n,p), u(p) double precision x(n,*), u(*) double precision ddot external ddot integer i, j double precision fac double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision dummy(1) c------------------------------------------------------------------------------ c form mean fac = one / sqrt(dble(n)) c call dcopy( p, zero, 0, u, 1) dummy(1) = zero call dcopy( p, dummy, 0, u, 1) do i = 1, n call daxpy( p, fac, x(i,1), n, u, 1) end do c subtract mean and form sum of squares ss = zero do j = 1, p call daxpy( n, (-fac), u(j), 0, x(1,j), 1) ss = ss + ddot(n, x(1,j), 1, x(1,j), 1) end do return end subroutine mclvol( x, n, p, u, v, w, * work, lwork, iwork, liwork, * info) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, lwork, liwork, info c integer iwork(liwork) integer iwork(*) c double precision x(n,p), u(p), v(p,p), w(p,p), work(lwork), double precision x(n,*), u(*), v(p,*), w(p,p), work(*) integer i, j double precision temp, cmin, cmax double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision EPSMAX parameter (EPSMAX = 2.2204460492503131d-16) double precision FLMAX parameter (FLMAX = 1.7976931348623157D+308) double precision dummy(1) c------------------------------------------------------------------------------ c form mean temp = one / dble(n) c call dcopy( p, zero, 0, u, 1) dummy(1) = zero call dcopy( p, dummy, 0, u, 1) do i = 1, n call daxpy( p, temp, x(i,1), n, u, 1) end do c subtract mean do j = 1, p call daxpy( n, (-one), u(j), 0, x(1,j), 1) end do c if (.false.) then c this gets the eigenvectors but x is overwritten c get right singular vectors c call dgesvd( 'N', 'A', n, p, x, n, u, c * dummy, 1, w, p, work, lwork, info) c if (info .lt. 0) return c if (info .eq. 0) then c lwork = int(work(1)) c do i = 1, p c v(i,i) = w(i,i) c if (i .gt. 1) then c do j = 1, (i-1) c v(i,j) = w(j,i) c v(j,i) = w(i,j) c end do c end if c end do c goto 100 c end if c end if c form crossproduct call dsyrk( 'U', 'T', p, n, one, x, n, zero, w, p) c get eigenvectors do j = 1, p do i = 1, j v(i,j) = w(i,j) end do end do call dsyevd( 'V', 'U', p, v, p, u, * work, lwork, iwork, liwork, info) if (info .lt. 0) return if (info .eq. 0) then lwork = int(work(1)) liwork = iwork(1) goto 100 end if c EPSMAX = d1mach(4) call dsyevx( 'V', 'A', 'U', p, w, p, dummy, dummy, i, i, * sqrt(EPSMAX), j, u, v, p, * work, lwork, iwork(p+1), iwork, info) if (info .ne. 0) return lwork = int(work(1)) liwork = -1 100 continue c FLMAX = d1mach(2) c form xv c vol = one do j = 1, p call dgemv( 'N', n, p, one, x, n, v(1,j), 1, zero, work, 1) cmax = -FLMAX cmin = FLMAX do i = 1, n temp = work(i) if (temp .gt. cmax) cmax = temp if (temp .lt. cmin) cmin = temp end do u(j) = cmax - cmin c vol = vol * (cmax - cmin) end do return end subroutine sgnrng( l, v, i, vmin, vmax) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE double precision v(*) integer i, j, k, l double precision temp, vmin, vmax c---------------------------------------------------------------------------- temp = v(1) vmin = temp vmax = temp if (l .eq. 1) return if (i .eq. 1) then do j = 2, l temp = v(j) vmin = min(vmin,temp) vmax = max(vmax,temp) end do else k = 1 + i do j = 2, l temp = v(k) vmin = min(vmin,temp) vmax = max(vmax,temp) k = k + i end do end if return end subroutine shapeo( TRANSP, s, O, l, m, w, info) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical TRANSP integer l, m, info c double precision s(l), O(l,l,m), w(l,l) double precision s(*), O(l,l,*), w(l,*) integer j, k double precision temp double precision zero, one parameter (zero = 0.d0, one = 1.d0) c------------------------------------------------------------------------------ if (TRANSP) then do j = 1, l temp = sqrt(s(j)) do k = 1, m call dscal( l, temp, O(j,1,k), l) end do end do do k = 1, m call dsyrk( 'U', 'T', l, l, one, O(1,1,k), l, zero, w, l) do j = 1, l call dcopy( j, w(1,j), 1, O(1,j,k), 1) end do do j = 2, l call dcopy( j-1, w(1,j), 1, O(j,1,k), l) end do end do info = 0 return end if if (.not. TRANSP) then do j = 1, l temp = sqrt(s(j)) do k = 1, m call dscal( l, temp, O(1,j,k), 1) end do end do do k = 1, m call dsyrk( 'U', 'N', l, l, one, O(1,1,k), l, zero, w, l) do j = 1, l call dcopy( j, w(1,j), 1, O(1,j,k), 1) end do do j = 2, l call dcopy( j-1, w(1,j), 1, O(j,1,k), l) end do end do info = 0 return end if info = -1 return end subroutine uncholf ( UPPER, T, l, n, info) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical UPPER integer l, n, info c double precision T(abs(n), abs(n)) double precision T( l , * ) integer i, j, k double precision ddot external ddot c------------------------------------------------------------------------------ if (UPPER) then do i = 2, n do j = 1, (i-1) T(i,j) = ddot( j, T(1,i), 1, T(1,j), 1) end do end do do k = 1, n T(k,k) = ddot( k, T(1,k), 1, T(1,k), 1) end do do k = 1, n-1 call dcopy( n-k, T(k+1,k), 1, T(k,k+1), l) end do info = 0 return end if if (.not. UPPER) then do i = 2, n do j = 1, (i-1) T(j,i) = ddot( j, T(i,1), l, T(j,1), l) end do end do do k = 1, n T(k,k) = ddot( k, T(k,1), l, T(k,1), l) end do do k = 2, n call dcopy( k-1, T(1,k), 1, T(k,1), l) end do return end if info = -1 return end subroutine wardsw( i, n, d) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer i, n double precision d(*) integer i1, n1, ii, nn, k double precision temp double precision FLMAX parameter (FLMAX = 1.7976931348623157D+308) *----------------------------------------------------------------------------- i1 = i - 1 ii = (i1*(i1-1))/2 + 1 n1 = n - 1 nn = (n1*(n1-1))/2 + 1 c if (i .gt. 1) then call dswap( i1, d(nn), 1, d(ii), 1) c call dcopy( i1, FLMAX, 0, d(nn), 1) ii = ii + i1 + i1 nn = nn + i c end if if (n1 .eq. i) return k = i 100 continue temp = d(ii) d(ii) = d(nn) d(nn) = temp c d(nn) = FLMAX ii = ii + k nn = nn + 1 k = k + 1 if (k .lt. n1) goto 100 c d(nn) = FLMAX return end subroutine es1e ( x, mu, sigsq, pro, n, G, Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, G double precision sigsq, hood, Vinv c double precision x(n), mu(G), pro(G[+1]), z(n,G[+1]) double precision x(*), mu(*), pro( * ), z(n, * ) integer i, k, nz double precision temp, const, muk, prok, tmin, tmax, sum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (sigsq .le. zero) then hood = FLMAX return end if const = pi2log + log(sigsq) do k = 1, G muk = mu(k) c prok = pro(k) do i = 1, n temp = x(i) - muk c z(i,k) = prok*exp(-(const+(temp*temp)/sigsq)/two) if (sigsq .lt. one .and. * abs(temp) .ge. sqrt(sigsq)*RTMAX) then hood = FLMAX return end if z(i,k) = -(const+(temp*temp)/sigsq)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c temp = zero c do k = 1, nz c temp = temp + z(i,k) c end do c hood = hood + log(temp) c call dscal( nz, (one/temp), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine hc1e ( x, n, ic, ng, ns, nd, d) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, ic(n), ng, ns, nd c double precision x(n), d(ng*(ng-1)/2) double precision x(*), d(*) integer lg, ld, ll, lo, ls integer i, j, k, m integer ni, nj, nij, iopt, jopt, iold, jold integer ij, ici, icj, ii, ik, jk double precision ri, rj, rij, si, sj, sij double precision temp, dij, dopt, dold external wardsw double precision one parameter (one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) c------------------------------------------------------------------------------ iopt = 0 jopt = 0 lg = ng ld = (ng*(ng-1))/2 ll = nd-ng lo = nd c call intpr( 'ic', -1, ic, n) c call intpr( 'no. of groups', -1, lg, 1) c group heads should be first among rows of x i = 1 j = 2 1 continue icj = ic(j) if (icj .ne. j) goto 2 if (j .eq. lg) goto 3 i = j j = j + 1 goto 1 2 continue k = i m = j + 1 do j = m, n icj = ic(j) if (icj .gt. k) then k = k + 1 c call dswap( p, x(k,1), n, x(j,1), n) temp = x(k) x(k) = x(j) x(j) = temp ic(j) = ic(k) ic(k) = icj end if end do 3 continue c call intpr( 'ic', -1, ic, n) do j = 1, n i = ic(j) if (i .ne. j) then ic(j) = 0 ni = ic(i) nij = ni + 1 ic(i) = nij ri = dble(ni) rij = dble(nij) sj = sqrt(one/rij) si = sqrt(ri)*sj c update column sum in kth row c call dscal( p, si, x(i,1), n) c call daxpy( p, sj, x(j,1), n, x(i,1), n) x(i) = si*x(i) + sj*x(j) else ic(j) = 1 end if end do c call intpr( 'ic', -1, ic, n) dopt = FLMAX ij = 0 do j = 2, lg nj = ic(j) rj = dble(nj) do i = 1, (j-1) ni = ic(i) ri = dble(ni) nij = ni + nj rij = dble(nij) si = sqrt(ri/rij) sj = sqrt(rj/rij) c call dcopy( p, x(i,1), n, v, 1) c call dscal( p, sj, v, 1) c call daxpy( p, (-si), x(j,1), n, v, 1) c dij = ddot(p, v, 1, v, 1) temp = sj*x(i) - si*x(j) dij = temp*temp ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij iopt = i jopt = j end if end do end do c if (.false.) then c i = 1 c ij = 1 c do j = 2, ng c call dblepr( 'dij', -1, d(ij), i) c ij = ij + i c i = j c end do c end if if (ns .eq. 1) then if (iopt .lt. jopt) then x(1) = dble(iopt) ic(1) = jopt else x(1) = dble(jopt) ic(1) = iopt end if d(1) = dopt return end if ls = 1 100 continue ni = ic(iopt) nj = ic(jopt) nij = ni + nj ic(iopt) = nij ic(jopt) = -iopt if (jopt .ne. lg) then call wardsw( jopt, lg, d) m = ic(jopt) ic(jopt) = ic(lg) ic(lg) = m end if si = dble(ni) sj = dble(nj) sij = dble(nij) dold = dopt iold = iopt jold = jopt iopt = -1 jopt = -1 dopt = FLMAX lg = lg - 1 ld = ld - lg ii = (iold*(iold-1))/2 if (iold .gt. 1) then ik = ii - iold + 1 do j = 1, (iold - 1) nj = ic(j) rj = dble(nj) ik = ik + 1 jk = ld + j dij = (rj+si)*d(ik)+(rj+sj)*d(jk) dij = (dij-rj*dold)/(rj+sij) d(ik) = dij end do end if if (iold .lt. lg) then ik = ii + iold i = iold do j = (iold + 1), lg nj = ic(j) rj = dble(nj) jk = ld + j dij = (rj+si)*d(ik)+(rj+sj)*d(jk) dij = (dij-rj*dold)/(rj+sij) d(ik) = dij ik = ik + i i = j end do end if d(lo) = dold lo = lo - 1 d(lo) = dble(iold) lo = lo - 1 d(lo) = dble(jold) lo = lo - 1 c update d and find max jopt = 2 iopt = 1 dopt = d(1) if (lg .eq. 2) goto 900 ij = 1 do i = 2, ld si = d(i) if (si .le. dopt) then ij = i dopt = si end if end do if (ij .gt. 1) then do i = 2, ij iopt = iopt + 1 if (iopt .ge. jopt) then jopt = jopt + 1 iopt = 1 end if end do end if ls = ls + 1 if (ls .eq. ns) goto 900 goto 100 900 continue d(lo) = dopt lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) do i = 1, ng ic(i) = i end do lo = nd - 1 ld = lo si = d(lo) lo = lo - 1 sj = d(lo) ic(int(sj)) = ng if (si .lt. sj) then x(1) = si d(ld) = sj else x(1) = sj d(ld) = si end if ld = ld - 1 lg = ng + 1 do k = 2, ns lo = lo - 1 d(ld) = d(lo) ld = ld - 1 lo = lo - 1 i = int(d(lo)) ici = ic(i) lo = lo - 1 j = int(d(lo)) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(lg-k) if (ici .lt. icj) then x(k) = dble(ici) d(ld) = dble(icj) else x(k) = dble(icj) d(ld) = dble(ici) end if ld = ld - 1 end do ld = nd lo = nd - 1 do k = 1, ns ic(k) = int(d(lo)) lo = lo - 1 ld = ld - 1 d(ld) = d(lo) lo = lo - 1 end do ld = nd lo = 1 do k = 1, ns si = d(lo) d(lo) = d(ld) d(ld) = si ld = ld - 1 lo = lo + 1 end do return end subroutine me1e ( EQPRO, x, n, G, Vinv, z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, G, maxi double precision Vinv, eps, tol c double precision x(n), z(n,G[+1]), mu(G), sigsq, pro(G[+1]) double precision x(*), z(n, * ), mu(*), sigsq, pro( * ) integer nz, iter, k, i double precision hold, hood, err, prok, tmin, tmax, rteps double precision const, sum, sumz, smu, temp, term, zsum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( nz, one/dble(nz), 0, pro, 1) dummy(1) = one/dble(nz) call dcopy( nz, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 sumz = zero sigsq = zero zsum = one do k = 1, G sum = zero smu = zero do i = 1, n temp = z(i,k) sum = sum + temp smu = smu + temp*x(i) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum / dble(n) zsum = min(sum,zsum) if (sum .gt. rteps) then smu = smu / sum mu(k) = smu do i = 1, n temp = x(i) - smu temp = temp*temp sigsq = sigsq + z(i,k)*temp z(i,k) = temp end do end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if if (Vinv .le. zero) then sigsq = sigsq / dble(n) else sigsq = sigsq / sumz end if if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (sigsq .le. eps) then tol = err eps = FLMAX maxi = iter return end if const = pi2log + log(sigsq) do k = 1, G c temp = pro(k) do i = 1, n c z(i,k) = temp*exp(-(const+(z(i,k)/sigsq))/two) z(i,k) = -(const+(z(i,k)/sigsq))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine me1ep ( EQPRO, x, n, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, G, maxi double precision pshrnk, pmu, pscale, pdof double precision Vinv, eps, tol c double precision x(n), z(n,G[+1]), mu(G), sigsq, pro(G[+1]) double precision x(*), z(n, * ), mu(*), sigsq, pro( * ) integer nz, iter, k, i double precision hold, hood, err, prok, tmin, tmax double precision const, sum, sumz, smu, temp, term, zsum double precision pmupmu, cgam, cmu, rmu, rgam, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision dlngam external dlngam double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( nz, one/dble(nz), 0, pro, 1) dummy(1) = one/dble(nz) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX pmupmu = pmu*pmu iter = 0 100 continue iter = iter + 1 sigsq = zero zsum = one do k = 1, G sumz = zero smu = zero do i = 1, n temp = z(i,k) sumz = sumz + temp smu = smu + temp*x(i) end do if (.not. EQPRO) pro(k) = sumz / dble(n) zsum = min(zsum,sumz) if (sumz .gt. rteps) then smu = smu/sumz sum = zero do i = 1, n term = x(i) - smu term = term*term sum = sum + z(i,k)*term end do term = (pshrnk*sumz)/(pshrnk+sumz) temp = (pmupmu + smu*smu) - two*pmu*smu sigsq = sigsq + sum + term*temp term = sumz/(pshrnk+sumz) temp = pshrnk/(pshrnk+sumz) mu(k) = term*smu + temp*pmu end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if sigsq = (pscale + sigsq)/(pdof + dble(n+G) + two) c if (Vinv .le. zero) then c sigsq = sigsq / dble(n) c else c sigsq = sigsq / sumz c end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (sigsq .le. eps) then tol = err eps = FLMAX maxi = iter return end if const = pi2log + log(sigsq) do k = 1, G c temp = pro(k) do i = 1, n term = x(i) - mu(k) z(i,k) = -(const+((term*term)/sigsq))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter cmu = dble(G)*(pi2log-log(pshrnk))/two sum = zero do k = 1, G temp = pmu - mu(k) temp = temp*temp sum = sum - (pshrnk/sigsq)*temp end do term = log(sigsq) rmu = (sum - dble(G)*term)/two temp = pdof/two cgam = temp*log(pscale/two) - dlngam(temp) rgam = -(temp+one)*term - (pscale/sigsq)/two pdof = (cmu+cgam) + (rmu+rgam) return end subroutine ms1e ( x, z, n, G, mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, G c double precision x(n), z(n,G), mu(G), sigsq, pro(G) double precision x(*), z(n,*), mu(*), sigsq, pro(*) integer i, k double precision sum, smu, sumz, temp double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) c------------------------------------------------------------------------------ sumz = zero sigsq = zero do k = 1, G sum = zero smu = zero do i = 1, n temp = z(i,k) sum = sum + temp smu = smu + temp*x(i) end do sumz = sumz + sum pro(k) = sum / dble(n) if (sigsq .gt. one .or. smu .le. sum*FLMAX) then smu = smu / sum mu(k) = smu if (sigsq .ne. FLMAX) then do i = 1, n temp = abs(x(i) - smu) sigsq = sigsq + z(i,k)*(temp*temp) end do end if else mu(k) = FLMAX sigsq = FLMAX end if end do c sumz .eq. n when no noise if (sigsq .ne. FLMAX) sigsq = sigsq / sumz return end subroutine ms1ep ( x, z, n, G, * pshrnk, pmu, pscale, pdof, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, G double precision pshrnk, pmu, pscale, pdof c double precision x(n), z(n,G), mu(G), sigsq, pro(G) double precision x(*), z(n,*), mu(*), sigsq, pro(*) integer k, i double precision pmupmu double precision sum, sumz, smu, temp, term double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero pmupmu = pmu*pmu sigsq = zero do k = 1, G sumz = zero smu = zero do i = 1, n temp = z(i,k) sumz = sumz + temp smu = smu + temp*x(i) end do pro(k) = sumz / dble(n) if (sumz .gt. one .or. smu .lt. sumz*FLMAX) then smu = smu/sumz sum = zero term = sumz/(pshrnk+sumz) temp = pshrnk/(pshrnk+sumz) mu(k) = term*smu + temp*pmu if (sigsq .ne. FLMAX) then do i = 1, n term = abs(x(i) - smu) sum = sum + z(i,k)*(term*term) end do term = (pshrnk*sumz)/(pshrnk+sumz) temp = (pmupmu + smu*smu) - two*pmu*smu sigsq = sigsq + sum + term*temp end if else mu(k) = FLMAX sigsq = FLMAX end if end do if (sigsq .ne. FLMAX) then temp = pdof + dble(n) + two if (pshrnk .gt. zero) temp = temp + dble(G) sigsq = (pscale + sigsq)/temp end if return end subroutine eseee ( CHOL, x, mu, Sigma, pro, n, p, G, Vinv, * w, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c character CHOL logical CHOL c integer n, p, G integer n, p, G double precision hood, Vinv c double precision x(n,p), w(p), z(n,G[+1]) double precision x(n,*), w(*), z(n, * ) c double precision mu(p,G), Sigma(p,p), pro(G[+1]) double precision mu(p,*), Sigma(p,*), pro( * ) integer info, i, j, k, nz double precision detlog, prok, tmin, tmax double precision umin, umax, const, temp, sum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ c if (CHOL .eq. 'N') then if (.not. CHOL) then c Cholesky factorization call dpotrf( 'U', p, Sigma, p, info) if (info .ne. 0) then w(1) = dble(info) hood = FLMAX return end if end if call absrng( p, Sigma, (p+1), umin, umax) c rc = umin/(one+umax) if (umax .le. one .and. umax .ge. umin*RTMAX) then w(1) = zero hood = FLMAX return end if if (umax .ge. one .and. umin .le. umax*RTMIN) then w(1) = zero hood = FLMAX return end if detlog = zero do j = 1, p detlog = detlog + log(abs(Sigma(j,j))) end do const = dble(p)*pi2log/two + detlog do k = 1, G c prok = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dtrsv( 'U', 'T', 'N', p, Sigma, p, w, 1) temp = ddot( p, w, 1, w, 1)/two c z(i,k) = prok*exp(-(const+temp)) z(i,k) = -(const+temp) end do end do w(1) = zero if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then w(1) = zero hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do w(1) = zero return end double precision function detmc2( n, u) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer k, n double precision q double precision u(n,*) double precision zero, two parameter (zero = 0.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) detmc2 = zero do k = 1, n q = u(k,k) if (q .eq. zero) then detmc2 = -FLMAX return end if detmc2 = detmc2 + log(abs(q)) end do detmc2 = two*detmc2 return end subroutine meeee ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, U, pro, w) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p), pro(G) double precision mu(p,*), U(p,*), pro(*) integer nz, p1, iter, i, j, k, j1 double precision piterm, sclfac, sumz, sum, zsum double precision cs, sn, umin, umax, rc, detlog, rteps double precision const, hold, hood, err, temp, term double precision prok, tmin, tmax double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if piterm = dble(p)*pi2log/two p1 = p + 1 eps = max(eps,zero) rteps = sqrt(eps) tol = max(tol,zero) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX c zero out the lower triangle i = 1 dummy(1) = zero do j = 2, p c call dcopy( p-i, zero, 0, U(j,i), 1) call dcopy( p-i, dummy, 0, U(j,i), 1) i = j end do iter = 0 100 continue iter = iter + 1 dummy(1) = zero do j = 1, p c call dcopy( j, zero, 0, U(1,j), 1) call dcopy( j, dummy, 0, U(1,j), 1) end do sumz = zero zsum = one do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum / dble(n) zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) end do end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX hood = eps maxi = iter return end if if (Vinv .le. zero) then sclfac = one/sqrt(dble(n)) else sclfac = one/sqrt(sumz) end if do j = 1, p call dscal( j, sclfac, U(1,j), 1) end do c condition number call absrng( p, U, p1, umin, umax) rc = umin/(one+umax) if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (rc .le. rteps) then tol = err eps = FLMAX hood = eps maxi = iter return end if detlog = zero do j = 1, p detlog = detlog + log(abs(U(j,j))) end do const = piterm + detlog do k = 1, G c temp = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dtrsv( 'U', 'T', 'N', p, U, p, w, 1) sum = ddot( p, w, 1, w, 1)/two c z(i,k) = temp * exp(-(const+sum)) z(i,k) = -(const+sum) end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine meeeep( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, U, pro, w) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p), pro(G) double precision mu(p,*), U(p,*), pro(*) integer nz, p1, iter, i, j, k, j1 double precision piterm, sclfac, sumz, sum, zsum double precision cs, sn, umin, umax, rc, detlog, rteps double precision const, hold, hood, err, temp, term double precision prok, tmin, tmax double precision cmu, cgam, rmu, rgam double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision twolog parameter (twolog = 0.6931471805599453d0) double precision pilog parameter (pilog = 1.144729885849400d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot, dlngam external ddot, dlngam double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if piterm = dble(p)*pi2log/two p1 = p + 1 sclfac = one/sqrt(dble(n)) eps = max(eps,zero) rteps = sqrt(eps) tol = max(tol,zero) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 c copy pscale to U do j = 1, p call dcopy( p, pscale(1,j), 1, U(1,j), 1) end do sumz = zero zsum = one do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum / dble(n) zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) const = sum + pshrnk temp = (sum*pshrnk)/const call dscal( p, sqrt(temp), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) call dscal( p, sum/const, mu(1,k), 1) call daxpy( p, pshrnk/const, pmu, 1, mu(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = pdof + dble(p) + one if (pshrnk .gt. zero) term = term + dble(G) if (Vinv .le. zero) then sclfac = one/sqrt(term+dble(n)) else sclfac = one/sqrt(term+dble(sumz)) end if do j = 1, p call dscal( j, sclfac, U(1,j), 1) end do if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if c condition number call absrng( p, U, p1, umin, umax) rc = umin/(one+umax) if (rc .le. rteps) then tol = err eps = FLMAX maxi = iter return end if detlog = zero do j = 1, p detlog = detlog + log(abs(U(j,j))) end do const = piterm + detlog do k = 1, G c temp = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dtrsv( 'U', 'T', 'N', p, U, p, w, 1) sum = ddot( p, w, 1, w, 1)/two c z(i,k) = temp * exp(-(const+sum)) z(i,k) = -(const+sum) end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter if (pshrnk .gt. zero) then cmu = dble(p)*(log(pshrnk) - pi2log)/two rmu = zero do k = 1, G call daxpy( p, (-one), mu(1,k), 1, pmu, 1) call dtrsv('U','T','N',p,U,p,pmu,1) rmu = rmu + ddot( p, pmu, 1, pmu, 1) end do sum = zero term = zero temp = zero do j = 1, p call dcopy( p, pscale(j,1), p, pmu, 1) c call dtrsv('U','T','N', p, U, p, pmu, 1) i = p-j+1 c call dtrsv('U','T','N', i, U(j,j),i,pmu(j),1) call dtrsv('U','T','N', i, U(j,j),p,pmu(j),1) sum = sum + ddot(i, pmu(j), 1, pmu(j), 1) temp = temp + log(abs(pscale(j,j))) term = term + dlngam((pdof+one-dble(j))/two) end do rmu = -(detlog+pshrnk*rmu/two) const = -dble(p)*(pdof*twolog+(dble(p)-one)*pilog/two) cgam = (const/two-pdof*temp) - term rgam = -((pdof+dble(p)+one)*detlog + sum/two) pdof = (dble(G)*cmu+rmu) + (cgam+rgam) else pdof = FLMAX end if return end subroutine mseee ( x, z, n, p, G, w, mu, U, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p), pro(G) double precision mu(p,*), U(p,*), pro(*) c------------------------------------------------------------------------------ c c x double (input) (n,p) matrix of observations. c z double (input) (n,G) conditional probabilities. c n integer (input) number of observations. c p integer (input) dimension of the data. c G integer (input) number of Gaussian clusters in the mixture. c w double (scratch) (p) c mu double (output) (p,G) mean for each group. c U double (output) (p,p) upper triangular Cholesky factor of the c common covariance matrix for the groups: transpose(U) * U = Sigma. c pro double (output) (G) mixing proportions (ignore result if equal). integer i, j, k, j1 double precision sum, sumz, zsum, temp, cs, sn double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision dummy(1) c----------------------------------------------------------------------------- dummy(1) = zero do j = 1, p c call dcopy p, zero, 0, U(1,j), 1) call dcopy( p, dummy, 0, U(1,j), 1) end do sumz = zero zsum = one do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum / dble(n) if (sum .gt. one .or. one .gt. sum*FLMAX) then zsum = min(zsum,sum) call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) end do else zsum = zero c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (zsum .eq. zero) return c sumz .eq. n when no noise do j = 1, p call dscal( j, one/sqrt(sumz), U(1,j), 1) end do return end subroutine mseeep( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * w, mu, U, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof c double precision mu(p,G), U(p,p), pro(G) double precision mu(p,*), U(p,*), pro(*) integer i, j, k, j1 double precision sclfac, const, temp double precision sum, sumz, zsum, cs, sn double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .le. zero) pshrnk = zero do j = 1, p call dcopy( p, pscale(1,j), 1, U(1,j), 1) end do sumz = zero zsum = one do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum / dble(n) if (sum .ge. one .or. one .gt. sum*FLMAX) then zsum = min(zsum,sum) call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) const = sum + pshrnk temp = (sum*pshrnk)/const call dscal( p, sqrt(temp), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) call dscal( p, sum/const, mu(1,k), 1) call daxpy( p, pshrnk/const, pmu, 1, mu(1,k), 1) else zsum = zero c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (zsum .eq. zero) return temp = pdof+dble(n+p+1) if (pshrnk .gt. zero) temp = temp + dble(G) sclfac = one/sqrt(temp) do j = 1, p call dscal( j, sclfac, U(1,j), 1) end do return end subroutine eseei ( x, mu, scale, shape, pro, n, p, G, * Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G double precision scale, hood, Vinv c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), shape(p), pro(G[+1]) double precision mu(p,*), shape(*), pro( * ) integer i, j, k, nz double precision sum, temp, const, tmin, tmax double precision smin, smax, prok double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (scale .le. zero) then hood = FLMAX return end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if temp = sqrt(scale) do j = 1, p shape(j) = temp*sqrt(shape(j)) end do const = dble(p)*(pi2log+log(scale)) do k = 1, G c prok = pro(k) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) if (abs(temp) .ge. shape(j)*FLMAX) then hood = FLMAX return end if temp = temp/shape(j) if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do c z(i,k) = prok*exp(-(const+sum)/two) z(i,k) = -(const+sum)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine meeei ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol, scale c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), shape(p), pro(G[+1]) double precision mu(p,*), shape(*), pro( * ) integer nz, iter, i, j, k double precision sum, sumz, temp, term, zsum double precision const, hold, hood, err, smin, smax double precision prok, tmin, tmax, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) sumz = zero zsum = one do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum/dble(n) zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) sum = sum + temp*temp end do shape(j) = shape(j) + sum end do end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero) then scale = zero tol = err eps = FLMAX maxi = iter return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX tol = err eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (Vinv .le. zero) then scale = temp/dble(n) else scale = temp/sumz end if if (temp .le. eps) then tol = err eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape, 1) call sgnrng(p, shape, 1, smin, smax) if (smin .le. eps .or. scale .le. eps) then tol = err eps = FLMAX maxi = iter return end if if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if const = dble(p)*(pi2log+log(scale)) do k = 1, G c prok = pro(k) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j) end do c z(i,k) = prok*exp(-(const+(sum/scale))/two) z(i,k) = -(const+(sum/scale))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine meeeip( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision Vinv, eps, tol, scale c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), shape(p), pro(G[+1]) double precision mu(p,*), shape(*), pro( * ) integer nz, iter, i, j, k double precision sum, sumz, temp, term, zsum double precision const, hold, hood, err, smin, smax double precision prok, tmin, tmax, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 c call dcopy( p, pscale, 0, shape, 1) dummy(1) = pscale call dcopy( p, dummy, 0, shape, 1) sumz = zero zsum = one do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum/dble(n) zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) term = pshrnk + sum const = (pshrnk*sum)/term do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) sum = sum + (temp*temp) end do shape(j) = shape(j) + sum temp = pmu(j) - mu(j,k) shape(j) = shape(j) + const*(temp*temp) end do end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero) then scale = zero tol = err eps = FLMAX maxi = iter return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX tol = err eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if term = pdof + one if (pshrnk .gt. zero) term = term + one if (Vinv .le. zero) then scale = temp/(term + dble(n)) else scale = temp/(term + sumz) end if if (temp .le. eps) then tol = err eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape, 1) call sgnrng(p, shape, 1, smin, smax) if (smin .le. eps .or. scale .le. eps) then tol = err eps = FLMAX maxi = iter return end if if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if const = dble(p)*(pi2log+log(scale)) do k = 1, G c prok = pro(k) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j) end do c z(i,k) = prok*exp(-(const+(sum/scale))/two) z(i,k) = -(const+(sum/scale))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine mseei ( x, z, n, p, G, mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), scale, shape(p), pro(G) double precision mu(p,*), scale, shape(*), pro(*) integer i, j, k double precision sum, sumz, temp, smin, smax double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ sumz = zero do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do sumz = sumz + sum pro(k) = sum/dble(n) if (sum .gt. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) else c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do j = 1, p sum = zero do i = 1, n do k = 1, G if (mu(1,k) .eq. FLMAX) then scale = FLMAX return end if temp = z(i,k)*(x(i,j) - mu(j,k)) if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do end do shape(j) = shape(j) + sum end do call sgnrng(p, shape, 1, smin, smax) if (smin .eq. zero) then scale = zero return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (sumz .lt. one .and. temp .ge. sumz*FLMAX) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if scale = temp/sumz if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if call dscal( p, one/temp, shape, 1) return end subroutine mseeip( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), scale, shape(p), pro(G[+1]) double precision mu(p,*), scale, shape(*), pro( * ) integer i, j, k double precision sum, sumz, temp, term double precision const, smin, smax double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero c call dcopy( p, pscale, 0, shape, 1) dummy(1) = pscale call dcopy( p, dummy, 0, shape, 1) sumz = zero do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do sumz = sumz + sum pro(k) = sum/dble(n) if (sum .gt. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) term = pshrnk + sum const = (pshrnk*sum)/term do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do shape(j) = shape(j) + sum temp = pmu(j) - mu(j,k) shape(j) = shape(j) + const*(temp*temp) end do else c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do call sgnrng(p, shape, 1, smin, smax) if (smin .eq. zero) then scale = zero return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if if (temp .gt. SMALOG) then smin = exp(temp) else smin = zero end if term = pdof + sumz + two if (pshrnk .gt. zero) term = term + dble(G) scale = smin/term if (smin .lt. one .and. one .ge. smin*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if call dscal( p, one/smin, shape, 1) return end subroutine eseev ( x, mu, scale, shape, O, pro, n, p, G, * Vinv, v, w, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p, G integer n, p, G double precision scale, Vinv, hood c double precision x(n,p), v(p), w(p), z(n,G[+1]) double precision x(n,*), v(*), w(*), z(n, * ) c double precision mu(p,G), shape(p), O(p,p,G), pro(G[+1]) double precision mu(p,*), shape(*), O(p,p,*), pro( * ) integer i, j, k, nz double precision const, temp, tmin, tmax double precision smin, smax, prok, sum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ if (scale .le. zero) then hood = FLMAX return end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if temp = sqrt(scale) do j = 1, p shape(j) = temp*sqrt(shape(j)) end do const = dble(p)*(pi2log + log(scale)) do k = 1, G c prok = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dgemv( 'N', p, p, one, O(1,1,k), p, * w, 1, zero, v, 1) do j = 1, p if (shape(j) .lt. one .and. * abs(v(j)) .ge. shape(j)*FLMAX) then hood = FLMAX return end if v(j) = v(j)/shape(j) end do temp = ddot( p, v, 1, v, 1) c z(i,k) = prok*exp(-(const+temp)/two) z(i,k) = -(const+temp)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine meeev ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * lwork, mu, scale, shape, O, pro, w, s) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi, lwork double precision Vinv, eps, tol, scale double precision x(n,*), z(n, * ), w( * ), s(*) double precision mu(p,*), shape(*), O(p,p,*), pro( * ) integer nz, p1, iter, i, j, k, l, j1, info double precision dnp, temp, term, rteps double precision sumz, sum, smin, smax, cs, sn double precision const, rc, hood, hold, err double precision prok, tmin, tmax, zsum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if p1 = p + 1 dnp = dble(n*p) eps = max(eps,zero) rteps = sqrt(eps) tol = max(tol,zero) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) sumz = zero zsum = one l = 0 do k = 1, G dummy(1) = zero c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p c call dcopy( p, zero, 0, O(1,j,k), 1) call dcopy( p, dummy, 0, O(1,j,k), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum zsum = min(zsum,sum) if (.not. EQPRO) pro(k) = sum / dble(n) if (sum .ge. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dgesvd( 'N', 'O', p, p, O(1,1,k), p, s, * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else do j = 1, p temp = s(j) shape(j) = shape(j) + temp*temp end do end if end if end do if (l .ne. 0 .or. zsum .lt. rteps) then lwork = l c w(1) = FLMAX tol = err if (l .ne. 0) then eps = FLMAX else eps = -FLMAX end if maxi = iter return end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then lwork = 0 c w(1) = smin tol = err eps = FLMAX maxi = iter return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then tol = err eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (Vinv .le. zero) then scale = temp/dble(n) else scale = temp/sumz end if if (temp .le. eps) then lwork = 0 c w(1) = temp tol = err eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape, 1) call sgnrng( p, shape, 1, smin, smax) if (smin .le. eps) then lwork = 0 c w(1) = -smin tol = err eps = FLMAX maxi = iter return end if if (scale .le. eps) then c w(1) = -scale lwork = 0 tol = err eps = FLMAX maxi = iter return end if temp = sqrt(scale) do j = 1, p w(j) = temp*sqrt(shape(j)) end do call absrng( p, w, 1, smin, smax) rc = smin / (one + smax) if (smin .le. rteps) then c w(1) = -smin lwork = 0 tol = err eps = FLMAX maxi = iter return end if const = dble(p)*(pi2log + log(scale))/two do k = 1, G c temp = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w(p1), 1) call daxpy( p, (-one), mu(1,k), 1, w(p1), 1) call dgemv( 'N', p, p, one, O(1,1,k), p, w(p1), 1, zero, s, 1) do j = 1, p s(j) = s(j) / w(j) end do sum = ddot( p, s, 1, s, 1)/two c z(i,k) = temp*exp(-(const+sum)) z(i,k) = -(const+sum) end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 lwork = 0 c w(1) = rc tol = err eps = hood maxi = iter return end subroutine meeevp( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * lwork, mu, scale, shape, O, pro, w, s) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi, lwork c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof double precision Vinv, eps, tol, scale c double precision x(n,p), z(n,G[+1]), w(lwork), s(p) double precision x(n,*), z(n, * ), w( * ), s(*) c double precision mu(p,G), shape(p), O(p,p,G), pro(G[+1]) double precision mu(p,*), shape(*), O(p,p,*), pro( * ) integer nz, p1, iter, i, j, k, l, j1, info double precision dnp, temp, term, rteps double precision sumz, sum, smin, smax, cs, sn double precision const, rc, hood, hold, err double precision prok, tmin, tmax, zsum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if p1 = p + 1 dnp = dble(n*p) eps = max(eps,zero) rteps = sqrt(eps) tol = max(tol,zero) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) zsum = one sumz = zero l = 0 do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p call dcopy( p, pscale(1,j), 1, O(1,j,k), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum / dble(n) zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) term = sum+pshrnk const = (sum*pshrnk)/term call dscal( p, sqrt(const), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) call dscal( p, sum/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) call dgesvd( 'N', 'O', p, p, O(1,1,k), p, s, * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else do j = 1, p temp = s(j) shape(j) = shape(j) + temp*temp end do end if end if end do if (l .ne. 0 .or. zsum .le. rteps) then lwork = l c w(1) = FLMAX tol = err if (l .ne. 0) then eps = FLMAX else eps = -FLMAX end if maxi = iter return end if if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( p, shape, 1, smin, smax) if (smin .eq. zero) then c w(1) = smin tol = err eps = FLMAX maxi = iter return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then tol = err eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if term = pdof + dble(p) + one if (pshrnk .gt. zero) term = term + one if (Vinv .le. zero) then scale = temp/(term + dble(n)) else scale = temp/(term + sumz) end if if (temp .le. eps) then c w(1) = temp tol = err eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape, 1) call sgnrng( p, shape, 1, smin, smax) if (smin .le. eps) then c w(1) = -smin tol = err eps = FLMAX maxi = iter return end if if (scale .le. eps) then c w(1) = -scale tol = err eps = FLMAX maxi = iter return end if temp = sqrt(scale) do j = 1, p w(j) = temp*sqrt(shape(j)) end do call sgnrng( p, w, 1, smin, smax) rc = smin / (one + smax) if (smin .le. rteps) then c w(1) = -smin tol = err eps = FLMAX maxi = iter return end if const = dble(p)*(pi2log + log(scale))/two do k = 1, G c temp = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w(p1), 1) call daxpy( p, (-one), mu(1,k), 1, w(p1), 1) call dgemv( 'N', p, p, one, O(1,1,k), p, w(p1), 1, zero, s, 1) do j = 1, p s(j) = s(j) / w(j) end do sum = ddot( p, s, 1, s, 1)/two c z(i,k) = temp*exp(-(const+sum)) z(i,k) = -(const+sum) end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 lwork = 0 c w(1) = rc tol = err eps = hood maxi = iter return end subroutine mseev ( x, z, n, p, G, w, lwork, * mu, scale, shape, O, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G, lwork double precision scale c double precision x(n,p), z(n,G), w(lwork) double precision x(n,*), z(n,*), w( * ) c double precision shape(p), O(p,p,G), mu(p,G), pro(G) double precision shape(*), O(p,p,*), mu(p,*), pro(*) integer i, j, k, j1, l, info double precision sum, sumz, temp double precision cs, sn, smin, smax double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision BIGLOG parameter (BIGLOG = 709.d0) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) l = 0 sumz = zero scale = zero do k = 1, G dummy(1) = zero c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p c call dcopy( p, zero, 0, O(1,j,k), 1) call dcopy( p, dummy, 0, O(1,j,k), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum / dble(n) if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dgesvd( 'N', 'O', p, p, O(1,1,k), p, z(1,k), * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else if (scale .ne. FLMAX) then do j = 1, p temp = z(j,k) shape(j) = shape(j) + temp*temp end do end if else scale = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (scale .eq. FLMAX .or. l .ne. 0) then lwork = l if (l .ne. 0) then scale = FLMAX else scale = -FLMAX end if c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if lwork = 0 call sgnrng( p, shape, 1, smin, smax) if (smin .eq. zero) then scale = FLMAX return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .ge. sumz*FLMAX) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if scale = temp/sumz if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) return end if call dscal( p, one/temp, shape, 1) return end subroutine mseevp( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * w, lwork, mu, scale, shape, O, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G, lwork c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof double precision scale c double precision x(n,p), z(n,G), w(lwork) double precision x(n,*), z(n,*), w( * ) c double precision mu(p,G), shape(p), O(p,p,G), pro(G) double precision mu(p,*), shape(*), O(p,p,*), pro(*) integer p1, i, j, k, l, j1, info double precision temp, term, const double precision sumz, sum, smin, smax, cs, sn double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision BIGLOG parameter (BIGLOG = 709.d0) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .gt. zero) pshrnk = zero p1 = p + 1 c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) l = 0 sumz = zero scale = zero do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p call dcopy( p, pscale(1,j), 1, O(1,j,k), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum / dble(n) if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) term = sum+pshrnk const = (sum*pshrnk)/term call dscal( p, sqrt(const), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) call dscal( p, sum/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) call dgesvd( 'N', 'O', p, p, O(1,1,k), p, z(1,k), * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else if (scale .ne. FLMAX) then do j = 1, p temp = z(j,k) shape(j) = shape(j) + temp*temp end do end if else scale = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (scale .eq. FLMAX .or. l .ne. 0) then lwork = l if (l .ne. 0) then scale = FLMAX else scale = -FLMAX end if c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if lwork = 0 call sgnrng( p, shape, 1, smin, smax) if (smin .eq. zero) then scale = FLMAX return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if term = pdof + dble(p) + one if (pshrnk .gt. zero) term = term + one scale = temp/(term + sumz) if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) return end if call dscal( p, one/temp, shape, 1) return end subroutine eseii ( x, mu, sigsq, pro, n, p, G, Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G double precision sigsq, hood, Vinv c double precision x(n,p), mu(p,G), pro(G[+1]), z(n,G[+1]) double precision x(n,*), mu(p,*), pro( * ), z(n, * ) integer i, j, k, nz double precision sum, temp, const, prok, tmin, tmax double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c----------------------------------------------------------------------------- if (sigsq .le. zero) then hood = FLMAX return end if const = dble(p)*(pi2log+log(sigsq)) do k = 1, G c prok = pro(k) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + temp*temp end do c z(i,k) = prok*exp(-(const+sum/sigsq)/two) if (sigsq .lt. one .and. sum .ge. sigsq*FLMAX) then hood = FLMAX return end if z(i,k) = -(const+sum/sigsq)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine hceii ( x, n, p, ic, ng, ns, v, nd, d) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, ic(n), ng, ns, nd c double precision x(n,p), v(p), d(ng*(ng-1)/2) double precision x(n,*), v(*), d(*) integer lg, ld, ll, lo, ls integer i, j, k, m integer ni, nj, nij, iopt, jopt, iold, jold integer ij, ici, icj, ii, ik, jk double precision ri, rj, rij, si, sj, sij double precision dij, dopt, dold external wardsw double precision one parameter (one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot external ddot c------------------------------------------------------------------------------ iopt = 0 lg = ng ld = (ng*(ng-1))/2 ll = nd-ng lo = nd c call intpr( 'ic', -1, ic, n) c call intpr( 'no. of groups', -1, lg, 1) c group heads should be first among rows of x i = 1 j = 2 1 continue icj = ic(j) if (icj .ne. j) goto 2 if (j .eq. lg) goto 3 i = j j = j + 1 goto 1 2 continue k = i m = j + 1 do j = m, n icj = ic(j) if (icj .gt. k) then k = k + 1 call dswap( p, x(k,1), n, x(j,1), n) ic(j) = ic(k) ic(k) = icj end if end do 3 continue c call intpr( 'ic', -1, ic, n) do j = 1, n i = ic(j) if (i .ne. j) then ic(j) = 0 ni = ic(i) nij = ni + 1 ic(i) = nij ri = dble(ni) rij = dble(nij) sj = sqrt(one/rij) si = sqrt(ri)*sj c update column sum in kth row call dscal( p, si, x(i,1), n) call daxpy( p, sj, x(j,1), n, x(i,1), n) else ic(j) = 1 end if end do c call intpr( 'ic', -1, ic, n) dopt = FLMAX ij = 0 do j = 2, lg nj = ic(j) rj = dble(nj) do i = 1, (j-1) ni = ic(i) ri = dble(ni) nij = ni + nj rij = dble(nij) si = sqrt(ri/rij) sj = sqrt(rj/rij) call dcopy( p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) dij = ddot(p, v, 1, v, 1) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij iopt = i jopt = j end if end do end do c if (.false.) then c i = 1 c ij = 1 c do j = 2, ng c call dblepr( 'dij', -1, d(ij), i) c ij = ij + i c i = j c end do c end if if (ns .eq. 1) then if (iopt .lt. jopt) then x(1,1) = iopt x(1,2) = jopt else x(1,1) = jopt x(1,2) = iopt end if d(1) = dopt return end if ls = 1 100 continue ni = ic(iopt) nj = ic(jopt) nij = ni + nj ic(iopt) = nij ic(jopt) = -iopt if (jopt .ne. lg) then call wardsw( jopt, lg, d) m = ic(jopt) ic(jopt) = ic(lg) ic(lg) = m end if si = dble(ni) sj = dble(nj) sij = dble(nij) dold = dopt iold = iopt jold = jopt iopt = -1 jopt = -1 dopt = FLMAX lg = lg - 1 ld = ld - lg ii = (iold*(iold-1))/2 if (iold .gt. 1) then ik = ii - iold + 1 do j = 1, (iold - 1) nj = ic(j) rj = dble(nj) ik = ik + 1 jk = ld + j dij = (rj+si)*d(ik)+(rj+sj)*d(jk) dij = (dij-rj*dold)/(rj+sij) d(ik) = dij end do end if if (iold .lt. lg) then ik = ii + iold i = iold do j = (iold + 1), lg nj = ic(j) rj = dble(nj) jk = ld + j dij = (rj+si)*d(ik)+(rj+sj)*d(jk) dij = (dij-rj*dold)/(rj+sij) d(ik) = dij ik = ik + i i = j end do end if d(lo) = dold lo = lo - 1 d(lo) = dble(iold) lo = lo - 1 d(lo) = dble(jold) lo = lo - 1 c update d and find max jopt = 2 iopt = 1 dopt = d(1) if (lg .eq. 2) goto 900 ij = 1 do i = 2, ld si = d(i) if (si .le. dopt) then ij = i dopt = si end if end do if (ij .gt. 1) then do i = 2, ij iopt = iopt + 1 if (iopt .ge. jopt) then jopt = jopt + 1 iopt = 1 end if end do end if ls = ls + 1 if (ls .eq. ns) goto 900 goto 100 900 continue d(lo) = dopt lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) do i = 1, ng ic(i) = i end do lo = nd - 1 ld = lo si = d(lo) lo = lo - 1 sj = d(lo) ic(int(sj)) = ng if (si .lt. sj) then x(1,1) = si x(1,2) = sj else x(1,1) = sj x(1,2) = si end if lg = ng + 1 do k = 2, ns lo = lo - 1 d(ld) = d(lo) ld = ld - 1 lo = lo - 1 i = int(d(lo)) ici = ic(i) lo = lo - 1 j = int(d(lo)) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(lg-k) if (ici .lt. icj) then x(k,1) = dble(ici) x(k,2) = dble(icj) else x(k,1) = dble(icj) x(k,2) = dble(ici) end if end do ld = nd lo = 1 do k = 1, ns si = d(lo) d(lo) = d(ld) d(ld) = si ld = ld - 1 lo = lo + 1 end do return end subroutine meeii ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol, sigsq c double precision x(n,p), z(n,G[+1]), mu(p,G), pro(G[+1]) double precision x(n,*), z(n, * ), mu(p,*), pro( * ) integer nz, iter, i, j, k double precision sum, sumz, temp, term, prok, tmax, tmin, rteps double precision const, hold, hood, err, dnp, zsum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return dnp = dble(n*p) if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 sigsq = zero sumz = zero zsum = one do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum/dble(n) zsum = min(sum,zsum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsq = sigsq + z(i,k)*sum z(i,k) = sum end do else sigsq = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if if (Vinv .le. zero) then sigsq = sigsq / dnp else sigsq = sigsq / (dble(p)*sumz) end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (sigsq .le. eps) then tol = err eps = FLMAX maxi = iter return end if const = dble(p)*(pi2log+log(sigsq)) do k = 1, G c temp = pro(k) do i = 1, n c z(i,k) = temp*exp(-(const+(z(i,k)/sigsq))/two) z(i,k) = -(const+(z(i,k)/sigsq))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine meeiip( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision Vinv, eps, tol, sigsq c double precision x(n,p), z(n,G[+1]), mu(p,G), pro(G[+1]) double precision x(n,*), z(n, * ), mu(p,*), pro( * ) integer nz, iter, i, j, k double precision sum, sumk, sumz, temp, term, tmax, tmin double precision const, hold, hood, err, dnp, prok double precision pmupmu, cmu, cgam, rmu, rgam, zsum, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMLOG parameter (SMLOG = -708.d0) double precision ddot, dlngam external ddot, dlngam double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return dnp = dble(n*p) if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 pmupmu = ddot( p, pmu, 1, pmu, 1) 100 continue iter = iter + 1 sigsq = zero sumz = zero zsum = one do k = 1, G sumk = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumk = sumk + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sumk if (.not. EQPRO) pro(k) = sumk/dble(n) zsum = min(zsum,sumk) if (sumk .gt. rteps) then call dscal( p, (one/sumk), mu(1,k), 1) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + temp*temp end do sigsq = sigsq + z(i,k)*sum end do temp = pmupmu + ddot( p, mu(1,k), 1, mu(1,k), 1) temp = temp - two*ddot( p, mu(1,k), 1, pmu, 1) const = sumk+pshrnk sigsq = sigsq + ((pshrnk*sumk)/const)*temp call dscal( p, (sumk/const), mu(1,k), 1) call daxpy(p, (pshrnk/const), pmu, 1, mu(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = zero if (Vinv .le. zero) then sigsq = sigsq / (pdof + dble((n+G)*p) + two) else sigsq = sigsq / (pdof + (sumz+dble(G))*dble(p) + two) do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (sigsq .le. eps) then tol = err eps = FLMAX maxi = iter return end if const = dble(p)*(pi2log+log(sigsq)) do i = 1, n do k = 1, G sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + temp*temp end do z(i,k) = -(const+(sum/sigsq))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMLOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter cmu = dble(p)*(log(pshrnk)-pi2log)/two const = pdof/two cgam = const*log(pscale/two)-dlngam(const) rmu = zero do k = 1, G temp = pmupmu + ddot( p, mu(1,k), 1, mu(1,k), 1) temp = temp - two*ddot( p, mu(1,k), 1, pmu, 1) rmu = rmu + (pshrnk*temp)/sigsq end do term = log(sigsq) rmu = -(rmu + dble(p)*term)/two rgam = -(const+one)*term - (pscale/sigsq)/two pdof = (dble(G)*cmu+cgam) + (rmu+rgam) return end subroutine mseii ( x, z, n, p, G, mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G), mu(p,G), sigsq, pro(G) double precision x(n,*), z(n,*), mu(p,*), sigsq, pro(*) integer i, j, k double precision sum, sumz, temp double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision dummy(1) c------------------------------------------------------------------------------ sumz = zero sigsq = zero do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum/dble(n) if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) if (sigsq .ne. FLMAX) then do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsq = sigsq + z(i,k)*sum end do end if else sigsq = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do c sumz .eq. n when no noise if (sigsq .ne. FLMAX) sigsq = sigsq / (sumz*dble(p)) return end subroutine mseiip( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G), mu(p,G), sigsq, pro(G) double precision x(n,*), z(n,*), mu(p,*), sigsq, pro(*) integer i, j, k double precision sum, sumz, pmupmu double precision const, temp, dnp double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero pscale = pscale*1.d0 dnp = dble(n*p) pmupmu = ddot( p, pmu, 1, pmu, 1) sumz = zero sigsq = zero do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum/dble(n) if (sum .gt. one .or. one .le. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) temp = pmupmu + ddot( p, mu(1,k), 1, mu(1,k), 1) temp = temp - two*ddot( p, mu(1,k), 1, pmu, 1) const = sum+pshrnk call dscal( p, (sum/const), mu(1,k), 1) call daxpy(p, (pshrnk/const), pmu, 1, mu(1,k), 1) if (sigsq .ne. FLMAX) then sigsq = sigsq + ((pshrnk*sum)/const)*temp do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsq = sigsq + z(i,k)*sum end do end if else sigsq = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (sigsq .eq. FLMAX) return temp = pdof + sumz*dble(p) + two if (pshrnk .gt. zero) temp = temp + dble(G*p) sigsq = sigsq / temp return end subroutine esevi ( x, mu, scale, shape, pro, n, p, G, * Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G double precision scale, hood, Vinv c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), shape(p,G), pro(G[+1]) double precision mu(p,*), shape(p,*), pro( * ) integer i, j, k, nz double precision sum, temp, const, tmin, tmax double precision smin, smax, prok double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (scale .le. zero) then hood = FLMAX return end if do k = 1, G call sgnrng( p, shape(1,k), 1, smin, smax) if (smin .eq. zero) then hood = FLMAX return end if end do temp = sqrt(scale) do k = 1, G do j = 1, p shape(j,k) = temp*sqrt(shape(j,k)) end do end do const = dble(p)*(pi2log+log(scale)) do k = 1, G c prok = pro(k) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) if (shape(j,k) .lt. one .and. * abs(temp) .ge. shape(j,k)*FLMAX) then hood = FLMAX return end if temp = temp/shape(j,k) if (abs(temp) .ge. RTMAX) then hood = FLMAX return end if if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do c z(i,k) = prok*exp(-(const+sum)/two) z(i,k) = -(const+sum)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine meevi ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol, scale c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), shape(p,G), pro(G[+1]) double precision mu(p,*), shape(p,*), pro( * ) integer nz, iter, i, j, k double precision sum, sumz, temp, term, epsmin double precision hold, hood, err, smin, smax, const double precision prok, tmin, tmax, zsum, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G c if (EQPRO) call dscal( G, one/dble(G), pro, 1) wrong? if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 sumz = zero zsum = one do k = 1, G dummy(1) = zero c call dcopy( p, zero, 0, shape(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum /dble(n) zsum = min(sum,zsum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j,k) if (sqrt(z(i,k))*abs(temp) .gt. RTMIN) * sum = sum + z(i,k)*(temp*temp) end do shape(j,k) = shape(j,k) + sum end do else c call dcopy( p, FLMAX, 0, mu(1,k), 1) c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if scale = zero epsmin = FLMAX do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) epsmin = min(smin,epsmin) if (smin .gt. zero) then sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) tol = err eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if scale = scale + temp epsmin = min(temp,epsmin) if (temp .lt. eps) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) tol = err eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape(1,k), 1) end if end do term = zero if (Vinv .gt. zero) then scale = scale /sumz do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else scale = scale /dble(n) end if if (scale .le. eps) then tol = epsmin eps = FLMAX maxi = iter return end if do k = 1, G call sgnrng( p, shape(1,k), 1, smin, smax) if (smin .le. eps) then tol = smin eps = FLMAX maxi = iter return end if end do const = dble(p)*(pi2log + log(scale)) do k = 1, G do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j,k) end do c z(i,k) = pro(k)*exp(-(const+(sum/scale))/two) z(i,k) = -(const+(sum/scale))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine meevip( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision Vinv, eps, tol, scale c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), shape(p,G), pro(G[+1]) double precision mu(p,*), shape(p,*), pro( * ) integer nz, iter, i, j, k double precision sum, sumz, temp, term, epsmin, zsum double precision hold, hood, err, smin, smax, const double precision prok, tmin, tmax, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G c if (EQPRO) call dscal( G, one/dble(G), pro, 1) wrong? if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 sumz = zero zsum = one do k = 1, G c call dcopy( p, pscale, 0, shape(1,k), 1) dummy(1) = pscale call dcopy( p, dummy, 0, shape(1,k), 1) sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum if (.not. EQPRO) pro(k) = sum /dble(n) zsum = min(sum,zsum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) term = pshrnk + sum const = (pshrnk*sum)/term do j = 1, p do i = 1, n temp = x(i,j) - mu(j,k) if (abs(temp)*sqrt(z(i,k)) .gt. RTMIN) * shape(j,k) = shape(j,k) + z(i,k)*(temp*temp) end do temp = pmu(j) - mu(j,k) shape(j,k) = shape(j,k) + const*(temp*temp) end do call dscal( p, sum/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) else c call dcopy( p, FLMAX, 0, mu(1,k), 1) c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if scale = zero epsmin = FLMAX do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) epsmin = min(smin,epsmin) if (smin .gt. zero) then sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) tol = err eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if scale = scale + temp epsmin = min(temp,epsmin) if (temp .lt. eps) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) tol = err eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape(1,k), 1) end if end do term = pdof + one if (Vinv .le. zero) then term = term + dble(n) else term = term + sumz end if if (pshrnk .gt. zero) term = term + one scale = scale/term if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (scale .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G call sgnrng( p, shape(1,k), 1, smin, smax) if (smin .le. eps) then tol = err eps = FLMAX maxi = iter return end if end do const = dble(p)*(pi2log + log(scale)) do k = 1, G do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j,k) end do c z(i,k) = pro(k)*exp(-(const+(sum/scale))/two) z(i,k) = -(const+(sum/scale))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine msevi ( x, z, n, p, G, mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), scale, shape(p,G), pro(G) double precision mu(p,*), scale, shape(p,*), pro(*) integer i, j, k double precision smin, smax double precision sum, sumz, temp double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ scale = zero sumz = zero do k = 1, G dummy(1) = zero c call dcopy( p, zero, 0, shape(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do sumz = sumz + sum pro(k) = sum/dble(n) if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) else scale = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) end if end do if (scale .eq. FLMAX) return c pro(k) now contains n_k do j = 1, p do k = 1, G sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do shape(j,k) = shape(j,k) + sum end do end do scale = zero do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) if (smin .le. zero) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if if (scale .ne. FLMAX) scale = scale + temp if (temp .lt. one .and. one .ge. temp*FLMAX) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if call dscal( p, one/temp , shape(1,k), 1) 100 continue end do if (sumz .lt. one .and. one .ge. sumz*FLMAX) then scale = FLMAX return end if scale = scale/sumz return end subroutine msevip( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), scale, shape(p,G), pro(G) double precision mu(p,*), scale, shape(p,*), pro(*) integer i, j, k double precision sum, sumz, temp, term double precision smin, smax, const double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero sumz = zero scale = zero do k = 1, G c call dcopy( p, pscale, 0, shape(1,k), 1) dummy(1) = pscale call dcopy( p, dummy, 0, shape(1,k), 1) c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum pro(k) = sum /dble(n) if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) term = pshrnk + sum const = (pshrnk*sum)/term do j = 1, p do i = 1, n temp = x(i,j) - mu(j,k) if (abs(temp)*sqrt(z(i,k)) .gt. RTMIN) * shape(j,k) = shape(j,k) + z(i,k)*(temp*temp) end do temp = pmu(j) - mu(j,k) shape(j,k) = shape(j,k) + const*(temp*temp) end do call dscal( p, sum/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) else scale = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) end if end do if (scale .eq. FLMAX) return scale = zero do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then scale = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero endif if (scale .ne. FLMAX) scale = scale + temp if (temp .le. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if call dscal( p, one/temp, shape(1,k), 1) 100 continue end do term = pdof + sumz + two if (pshrnk .gt. zero) term = term + dble(G) scale = scale/term return end subroutine es1v ( x, mu, sigsq, pro, n, G, Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, G double precision hood, Vinv c double precision x(n), mu(G), sigsq(G), pro(G[+1]), z(n,G[+1]) double precision x(*), mu(*), sigsq(*), pro( * ), z(n, * ) integer i, k, nz double precision temp, const, tmin, tmax, sum double precision muk, sigsqk, prok, sigmin double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ call sgnrng( G, sigsq, 1, sigmin, temp) if (sigmin .le. zero) then hood = FLMAX return end if do k = 1, G c prok = pro(k) muk = mu(k) sigsqk = sigsq(k) const = pi2log + log(sigsqk) do i = 1, n temp = x(i) - muk c z(i,k) = prok*exp(-(const+(temp*temp)/sigsqk)/two) if (sigsqk .lt. one .and. * abs(temp) .ge. sqrt(sigsqk)*RTMAX) then hood = FLMAX return end if z(i,k) = -(const+(temp*temp)/sigsqk)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c temp = zero c do k = 1, nz c temp = temp + z(i,k) c end do c hood = hood + log(temp) c call dscal( nz, (one/temp), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine hc1v ( x, n, ic, ng, ns, ALPHA, nd, d) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, ic(n), ng, ns, nd integer n, ic(*), ng, ns, nd c double precision x(n), ALPHA, d(ng*(ng-1)/2) double precision x(*), ALPHA, d(*) integer lg, ld, ll, lo, ls, i, j, k, m integer ni, nj, nij, nopt, niop, njop integer ij, ici, icj, iopt, jopt, iold double precision ALFLOG double precision qi, qj, qij, ri, rj, rij, si, sj double precision tracei, tracej, trcij, trop double precision termi, termj, trmij, tmop double precision temp, dij, dopt, siop, sjop double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision sqrthf parameter (sqrthf = .70710678118654757274d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision EPSMAX parameter (EPSMAX = 2.2204460492503131d-16) double precision ddot external ddot c------------------------------------------------------------------------------ c call dblepr( 'x', -1, x, n) c call intpr( 'n', -1, n, 1) c call intpr( 'ic', -1, ic, n) c call intpr( 'ng', -1, ng, 1) c call intpr( 'ns', -1, ns, 1) c call dblepr( 'alpha', -1, alpha, 1) c call intpr( 'nd', -1, nd, 1) iopt = 0 jopt = 0 niop = 0 njop = 0 nopt = 0 siop = 0 sjop = 0 tmop = 0.d0 trop = 0.d0 lg = ng ld = (ng*(ng-1))/2 ll = nd-ng lo = nd if (ng .eq. 1) return ALPHA = max(ALPHA,EPSMAX) ALFLOG = log(ALPHA) c group heads should be first among rows of x i = 1 j = 2 1 continue icj = ic(j) if (icj .ne. j) goto 2 if (j .eq. lg) goto 3 i = j j = j + 1 goto 1 2 continue k = i m = j + 1 do j = m, n icj = ic(j) if (icj .gt. k) then k = k + 1 c call dswap( p, x(k,1), n, x(j,1), n) temp = x(k) x(k) = x(j) x(j) = temp ic(j) = ic(k) ic(k) = icj end if end do 3 continue c set up pointers do j = 1, n i = ic(j) if (i .ne. j) then c update sum of squares k = ic(i) if (k .eq. 1) then ic(i) = j ic(j) = 2 c call dscal( p, sqrthf, x(i,1), n) c call dscal( p, sqrthf, x(j,1), n) c call dcopy( p, x(j,1), n, v, 1) c call daxpy( p, (-one), x(i,1), n, v, 1) c call daxpy( p, one, x(j,1), n, x(i,1), n) c x(j,1) = ddot( p, v, 1, v, 1) temp = sqrthf*(x(j) - x(i)) x(i) = sqrthf*(x(j) + x(i)) x(j) = temp*temp else ic(j) = 0 ni = ic(k) ic(k) = ni + 1 ri = dble(ni) rij = dble(ni+1) qj = one/rij qi = ri*qj si = sqrt(qi) sj = sqrt(qj) c call dcopy( p, x(j,1), n, v, 1) c call dscal( p, si, v, 1) c call daxpy( p, (-sj), x(i,1), n, v, 1) c x(k,1) = x(k,1) + ddot(p, v, 1, v, 1) c call dscal( p, si, x(i,1), n) c call daxpy( p, sj, x(j,1), n, x(i,1), n) temp = si*x(j) - sj*x(i) x(k) = x(k) + temp*temp x(i) = si*x(i) + sj*x(j) end if else ic(j) = 1 end if end do c store terms also so as not to recompute them do k = 1, ng i = ic(k) if (i .ne. 1) then ni = ic(i) ri = dble(ni) d(nd-k+1) = ri*log((x(i)+ALPHA)/ri) end if end do c call intpr( 'ic', -1, ic, n) c compute change in likelihood and determine minimum dopt = FLMAX ij = 0 do j = 2, ng nj = ic(j) if (nj .eq. 1) then tracej = zero termj = ALFLOG rj = one else tracej = x(nj) nj = ic(nj) rj = dble(nj) termj = d(nd-j+1) end if do i = 1, (j-1) ni = ic(i) if (ni .eq. 1) then tracei = zero termi = ALFLOG ri = one else tracei = x(ni) ni = ic(ni) ri = dble(ni) termi = d(nd-i+1) end if nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) c call dcopy(p, x(i,1), n, v, 1) c call dscal( p, sj, v, 1) c call daxpy( p, (-si), x(j,1), n, v, 1) temp = sj*x(i) - si*x(j) c trcij = (tracei + tracej) + ddot(p,v,1,v,1) trcij = (tracei + tracej) + temp*temp trmij = rij*log((trcij+ALPHA)/rij) dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij nopt = nij niop = ni njop = nj siop = si sjop = sj iopt = i jopt = j end if end do end do c call dblepr( 'dij', -1, d, (ng*(ng-1))/2) if (ns .eq. 1) then if (iopt .lt. jopt) then x(1) = dble(iopt) ic(1) = jopt else x(1) = dble(jopt) ic(1) = iopt end if d(1) = dopt return end if if (niop .ne. 1) ic(ic(iopt)) = 0 if (njop .ne. 1) ic(ic(jopt)) = 0 ls = 1 100 continue c if (.false.) then c ij = 1 c jj = 1 c do j = 2, n c nj = ic(j) c if (nj .ne. 0 .and. abs(nj) .le. n) then c call dblepr( 'dij', -1, d(ij), jj) c ij = ij + jj c jj = jj + 1 c end if c end do c end if c call dscal( p, siop, x(iopt,1), n) c call daxpy( p, sjop, x(jopt,1), n, x(iopt,1), n) x(iopt) = siop*x(iopt)+sjop*x(jopt) if (jopt .ne. lg) then call wardsw( jopt, lg, d) c call dcopy( p, x(lg,1), n, x(jopt,1), n) x(jopt) = x(lg) m = ic(jopt) ic(jopt) = ic(lg) ic(lg) = m end if ic(iopt) = lg c ic(lg) = nopt c x(lg,1) = trop x(lg) = trop c x(lg,2) = tmop d(lo) = dopt lo = lo - 1 ic(lg) = lo d(lo) = tmop lo = lo - 1 d(lo) = dble(nopt) lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) lo = lo - 1 lg = lg - 1 ld = ld - lg iold = iopt iopt = -1 jopt = -1 dopt = FLMAX ni = nopt ri = dble(ni) tracei = trop termi = tmop ij = ((iold-1)*(iold-2))/2 if (iold .gt. 1) then do j = 1, (iold - 1) nj = ic(j) if (nj .ne. 1) then c tracej = x(nj,1) tracej = x(nj) k = ic(nj) termj = d(k) nj = int(d(k-1)) rj = dble(nj) else tracej = zero termj = ALFLOG rj = one end if nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) c call dcopy( p, x(iold,1), n, v, 1) c call dscal( p, sj, v, 1) c call daxpy( p, (-si), x(j,1), n, v, 1) temp = sj*x(iold)-si*x(j) c trcij = (tracei + tracej) + ddot(p,v,1,v,1) trcij = (tracei + tracej) + temp*temp trmij = rij*log((trcij+ALPHA)/rij) dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij iopt = j jopt = iold nopt = nij niop = ni njop = nj sjop = si siop = sj end if end do end if if (iold .lt. lg) then i = iold ij = ij + i do j = (iold + 1), lg nj = ic(j) if (nj .ne. 1) then c tracej = x(nj,1) tracej = x(nj) k = ic(nj) termj = d(k) nj = int(d(k-1)) rj = dble(nj) else tracej = zero termj = ALFLOG rj = one end if nij = ni + nj rij = dble(nij) qij = one /rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) c call dcopy( p, x(iold,1), n, v, 1) c call dscal( p, sj, v, 1) c call daxpy( p, (-si), x(j,1), n, v, 1) temp = sj*x(iold) - si*x(j) c trcij = (tracei + tracej) + ddot(p,v,1,v,1) trcij = (tracei + tracej) + temp*temp trmij = rij*log((trcij+ALPHA)/rij) dij = trmij - (termi + termj) d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij iopt = iold jopt = j nopt = nij niop = ni njop = nj siop = si sjop = sj end if ij = ij + i i = j end do end if c update d and find max jopt = 2 iopt = 1 dopt = d(1) if (lg .eq. 2) goto 900 ij = 1 dopt = d(1) do i = 2, ld qi = d(i) if (qi .le. dopt) then ij = i dopt = qi end if end do if (ij .gt. 1) then do i = 2, ij iopt = iopt + 1 if (iopt .ge. jopt) then jopt = jopt + 1 iopt = 1 end if end do end if i = ic(iopt) j = ic(jopt) if (iopt .ne. iold .and. jopt .ne. iold) then if (i .ne. 1) then tracei = x(i) ici = ic(i) termi = d(ici) niop = int(d(ici-1)) ri = dble(niop) else tracei = zero termi = ALFLOG niop = 1 ri = one end if if (j .ne. 1) then c tracej = x(j,1) tracej = x(j) icj = ic(j) termj = d(icj) njop = int(d(icj-1)) rj = dble(njop) else tracej = zero termj = ALFLOG njop = 1 rj = one end if nopt = niop + njop rij = dble(nopt) qij = one/rij qi = ri*qij qj = rj*qij siop = sqrt(qi) sjop = sqrt(qj) c call dcopy( p, x(iopt,1), n, v, 1) c call dscal( p, sjop, v, 1) c call daxpy( p, (-siop), x(jopt,1), n, v, 1) temp = sjop*x(iopt)-siop*x(jopt) c trop = (tracei + tracej) + ddot(p,v,1,v,1) trop = (tracei + tracej) + temp*temp tmop = rij*log((trop+ALPHA)/rij) end if ls = ls + 1 if (ls .eq. ns) goto 900 goto 100 900 continue d(lo) = dopt lo = lo - 1 d(lo) = tmop lo = lo - 1 d(lo) = dble(nopt) lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) do i = 1, ng ic(i) = i end do lo = nd - 3 ld = nd - 1 si = d(lo) lo = lo - 1 sj = d(lo) lo = lo - 1 ic(int(sj)) = ng if (si .lt. sj) then x(1) = si d(ld) = sj else x(1) = sj d(ld) = si end if ld = ld - 1 lg = ng + 1 do k = 2, ns d(ld) = d(lo) ld = ld - 1 lo = lo - 3 i = int(d(lo)) ici = ic(i) lo = lo - 1 j = int(d(lo)) lo = lo - 1 icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(lg-k) if (ici .lt. icj) then x(k) = dble(ici) d(ld) = dble(icj) else x(k) = dble(icj) d(ld) = dble(ici) end if ld = ld - 1 end do ld = nd lo = nd - 1 do k = 1, ns ic(k) = int(d(lo)) lo = lo - 1 ld = ld - 1 d(ld) = d(lo) lo = lo - 1 end do ld = nd lo = 1 do k = 1, ns si = d(lo) d(lo) = d(ld) d(ld) = si ld = ld - 1 lo = lo + 1 end do return end subroutine me1v ( EQPRO, x, n, G, Vinv, z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, G, maxi double precision Vinv, eps, tol c double precision x(n), z(n,G[+1]), mu(G), sigsq(G), pro(G[+1]) double precision x(*), z(n, * ), mu(*), sigsq(*), pro( * ) integer nz, iter, k, i double precision hold, hood, err, sum, smu, zsum double precision const, temp, term, sigmin, sigsqk double precision prok, tmin, tmax, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G sum = zero smu = zero do i = 1, n temp = z(i,k) sum = sum + temp smu = smu + temp*x(i) end do if (.not. EQPRO) pro(k) = sum / dble(n) zsum = min(sum,zsum) if (sum .gt. rteps) then smu = smu / sum mu(k) = smu sigsqk = zero do i = 1, n temp = x(i) - smu temp = temp*temp sigsqk = sigsqk + z(i,k)*temp z(i,k) = temp end do sigsq(k) = sigsqk / sum end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if sigmin = FLMAX do k = 1, G sigmin = min(sigmin,sigsq(k)) end do if (sigmin .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G sigsqk = sigsq(k) const = pi2log + log(sigsqk) do i = 1, n c z(i,k) = temp*exp(-(const+(z(i,k)/sigsqk))/two) z(i,k) = -(const+(z(i,k)/sigsqk))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine me1vp ( EQPRO, x, n, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, G, maxi double precision pshrnk, pmu, pscale, pdof double precision Vinv, eps, tol c double precision x(n), z(n,G[+1]), mu(G), sigsq(G), pro(G[+1]) double precision x(*), z(n, * ), mu(*), sigsq(*), pro( * ) integer nz, iter, k, i double precision hold, hood, err, pmupmu double precision sumz, sum, smu, zsum, rteps double precision const, temp, term, sigmin, sigsqk double precision prok, tmin, tmax double precision cmu, cgam, rmu, rgam double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision three parameter (three = 3.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dlngam external dlngam double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX pmupmu = pmu*pmu iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G sumz = zero smu = zero do i = 1, n temp = z(i,k) sumz = sumz + temp smu = smu + temp*x(i) end do if (.not. EQPRO) pro(k) = sumz / dble(n) zsum = min(zsum,sumz) if (sumz .gt. rteps) then smu = smu/sumz sum = zero do i = 1, n term = abs(x(i) - smu) if (term .ge. eps .or. sqrt(z(i,k))*term .gt. RTMIN) * sum = sum + z(i,k)*(term*term) end do term = (pshrnk*sumz)/(pshrnk+sumz) temp = (pmupmu + smu*smu) - two*pmu*smu sigsq(k) = (pscale + sum + term*temp)/(pdof+sumz+three) term = sumz/(pshrnk+sumz) temp = pshrnk/(pshrnk+sumz) mu(k) = term*smu + temp*pmu end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if sigmin = FLMAX do k = 1, G sigmin = min(sigmin,sigsq(k)) end do if (sigmin .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G sigsqk = sigsq(k) const = pi2log + log(sigsqk) do i = 1, n term = abs(x(i) - mu(k)) if (term .gt. RTMIN) then z(i,k) = -(const+((term*term)/sigsqk))/two else z(i,k) = -const/two end if end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter cmu = dble(G)*(pi2log-log(pshrnk))/two const = pdof/two cgam = dble(G)*(const*log(pscale/two) - dlngam(const)) rmu = zero rgam = zero do k = 1, G temp = pmu - mu(k) temp = temp*temp term = log(sigsq(k)) rmu = rmu + (term + (pshrnk/sigsq(k))*temp) rgam = rgam + ((pdof+3.d0)*term + pscale/sigsq(k)) end do rmu = -rmu /two rgam = -rgam/two pdof = (cmu+cgam) + (rmu+rgam) return end subroutine ms1v ( x, z, n, G, mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, G c double precision x(n), z(n,G), mu(G), sigsq(G), pro(G) double precision x(*), z(n,*), mu(*), sigsq(*), pro(*) integer i, k double precision sum, smu, temp, sigsqk double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) c------------------------------------------------------------------------------ do k = 1, G sum = zero smu = zero do i = 1, n temp = z(i,k) sum = sum + temp smu = smu + temp*x(i) end do pro(k) = sum / dble(n) if (sum .gt. one .or. smu .le. sum*FLMAX) then smu = smu / sum mu(k) = smu sigsqk = zero do i = 1, n temp = abs(x(i) - smu) sigsqk = sigsqk + z(i,k)*(temp*temp) end do sigsq(k) = sigsqk / sum else mu(k) = FLMAX sigsq(k) = FLMAX end if end do return end subroutine ms1vp ( x, z, n, G, * pshrnk, pmu, pscale, pdof, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, G double precision pshrnk, pmu, pscale, pdof c double precision x(n), z(n,G), mu(G), sigsq(G), pro(G) double precision x(*), z(n,*), mu(*), sigsq(*), pro(*) integer k, i double precision pmupmu double precision sumz, sum, smu double precision temp, term double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero pmupmu = pmu*pmu do k = 1, G sumz = zero smu = zero do i = 1, n temp = z(i,k) sumz = sumz + temp smu = smu + temp*x(i) end do pro(k) = sumz / dble(n) if (sumz .gt. one .or. smu .le. sumz*FLMAX) then smu = smu/sumz term = sumz/(pshrnk+sumz) temp = pshrnk/(pshrnk+sumz) mu(k) = term*smu + temp*pmu sum = zero do i = 1, n term = abs(x(i) - smu) sum = sum + z(i,k)*(term*term) end do term = (pshrnk*sumz)/(pshrnk+sumz) temp = (pmupmu + smu*smu) - two*pmu*smu if (pshrnk .gt. zero) then sigsq(k) = (pscale + sum + term*temp)/(pdof+sumz+3.d0) else sigsq(k) = (pscale + sum + term*temp)/(pdof+sumz+two) end if else mu(k) = FLMAX sigsq(k) = FLMAX end if end do return end subroutine esvei ( x, mu, scale, shape, pro, n, p, G, * Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G double precision hood, Vinv c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), scale(G), shape(p), pro(G[+1]) double precision mu(p,*), scale(*), shape(*), pro( * ) integer i, j, k, nz double precision sum, temp, const, tmin, tmax double precision smin, smax, prok, scalek double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c----------------------------------------------------------------------------- call sgnrng( G, scale, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if do j = 1, p shape(j) = sqrt(shape(j)) end do do k = 1, G c prok = pro(k) scalek = scale(k) const = dble(p)*(pi2log+log(scalek)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) if (shape(j) .lt. one .and. * abs(temp) .ge. shape(j)*FLMAX) then hood = FLMAX return end if temp = temp/shape(j) if (abs(temp) .ge. RTMAX) then hood = FLMAX return end if if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do c z(i,k) = prok*exp(-(const+sum/scalek)/two) if (scalek .lt. one .and. * sum .ge. scalek*FLMAX) then hood = FLMAX return end if z(i,k) = -(const+sum/scalek)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. * one .le. sum*FLMAX) then hood = FLMAX return end if if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine mevei ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, scale, shape, pro, scl, shp, w) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi(2) double precision Vinv, eps, tol(2) c double precision x(n,p), z(n,G[+1]), scl(G), shp(p), w(p,G) double precision x(n,*), z(n, * ), scl(*), shp(*), w(p,*) c double precision mu(p,G), scale(G), shape(p), pro(G[+1]) double precision mu(p,*), scale(*), shape(*), pro( * ) integer nz, i, j, k integer iter, maxi1, maxi2, inner, inmax double precision tol1, tol2, sum, temp, term, tmin, tmax double precision prok, scalek, smin, smax, const, zsum double precision hold, hood, err, errin, dnp, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ maxi1 = maxi(1) maxi2 = max(maxi(2),0) if (maxi1 .le. 0) return dnp = dble(n*p) inmax = 0 if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G end if eps = max(eps,zero) tol1 = max(tol(1),zero) tol2 = max(tol(2),zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX errin = FLMAX c start with shape and scale equal to 1 c call dcopy(p, one, 0, shape, 1) c call dcopy(G, one, 0, scale, 1) dummy(1) = one call dcopy(p, dummy, 0, shape, 1) call dcopy(G, dummy, 0, scale, 1) iter = 0 100 continue inner = 0 zsum = one do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sum zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j,k) sum = sum + z(i,k)*(temp*temp) end do w(j,k) = sum end do end if end do call dscal( G, dble(p), pro, 1) if (zsum .le. rteps) then eps = -FLMAX tol(1) = zsum tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if if (maxi2 .le. 0) goto 120 110 continue call sgnrng(p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if inner = inner + 1 c scale estimate call dcopy( G, scale, 1, scl, 1) do k = 1, G sum = zero do j = 1, p sum = sum + w(j,k)/shape(j) end do scale(k) = sum/pro(k) end do call sgnrng(G, scale, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if c shape estimate call dcopy( p, shape, 1, shp, 1) do j = 1, p sum = zero do k = 1, G sum = sum + w(j,k)/scale(k) end do shape(j) = sum end do call sgnrng(p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .le. eps) then eps = temp tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if call dscal( p, one/temp, shape, 1) errin = zero do k = 1, G errin = max(errin, abs(scl(k)-scale(k))/(one + scale(k))) end do do j = 1, p errin = max(errin, abs(shp(j)-shape(j))/(one + shape(j))) end do if (errin .gt. tol2 .and. inner .le. maxi2) goto 110 120 continue iter = iter + 1 inmax = max(inner, inmax) if (.not. EQPRO) call dscal( G, one/dnp, pro, 1) term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dscal( G, one/dble(G), pro, 1) wrong? if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( G, scale, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = inmax return end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = inmax return end if do k = 1, G c prok = pro(k) scalek = scale(k) const = dble(p)*(pi2log+log(scalek)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j) end do c z(i,k) = prok*exp(-(const+sum/scalek)/two) z(i,k) = -(const+sum/scalek)/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol1 .and. iter .lt. maxi1) goto 100 tol(1) = err tol(2) = errin eps = hood maxi(1) = iter maxi(2) = inmax return end subroutine meveip( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, scale, shape, pro, scl, shp, w) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi(2) double precision Vinv, eps, tol(2) c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G[+1]), scl(G), shp(p), w(p,G) double precision x(n,*), z(n, * ), scl(*), shp(*), w(p,*) c double precision mu(p,G), scale(G), shape(p), pro(G[+1]) double precision mu(p,*), scale(*), shape(*), pro( * ) integer nz, i, j, k integer iter, maxi1, maxi2, inner, inmax double precision tol1, tol2, sum, temp, term, tmin, tmax double precision prok, scalek, smin, smax, const, sumz double precision hold, hood, err, errin, dnp, zsum, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero maxi1 = maxi(1) maxi2 = max(maxi(2),0) if (maxi1 .le. 0) return dnp = dble(n*p) inmax = 0 if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G end if eps = max(eps,zero) tol1 = max(tol(1),zero) tol2 = max(tol(2),zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX errin = FLMAX c start with shape and scale equal to 1 c call dcopy(p, one, 0, shape, 1) c call dcopy(G, one, 0, scale, 1) dummy(1) = one call dcopy(p, dummy, 0, shape, 1) call dcopy(G, dummy, 0, scale, 1) iter = 0 100 continue inner = 0 zsum = one do k = 1, G sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz zsum = min(zsum,sumz) if (sumz .gt. rteps) then term = pshrnk + sumz const = (pshrnk*sumz)/term call dscal( p, (one/sumz), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j,k) sum = sum + z(i,k)*(temp*temp) end do temp = pmu(j) - mu(j,k) w(j,k) = pscale + sum + const*(temp*temp) end do call dscal( p, sumz/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) end if end do call dscal( G, dble(p), pro, 1) if (zsum .le. rteps) then eps = -FLMAX tol(1) = zsum tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if if (maxi2 .le. 0) goto 120 110 continue call sgnrng(p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if inner = inner + 1 c scale estimate call dcopy( G, scale, 1, scl, 1) temp = pdof + two if (pshrnk .gt. zero) temp = temp + one do k = 1, G sum = zero do j = 1, p sum = sum + w(j,k)/shape(j) end do scale(k) = sum/(pro(k)+temp) end do call sgnrng(G, scale, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if c shape estimate call dcopy( p, shape, 1, shp, 1) do j = 1, p sum = zero do k = 1, G sum = sum + w(j,k)/scale(k) end do shape(j) = sum end do call sgnrng(p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .le. eps) then eps = temp tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = max(inner,inmax) return end if call dscal( p, one/temp, shape, 1) errin = zero do k = 1, G errin = max(errin, abs(scl(k)-scale(k))/(one + scale(k))) end do do j = 1, p errin = max(errin, abs(shp(j)-shape(j))/(one + shape(j))) end do if (errin .gt. tol2 .and. inner .le. maxi2) goto 110 120 continue iter = iter + 1 inmax = max(inner, inmax) if (.not. EQPRO) call dscal( G, one/dnp, pro, 1) term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dscal( G, one/dble(G), pro, 1) wrong? if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 1, pro, 1) end if end if call sgnrng( G, scale, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = inmax return end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = inmax return end if do k = 1, G c prok = pro(k) scalek = scale(k) const = dble(p)*(pi2log+log(scalek)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j) end do c z(i,k) = prok*exp(-(const+sum/scalek)/two) z(i,k) = -(const+sum/scalek)/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol1 .and. iter .lt. maxi1) goto 100 tol(1) = err tol(2) = errin eps = hood maxi(1) = iter maxi(2) = inmax return end subroutine msvei ( x, z, n, p, G, maxi, tol, * mu, scale, shape, pro, scl, shp, w) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G, maxi double precision tol c double precision x(n,p), z(n,G), scl(G), shp(p), w(p,G) double precision x(n,*), z(n,*), scl(*), shp(*), w(p,*) c double precision mu(p,G), scale(G), shape(p), pro(G) double precision mu(p,*), scale(*), shape(*), pro(*) integer i, j, k, inner double precision sum, temp, smin, smax, err double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ tol = max(tol,zero) err = FLMAX c start with the equal volume and shape estimate do k = 1, G sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sum if (sum .gt. one .or. one .lt. sum*FLMAX) then err = min(err,sum) call dscal( p, (one/sum), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j,k) temp = temp*temp temp = z(i,k)*temp sum = sum + temp end do w(j,k) = sum end do else err = -FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (err .lt. zero) then call dscal( G, one/dble(n), pro, 1) c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) tol = FLMAX maxi = 0 return end if c call dcopy( p, one, 0, shape, 1) c call dcopy( G, one, 0, scale, 1) dummy(1) = one call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) call dscal( G, dble(p), pro, 1) inner = 0 err = FLMAX 100 continue call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero) goto 200 inner = inner + 1 c scale estimate call dcopy( G, scale, 1, scl, 1) do k = 1, G sum = zero do j = 1, p if (shape(j) .gt. one .or. * w(j,k) .lt. shape(j)*FLMAX) then sum = sum + w(j,k)/shape(j) else scale(k) = FLMAX goto 110 end if end do scale(k) = sum/pro(k) 110 continue end do call sgnrng(G, scale, 1, smin, smax) if (smin .le. zero .or. smax .eq. FLMAX) goto 200 c shape estimate call dcopy( p, shape, 1, shp, 1) do j = 1, p sum = zero do k = 1, G if (scale(k) .gt. one .or. w(j,k) .lt. scale(k)*FLMAX) then sum = sum + w(j,k)/scale(k) else shape(j) = FLMAX goto 120 end if end do shape(j) = sum 120 continue end do call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero .or. smax .eq. FLMAX) goto 200 sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then c call dcopy( G, FLMAX, 0, scale, 1) c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( G, dummy, 0, scale, 1) call dcopy( p, dummy, 0, shape, 1) goto 200 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if call dscal( p, one/temp, shape, 1) err = zero do k = 1, G err = max(err, abs(scl(k) - scale(k))/(one + scale(k))) end do do j = 1, p err = max(err, abs(shp(j) - shape(j))/(one + shape(j))) end do if (err .gt. tol .and. inner .le. maxi) goto 100 200 continue call dscal( G, one/dble(n*p), pro, 1) tol = err maxi = inner return end subroutine msveip( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * maxi, tol, * mu, scale, shape, pro, scl, shp, w) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G, maxi double precision tol c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G), scl(G), shp(p), w(p,G) double precision x(n,*), z(n,*), scl(*), shp(*), w(p,*) c double precision mu(p,G), scale(G), shape(p), pro(G) double precision mu(p,*), scale(*), shape(*), pro(*) integer i, j, k, inner double precision sum, temp, term, err double precision smin, smax, const, sumz double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c------------------------------------------------------------------------------ tol = max(tol,zero) err = FLMAX c start with shape and scale equal to 1 do k = 1, G sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz if (sumz .gt. one .or. one .lt. sumz*FLMAX) then err = min(err,sumz) term = pshrnk + sumz const = (pshrnk*sumz)/term call dscal( p, (one/sumz), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j,k) sum = sum + z(i,k)*(temp*temp) end do temp = pmu(j) - mu(j,k) w(j,k) = pscale + sum + const*(temp*temp) end do call dscal( p, sumz/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) else err = -FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (err .lt. zero) then call dscal( G, one/dble(n), pro, 1) c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) tol = FLMAX maxi = 0 return end if c call dcopy(p, one, 0, shape, 1) c call dcopy(G, one, 0, scale, 1) dummy(1) = one call dcopy(p, dummy, 0, shape, 1) call dcopy(G, dummy, 0, scale, 1) call dscal( G, dble(p), pro, 1) if (maxi .le. 0) return inner = 0 err = FLMAX 100 continue call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero) goto 200 inner = inner + 1 c scale estimate call dcopy( G, scale, 1, scl, 1) do k = 1, G sum = zero do j = 1, p if (shape(j) .ge. one .or. * w(j,k) .le. shape(j)*FLMAX) then sum = sum + w(j,k)/shape(j) else scale(k) = FLMAX goto 110 end if end do temp = pdof + pro(k) + two if (pshrnk .gt. zero) temp = temp + one scale(k) = sum/temp 110 continue end do call sgnrng(G, scale, 1, smin, smax) if (smin .le. zero .or. smax .ge. FLMAX) then c call dcopy( G, FLMAX, 0, scale, 1) c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if c shape estimate call dcopy( p, shape, 1, shp, 1) do j = 1, p sum = zero do k = 1, G if (scale(k) .gt. w(j,k) .or. * w(j,k) .lt. scale(k)*FLMAX) then sum = sum + w(j,k)/scale(k) else shape(j) = FLMAX goto 120 end if end do shape(j) = sum 120 continue end do call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero .or. smax .ge. FLMAX) then c call dcopy( G, FLMAX, 0, scale, 1) c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then c call dcopy( G, FLMAX, 0, scale, 1) c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if call dscal( p, one/temp, shape, 1) err = zero do k = 1, G err = max(err, abs(scl(k)-scale(k))/(one + scale(k))) end do do j = 1, p err = max(err, abs(shp(j)-shape(j))/(one + shape(j))) end do if (err .gt. tol .and. inner .le. maxi) goto 100 200 continue call dscal( G, one/dble(n*p), pro, 1) tol = err maxi = inner return end subroutine esvev ( x, mu, scale, shape, O, pro, n, p, G, * Vinv, v, w, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p, G integer n, p, G double precision Vinv, hood c double precision x(n,p), z(n,G[+1]), mu(p,G), pro(G[+1]) double precision x(n,*), z(n, * ), mu(p,*), pro( * ) c double precision v(p), w(p) double precision v(*), w(*) c double precision scale(G), shape(p), O(p,p,G) double precision scale(*), shape(*), O(p,p,*) integer i, j, k, nz double precision const, temp, tmin, tmax double precision smin, smax, scalek, prok, sum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ call sgnrng( G, scale, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if do j = 1, p shape(j) = sqrt(shape(j)) end do do k = 1, G scalek = scale(k) const = dble(p)*(pi2log+log(scalek)) c prok = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dgemv( 'N', p, p, one, O(1,1,k), p, * w, 1, zero, v, 1) do j = 1, p if (shape(j) .lt. one .and. * abs(v(j)) .ge. shape(j)*FLMAX) then hood = FLMAX return end if v(j) = v(j)/shape(j) end do temp = ddot( p, v, 1, v, 1) if (scalek .lt. one .and. temp .ge. scalek*FLMAX) then hood = FLMAX return end if temp = temp/scalek c z(i,k) = prok*exp(-(const+temp)/two) z(i,k) = -(const+temp)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine mevev ( EQPRO, x, n, p, G, Vinv, z, * maxi, tol, eps, lwork, * mu, scale, shape, O, pro, w, s) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi(2), lwork double precision Vinv, eps, tol(2) double precision x(n,*), z(n, * ), w( * ), s(*) double precision mu(p,*), pro( * ) double precision scale(*), shape(*), O(p,p,*) integer maxi1, maxi2, p1, inmax, iter integer nz, i, j, k, l, j1, info, inner double precision tol1, tol2, dnp, term, rteps double precision errin, smin, smax, sumz, tmin, tmax double precision cs, sn, hold, hood, err, zsum double precision const, temp, sum, prok, scalek double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ maxi1 = maxi(1) maxi2 = maxi(2) if (maxi1 .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G end if eps = max(eps,zero) rteps = sqrt(eps) tol1 = max(tol(1),zero) tol2 = max(tol(2),zero) p1 = p + 1 dnp = dble(n*p) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX errin = FLMAX inmax = 0 iter = 0 100 continue sumz = zero zsum = one l = 0 do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p call dcopy( p, dummy, 0, O(1,j,k), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do sumz = sumz + sum zsum = min(zsum,sum) pro(k) = sum if (sum .ge. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dgesvd( 'N', 'O', p, p, O(1,1,k), p, z(1,k), * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else do j = 1, p temp = z(j,k) z(j,k) = temp*temp end do end if end if end do iter = iter + 1 if (l .ne. 0 .or. zsum .lt. rteps) then if (Vinv .ge. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if lwork = l c w(1) = FLMAX tol(1) = err tol(2) = errin if (l .ne. 0) then eps = FLMAX else eps = -FLMAX end if maxi(1) = -1 maxi(2) = -1 return end if if (iter .eq. 1) then c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do j = 1, p sum = zero do k = 1, G sum = sum + z(j,k) end do shape(j) = sum end do call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then if (.not. EQPRO) call dscal( G, one/dble(n), pro, 1) if (Vinv .ge. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if lwork = 0 c w(1) = smin tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = -1 maxi(2) = -1 return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = -1 maxi(2) = -1 return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (Vinv .le. zero) then c call dcopy (G, temp/dble(n), 0, scale, 1) dummy(1) = temp/dble(n) call dcopy (G, dummy, 0, scale, 1) else c call dcopy (G, temp/sumz, 0, scale, 1) dummy(1) = temp/sumz call dcopy (G, dummy, 0, scale, 1) end if if (temp .le. eps) then if (.not. EQPRO) call dscal( G, one/dble(n), pro, 1) if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if lwork = 0 c w(1) = temp c w(2) = zero tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = -1 maxi(2) = -1 return end if call dscal( p, one/temp, shape, 1) end if c inner iteration to estimate scale and shape c pro now contains n*pro inner = 0 errin = zero if (maxi2 .le. 0) goto 120 110 continue call dcopy( p, shape, 1, w , 1) call dcopy( G, scale, 1, w(p1), 1) c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do k = 1, G sum = zero do j = 1, p sum = sum + z(j,k)/w(j) end do temp = sum/(pro(k)*dble(p)) scale(k) = temp if (temp .le. eps) then lwork = 0 c w(1) = temp tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = iter maxi(2) = max(inner,inmax) return end if do j = 1, p shape(j) = shape(j) + z(j,k)/temp end do end do inner = inner + 1 call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then if (.not. EQPRO) call dscal( G, one/dble(n), pro, 1) if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if lwork = 0 c w(1) = smin tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = iter maxi(2) = max(inner,inmax) return end if c normalize the shape matrix sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = -1 maxi(2) = -1 return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .le. eps) then if (.not. EQPRO) call dscal( G, one/dble(n), pro, 1) if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if lwork = 0 c w(1) = temp tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = iter maxi(2) = max(inner,inmax) end if call dscal( p, one/temp, shape, 1) errin = zero do j = 1, p errin = max(abs(w(j)-shape(j))/(one+shape(j)), errin) end do do k = 1, G errin = max(abs(scale(k)-w(p+k))/(one+scale(k)), errin) end do if (errin .ge. tol2 .and. inner .lt. maxi2) goto 110 120 continue inmax = max(inner,inmax) smin = smin/temp smax = smax/temp if (.not. EQPRO) call dscal( G, one/dble(n), pro, 1) term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. eps) then lwork = 0 c w(1) = -smin tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = iter maxi(2) = inmax return end if call sgnrng( G, scale, 1, smin, smax) if (smin .le. eps) then lwork = 0 c w(1) = -smin tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = iter maxi(2) = inmax return end if do j = 1, p s(j) = sqrt(shape(j)) end do call sgnrng( p, s, 1, smin, smax) if (smin .le. rteps) then lwork = 0 c w(1) = -smin tol(1) = err tol(2) = errin eps = FLMAX maxi(1) = iter maxi(2) = inmax return end if do k = 1, G c prok = pro(k) scalek = scale(k) const = dble(p)*(pi2log + log(scalek)) do i = 1, n call dcopy( p, x(i,1), n, w(p1), 1) call daxpy( p, (-one), mu(1,k), 1, w(p1), 1) call dgemv( 'N', p, p, one, O(1,1,k), p, w(p1), 1, zero, w, 1) do j = 1, p w(j) = w(j) / s(j) end do sum = ddot(p,w,1,w,1)/scalek c z(i,k) = prok*exp(-(const+sum)/two) z(i,k) = -(const+sum)/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol1 .and. iter .lt. maxi1) goto 100 c smin = sqrt(smin) c smax = sqrt(smax) c rcmin = FLMAX c do k = 1, G c temp = sqrt(scale(k)) c rcmin = min(rcmin,(temp*smin)/(one+temp*smax)) c end do lwork = 0 c w(1) = rcmin tol(1) = err tol(2) = errin eps = hood maxi(1) = iter maxi(2) = inmax return end subroutine mevevp( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, lwork, * mu, scale, shape, O, pro, w, s) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi(2), lwork c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof double precision Vinv, eps, tol(2) c double precision x(n,p), z(n,G[+1]), w(lwork), s(p) double precision x(n,*), z(n, * ), w( * ), s(*) c double precision mu(p,G), pro(G[+1]) double precision mu(p,*), pro( * ) c double precision scale(G), shape(p), O(p,p,G) double precision scale(*), shape(*), O(p,p,*) integer maxi1, maxi2, p1, inmax, iter integer nz, i, j, k, l, j1, info, inner double precision tol1, tol2, dnp, term, rteps double precision errin, smin, smax, sumz, tmin, tmax double precision cs, sn, hold, hood, err, zsum double precision const, temp, sum, prok, scalek double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision ddot external ddot double precision dummy(1) c----------------------------------------------------------------------------- if (pshrnk .lt. zero) pshrnk = zero pdof = pdof*1.d0 maxi1 = maxi(1) maxi2 = maxi(2) if (maxi1 .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G end if eps = max(eps,zero) rteps = sqrt(eps) tol1 = max(tol(1),zero) tol2 = max(tol(2),zero) p1 = p + 1 dnp = dble(n*p) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX errin = FLMAX inmax = 0 inner = 0 iter = 0 100 continue zsum = one l = 0 do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p call dcopy( p, pscale(1,j), 1, O(1,j,k), 1) end do sumz = zero do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz / dble(n) zsum = min(zsum,sumz) if (sumz .gt. rteps) then call dscal( p, (one/sumz), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) term = sumz+pshrnk const = (sumz*pshrnk)/term call dscal( p, sqrt(const), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) call dscal( p, sumz/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) call dgesvd( 'N', 'O', p, p, O(1,1,k), p, z(1,k), * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else do j = 1, p temp = z(j,k) z(j,k) = temp*temp end do end if end if end do iter = iter + 1 if (l .ne. 0 .or. zsum .le. rteps) then lwork = l c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) if (l .ne. 0) then eps = FLMAX else eps = -FLMAX end if goto 200 end if if (iter .eq. 1) then c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do j = 1, p sum = zero do k = 1, G sum = sum + z(j,k) end do shape(j) = sum end do call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then eps = FLMAX goto 200 return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then eps = FLMAX goto 200 return end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if do k = 1, G scale(k) = temp / (pro(k)*dble(n)) end do if (temp .le. eps) then eps = FLMAX goto 200 return end if call dscal( p, one/temp, shape, 1) end if inner = 0 errin = zero if (maxi2 .le. 0) goto 120 110 continue call dcopy( p, shape, 1, w , 1) call dcopy( G, scale, 1, w(p1), 1) call sgnrng( p+G, w, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do k = 1, G sum = zero do j = 1, p if (w(j) .le. z(j,k) .and. z(j,k) .lt. w(j)*rteps) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if sum = sum + z(j,k)/w(j) end do temp = sum/(pro(k)*dble(n*p)) scale(k) = temp do j = 1, p if (temp .le. z(j,k) .and. z(j,k) .lt. temp*rteps) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if shape(j) = shape(j) + z(j,k)/temp end do end do inner = inner + 1 call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then eps = FLMAX goto 200 return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then eps = FLMAX goto 200 return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .le. eps) then eps = FLMAX goto 200 end if call dscal( p, one/temp, shape, 1) errin = zero do j = 1, p errin = max(abs(w(j)-shape(j))/(one+shape(j)), errin) end do do k = 1, G errin = max(abs(scale(k)-w(p+k))/(one+scale(k)), errin) end do if (errin .ge. tol2 .and. inner .lt. maxi2) goto 110 120 continue inmax = max(inner,inmax) smin = smin/temp smax = smax/temp term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if else c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. eps) then eps = FLMAX goto 200 return end if call sgnrng( G, scale, 1, smin, smax) if (smin .le. eps) then eps = FLMAX goto 200 return end if do j = 1, p s(j) = sqrt(shape(j)) end do call sgnrng( p, s, 1, smin, smax) if (smin .le. rteps) then eps = FLMAX goto 200 return end if do k = 1, G c prok = pro(k) scalek = scale(k) const = dble(p)*(pi2log + log(scalek)) do i = 1, n call dcopy( p, x(i,1), n, w(p1), 1) call daxpy( p, (-one), mu(1,k), 1, w(p1), 1) call dgemv( 'N', p, p, one, O(1,1,k), p, w(p1), 1, zero, w, 1) do j = 1, p w(j) = w(j) / s(j) end do sum = ddot(p,w,1,w,1)/scalek c z(i,k) = prok*exp(-(const+sum)/two) z(i,k) = -(const+sum)/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol1 .and. iter .lt. maxi1) goto 100 c smin = sqrt(smin) c smax = sqrt(smax) c rcmin = FLMAX c do k = 1, G c temp = sqrt(scale(k)) c rcmin = min(rcmin,(temp*smin)/(one+temp*smax)) c end do c w(1) = rcmin lwork = 0 eps = hood 200 continue tol(1) = err tol(2) = errin maxi(1) = iter maxi(2) = inmax return end subroutine msvev ( x, z, n, p, G, w, lwork, maxi, tol, * mu, scale, shape, O, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G, maxi, lwork double precision tol c double precision x(n,p), z(n,G), w(max(4*p,5*p-4,p+G)) double precision x(n,*), z(n,*), w(*) c double precision scale(G), shape(p), O(p,p,G), mu(p,G), pro(G) double precision scale(*), shape(*), O(p,p,*), mu(p,*), pro(*) integer p1, i, j, k, j1, inner, info double precision temp double precision err, sum, smin, smax, cs, sn double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision BIGLOG parameter (BIGLOG = 709.d0) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c----------------------------------------------------------------------------- tol = max(tol,zero) p1 = p + 1 err = FLMAX inner = 0 c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do k = 1, G dummy(1) = zero c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p c call dcopy( p, zero, 0, O(1,j,k), 1) call dcopy( p, dummy, 0, O(1,j,k), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sum if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) if (lwork .gt. 0) then do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dgesvd( 'N', 'O', p, p, O(1,1,k), p, z(1,k), * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then inner = info else do j = 1, p temp = z(j,k) temp = temp*temp shape(j) = shape(j) + temp z(j,k) = temp end do end if end if else err = zero c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do c inner iteration estimates scale and shape c pro now contains n*pro if (inner .ne. 0 .or. err .eq. zero) then lwork = inner c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if lwork = 0 call sgnrng( p, shape, 1, smin, smax) if (smin .eq. zero) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if do k = 1, G scale(k) = temp / (pro(k)*dble(n)) end do if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if call dscal( p, one/temp, shape, 1) c iteration to estimate scale and shape c pro now contains n*pro if (maxi .le. 0) goto 200 100 continue call dcopy( p, shape, 1, w , 1) call dcopy( G, scale, 1, w(p1), 1) call absrng( p, w, 1, smin, smax) if (smin .le. one .and. one .ge. smin*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do k = 1, G sum = zero do j = 1, p sum = sum + z(j,k)/w(j) end do temp = (sum/pro(k))/dble(p) scale(k) = temp if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if do j = 1, p shape(j) = shape(j) + z(j,k)/temp end do end do inner = inner + 1 call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if c normalize the shape matrix sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if call dscal( p, one/temp, shape, 1) err = zero do j = 1, p err = max(abs(w(j)-shape(j))/(one+shape(j)), err) end do do k = 1, G err = max(abs(scale(k)-w(p+k))/(one+scale(k)), err) end do if (err .ge. tol .and. inner .lt. maxi) goto 100 200 continue call dscal( G, one/dble(n), pro, 1) tol = err maxi = inner return end subroutine msvevp( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * w, lwork, maxi, tol, * mu, scale, shape, O, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G, maxi, lwork c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof double precision tol c double precision x(n,p), z(n,G), w(lwork) double precision x(n,*), z(n,*), w( * ) c double precision mu(p,G), pro(G) double precision mu(p,*), pro(*) c double precision scale(G), shape(p), O(p,p,G) double precision scale(*), shape(*), O(p,p,*) integer p1, i, j, k, l, j1, inner, info double precision sum, term, temp, err, smin, smax double precision sumz, cs, sn, const double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c----------------------------------------------------------------------------- if (maxi .le. 0) return if (pshrnk .le. zero) pshrnk = zero pdof = pdof*1.d0 tol = max(tol,zero) p1 = p + 1 err = FLMAX inner = 0 l = 0 c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p call dcopy( p, pscale(1,j), 1, O(1,j,k), 1) end do sumz = zero do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz / dble(n) if (sumz .ge. one .or. one .lt. sumz*FLMAX) then call dscal( p, (one/sumz), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) term = sumz+pshrnk const = (sumz*pshrnk)/term call dscal( p, sqrt(const), w, 1) j = 1 do j1 = 2, p call drotg( O(j,j,k), w(j), cs, sn) call drot( p-j, O(j,j1,k), p, w(j1), 1, cs, sn) j = j1 end do call drotg( O(p,p,k), w(p), cs, sn) call dscal( p, sumz/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) call dgesvd( 'N', 'O', p, p, O(1,1,k), p, z(1,k), * dummy, 1, dummy, 1, w, lwork, info) if (info .ne. 0) then l = info else do j = 1, p temp = z(j,k) temp = temp*temp shape(j) = shape(j) + temp z(j,k) = temp end do end if else err = zero c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (l .ne. 0 .or. err .eq. zero) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if do k = 1, G scale(k) = temp / (pro(k)*dble(n)) end do if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if call dscal( p, one/temp, shape, 1) if (maxi .le. 0) goto 200 100 continue call dcopy( p, shape, 1, w , 1) call dcopy( G, scale, 1, w(p1), 1) call sgnrng( p+G, w, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if c call dcopy( p, zero, 0, shape, 1) dummy(1) = zero call dcopy( p, dummy, 0, shape, 1) do k = 1, G sum = zero do j = 1, p if (w(j) .le. z(j,k) .and. z(j,k) .ge. w(j)*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if sum = sum + z(j,k)/w(j) end do temp = sum/(pro(k)*dble(n*p)) scale(k) = temp do j = 1, p if (temp .le. z(j,k) .and. z(j,k) .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) c call dcopy( G, FLMAX, 0, scale, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) call dcopy( G, dummy, 0, scale, 1) goto 200 end if shape(j) = shape(j) + z(j,k)/temp end do end do inner = inner + 1 call sgnrng( p, shape, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if if (temp .ge. SMALOG) then temp = exp(temp) else temp = zero end if if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) goto 200 end if call dscal( p, one/temp, shape, 1) err = zero do j = 1, p err = max(abs(w(j)-shape(j))/(one+shape(j)), err) end do do k = 1, G err = max(abs(scale(k)-w(p+k))/(one+scale(k)), err) end do if (err .ge. tol .and. inner .lt. maxi) goto 100 200 continue lwork = l tol = err maxi = inner return end subroutine esvii ( x, mu, sigsq, pro, n, p, G, Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G double precision hood, Vinv c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), sigsq(G), pro(G[+1]) double precision mu(p,*), sigsq(*), pro( * ) integer i, j, k, nz double precision sum, temp, const, tmin, tmax double precision prok, sigsqk, sigmin double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c----------------------------------------------------------------------------- call sgnrng( G, sigsq, 1, sigmin, temp) if (sigmin .le. zero) then hood = FLMAX return end if do k = 1, G c prok = pro(k) sigsqk = sigsq(k) const = dble(p)*(pi2log+log(sigsq(k))) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) if (abs(temp) .ge. RTMAX) then hood = FLMAX return end if if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do c z(i,k) = prok*exp(-(const+sum/sigsqk)/two) if (sigsqk .lt. one .and. sum .ge. sigsqk*FLMAX) then hood = FLMAX return end if z(i,k) = -(const+sum/sigsqk)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine hcvii ( x, n, p, ic, ng, ns, ALPHA, v, nd, d) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, ic(n), ng, ns, nd c double precision x(n,p), v(p). d(*), ALPHA double precision x(n,*), v(*), d(*), ALPHA integer lg, ld, ll, lo, ls, i, j, k, m integer ni, nj, nij, nopt, niop, njop integer ij, ici, icj, iopt, jopt, iold double precision ALFLOG double precision qi, qj, qij, ri, rj, rij, si, sj double precision tracei, tracej, trcij, trop double precision termi, termj, trmij, tmop double precision dij, dopt, siop, sjop double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision sqrthf parameter (sqrthf = .70710678118654757274d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision EPSMAX parameter (EPSMAX = 2.2204460492503131d-16) double precision ddot external ddot c------------------------------------------------------------------------------ iopt = 0 niop = 0 njop = 0 nopt = 0 tmop = 0.d0 trop = 0.d0 lg = ng ld = (ng*(ng-1))/2 ll = nd-ng lo = nd if (ng .eq. 1) return ALPHA = max(ALPHA,EPSMAX) ALFLOG = log(ALPHA) c call intpr( 'ic', -1, ic, n) c group heads should be first among rows of x i = 1 j = 2 1 continue icj = ic(j) if (icj .ne. j) goto 2 if (j .eq. lg) goto 3 i = j j = j + 1 goto 1 2 continue k = i m = j + 1 do j = m, n icj = ic(j) if (icj .gt. k) then k = k + 1 call dswap( p, x(k,1), n, x(j,1), n) ic(j) = ic(k) ic(k) = icj end if end do 3 continue c set up pointers do j = 1, n i = ic(j) if (i .ne. j) then c update sum of squares k = ic(i) if (k .eq. 1) then ic(i) = j ic(j) = 2 call dscal( p, sqrthf, x(i,1), n) call dscal( p, sqrthf, x(j,1), n) call dcopy( p, x(j,1), n, v, 1) call daxpy( p, (-one), x(i,1), n, v, 1) call daxpy( p, one, x(j,1), n, x(i,1), n) c call dcopy( p, FLMAX, 0, x(j,1), n) c x(j,1) = ddot( p, v, 1, v, 1) / two x(j,1) = ddot( p, v, 1, v, 1) else ic(j) = 0 ni = ic(k) ic(k) = ni + 1 ri = dble(ni) rij = dble(ni+1) qj = one/rij qi = ri*qj si = sqrt(qi) sj = sqrt(qj) call dcopy( p, x(j,1), n, v, 1) call dscal( p, si, v, 1) call daxpy( p, (-sj), x(i,1), n, v, 1) c x(k,1) = qi*x(k,1) + qj*ddot(p, v, 1, v, 1) x(k,1) = x(k,1) + ddot(p, v, 1, v, 1) call dscal( p, si, x(i,1), n) call daxpy( p, sj, x(j,1), n, x(i,1), n) c call dcopy( p, FLMAX, 0, x(j,1), n) end if else ic(j) = 1 end if end do c store terms also so as not to recompute them do k = 1, ng i = ic(k) if (i .ne. 1) then ni = ic(i) ri = dble(ni) c x(i,2) = ri*log(x(i,1)+ALPHA) x(i,2) = ri*log((x(i,1)+ALPHA)/ri) end if end do c call intpr( 'ic', -1, ic, n) c call dblepr( 'trace', -1, x(1,1), n) c call dblepr( 'term', -1, x(1,2), n) c compute change in likelihood and determine minimum dopt = FLMAX ij = 0 do j = 2, ng nj = ic(j) if (nj .eq. 1) then tracej = zero termj = ALFLOG rj = one else tracej = x(nj,1) termj = x(nj,2) nj = ic(nj) rj = dble(nj) end if do i = 1, (j-1) ni = ic(i) if (ni .eq. 1) then tracei = zero termi = ALFLOG ri = one else tracei = x(ni,1) termi = x(ni,2) ni = ic(ni) ri = dble(ni) end if nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) c trcij = (qi*tracei + qj*tracej) + qij*ddot(p,v,1,v,1) trcij = (tracei + tracej) + ddot(p,v,1,v,1) c trmij = rij*log(trcij+ALPHA) trmij = rij*log((trcij+ALPHA)/rij) dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij nopt = nij niop = ni njop = nj siop = si sjop = sj iopt = i jopt = j end if end do end do c call dblepr( 'dij', -1, d, (l*(l-1))/2) if (ns .eq. 1) then if (iopt .lt. jopt) then x(1,1) = dble(iopt) x(1,2) = dble(jopt) else x(1,1) = dble(jopt) x(1,2) = dble(iopt) end if d(1) = dopt return end if if (niop .ne. 1) ic(ic(iopt)) = 0 if (njop .ne. 1) ic(ic(jopt)) = 0 ls = 1 100 continue c if (.false.) then c ij = 1 c jj = 1 c do j = 2, n c nj = ic(j) c if (nj .ne. 0 .and. abs(nj) .le. n) then c call dblepr( 'dij', -1, d(ij), jj) c ij = ij + jj c jj = jj + 1 c end if c end do c end if call dscal( p, siop, x(iopt,1), n) call daxpy( p, sjop, x(jopt,1), n, x(iopt,1), n) if (jopt .ne. lg) then call wardsw( jopt, lg, d) call dcopy( p, x(lg,1), n, x(jopt,1), n) m = ic(jopt) ic(jopt) = ic(lg) ic(lg) = m end if ic(iopt) = lg ic(lg) = nopt x(lg,1) = trop x(lg,2) = tmop d(lo) = dopt lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) lo = lo - 1 lg = lg - 1 ld = ld - lg iold = iopt iopt = -1 jopt = -1 dopt = FLMAX ni = nopt ri = dble(ni) tracei = trop termi = tmop ij = ((iold-1)*(iold-2))/2 if (iold .gt. 1) then do j = 1, (iold - 1) nj = ic(j) if (nj .ne. 1) then tracej = x(nj,1) termj = x(nj,2) nj = ic(nj) rj = dble(nj) else tracej = zero termj = ALFLOG rj = one end if nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) call dcopy( p, x(iold,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) c trcij = (qi*tracei + qj*tracej) + qij*ddot(p,v,1,v,1) trcij = (tracei + tracej) + ddot(p,v,1,v,1) c trmij = rij*log(trcij+ALPHA) trmij = rij*log((trcij+ALPHA)/rij) dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij iopt = j jopt = iold nopt = nij niop = ni njop = nj sjop = si siop = sj end if end do end if if (iold .lt. lg) then i = iold ij = ij + i do j = (iold + 1), lg nj = ic(j) if (nj .ne. 1) then tracej = x(nj,1) termj = x(nj,2) nj = ic(nj) rj = dble(nj) else tracej = zero termj = ALFLOG rj = one end if nij = ni + nj rij = dble(nij) qij = one /rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) call dcopy( p, x(iold,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) c trcij = (qi*tracei + qj*tracej) + qij*ddot(p,v,1,v,1) trcij = (tracei + tracej) + ddot(p,v,1,v,1) c trmij = rij*log(trcij+ALPHA) trmij = rij*log((trcij+ALPHA)/rij) dij = trmij - (termi + termj) d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij iopt = iold jopt = j nopt = nij niop = ni njop = nj siop = si sjop = sj end if ij = ij + i i = j end do end if c update d and find max jopt = 2 iopt = 1 dopt = d(1) if (lg .eq. 2) goto 900 ij = 1 dopt = d(1) do i = 2, ld qi = d(i) if (qi .le. dopt) then ij = i dopt = qi end if end do if (ij .gt. 1) then do i = 2, ij iopt = iopt + 1 if (iopt .ge. jopt) then jopt = jopt + 1 iopt = 1 end if end do end if i = ic(iopt) j = ic(jopt) if (iopt .ne. iold .and. jopt .ne. iold) then if (i .ne. 1) then tracei = x(i,1) termi = x(i,2) niop = ic(i) ri = dble(niop) else tracei = zero termi = ALFLOG niop = 1 ri = one end if if (j .ne. 1) then tracej = x(j,1) termj = x(j,2) njop = ic(j) rj = dble(njop) else tracej = zero termj = ALFLOG njop = 1 rj = one end if nopt = niop + njop rij = dble(nopt) qij = one/rij qi = ri*qij qj = rj*qij siop = sqrt(qi) sjop = sqrt(qj) call dcopy( p, x(iopt,1), n, v, 1) call dscal( p, sjop, v, 1) call daxpy( p, (-siop), x(jopt,1), n, v, 1) c trop = (qi*tracei + qj*tracej) + qij*ddot(p,v,1,v,1) trop = (tracei + tracej) + ddot(p,v,1,v,1) c tmop = rij*log(trop+ALPHA) tmop = rij*log((trop+ALPHA)/rij) end if ls = ls + 1 if (ls .eq. ns) goto 900 goto 100 900 continue d(lo) = dopt lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) do i = 1, ng ic(i) = i end do lo = nd - 1 ld = lo si = d(lo) lo = lo - 1 sj = d(lo) ic(int(sj)) = ng if (si .lt. sj) then x(1,1) = si x(1,2) = sj else x(1,1) = sj x(1,2) = si end if lg = ng + 1 do k = 2, ns lo = lo - 1 d(ld) = d(lo) ld = ld - 1 lo = lo - 1 i = int(d(lo)) ici = ic(i) lo = lo - 1 j = int(d(lo)) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(lg-k) if (ici .lt. icj) then x(k,1) = dble(ici) x(k,2) = dble(icj) else x(k,1) = dble(icj) x(k,2) = dble(ici) end if end do ld = nd lo = 1 do k = 1, ns si = d(lo) d(lo) = d(ld) d(ld) = si ld = ld - 1 lo = lo + 1 end do return end subroutine mevii ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), sigsq(G), pro(G[+1]) double precision mu(p,*), sigsq(*), pro( * ) integer nz, iter, i, j, k double precision sumz, sum, temp, const, term, zsum double precision sigmin, sigsqk, hold, hood, err double precision prok, tmin, tmax, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c----------------------------------------------------------------------------- if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G c if (EQPRO) call dcopy( G, one/dble(G), 0, pro, 1) if (EQPRO) then dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do if (.not. EQPRO) pro(k) = sumz / dble(n) zsum = min(sumz,zsum) if (sumz .gt. rteps) then call dscal( p, (one/sumz), mu(1,k), 1) sigsqk = zero do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsqk = sigsqk + z(i,k)*sum z(i,k) = sum end do sigsq(k) = (sigsqk/sumz)/dble(p) else sigsq(k) = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( G, sigsq, 1, sigmin, temp) if (sigmin .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G c temp = pro(k) sigsqk = sigsq(k) const = dble(p)*(pi2log+log(sigsqk)) do i = 1, n c z(i,k) = temp*exp(-(const+z(i,k)/sigsqk)/two) z(i,k) = -(const+z(i,k)/sigsqk)/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do const = zero - tmax sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) + const if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)-const) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine meviip( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision Vinv, eps, tol c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), sigsq(G), pro(G[+1]) double precision mu(p,*), sigsq(*), pro( * ) integer nz, iter, i, j, k double precision sumz, sum, temp, const, term, zsum double precision sigmin, sigsqk, hold, hood, err double precision prok, tmin, tmax, rteps double precision pmupmu, cmu, cgam, rmu, rgam double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot, dlngam external ddot, dlngam double precision dummy(1) c----------------------------------------------------------------------------- if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 pmupmu = ddot(p,pmu,1,pmu,1) 100 continue iter = iter + 1 zsum = one do k = 1, G sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do if (.not. EQPRO) pro(k) = sumz / dble(n) zsum = min(sumz,zsum) if (sumz .gt. rteps) then call dscal( p, (one/sumz), mu(1,k), 1) sigsqk = pscale do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsqk = sigsqk + z(i,k)*sum end do temp = pmupmu + ddot(p, mu(1,k), 1, mu(1,k), 1) temp = temp - two*ddot(p,mu(1,k),1,pmu,1) const = sumz+pshrnk sigsqk = sigsqk + ((sumz*pshrnk)/const) * temp c sigsq(k) = sigsqk/(pdof+(sumz+one)*dble(p)+two) temp = pdof+sumz*dble(p)+two if (pshrnk .gt. zero) temp = temp + dble(p) sigsq(k) = sigsqk/temp call dscal( p, sumz/const, mu(1,k), 1) call daxpy( p, pshrnk/const, pmu, 1, mu(1,k), 1) else sigsq(k) = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if call sgnrng( G, sigsq, 1, sigmin, temp) if (sigmin .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G sigsqk = sigsq(k) const = dble(p)*(pi2log+log(sigsqk)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + temp*temp end do z(i,k) = -(const+sum/sigsqk)/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do const = zero - tmax sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) + const if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)-const) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter if (pshrnk .gt. zero) then cmu = dble(p)*(log(pshrnk)-pi2log)/two const = pdof/two cgam = const*log(pscale/two)-dlngam(const) rmu = zero rgam = zero do k = 1, G term = log(sigsq(k)) temp = pmupmu + ddot( p, mu(1,k), 1, mu(1,k), 1) temp = temp - two*ddot( p, mu(1,k), 1, pmu, 1) rmu = rmu + (pshrnk*temp)/sigsq(k) rgam = rgam + ((pdof+3.d0)*term - (pscale/sigsq(k))) end do rmu = -rmu /two rgam = -rgam/two pdof = (dble(G)*cmu+rmu) + (dble(G)*cgam+rgam) else pdof = FLMAX end if return end subroutine msvii ( x, z, n, p, G, mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G), mu(p,G), sigsq(G), pro(G) double precision x(n,*), z(n,*), mu(p,*), sigsq(*), pro(*) integer i, j, k double precision sum, sumz, temp, sigsqk double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision dummy(1) c----------------------------------------------------------------------------- do k = 1, G sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz / dble(n) if (sumz .ge. one .or. one .le. sumz*FLMAX) then call dscal( p, (one/sumz), mu(1,k), 1) sigsqk = zero do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsqk = sigsqk + z(i,k)*sum end do temp = sumz*dble(p) if (temp .ge. one .or. sigsqk .le. temp*FLMAX) then sigsq(k) = sigsqk/temp else sigsq(k) = FLMAX end if else sigsq(k) = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do return end subroutine msviip( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * mu, sigsq, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), sigsq(G), pro(G) double precision mu(p,*), sigsq(*), pro(*) integer i, j, k double precision sumz, sum, temp double precision sigsqk, const, pmupmu double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision ddot external ddot double precision dummy(1) c----------------------------------------------------------------------------- if (pshrnk .lt. zero) pshrnk = zero pmupmu = ddot(p,pmu,1,pmu,1) do k = 1, G sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz / dble(n) if (sumz .ge. one .or. one .lt. sumz*FLMAX) then call dscal( p, (one/sumz), mu(1,k), 1) sigsqk = pscale do i = 1, n sum = zero do j = 1, p temp = abs(x(i,j) - mu(j,k)) if (temp .gt. RTMIN) sum = sum + temp*temp end do if (sqrt(z(i,k))*sqrt(sum) .gt. RTMIN) * sigsqk = sigsqk + z(i,k)*sum end do temp = pmupmu + ddot(p, mu(1,k), 1, mu(1,k), 1) temp = temp - two*ddot(p,mu(1,k),1,pmu,1) const = sumz+pshrnk sigsqk = sigsqk + ((sumz*pshrnk)/const) * temp temp = pdof+sumz*dble(p)+two if (pshrnk .gt. zero) temp = temp + dble(p) sigsq(k) = sigsqk/temp call dscal( p, sumz/const, mu(1,k), 1) call daxpy( p, pshrnk/const, pmu, 1, mu(1,k), 1) else sigsq(k) = FLMAX c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do return end subroutine esvvi ( x, mu, scale, shape, pro, n, p, G, * Vinv, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G double precision hood, Vinv c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), scale(G), shape(p,G), pro(G[+1]) double precision mu(p,*), scale(*), shape(p,*), pro( * ) integer i, j, k, nz double precision sum, temp, const, tmin, tmax double precision smin, smax, prok, scalek double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision dummy(1) c----------------------------------------------------------------------------- call sgnrng( G, scale, 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if do k = 1, G call sgnrng( p, shape(1,k), 1, smin, smax) if (smin .le. zero) then hood = FLMAX return end if temp = sqrt(scale(k)) do j = 1, p shape(j,k) = temp*sqrt(shape(j,k)) end do end do do k = 1, G c prok = pro(k) scalek = scale(k) const = dble(p)*(pi2log+log(scalek)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) if (shape(j,k) .lt. one .and. * abs(temp) .ge. shape(j,k)*FLMAX) then hood = FLMAX return end if temp = temp/shape(j,k) if (abs(temp) .gt. RTMIN) sum = sum + temp*temp end do c z(i,k) = prok*exp(-(const+sum)/two) z(i,k) = -(const+sum)/two end do end do if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do return end subroutine mevvi ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol double precision x(n,*), z(n, * ) double precision mu(p,*), scale(*), shape(p,*), pro( * ) integer nz, iter, i, j, k double precision sum, temp, term, scalek, epsmin double precision hold, hood, err, smin, smax, const double precision prok, tmin, tmax, zsum, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c---------------------------------------------------------------------------- if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G end if tol = max(tol,zero) eps = max(eps,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G c call dcopy( p, zero, 0, shape(1,k), 1) c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, shape(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do pro(k) = sum c pro(k) now contains n_k zsum = min(zsum,sum) if (sum .gt. rteps) then call dscal( p, (one/sum), mu(1,k), 1) do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) sum = sum + temp*temp end do shape(j,k) = shape(j,k) + sum end do end if end do if (zsum .le. rteps) then call dscal( G, one/dble(n), pro, 1) tol = zsum eps = -FLMAX maxi = iter return end if epsmin = FLMAX do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) epsmin = min(smin,epsmin) if (smin .le. zero) then scale(k) = zero else temp = zero do j = 1, p temp = temp + log(shape(j,k)) end do temp = temp/dble(p) if (temp .gt. BIGLOG) then call dscal( G, one/dble(n), pro, 1) tol = zsum eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if scale(k) = temp/pro(k) epsmin = min(temp,epsmin) if (temp .le. eps) then call dscal( G, one/dble(n), pro, 1) tol = zsum eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape(1,k), 1) end if end do if (.not. EQPRO) then call dscal( G, one/dble(n), pro, 1) else if (Vinv .le. zero) then call dscal( G, one/dble(G), pro, 1) end if if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (epsmin .le. eps) then tol = err eps = -FLMAX maxi = iter return end if call sgnrng( G, scale, 1, smin, smax) if (smin .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G call sgnrng( p, shape(1,k), 1, smin, smax) if (smin .le. eps) then tol = err eps = FLMAX maxi = iter return end if end do do k = 1, G scalek = scale(k) const = dble(p)*(pi2log + log(scalek)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j,k) end do c z(i,k) = pro(k)*exp(-(const+(sum/scalek))/two) z(i,k) = -(const+(sum/scalek))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine mevvip( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision Vinv, eps, tol c double precision x(n,p), z(n,G[+1]) double precision x(n,*), z(n, * ) c double precision mu(p,G), scale(G), shape(p,G), pro(G[+1]) double precision mu(p,*), scale(*), shape(p,*), pro( * ) integer nz, iter, i, j, k double precision sumz, sum, temp, term, scalek, epsmin double precision hold, hood, err, smin, smax, const double precision prok, tmin, tmax, zsum, rteps double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c----------------------------------------------------------------------------- if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G end if eps = max(eps,zero) tol = max(tol,zero) rteps = sqrt(eps) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G c call dcopy( p, pscale, 0, shape(1,k), 1) c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = pscale call dcopy( p, dummy, 0, shape(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) sumz = zero do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do pro(k) = sumz zsum = min(zsum,sumz) if (sumz .gt. rteps) then call dscal( p, (one/sumz), mu(1,k), 1) term = pshrnk+sumz const = (pshrnk*sumz)/term do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) sum = sum + temp*temp end do shape(j,k) = shape(j,k) + sum temp = pmu(j) - mu(j,k) shape(j,k) = shape(j,k) + const*(temp*temp) end do call dscal( p, sumz/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) end if end do if (zsum .le. rteps) then call dscal( G, one/dble(n), pro, 1) tol = zsum eps = -FLMAX maxi = iter return end if c pro(k) now contains n_k epsmin = FLMAX term = pdof+two if (pshrnk .gt. zero) term = term + one do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) epsmin = min(smin,epsmin) if (smin .eq. zero) then scale(k) = zero else sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then call dscal( G, one/dble(n), pro, 1) tol = zsum eps = FLMAX maxi = iter return end if if (temp .gt. SMALOG) then temp = exp(temp) else temp = zero end if c pro(k) contains n_k scale(k) = temp/(pro(k)+term) epsmin = min(temp,epsmin) if (temp .le. eps) then call dscal( G, one/dble(n), pro, 1) tol = zsum eps = FLMAX maxi = iter return end if call dscal( p, one/temp, shape(1,k), 1) end if end do if (.not. EQPRO) then call dscal( G, one/dble(n), pro, 1) else if (Vinv .le. zero) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if if (epsmin .le. eps) then tol = err eps = FLMAX maxi = iter return end if call sgnrng( G, scale, 1, smin, smax) if (smin .le. eps) then tol = err eps = FLMAX maxi = iter return end if do k = 1, G call sgnrng( p, shape(1,k), 1, smin, smax) if (smin .le. eps) then tol = err eps = FLMAX maxi = iter return end if end do do k = 1, G scalek = scale(k) const = dble(p)*(pi2log + log(scalek)) do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j,k) sum = sum + (temp*temp)/shape(j,k) end do c z(i,k) = pro(k)*exp(-(const+(sum/scalek))/two) z(i,k) = -(const+(sum/scalek))/two end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 tol = err eps = hood maxi = iter return end subroutine msvvi ( x, z, n, p, G, mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), scale(G), shape(p,G), pro(G) double precision mu(p,*), scale(*), shape(p,*), pro(*) integer i, j, k double precision sum, temp, smin, smax double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c----------------------------------------------------------------------------- do k = 1, G dummy(1) = zero c call dcopy( p, zero, 0, shape(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) sum = zero c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do pro(k) = sum if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) else c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if end do c pro(k) now contains n_k do k = 1, G if (mu(1,k) .ne. FLMAX) then do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) sum = sum + temp*temp end do shape(j,k) = shape(j,k) + sum end do else c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) end if end do do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) if (smin .le. zero) then scale(k) = zero c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if if (smax .eq. FLMAX) then scale(k) = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale(k) = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if if (temp .lt. SMALOG) then temp = zero scale(k) = zero c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if temp = exp(temp) if (pro(k) .lt. one .and. temp .ge. pro(k)*FLMAX) then scale(k) = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if scale(k) = temp/pro(k) if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) goto 100 end if call dscal( p, one/temp, shape(1,k), 1) 100 continue end do call dscal( G, one/dble(n), pro, 1) return end subroutine msvvip( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * mu, scale, shape, pro) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof c double precision x(n,p), z(n,G) double precision x(n,*), z(n,*) c double precision mu(p,G), scale(G), shape(p,G), pro(G) double precision mu(p,*), scale(*), shape(p,*), pro(*) integer i, j, k double precision sumz, sum, temp, term double precision smin, smax, const double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision dummy(1) c----------------------------------------------------------------------------- if (pshrnk .lt. zero) pshrnk = zero do k = 1, G c call dcopy( p, pscale, 0, shape(1,k), 1) dummy(1) = pscale call dcopy( p, dummy, 0, shape(1,k), 1) sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) z(i,k) = sqrt(temp) end do pro(k) = sumz if (sumz .ge. one .or. one .le. sumz*FLMAX) then call dscal( p, (one/sumz), mu(1,k), 1) term = pshrnk+sumz const = (pshrnk*sumz)/term do j = 1, p sum = zero do i = 1, n temp = z(i,k)*(x(i,j) - mu(j,k)) sum = sum + temp*temp end do shape(j,k) = shape(j,k) + sum temp = pmu(j) - mu(j,k) shape(j,k) = shape(j,k) + const*(temp*temp) end do call dscal( p, sumz/term, mu(1,k), 1) call daxpy( p, pshrnk/term, pmu, 1, mu(1,k), 1) else c call dcopy( p, FLMAX, 0, mu(1,k), 1) c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) call dcopy( p, dummy, 0, shape(1,k), 1) end if end do c pro(k) now contains n_k do k = 1, G call sgnrng(p, shape(1,k), 1, smin, smax) if (smin .le. zero) then scale(k) = zero c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) else if (smax .eq. FLMAX) then scale(k) = FLMAX else sum = zero do j = 1, p sum = sum + log(shape(j,k)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then scale(k) = FLMAX c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) else if (temp .lt. SMALOG) then temp = zero scale(k) = zero c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) else temp = exp(temp) c pro(k) contains n_k term = pro(k) + pdof + two if (pshrnk .gt. zero) term = term + one scale(k) = temp/term if (temp .ge. one .or. one .le. temp*FLMAX) then call dscal( p, one/temp, shape(1,k), 1) else c call dcopy( p, FLMAX, 0, shape(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape(1,k), 1) end if end if end if end do call dscal( G, one/dble(n), pro, 1) return end subroutine esvvv ( CHOL, x, mu, Sigma, pro, n, p, G, Vinv, * w, hood, z) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c character CHOL logical CHOL c integer n, p, G integer n, p, G double precision hood, Vinv c double precision x(n,p), w(p), z(n,G[+1]) double precision x(n,*), w(*), z(n, * ) c double precision mu(p,G), Sigma(p,p,G), pro(G[+1]) double precision mu(p,*), Sigma(p,p,*), pro( * ) integer nz, p1, info, i, j, k double precision const, detlog, temp, prok, tmin, tmax double precision umin, umax, sum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision RTMAX parameter (RTMAX = 1.340780792994260d154) double precision RTMIN parameter (RTMIN = 1.49166814624d-154) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot external ddot double precision dummy(1) c----------------------------------------------------------------------------- p1 = p + 1 c if (CHOL .eq. 'N') then if (.not. CHOL) then do k = 1, G call dpotrf( 'U', p, Sigma(1,1,k), p, info) w(1) = dble(info) if (info .ne. 0) then hood = FLMAX return end if end do end if do k = 1, G call absrng( p, Sigma(1,1,k), p1, umin, umax) if (umax .le. one .and. umax .ge. umin*RTMAX) then w(1) = zero hood = FLMAX return end if if (umax .ge. one .and. umin .le. umax*RTMIN) then w(1) = zero hood = FLMAX return end if end do do k = 1, G detlog = zero do j = 1, p detlog = detlog + log(abs(Sigma(j,j,k))) end do const = dble(p)*pi2log/two + detlog c prok = pro(k) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dtrsv( 'U', 'T', 'N', p, Sigma(1,1,k), p, w, 1) temp = ddot( p, w, 1, w, 1)/two c z(i,k) = prok*exp(-(const+temp)) z(i,k) = -(const+temp) end do end do w(1) = zero if (pro(1) .lt. zero) return nz = G if (Vinv .gt. zero) then nz = nz + 1 c call dcopy( n, log(Vinv), 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) end if c hood = zero c do i = 1, n c sum = zero c do k = 1, nz c sum = sum + z(i,k) c end do c hood = hood + log(sum) c call dscal( nz, (one/sum), z(i,1), n) c end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) if (sum .lt. one .and. one .ge. sum*FLMAX) then w(1) = zero hood = FLMAX return end if call dscal( nz, (one/sum), z(i,1), n) end do w(1) = zero return end subroutine hcvvv ( x, n, p, ic, ng, ns, ALPHA, BETA, * v, u, s, r, nd, d) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, ic(n), ng, ns, nd double precision ALPHA, BETA c double precision x(n,p+1), v(p), u(p,p), s(p,p) c double precision r(p,p), d(ng*(ng-1)/2) double precision x(n,*), v(*), u(p,*), s(p,*) double precision r(p,*), d(*) integer psq, pm1, pp1 integer i, j, k, l, m, ij, iold integer lg, ld, ll, lo, ls integer ici, icj, ni, nj, nij integer nopt, niop, njop, iopt, jopt double precision trcij, trmij, trop, tmop double precision traci, tracj, termi, termj double precision qi, qj, qij, si, sj, sij, ri, rj, rij double precision dij, dopt, siop, sjop double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision rthalf parameter (rthalf = .7071067811865476d0) double precision ddot, vvvtij external ddot, vvvtij double precision BETA0, ALPHA0, ABLOG common /VVVMCL/ BETA0, ALPHA0, ABLOG save /VVVMCL/ double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision EPSMAX parameter (EPSMAX = 2.2204460492503131d-16) double precision dummy(1) c----------------------------------------------------------------------------- iopt = 0 niop = 0 nopt = 0 tmop = 0.d0 trop = 0.d0 lg = ng ld = (ng*(ng-1))/2 ll = nd-ng lo = nd psq = p*p pm1 = p-1 pp1 = p+1 if (ng .eq. 1) return ALPHA = max(ALPHA,EPSMAX) BETA0 = BETA ALPHA0 = ALPHA ABLOG = log(BETA*ALPHA) c call intpr( 'ic', -1, ic, n) c group heads should be first among rows of x i = 1 j = 2 1 continue icj = ic(j) if (icj .ne. j) goto 2 if (j .eq. ng) goto 3 i = j j = j + 1 goto 1 2 continue k = i m = j + 1 do j = m, n icj = ic(j) if (icj .gt. k) then k = k + 1 call dswap( p, x(k,1), n, x(j,1), n) ic(j) = ic(k) ic(k) = icj end if end do 3 continue c set up pointers if (ng .eq. n) goto 4 do j = n, ng+1, -1 icj = ic(j) i = ic(icj) ic(icj) = j if (i .ne. icj) then ic(j) = i else ic(j) = j end if end do 4 continue c call intpr( 'ic', -1, ic, n) c initialize by simulating merges do k = 1, ng j = ic(k) if (j .ne. k) then c non-singleton c call dcopy( psq, zero, 0, r, 1) dummy(1) = zero call dcopy( psq, dummy, 0, r, 1) trcij = zero l = 1 10 continue m = l + 1 qj = one/dble(m) qi = dble(l)*qj si = sqrt(qi) sj = sqrt(qj) call dcopy( p, x(j,1), n, v, 1) call dscal( p, si, v, 1) call daxpy( p, (-sj), x(k,1), n, v, 1) trcij = trcij + ddot( p, v, 1, v, 1) call dscal( p, si, x(k,1), n) call daxpy( p, sj, x(j,1), n, x(k,1), n) call mclrup( m, p, v, r, p) l = m i = ic(j) if (i .eq. j) goto 20 j = i goto 10 20 continue c d(ll+k) = trcij c copy triangular factor into the rows of x j = k m = p do i = 1, min(l-1,p) j = ic(j) call dcopy( m, r(i,i), p, x(j,i), n) m = m - 1 end do ij = j if (l .ge. p) then do m = p, l icj = ic(j) ic(j) = -k j = icj end do end if ic(ij) = n+l x(k, pp1) = zero if (l .ge. 2) then x( k, pp1) = trcij trmij = vvvtij( l, p, r, sj, trcij) x(ic(k),pp1) = trmij end if else ic(k) = 1 c d(ll+k) = zero end if end do c call intpr( 'ic', -1, ic, n) c call dblepr( '', -1, x(1,pp1), n) c call dblepr( 'trac', -1, d(ll+1), ng) c call dblepr( 'term', -1, term, n) c compute change in likelihood and determine minimum dopt = FLMAX ij = 0 do j = 2, ng icj = ic(j) nj = 1 if (icj .eq. 1) then tracj = zero termj = ABLOG do i = 1, (j-1) ni = 1 ici = ic(i) if (ici .eq. 1) then nij = 2 rij = two si = rthalf sj = rthalf sij = rthalf call dcopy( p, x(i,1), n, v, 1) call daxpy( p, (-one), x(j,1), n, v, 1) call dscal( p, rthalf, v, 1) c trcij = half*ddot( p, v, 1, v, 1) trcij = ddot( p, v, 1, v, 1) call dcopy( p, v, 1, u, p) c trmij = rij*log(BETA*trcij+ALPHA) trmij = two*log(BETA*(trcij+ALPHA)/two) termi = ABLOG else m = p l = ici 110 continue call dcopy( m, x(l,ni), n, u(ni,ni), p) ni = ni + 1 m = m - 1 l = ic(l) if (l .le. n) goto 110 ni = l - n c traci = d(ll+i) c traci = trac(i) c termi = vvvtrm(i,ni,n,p,ic,x,traci) c termi = term(i) traci = x( i , pp1) termi = x(ic(i), pp1) ri = dble(ni) nij = ni + 1 rij = dble(nij) qij = one/rij qi = ri*qij si = sqrt(qi) sj = sqrt(qij) sij = sj call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcij = traci + ddot(p,v,1,v,1) call mclrup( nij, p, v, u, p) trmij = vvvtij( nij, p, u, sij, trcij) end if dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij nopt = nij niop = ni njop = nj siop = si sjop = sj iopt = i jopt = j m = p do k = 1, min(nij-1,p) call dcopy( m, u(k,k), p, r(k,k), p) m = m - 1 end do end if end do else m = p l = icj 120 continue call dcopy( m, x(l,nj), n, s(nj,nj), p) nj = nj + 1 m = m - 1 l = ic(l) if (l .le. n) goto 120 nj = l - n c tracj = d(ll+j) c termj = vvvtrm(j,nj,n,p,ic,x,tracj) tracj = x( j , pp1) termj = x( ic(j), pp1) rj = dble(nj) do i = 1, (j-1) m = p do k = 1, min(nj-1,p) call dcopy( m, s(k,k), p, u(k,k), p) m = m - 1 end do ni = 1 ici = ic(i) if (ici .eq. 1) then nij = nj + 1 rij = dble(nij) qij = one/rij qi = qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) sij = sqrt(qij) call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcij = tracj + ddot(p,v,1,v,1) termi = ABLOG else m = p l = ici k = nj + 1 130 continue call dcopy( m, x(l,ni), n, v, 1) call mclrup( k, m, v, u(ni,ni), p) ni = ni + 1 m = m - 1 l = ic(l) if (l .le. n) goto 130 ni = l - n c traci = d(ll+i) c termi = vvvtrm(i,ni,n,p,ic,x,traci) traci = x( i , pp1) termi = x(ic(i), pp1) ri = dble(ni) nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) sij = sqrt(qij) call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcij = ( traci + tracj) + ddot(p,v,1,v,1) end if call mclrup( nij, p, v, u, p) trmij = vvvtij( nij, p, u, sij, trcij) dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij nopt = nij niop = ni njop = nj siop = si sjop = sj iopt = i jopt = j m = p do k = 1, min(nij-1,p) call dcopy( m, u(k,k), p, r(k,k), p) m = m - 1 end do end if end do end if end do c if (.false.) then c i = 1 c ij = 1 c do j = 2, ng c call dblepr( 'dij', -1, d(ij), i) c ij = ij + i c i = j c end do c end if if (ns .eq. 1) then if (iopt .lt. jopt) then x(1,1) = iopt x(1,2) = jopt else x(1,1) = jopt x(1,2) = iopt end if d(1) = dopt return end if ls = 1 200 continue call dcopy( p, x(iopt,1), n, v, 1) call dscal( p, siop, v, 1) call daxpy( p, sjop, x(jopt,1), n, v, 1) if (jopt .ne. lg) then call wardsw( jopt, lg, d) call dcopy( p, x(lg,1), n, x(jopt,1), n) m = ic(jopt) icj = ic(lg) if (icj .ne. 1) x( jopt, pp1) = x( lg, pp1) ic(jopt) = icj ic(lg) = m end if if (niop .eq. 1) then ic(iopt) = lg else l = ic(iopt) do k = 1, min(niop-1,p) m = l l = ic(l) end do if (l .lt. n) call intpr("l .lt. n", 8, l, 1) ic(m) = lg end if l = ic(iopt) do k = 1, min(nopt-1,p) call dcopy( p, r(1,1), p, x(l,1), n) m = l l = ic(l) end do ic(m) = nopt + n c call intpr('ic', 2, ic, n) c term(iopt) = tmop c trac(iopt) = trop x(iopt, pp1) = zero if (nopt .ge. 2) then x(iopt,pp1) = trop x(ic(iopt),pp1) = tmop endif call dcopy( p, v, 1, x(iopt,1), n) d(lo) = dopt lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) lo = lo - 1 lg = lg - 1 ld = ld - lg iold = iopt dopt = FLMAX ni = nopt ri = dble(ni) termi = tmop traci = trop ij = ((iold-1)*(iold-2))/2 if (iold .gt. 1) then do j = 1, (iold-1) c call dcopy(psq, zero, 0, u, 1) dummy(1) = zero call dcopy(psq, dummy, 0, u, 1) m = p do k = 1, min(ni-1,p) call dcopy(m, r(k,k), p, u(k,k), p) m = m - 1 end do nj = 1 icj = ic(j) if (icj .eq. 1) then nij = ni + 1 rij = dble(nij) qij = one/rij qi = ri*qij si = sqrt(qi) sj = sqrt(qij) sij = sj call dcopy(p, x(j,1), n, v, 1) call dscal( p, si, v, 1) call daxpy( p, (-sj), x(iold,1), n, v, 1) trcij = traci + ddot(p,v,1,v,1) tracj = zero termj = ABLOG else m = p l = icj k = ni + 1 310 continue call dcopy( m, x(l,nj), n, v, 1) call mclrup( k, m, v, u(nj,nj), p) k = k + 1 nj = nj + 1 m = m - 1 l = ic(l) if (l .le. n) goto 310 nj = l - n c call vvvget(j,nj,n,p,ic,x,tracj,termj) tracj = x( j ,pp1) termj = x(ic(j),pp1) rj = dble(nj) nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) sij = sqrt(qij) call dcopy(p, x(j,1), n, v, 1) call dscal( p, si, v, 1) call daxpy( p, (-sj), x(iold,1), n, v, 1) trcij = ( traci + tracj) + ddot(p,v,1,v,1) end if call mclrup( nij, p, v, u, p) trmij = vvvtij( nij, p, u, sij, trcij) dij = trmij - (termi + termj) ij = ij + 1 d(ij) = dij if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij nopt = nij niop = nj njop = ni siop = sj sjop = si iopt = j jopt = iold m = p do k = 1, min(nij-1,p) call dcopy(m, u(k,k), p, s(k,k), p) m = m - 1 end do end if end do end if if (iold .lt. lg) then i = iold ij = ij + i do j = (iold+1), lg c call dcopy(psq, zero, 0, u, 1) dummy(1) = zero call dcopy(psq, dummy, 0, u, 1) m = p do k = 1, min(ni-1,p) call dcopy(m, r(k,k), p, u(k,k), p) m = m - 1 end do nj = 1 icj = ic(j) if (icj .eq. 1) then nij = ni + 1 rij = dble(nij) qij = one/rij qi = ri*qij si = sqrt(qi) sj = sqrt(qij) sij = sj call dcopy(p, x(j,1), n, v, 1) call dscal( p, si, v, 1) call daxpy( p, (-sj), x(iold,1), n, v, 1) trcij = traci + ddot(p,v,1,v,1) termj = ABLOG else m = p l = icj k = ni + 1 410 continue call dcopy( m, x(l,nj), n, v, 1) call mclrup( k, m, v, u(nj,nj), p) k = k + 1 nj = nj + 1 m = m - 1 l = ic(l) if (l .le. n) goto 410 nj = l - n c call vvvget(j,nj,n,p,ic,x,tracj,termj) tracj = x( j ,pp1) termj = x(ic(j),pp1) rj = dble(nj) nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) sij = sqrt(qij) call dcopy(p, x(j,1), n, v, 1) call dscal( p, si, v, 1) call daxpy( p, (-sj), x(iold,1), n, v, 1) trcij = ( traci + tracj) + ddot(p,v,1,v,1) end if call mclrup( nij, p, v, u, p) trmij = vvvtij( nij, p, u, sij, trcij) dij = trmij - (termi + termj) d(ij) = dij ij = ij + i i = j if (dij .le. dopt) then dopt = dij trop = trcij tmop = trmij nopt = nij niop = ni njop = nj siop = si sjop = sj iopt = iold jopt = j m = p do k = 1, min(nij-1,p) call dcopy(m, u(k,k), p, s(k,k), p) m = m - 1 end do end if end do end if c update d and find max jopt = 2 iopt = 1 dopt = d(1) if (lg .eq. 2) goto 900 ij = 1 do i = 2, ld qi = d(i) if (qi .le. dopt) then ij = i dopt = qi end if end do c call dblepr("d", 1, d, nd) c call dblepr("d", 1, d, ld) if (ij .gt. 1) then do i = 2, ij iopt = iopt + 1 if (iopt .ge. jopt) then jopt = jopt + 1 iopt = 1 end if end do end if dummy(1) = zero do k = 1, p c call dcopy( p, zero, 0, r(1,k), 1) call dcopy( p, dummy, 0, r(1,k), 1) end do if (iopt .ne. iold .and. jopt .ne. iold) then i = iopt j = jopt nj = 1 icj = ic(j) ni = 1 ici = ic(i) if (icj .eq. 1) then termj = ABLOG if (ici .eq. 1) then nij = 2 rij = two si = rthalf sj = rthalf call dcopy(p, x(i,1), n, v, 1) call daxpy( p, (-one), x(j,1), n, v, 1) call dscal( p, rthalf, v, 1) trcij = ddot( p, v, 1, v, 1) call dcopy( p, v, 1, r, p) termi = ABLOG else m = p l = ici 610 continue call dcopy( m, x(l,ni), n, r(ni,ni), p) ni = ni + 1 m = m - 1 l = ic(l) if (l .le. n) goto 610 ni = l - n c call vvvget(i,ni,n,p,ic,x,traci,termi) traci = x( i , pp1) termi = x(ic(i), pp1) ri = dble(ni) nij = ni + 1 rij = dble(nij) qij = one/rij qi = ri*qij si = sqrt(qi) sj = sqrt(qij) call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcij = traci + ddot( p, v, 1, v, 1) call mclrup( nij, p, v, r, p) end if else m = p l = icj 620 continue call dcopy( m, x(l,nj), n, r(nj,nj), p) nj = nj + 1 m = m - 1 l = ic(l) if (l .le. n) goto 620 nj = l - n c call vvvget(j,nj,n,p,ic,x,tracj,termj) tracj = x( j , pp1) termj = x(ic(j), pp1) rj = dble(nj) if (ici .eq. 1) then nij = nj + 1 rij = dble(nij) qij = one/rij qj = rj*qij si = sqrt(qij) sj = sqrt(qj) call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcij = tracj + ddot( p, v, 1, v, 1) termi = ABLOG else m = p l = ici k = nj + 1 630 continue call dcopy( m, x(l,ni), n, v, 1) call mclrup( k, m, v, r(ni,ni), p) ni = ni + 1 m = m - 1 l = ic(l) if (l .le. n) goto 630 ni = l - n c call vvvget(i,ni,n,p,ic,x,traci,termi) traci = x( i , pp1) termi = x(ic(i), pp1) ri = dble(ni) nij = ni + nj rij = dble(nij) qij = one/rij qi = ri*qij qj = rj*qij si = sqrt(qi) sj = sqrt(qj) call dcopy(p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcij = ( traci + tracj) + ddot( p,v,1,v,1) end if call mclrup( nij, p, v, r, p) end if trop = trcij tmop = dopt + (termi + termj) nopt = nij niop = ni njop = nj siop = si sjop = sj else m = p do k = 1, min(nopt-1,p) call dcopy(m, s(k,k), p, r(k,k), p) m = m - 1 end do l = ic(iopt) if (l .ne. 1) then 710 continue if (l .le. n) then l = ic(l) goto 710 end if niop = l-n else niop = 1 end if l = ic(jopt) if (l .ne. 1) then 720 continue if (l .le. n) then l = ic(l) goto 720 end if njop = l-n else njop = 1 end if nopt = niop + njop end if ls = ls + 1 if (ls .eq. ns) goto 900 goto 200 900 continue d(lo) = dopt lo = lo - 1 d(lo) = dble(iopt) lo = lo - 1 d(lo) = dble(jopt) do i = 1, ng ic(i) = i end do lo = nd - 1 ld = lo si = d(lo) lo = lo - 1 sj = d(lo) ic(int(sj)) = ng if (si .lt. sj) then x(1,1) = si x(1,2) = sj else x(1,1) = sj x(1,2) = si end if lg = ng + 1 do k = 2, ns lo = lo - 1 d(ld) = d(lo) ld = ld - 1 lo = lo - 1 i = int(d(lo)) ici = ic(i) lo = lo - 1 j = int(d(lo)) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(lg-k) if (ici .lt. icj) then x(k,1) = dble(ici) x(k,2) = dble(icj) else x(k,1) = dble(icj) x(k,2) = dble(ici) end if end do ld = nd lo = 1 do k = 1, ns si = d(lo) d(lo) = d(ld) d(ld) = si ld = ld - 1 lo = lo + 1 end do return end double precision function vvvtij( l, p, r, s, trac) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer l, p double precision r(p,*), s, trac double precision detlog double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision det2mc external det2mc double precision BETA, ALPHA, ABLOG common /VVVMCL/ BETA, ALPHA, ABLOG save /VVVMCL/ double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) if (l .le. p) then vvvtij = log(BETA*(trac+ALPHA)/dble(l)) else if (trac .eq. zero) then vvvtij = log((ALPHA*BETA)/dble(l)) else detlog = det2mc( p, r, s) if (detlog .eq. -FLMAX) then vvvtij = log(BETA*(trac+ALPHA)/dble(l)) else if (detlog .le. zero) then vvvtij = log(exp(detlog)+BETA*(trac+ALPHA)/dble(l)) else vvvtij = log(one+exp(-detlog)*(BETA*(trac+ALPHA)/dble(l))) * + detlog end if end if end if vvvtij = dble(l)*vvvtij return end double precision function det2mc( n, u, s) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer k, n double precision q, s double precision u(n,*) double precision zero, two parameter (zero = 0.d0, two = 2.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) det2mc = zero do k = 1, n q = u(k,k)*s if (abs(q) .le. zero) then det2mc = -FLMAX return end if det2mc = det2mc + log(abs(q)) end do det2mc = two*det2mc return end subroutine mevvv ( EQPRO, x, n, p, G, Vinv, z, maxi, tol, eps, * mu, U, pro, w, S) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi double precision Vinv, eps, tol c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p,G), pro(G), S(p,p) double precision mu(p,*), U(p,p,*), pro(*), S(p,*) integer nz, p1, iter, i, j, k, l, j1 double precision piterm, hold, rcmin, rteps double precision temp, term, cs, sn, umin, umax double precision sumz, sum, detlog, const, hood, err double precision prok, tmin, tmax, zsum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if piterm = dble(p)*pi2log/two p1 = p + 1 eps = max(eps,zero) rteps = sqrt(eps) tol = max(tol,zero) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX c zero out the lower triangle do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,k) end do end do i = 1 do j = 2, p c call dcopy( p-i, zero, 0, S(j,i), 1) dummy(1) = zero call dcopy( p-i, dummy, 0, S(j,i), 1) i = j end do do j = 1, p do l = 1, p U(l,j,k) = S(l,j) end do end do end do iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,k) end do end do dummy(1) = zero do j = 1, p c call dcopy( j, zero, 0, S(1,j), 1) call dcopy( j, dummy, 0, S(1,j), 1) end do c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) sumz = zero do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do if (.not. EQPRO) pro(k) = sumz / dble(n) zsum = min(sumz,zsum) if (sumz .gt. rteps) then call dscal( p, (one/sumz), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( S(j,j), w(j), cs, sn) call drot( p-j, S(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( S(p,p), w(p), cs, sn) end do do j = 1, p call dscal( j, one/sqrt(sumz), S(1,j), 1) end do else c call dcopy( p, FLMAX, 0, z(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, z(1,k), 1) end if do j = 1, p do l = 1, p U(l,j,k) = S(l,j) end do end do end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if if (Vinv .gt. zero) then term = zero do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if rcmin = FLMAX do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,K) end do end do call absrng( p, S, p1, umin, umax) rcmin = min(umin/(one+umax),rcmin) end do if (rcmin .le. rteps) then tol = rcmin eps = FLMAX maxi = iter return end if do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,K) end do end do c temp = pro(k) detlog = zero do j = 1, p detlog = detlog + log(abs(S(j,j))) end do const = piterm+detlog do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dtrsv( 'U', 'T', 'N', p, S, p, w, 1) sum = ddot( p, w, 1, w, 1)/two c z(i,k) = temp*exp(-(const+sum)) z(i,k) = -(const+sum) end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 c w(1) = rcmin tol = err eps = hood maxi = iter return end subroutine mevvvp( EQPRO, x, n, p, G, Vinv, * pshrnk, pmu, pscale, pdof, * z, maxi, tol, eps, mu, U, pro, w, S) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE logical EQPRO integer n, p, G, maxi c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof double precision Vinv, eps, tol c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p,G), pro(G), S(p,p) double precision mu(p,*), U(p,p,*), pro(*), S(p,*) integer nz, p1, iter, i, j, k, l, j1 double precision piterm, hold, rcmin, rteps double precision temp, term, cs, sn, umin, umax double precision sum, sumz, detlog, const, hood, err double precision prok, tmin, tmax double precision cmu, cgam, rmu, rgam, zsum double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision twolog parameter (twolog = 0.6931471805599453d0) double precision pilog parameter (pilog = 1.144729885849400d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG parameter (SMALOG = -708.d0) double precision ddot, dlngam external ddot, dlngam double precision dummy(1) c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero if (maxi .le. 0) return if (Vinv .gt. zero) then nz = G + 1 c ViLog = log(Vinv) else nz = G if (EQPRO) then c call dcopy( G, one/dble(G), 0, pro, 1) dummy(1) = one/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if piterm = dble(p)*pi2log/two p1 = p + 1 eps = max(eps,zero) rteps = sqrt(eps) tol = max(tol,zero) c FLMAX = d1mach(2) hold = FLMAX/two hood = FLMAX err = FLMAX iter = 0 100 continue iter = iter + 1 zsum = one do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,k) end do end do do j = 1, p call dcopy( p, pscale(1,j), 1, S(1,j), 1) end do sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do if (.not. EQPRO) pro(k) = sumz / dble(n) zsum = min(sumz,zsum) if (sumz .gt. rteps) then call dscal( p, (one/sumz), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( S(j,j), w(j), cs, sn) call drot( p-j, S(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( S(p,p), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) const = sumz+pshrnk temp = (sumz*pshrnk)/const call dscal( p, sqrt(temp), w, 1) j = 1 do j1 = 2, p call drotg( S(j,j), w(j), cs, sn) call drot( p-j, S(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( S(p,p), w(p), cs, sn) do j = 1, p temp = pdof+sumz+dble(p)+two call dscal( j, one/sqrt(temp), S(1,j), 1) end do call dscal( p, sumz/const, mu(1,k), 1) call daxpy( p, pshrnk/const, pmu, 1, mu(1,k), 1) else c call dcopy( p, FLMAX, 0, z(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, z(1,k), 1) end if do j = 1, p do l = 1, p U(l,j,k) = S(l,j) end do end do end do if (zsum .le. rteps) then tol = zsum eps = -FLMAX maxi = iter return end if term = zero if (Vinv .gt. zero) then do i = 1, n term = term + z(i,nz) end do temp = term / dble(n) pro(nz) = temp c call dcopy( n, ViLog, 0, z(1,nz), 1) dummy(1) = log(Vinv) call dcopy( n, dummy, 0, z(1,nz), 1) if (EQPRO) then c temp = (one - pro(nz))/dble(G) c call dcopy( G, temp, 0, pro, 1) dummy(1) = (one - pro(nz))/dble(G) call dcopy( G, dummy, 0, pro, 1) end if end if rcmin = FLMAX do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,K) end do end do call absrng( p, S, p1, umin, umax) rcmin = min(umin/(one+umax),rcmin) end do if (rcmin .le. rteps) then tol = rcmin eps = FLMAX maxi = iter return end if rmu = zero rgam = zero do k = 1, G c temp = pro(k) do j = 1, p do l = 1, p S(l,j) = U(l,j,k) end do end do detlog = zero do j = 1, p detlog = detlog + log(abs(S(j,j))) end do rmu = rmu - detlog rgam = rgam - (pdof+dble(p)+one)*detlog const = piterm+detlog do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dtrsv( 'U', 'T', 'N', p, S, p, w, 1) sum = ddot( p, w, 1, w, 1)/two c z(i,k) = temp*exp(-(const+sum)) z(i,k) = -(const+sum) end do end do hood = zero do i = 1, n tmin = FLMAX tmax = -FLMAX do k = 1, nz prok = pro(k) if (prok .eq. zero) then z(i,k) = zero else temp = log(prok) + z(i,k) tmin = min(tmin,temp) tmax = max(tmax,temp) z(i,k) = temp end if end do sum = zero do k = 1, nz if (pro(k) .ne. zero) then temp = z(i,k) - tmax if (temp .ge. SMALOG) then z(i,k) = exp(temp) sum = sum + z(i,k) else z(i,k) = zero end if end if end do hood = hood + (log(sum)+tmax) call dscal( nz, (one/sum), z(i,1), n) end do err = abs(hold-hood)/(one+abs(hood)) hold = hood if (err .gt. tol .and. iter .lt. maxi) goto 100 c w(1) = rcmin tol = err eps = hood maxi = iter cmu = dble(p)*(log(pshrnk) - pi2log)/two sum = zero do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,k) end do end do call daxpy( p, (-one), mu(1,k), 1, pmu, 1) call dtrsv('U','T','N',p, S, p, pmu, 1) sum = sum + ddot( p, pmu, 1, pmu, 1) end do rmu = rmu - pshrnk*sum/two sum = zero term = zero temp = zero do j = 1, p call dcopy( p, pscale(j,1), p, pmu, 1) c call dtrsv('U','T','N', p, U, p, pmu, 1) i = p-j+1 c call dtrsv('U','T','N', i, U(j,j,k),i,pmu(j),1) call dtrsv('U','T','N', i, S(j,j), p, pmu(j), 1) sum = sum + ddot(i, pmu(j), 1, pmu(j), 1) temp = temp + log(abs(pscale(j,j))) term = term + dlngam((pdof+one-dble(j))/two) end do rgam = rgam - sum/two const = -dble(p)*(pdof*twolog+(dble(p)-one)*pilog/two) cgam = (const-pdof*temp)/two-term pdof = (dble(G)*cmu+rmu) + (dble(G)*cgam+rgam) return end subroutine msvvv ( x, z, n, p, G, w, mu, U, pro, S) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p,G), pro(G), S(p,p) double precision mu(p,*), U(p,p,*), pro(*), S(p,*) integer i, j, k, l, j1 double precision sum, temp, cs, sn double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision dummy(1) c----------------------------------------------------------------------------- dummy(1) = zero do k = 1, G c call dcopy( p, zero, 0, mu(1,k), 1) call dcopy( p, dummy, 0, mu(1,k), 1) do j = 1, p c call dcopy( j, zero, 0, U(1,j,k), 1) c call dcopy( j, zero, 0, S(1,j), 1) call dcopy( j, dummy, 0, S(1,j), 1) end do sum = zero do i = 1, n temp = z(i,k) sum = sum + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sum / dble(n) if (sum .ge. one .or. one .lt. sum*FLMAX) then call dscal( p, (one/sum), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( S(j,j), w(j), cs, sn) call drot( p-j, S(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( S(p,p), w(p), cs, sn) end do temp = sqrt(sum) if (temp .ge. one .or. one .lt. temp*FLMAX) then do j = 1, p call dscal( j, one/temp, S(1,j), 1) end do else do j = 1, p c call dcopy( j, FLMAX, 0, S(1,j), 1) dummy(1) = FLMAX call dcopy( j, dummy, 0, S(1,j), 1) end do end if else c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) end if do j = 1, p do l = 1, p U(l,j,k) = S(l,j) end do end do end do return end subroutine msvvvp( x, z, n, p, G, * pshrnk, pmu, pscale, pdof, * w, mu, U, pro, S) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n, p, G c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof c double precision x(n,p), z(n,G), w(p) double precision x(n,*), z(n,*), w(*) c double precision mu(p,G), U(p,p,G), pro(G), S(p,p) double precision mu(p,*), U(p,p,*), pro(*), S(p,*) c------------------------------------------------------------------------------ c c x double (input) (n,p) matrix of observations. c z double (input/output) (n,G[+1]) conditional probabilities. c n integer (input) number of observations. c p integer (input) dimension of the data. c G integer (input) number of Gaussian clusters in the mixture. c mu double (output) (p,G) mean for each group. c U double (output) (p,p,G) c pro double (output) (G) mixing proportions (used even if equal). c w double (scratch) (max(p,G)) integer i, j, k, l, j1 double precision sumz, temp, cs, sn, const double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision dummy(1) c------------------------------------------------------------------------------ do k = 1, G do j = 1, p do l = 1, p S(l,j) = U(l,j,k) end do end do do j = 1, p call dcopy( p, pscale(1,j), 1, S(1,j), 1 ) end do sumz = zero c call dcopy( p, zero, 0, mu(1,k), 1) dummy(1) = zero call dcopy( p, dummy, 0, mu(1,k), 1) do i = 1, n temp = z(i,k) sumz = sumz + temp call daxpy( p, temp, x(i,1), n, mu(1,k), 1) end do pro(k) = sumz / dble(n) if (sumz .ge. one .or. one .lt. sumz*FLMAX) then call dscal( p, (one/sumz), mu(1,k), 1) do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) call dscal( p, sqrt(z(i,k)), w, 1) j = 1 do j1 = 2, p call drotg( S(j,j), w(j), cs, sn) call drot( p-j, S(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( S(p,p), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu(1,k), 1, w, 1) const = sumz+pshrnk temp = (sumz*pshrnk)/const call dscal( p, sqrt(temp), w, 1) j = 1 do j1 = 2, p call drotg( S(j,j), w(j), cs, sn) call drot( p-j, S(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( S(p,p), w(p), cs, sn) temp = pdof+sumz+dble(p)+one if (pshrnk .gt. zero) temp = temp + one do j = 1, p call dscal( j, one/sqrt(temp), S(1,j), 1) end do call dscal( p, sumz/const, mu(1,k), 1) call daxpy( p, pshrnk/const, pmu, 1, mu(1,k), 1) else c call dcopy( p, FLMAX, 0, mu(1,k), 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, mu(1,k), 1) end if do j = 1, p do l = 1, p U(l,j,k) = S(l,j) end do end do end do return end subroutine mvn1d ( x, n, mu, sigsq, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n integer n double precision mu, sigsq, hood c double precision x(n) double precision x(*) c------------------------------------------------------------------------------ c c x double (input) (n) matrix of observations (destroyed). c n integer (input) number of observations. c mu double (output) mean. c sigsq double (output) variance. c hood double (output) loglikelihood. double precision dn double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot external ddot c------------------------------------------------------------------------------ dn = dble(n) mu = ddot( n, one/dn, 0, x, 1) sigsq = zero call daxpy( n, (-one), mu, 0, x, 1) sigsq = ddot( n, x, 1, x, 1)/dn if (sigsq .eq. zero) then hood = FLMAX else hood = -dn*(pi2log + (one + log(sigsq)))/two end if return end subroutine mvn1p ( x, n, pshrnk, pmu, pscale, pdof, * mu, sigsq, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE integer n double precision pshrnk, pmu, pscale, pdof double precision mu, sigsq, hood c double precision x(n) double precision x(*) integer i double precision dn, const, term, temp, xbar double precision cmu, cgam, rmu, rgam double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot, dlngam external ddot, dlngam c------------------------------------------------------------------------------ if (pshrnk .lt. zero) pshrnk = zero dn = dble(n) xbar = ddot( n, one/dn, 0, x, 1) const = pshrnk + dn mu = (dn/const)*xbar + (pshrnk/const)*pmu sigsq = zero do i = 1, n temp = xbar - x(i) sigsq = sigsq + temp*temp end do temp = xbar - pmu sigsq = sigsq + pscale + dn*(pshrnk/const)*(temp*temp) temp = pdof + dn + two if (pshrnk .gt. zero) temp = temp + one sigsq = sigsq / temp if (sigsq .eq. zero) then hood = FLMAX else call daxpy( n, (-one), mu, 0, x, 1) temp = ddot( n, x, 1, x, 1) if (sigsq .lt. one .and. temp .ge. sigsq*FLMAX) then hood = FLMAX return end if temp = temp/sigsq hood = -(dn*(pi2log + log(sigsq)) + temp)/two end if if (pshrnk .gt. zero) then cmu = (pi2log-log(pshrnk))/two term = pdof/two cgam = term*log(pscale/two) - dlngam(term) temp = pmu - mu const = log(sigsq) rmu = -(const - (pshrnk/sigsq)*(temp*temp))/two rgam = -(term+one)*const - (pscale/sigsq)/two pdof = (cmu+rmu) + (cgam+rgam) else pdof = FLMAX end if return end subroutine mnxiip( x, n, p, pshrnk, pmu, pscale, pdof, * mu, sigsq, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision sigsq, hood c double precision x(n,p), mu(p) double precision x(n,*), mu(*) integer i, j double precision dnp, scl, temp, term, sum double precision dmudmu, pmupmu, cmu, cgam, rmu, rgam double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot, dlngam external ddot, dlngam c------------------------------------------------------------------------------ dnp = dble(n*p) scl = one/dble(n) do j = 1, p mu(j) = ddot( n, scl, 0, x(1,j), 1) end do sum = zero do i = 1, n do j = 1, p temp = x(i,j) - mu(j) sum = sum + temp*temp end do end do pmupmu = ddot(p,pmu,1,pmu,1) dmudmu = ddot(p,mu,1,mu,1) temp = dmudmu + pmupmu temp = temp - two*ddot(p,mu,1,pmu,1) term = pshrnk + dble(n) scl = (pshrnk*dble(n))/term sigsq = pscale + scl*temp + sum temp = pdof + dble(n*p) + two if (pshrnk .gt. zero) temp = temp + dble(p) sigsq = sigsq/temp call dscal( p, dble(n)/term, mu, 1) call daxpy( p, pshrnk/term, pmu, 1, mu, 1) if (sigsq .eq. zero) then hood = FLMAX else sum = zero do i = 1, n do j = 1, p temp = x(i,j) - mu(j) sum = sum + temp*temp end do end do hood = -(sum/sigsq + dnp*(pi2log + log(sigsq)))/two end if if (pshrnk .gt. zero) then dmudmu = ddot(p,mu,1,mu,1) cmu = dble(p)*(log(pshrnk)-pi2log)/two temp = (dmudmu+pmupmu) - two*ddot(p,pmu,1,mu,1) term = log(sigsq) rmu = -(dble(p)*term + (pshrnk*temp)/sigsq)/two temp = pdof/two cgam = temp*log(pscale/two) - dlngam(temp) rgam = -(temp+one)*term - pscale/(two*sigsq) pdof = (cmu+rmu) + (cgam+rgam) else pdof = FLMAX end if return end subroutine mvnxii( x, n, p, mu, sigsq, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p double precision sigsq, hood c double precision x(n,p), mu(p) double precision x(n,*), mu(*) integer j double precision dnp, scl double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot external ddot c------------------------------------------------------------------------------ dnp = dble(n*p) scl = one/dble(n) do j = 1, p mu(j) = ddot( n, scl, 0, x(1,j), 1) end do sigsq = zero do j = 1, p call daxpy( n, (-one), mu(j), 0, x(1,j), 1) sigsq = sigsq + ddot( n, x(1,j), 1, x(1,j), 1) end do sigsq = sigsq/dnp if (sigsq .eq. zero) then hood = FLMAX else hood = -dnp*(pi2log + (one + log(sigsq)))/two end if return end subroutine mnxxip( x, n, p, pshrnk, pmu, pscale, pdof, * mu, scale, shape, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p c double precision pshrnk, pmu(p), pscale, pdof double precision pshrnk, pmu(*), pscale, pdof double precision scale, hood c double precision x(n,p), mu(p), shape(p) double precision x(n,*), mu(*), shape(*) integer i, j double precision sum, temp, smin, smax double precision term, const, scl double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ temp = one/dble(n) do j = 1, p mu(j) = ddot( n, temp, 0, x(1,j), 1) shape(j) = zero end do do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j) sum = sum + temp*temp end do shape(j) = shape(j) + sum end do term = pshrnk + dble(n) scl = (pshrnk*dble(n))/term do j = 1, p temp = pmu(j) - mu(j) shape(j) = shape(j) + scl*(temp*temp) + pscale end do call dscal( p, dble(n)/term, mu, 1) call daxpy( p, pshrnk/term, pmu, 1, mu, 1) call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) scale = zero hood = FLMAX return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .ge. BIGLOG) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) scale = FLMAX hood = FLMAX return end if if (temp .le. SMALOG) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) scale = zero hood = FLMAX return end if temp = exp(temp) term = pdof + dble(n) + two if (pshrnk .gt. zero) term = term + one scale = temp/term if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) hood = FLMAX return end if call dscal( p, one/temp, shape, 1) const = dble(p)*(pi2log+log(scale)) hood = zero do i = 1, n sum = zero do j = 1, p temp = x(i,j) - mu(j) sum = sum + (temp*temp)/shape(j) end do hood = hood - (const+(sum/scale))/two end do c log posterior computation not yet available pdof = FLMAX return end subroutine mvnxxi( x, n, p, mu, scale, shape, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p double precision scale, hood c double precision x(n,p), mu(p), shape(p) double precision x(n,*), mu(*), shape(*) integer i, j double precision dn, scl, sum, temp, smin, smax double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision SMALOG, BIGLOG parameter (SMALOG = -708.d0, BIGLOG = 709.d0) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ dn = dble(n) scl = one/dn do j = 1, p mu(j) = ddot( n, scl, 0, x(1,j), 1) shape(j) = zero end do do j = 1, p sum = zero do i = 1, n temp = x(i,j) - mu(j) sum = sum + temp*temp end do shape(j) = shape(j) + sum end do call sgnrng(p, shape, 1, smin, smax) if (smin .le. zero) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) scale = zero hood = FLMAX return end if sum = zero do j = 1, p sum = sum + log(shape(j)) end do temp = sum/dble(p) if (temp .gt. BIGLOG) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) scale = FLMAX hood = FLMAX return end if if (temp .lt. SMALOG) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) scale = zero hood = FLMAX return end if temp = exp(temp) scale = temp/dn if (temp .lt. one .and. one .ge. temp*FLMAX) then c call dcopy( p, FLMAX, 0, shape, 1) dummy(1) = FLMAX call dcopy( p, dummy, 0, shape, 1) hood = FLMAX return end if call dscal( p, one/temp, shape, 1) hood = -dble(n*p)*(one + pi2log + log(scale))/two return end subroutine mnxxxp( x, n, p, w, * pshrnk, pmu, pscale, pdof, * mu, U, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p c double precision pshrnk, pmu(p), pscale(p,p), pdof double precision pshrnk, pmu(*), pscale(p,*), pdof double precision hood c double precision x(n,p), w(p), mu(p), U(p,p) double precision x(n,*), w(*), mu(*), U(p,*) integer i, j, j1 double precision dnp, scl, detlog, sum, term, temp double precision umin, umax, cs, sn, const double precision cmu, cgam, rmu, rgam double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision twolog parameter (twolog = 0.6931471805599453d0) double precision pilog parameter (pilog = 1.144729885849400d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot, dlngam external ddot, dlngam c------------------------------------------------------------------------------ dnp = dble(n*p) scl = one/dble(n) do j = 1, p mu(j) = ddot( n, scl, 0, x(1,j), 1) call dcopy( p, pscale(1,j), 1, U(1,j), 1) end do c mu contains ybar; U contains Cholesky factor of inverse Wishart scale do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu, 1, w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) end do call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu, 1, w, 1) term = (pshrnk*dble(n))/(pshrnk+dble(n)) call dscal( p, sqrt(term), w, 1) j = 1 do j1 = 2, p call drotg( U(j,j), w(j), cs, sn) call drot( p-j, U(j,j1), p, w(j1), 1, cs, sn) j = j1 end do call drotg( U(p,p), w(p), cs, sn) scl = pdof + dble(n+p+1) if (pshrnk .gt. zero) scl = scl + one scl = one/sqrt(scl) do j = 1, p call dscal( j, scl, U(1,j), 1) end do term = pshrnk + dble(n) call dscal( p, dble(n)/term, mu, 1) call daxpy( p, pshrnk/term, pmu, 1, mu, 1) call absrng( p, U, p+1, umin, umax) c rcond = umin / (one + umax) if (umin .eq. zero) then hood = FLMAX return end if detlog = zero do j = 1, p detlog = detlog + log(abs(U(j,j))) end do const = dble(n)*(detlog + dble(p)*pi2log/two) sum = zero do i = 1, n call dcopy( p, x(i,1), n, w, 1) call daxpy( p, (-one), mu, 1, w, 1) call dtrsv( 'U', 'T', 'N', p, U, p, w, 1) sum = sum + ddot(p, w, 1, w, 1) end do hood = -(const+sum/two) cmu = dble(p)*(log(pshrnk) - pi2log)/two call dcopy( p, pmu, 1, w, 1) call daxpy( p, (-one), mu, 1, w, 1) call dtrsv( 'U', 'T', 'N', p, U, p, w, 1) temp = ddot( p, w, 1, w, 1) sum = zero term = zero do j = 1, p term = term + dlngam((pdof+dble(1-j))/two) call dcopy( p, pscale(j,1), p, pmu, 1) c call dtrsv('U','T','N', p, U, p, pmu, 1) i = p-j+1 c call dtrsv('U','T','N', i, U(j,j),i,pmu(j),1) call dtrsv('U','T','N', i, U(j,j),p,pmu(j),1) sum = sum + ddot(i, pmu(j), 1, pmu(j), 1) end do if (pshrnk .gt. zero) then rmu = -(detlog+pshrnk*temp/two) const = -dble(p)*(pdof*twolog+(dble(p)-one)*pilog/two) cgam = (const/two-pdof*detlog) - term rgam = -((pdof+dble(p)+one)*detlog + sum/two) pdof = (cmu+cgam) + (rmu+rgam) else pdof = FLMAX end if return end subroutine mvnxxx( x, n, p, mu, U, hood) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt implicit NONE c integer n, p integer n, p double precision hood c double precision x(n,p), mu(p), U(p,p) double precision x(n,*), mu(*), U(p,*) integer i, j, j1 double precision dn, dnp, scl double precision umin, umax, cs, sn double precision zero, one, two parameter (zero = 0.d0, one = 1.d0, two = 2.d0) double precision pi2log parameter (pi2log = 1.837877066409345d0) double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision ddot external ddot double precision dummy(1) c------------------------------------------------------------------------------ dn = dble(n) dnp = dble(n*p) scl = one/dn do j = 1, p mu(j) = ddot( n, scl, 0, x(1,j), 1) c call dcopy( p, zero, 0, U(1,j), 1) dummy(1) = zero call dcopy( p, dummy, 0, U(1,j), 1) end do do i = 1, n call daxpy( p, (-one), mu, 1, x(i,1), n) j = 1 do j1 = 2, p call drotg( U(j,j), x(i,j), cs, sn) call drot( p-j, U(j,j1), p, x(i,j1), n, cs, sn) j = j1 end do call drotg( U(p,p), x(i,p), cs, sn) end do scl = sqrt(scl) do j = 1, p call dscal( j, scl, U(1,j), 1) end do call absrng( p, U, p+1, umin, umax) c rcond = umin / (one + umax) if (umin .eq. zero) then hood = FLMAX else hood = zero do j = 1, p hood = hood + log(abs(U(j,j))) end do hood = -dn*(hood + dble(p)*(pi2log + one)/two) end if c c do j = 1, p c do i = 1, j c x(i,j) = ddot(i,U(1,i),1,U(1,j),1) c if (i .ne. j) x(j,i) = x(i,j) c end do c end do c do j = 1, p c call dcopy( p, x(1,j), 1, U(1,j), 1) c end do return end c Luca: add to check if compile ok subroutine hceee ( x, n, p, ic, ng, ns, io, jo, v, s, u, r) c This function is part of the MCLUST software described at c http://www.stat.washington.edu/mclust c Copyright information and conditions for use of MCLUST are given at c http://www.stat.washington.edu/mclust/license.txt c Gaussian model-based clustering algorithm in clusters share a common c variance (shape, volume, and orientation are the same for all clusters). implicit NONE integer n, p, ic(n), ng, ns, io(*), jo(*) c double precision x(n,p), v(p), s(p,p), u(p,p), r(p,p) double precision x(n,*), v(*), s(*), u(*), r(*) c------------------------------------------------------------------------------ c c x double (input/output) On input, the (n by p) matrix containing c the observations. On output, the first two columns c and ns rows contain the determinant and trace of the c sum of the sample cross product matrices. Columns 3 and 4 c contain the merge indices if p .ge. 4 c n integer (input) number of observations c p integer (input) dimension of the data c ic integer (input) (n) Initial partitioning of the data; groups must c be numbered consecutively. c ng integer (input) Number of groups in initial partition. c ns integer (input) Desired number of stages of clustering. c io,jo integer (output [p .le. 3]) If p .lt. 3, both io and jo must be of c length ns and contain the indices of the merged pairs on c output. If p .eq. 3, jo must be of length ns and contains c an index of each merged on output pair. Otherwise io and c jo are not used and can be of length 1. c v double (scratch/output) (p) On output, algorithm breakpoints; c tells where the algorithm switches from using trace c to trace + det, and from trace + det to det as criterion. c s double (scratch/output) (p,p) On output the first column contains c the initial trace and determinant of the sum of sample c cross product matrices. c u,r double (scratch) (p,p) integer q, i, j, k, l, m, i1, i2, l1, l2 integer ni, nj, nij, lw, ls, lg, ici, icj integer nopt, iopt, jopt, idet, jdet, ndet double precision DELOG double precision ri, rj, rij, dij, tij, zij double precision trc0, trc1, trcw, det0, det1, detw double precision si, sj, siop, sjop, sidt, sjdt double precision dopt, zopt, dijo, tijo, tdet double precision zero, one parameter (zero = 0.d0, one = 1.d0) double precision ddot, detmc2 external ddot, detmc2 double precision FLMAX parameter (FLMAX = 1.7976931348623157d308) double precision EPSMIN parameter (EPSMIN = 1.1102230246251565d-16) double precision dummy(1) c------------------------------------------------------------------------------ i1 = 0 i2 = 0 trcw = 0.d0 tijo = 0.d0 tdet = 0.d0 sjdt = 0.d0 sidt = 0.d0 dijo = 0.d0 ndet = 0 jdet = 0 idet = 0 iopt = 0 nopt = 0 jopt = 0 lw = p*p c call intpr('ic', -1, ic, n) c form scaled column sums call dscal( n*p, one/sqrt(dble(n)), x, 1) si = one/sqrt(dble(p)) sj = si / dble(n) c call dcopy( p, zero, 0, v, 1) dummy(1) = zero call dcopy( p, dummy, 0, v, 1) do k = 1, n call daxpy( p, sj, x(k,1), n, v, 1) end do trc0 = zero c call dcopy( lw, zero, 0, r, 1) dummy(1) = zero call dcopy( lw, dummy, 0, r, 1) do k = 1, n call dcopy( p, v, 1, s, 1) call daxpy( p, (-si), x(k,1), n, s, 1) trc0 = trc0 + ddot( p, s, 1, s, 1) call mclrup( (k+1), p, s, r, p) end do det0 = detmc2( p, r) DELOG = log(trc0+EPSMIN) c group heads should be first among rows of x i = 1 j = 2 1 continue icj = ic(j) if (icj .ne. j) goto 2 if (j .eq. ng) goto 3 i = j j = j + 1 goto 1 2 continue k = i m = j + 1 do j = m, n icj = ic(j) if (icj .gt. k) then k = k + 1 call dswap( p, x(k,1), n, x(j,1), n) ic(j) = ic(k) ic(k) = icj end if end do 3 continue c call intpr( 'ic', -1, ic, n) c call dcopy( lw, zero, 0, r, 1) dummy(1) = zero call dcopy( lw, dummy, 0, r, 1) q = 1 do j = 1, n i = ic(j) if (i .ne. j) then c update trace and Cholesky factor as if a merge q = q + 2 ni = ic(i) ri = dble(ni) rij = dble(ni+1) sj = sqrt(one/rij) si = sqrt(ri)*sj call dcopy( p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) trcw = trcw + ddot(p, v, 1, v, 1) call mclrup( q, p, v, r, p) ic(j) = 0 ic(i) = ic(i) + 1 call dscal( p, si, x(i,1), n) call daxpy( p, sj, x(j,1), n, x(i,1), n) c call dcopy( p, FLMAX, 0, x(j,1), n) c update column sum in jth row else ic(j) = 1 end if end do c call intpr('ic', -1, ic, n) trc1 = trcw if (q .lt. p) then detw = -FLMAX else detw = detmc2( p, r) end if det1 = detw ls = 1 lg = ng l1 = 0 l2 = 0 100 continue if (q .ge. p) then c if (.false.) c * call intpr('PART 2 --------------------------', -1, ls, 0) if (detw .lt. DELOG) then goto 200 else goto 300 end if end if dopt = FLMAX do j = 2, lg nj = ic(j) rj = dble(nj) do i = 1, (j-1) ni = ic(i) ri = dble(ni) nij = ni + nj rij = dble(nij) si = sqrt(ri/rij) sj = sqrt(rj/rij) call dcopy( p, x(i,1), n, s, 1) call dscal( p, sj, s, 1) call daxpy( p, (-si), x(j,1), n, s, 1) tij = trcw + ddot(p, s, 1, s, 1) zij = max(tij,EPSMIN) if (zij .le. dopt) then dopt = zij nopt = nij siop = si sjop = sj iopt = i jopt = j call dcopy( p, s, 1, v, 1) end if end do end do trcw = dopt if (ls .eq. ns) goto 900 call dscal( p, siop, x(iopt,1), n) call daxpy( p, sjop, x(jopt,1), n, x(iopt,1), n) if (jopt .ne. lg) then call dcopy( p, x(lg,1), n, x(jopt,1), n) ic(jopt) = ic(lg) end if ic(iopt) = nopt x(lg,1) = detw x(lg,2) = trcw if (p .ge. 4) then x(lg,3) = dble(iopt) x(lg,4) = dble(jopt) else if (p .eq. 3) then x(lg,3) = dble(iopt) jo(ls) = jopt else io(ls) = iopt jo(ls) = jopt end if c update the Cholesky factor q = q + 1 call mclrup( q, p, v, r, p) ls = ls + 1 lg = lg - 1 goto 100 200 continue q = q + 1 c call intpr('ic', -1, ic, n) dopt = FLMAX zopt = FLMAX do j = 2, lg nj = ic(j) rj = dble(nj) do i = 1, (j-1) ni = ic(i) ri = dble(ni) nij = ni + nj rij = dble(nij) si = sqrt(ri/rij) sj = sqrt(rj/rij) call dcopy( p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) tij = trcw + ddot(p, v, 1, v, 1) call dcopy( lw, r, 1, u, 1) call mclrup( q, p, v, u, p) dij = detmc2( p, u) if (dij .le. dopt) then dopt = dij tdet = tij ndet = nij sidt = si sjdt = sj idet = i jdet = j end if if (tij .eq. zero) then zij = -FLMAX else zij = max(tij,EPSMIN) if (dij .eq. (-FLMAX)) then zij = log(zij) else if (dij .le. zero) then zij = log(exp(dij) + zij) else zij = log(one + zij*exp(-dij)) + dij end if end if if (zij .le. zopt) then zopt = zij dijo = dij tijo = tij nopt = nij siop = si sjop = sj iopt = i jopt = j call dcopy( lw, u, 1, s, 1) end if end do end do if (dopt .lt. DELOG) then if (l1 .eq. 0) l1 = ls trcw = tijo detw = dijo call dcopy( lw, s, 1, r, 1) else l2 = ls trcw = tdet detw = dopt siop = sidt sjop = sjdt nopt = ndet iopt = idet jopt = jdet call dcopy( p, x(iopt,1), n, v, 1) call dscal( p, sjop, v, 1) call daxpy( p, (-siop), x(jopt,1), n, v, 1) call mclrup( q, p, v, r, p) end if if (ls .eq. ns) goto 900 call dscal( p, siop, x(iopt,1), n) call daxpy( p, sjop, x(jopt,1), n, x(iopt,1), n) if (jopt .ne. lg) then call dcopy( p, x(lg,1), n, x(jopt,1), n) ic(jopt) = ic(lg) end if ic(iopt) = nopt x(lg,1) = detw x(lg,2) = trcw if (p .ge. 4) then x(lg,3) = dble(iopt) x(lg,4) = dble(jopt) else if (p .eq. 3) then x(lg,3) = dble(iopt) jo(ls) = jopt else io(ls) = iopt jo(ls) = jopt end if ls = ls + 1 lg = lg - 1 if (detw .ge. DELOG) then c if (.false.) c * call intpr('PART 3 --------------------------', -1, ls, 0) goto 300 end if goto 200 300 continue q = q + 1 detw = FLMAX do j = 2, lg nj = ic(j) rj = dble(nj) do i = 1, (j-1) ni = ic(i) ri = dble(ni) nij = ni + nj rij = dble(nij) si = sqrt(ri/rij) sj = sqrt(rj/rij) call dcopy( p, x(i,1), n, v, 1) call dscal( p, sj, v, 1) call daxpy( p, (-si), x(j,1), n, v, 1) call dcopy( lw, r, 1, u, 1) call mclrup( q, p, v, u, p) dij = detmc2( p, u) if (dij .le. detw) then detw = dij nopt = nij siop = si sjop = sj iopt = i jopt = j call dcopy( lw, u, 1, s, 1) end if end do end do c update the trace call dcopy( p, x(iopt,1), n, v, 1) call dscal( p, sjop, v, 1) call daxpy( p, (-siop), x(jopt,1), n, v, 1) trcw = trcw + ddot( p, v, 1, v, 1) if (ls .eq. ns) goto 900 call dcopy( lw, s, 1, r, 1) call dscal( p, siop, x(iopt,1), n) call daxpy( p, sjop, x(jopt,1), n, x(iopt,1), n) if (jopt .ne. lg) then call dcopy( p, x(lg,1), n, x(jopt,1), n) ic(jopt) = ic(lg) end if ic(iopt) = nopt x(lg,1) = detw x(lg,2) = trcw if (p .ge. 4) then x(lg,3) = dble(iopt) x(lg,4) = dble(jopt) else if (p .eq. 3) then x(lg,3) = dble(iopt) jo(ls) = jopt else io(ls) = iopt jo(ls) = jopt end if ls = ls + 1 lg = lg - 1 goto 300 900 continue x(lg,1) = detw x(lg,2) = trcw if (p .ge. 4) then if (iopt .lt. jopt) then x(lg,3) = dble(iopt) x(lg,4) = dble(jopt) else x(lg,3) = dble(jopt) x(lg,4) = dble(iopt) end if else if (p .eq. 3) then if (iopt .lt. jopt) then x(lg,3) = dble(iopt) jo(ls) = jopt else x(lg,3) = dble(jopt) jo(ls) = iopt end if else if (iopt .lt. jopt) then io(ls) = iopt jo(ls) = jopt else io(ls) = jopt jo(ls) = iopt end if end if c decode do k = 1, ng ic(k) = k end do m = ng + 1 if (p .ge. 4) then l = m do k = 1, ns l = l - 1 i = int(x(l,3)) ici = ic(i) j = int(x(l,4)) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(m - k) if (ici .lt. icj) then x(l,3) = dble(ici) x(l,4) = dble(icj) else x(l,3) = dble(icj) x(l,4) = dble(ici) end if end do else if (p .eq. 3) then l = m do k = 1, ns l = l - 1 i = int(x(l,3)) ici = ic(i) j = jo(k) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(m - k) if (ici .lt. icj) then x(l,3) = dble(ici) jo(k) = icj else x(l,3) = dble(icj) jo(k) = ici end if end do else do k = 1, ns i = io(k) ici = ic(i) j = jo(k) icj = ic(j) if (ici .gt. icj) ic(i) = icj ic(j) = ic(m - k) if (ici .lt. icj) then io(k) = ici jo(k) = icj else io(k) = icj jo(k) = ici end if end do end if l = 2 m = min(p,4) do k = ng, lg, -1 if (k .le. l) goto 950 call dswap( m, x(k,1), n, x(l,1), n) l = l + 1 end do 950 continue x(1,1) = det1 x(1,2) = trc1 v(1) = dble(l1) v(2) = dble(l2) s(1) = det0 s(2) = trc0 return end mclust/vignettes/0000755000176200001440000000000014525075361013601 5ustar liggesusersmclust/vignettes/mclust.Rmd0000644000176200001440000001773414516406623015567 0ustar liggesusers--- title: "A quick tour of mclust" author: "Luca Scrucca" date: "`r format(Sys.time(), '%d %b %Y')`" output: rmarkdown::html_vignette: toc: true number_sections: false css: "vignette.css" vignette: > %\VignetteIndexEntry{A quick tour of mclust} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} library(knitr) opts_chunk$set(fig.align = "center", out.width = "90%", fig.width = 6, fig.height = 5, dev.args = list(pointsize=10), par = TRUE, # needed for setting hook collapse = TRUE, # collapse input & output code in chunks warning = FALSE) knit_hooks$set(par = function(before, options, envir) { if(before && options$fig.show != "none") par(family = "sans", mar=c(4.1,4.1,1.1,1.1), mgp=c(3,1,0), tcl=-0.5) }) set.seed(1) # for exact reproducibility ``` # Introduction **mclust** is a contributed R package for model-based clustering, classification, and density estimation based on finite normal mixture modelling. It provides functions for parameter estimation via the EM algorithm for normal mixture models with a variety of covariance structures, and functions for simulation from these models. Also included are functions that combine model-based hierarchical clustering, EM for mixture estimation and the Bayesian Information Criterion (BIC) in comprehensive strategies for clustering, density estimation and discriminant analysis. Additional functionalities are available for displaying and visualizing fitted models along with clustering, classification, and density estimation results. This document gives a quick tour of **mclust** (version `r packageVersion("mclust")`) functionalities. It was written in R Markdown, using the [knitr](https://cran.r-project.org/package=knitr) package for production. See `help(package="mclust")` for further details and references provided by `citation("mclust")`. ```{r, message = FALSE, echo=-2} library(mclust) cat(mclust:::mclustStartupMessage(), sep="") ``` # Clustering ```{r} data(diabetes) class <- diabetes$class table(class) X <- diabetes[,-1] head(X) clPairs(X, class) BIC <- mclustBIC(X) plot(BIC) summary(BIC) mod1 <- Mclust(X, x = BIC) summary(mod1, parameters = TRUE) plot(mod1, what = "classification") table(class, mod1$classification) plot(mod1, what = "uncertainty") ICL <- mclustICL(X) summary(ICL) plot(ICL) LRT <- mclustBootstrapLRT(X, modelName = "VVV") LRT ``` ## Initialisation EM algorithm is used by **mclust** for maximum likelihood estimation. Initialisation of EM is performed using the partitions obtained from agglomerative hierarchical clustering. For details see `help(mclustBIC)` or `help(Mclust)`, and `help(hc)`. ```{r} (hc1 <- hc(X, modelName = "VVV", use = "SVD")) BIC1 <- mclustBIC(X, initialization = list(hcPairs = hc1)) # default summary(BIC1) (hc2 <- hc(X, modelName = "VVV", use = "VARS")) BIC2 <- mclustBIC(X, initialization = list(hcPairs = hc2)) summary(BIC2) (hc3 <- hc(X, modelName = "EEE", use = "SVD")) BIC3 <- mclustBIC(X, initialization = list(hcPairs = hc3)) summary(BIC3) ``` Update BIC by merging the best results: ```{r} BIC <- mclustBICupdate(BIC1, BIC2, BIC3) summary(BIC) plot(BIC) ``` Univariate fit using random starting points obtained by creating random agglomerations (see `help(hcRandomPairs)`) and merging best results: ```{r, echo=-1} set.seed(20181116) data(galaxies, package = "MASS") galaxies <- galaxies / 1000 BIC <- NULL for(j in 1:20) { rBIC <- mclustBIC(galaxies, verbose = FALSE, initialization = list(hcPairs = hcRandomPairs(galaxies))) BIC <- mclustBICupdate(BIC, rBIC) } summary(BIC) plot(BIC) mod <- Mclust(galaxies, x = BIC) summary(mod) ``` # Classification ## EDDA ```{r} data(iris) class <- iris$Species table(class) X <- iris[,1:4] head(X) mod2 <- MclustDA(X, class, modelType = "EDDA") summary(mod2) plot(mod2, what = "scatterplot") plot(mod2, what = "classification") ``` ## MclustDA ```{r} data(banknote) class <- banknote$Status table(class) X <- banknote[,-1] head(X) mod3 <- MclustDA(X, class) summary(mod3) plot(mod3, what = "scatterplot") plot(mod3, what = "classification") ``` ## Cross-validation error ```{r} cv <- cvMclustDA(mod2, nfold = 10) str(cv) unlist(cv[3:6]) cv <- cvMclustDA(mod3, nfold = 10) str(cv) unlist(cv[3:6]) ``` # Density estimation ## Univariate ```{r} data(acidity) mod4 <- densityMclust(acidity) summary(mod4) plot(mod4, what = "BIC") plot(mod4, what = "density", data = acidity, breaks = 15) plot(mod4, what = "diagnostic", type = "cdf") plot(mod4, what = "diagnostic", type = "qq") ``` ## Multivariate ```{r} data(faithful) mod5 <- densityMclust(faithful) summary(mod5) plot(mod5, what = "BIC") plot(mod5, what = "density", type = "hdr", data = faithful, points.cex = 0.5) plot(mod5, what = "density", type = "persp") ``` # Bootstrap inference ```{r} boot1 <- MclustBootstrap(mod1, nboot = 999, type = "bs") summary(boot1, what = "se") summary(boot1, what = "ci") ``` ```{r, echo=-1, fig.width=6, fig.height=7} par(mfrow=c(4,3)) plot(boot1, what = "pro") plot(boot1, what = "mean") ``` ```{r} boot4 <- MclustBootstrap(mod4, nboot = 999, type = "bs") summary(boot4, what = "se") summary(boot4, what = "ci") ``` ```{r, echo=-1} par(mfrow=c(2,2)) plot(boot4, what = "pro") plot(boot4, what = "mean") ``` # Dimension reduction ## Clustering ```{r} mod1dr <- MclustDR(mod1) summary(mod1dr) plot(mod1dr, what = "pairs") plot(mod1dr, what = "boundaries", ngrid = 200) mod1dr <- MclustDR(mod1, lambda = 1) summary(mod1dr) plot(mod1dr, what = "scatterplot") plot(mod1dr, what = "boundaries", ngrid = 200) ``` ## Classification ```{r} mod2dr <- MclustDR(mod2) summary(mod2dr) plot(mod2dr, what = "scatterplot") plot(mod2dr, what = "boundaries", ngrid = 200) mod3dr <- MclustDR(mod3) summary(mod3dr) plot(mod3dr, what = "scatterplot") plot(mod3dr, what = "boundaries", ngrid = 200) ```
# Using colorblind-friendly palettes Most of the graphs produced by **mclust** use colors that by default are defined in the following options: ```{r} mclust.options("bicPlotColors") mclust.options("classPlotColors") ``` The first option controls colors used for plotting BIC, ICL, etc. curves, whereas the second option is used to assign colors for indicating clusters or classes when plotting data. Starting with R version 4.0, the function \code{palette.colors()} can be used for retrieving colors from some pre-defined palettes. For instance ```{r, eval=FALSE} palette.colors(palette = "Okabe-Ito") ``` returns a color-blind-friendly palette for individuals suffering from protanopia or deuteranopia, the two most common forms of inherited color blindness. For earlier versions of R such palette can be defined as: ```{r} cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999") ``` and then assigned to the **mclust** options as follows: ```{r} bicPlotColors <- mclust.options("bicPlotColors") bicPlotColors[1:14] <- c(cbPalette, cbPalette[1:5]) mclust.options("bicPlotColors" = bicPlotColors) mclust.options("classPlotColors" = cbPalette[-1]) clPairs(iris[,-5], iris$Species) mod <- Mclust(iris[,-5]) plot(mod, what = "BIC") plot(mod, what = "classification") ``` If needed, users can easily define their own palettes following the same procedure outlined above.

# References Scrucca L., Fraley C., Murphy T. B. and Raftery A. E. (2023) *Model-Based Clustering, Classification, and Density Estimation Using mclust in R*. Chapman & Hall/CRC, ISBN: 978-1032234953, https://mclust-org.github.io/book/ Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, *The R Journal*, 8/1, pp. 205-233. https://journal.r-project.org/archive/2016/RJ-2016-021/RJ-2016-021.pdf Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, *Journal of the American Statistical Association*, 97/458, pp. 611-631. ---- ```{r} sessionInfo() ```mclust/vignettes/vignette.css0000644000176200001440000001152413766666463016162 0ustar liggesusers@charset "UTF-8"; body { background-color: #fff; margin: 1em auto; max-width: 700px; overflow: visible; padding-left: 2em; padding-right: 2em; font-family: "Open Sans", Helvetica, sans-serif; font-size: 14px; line-height: 1.35; } #header { text-align: center; } #TOC { clear: both; margin: 10px 10px 20px 10px; padding: 4px; width: 400px; border: 1px solid #CCCCCC; border-radius: 5px; background-color: #f6f6f6; font-size: 12px; line-height: 1.3; } #TOC .toctitle { font-weight: bold; font-size: 15px; margin-left: 5px; } #TOC ul { padding-left: 40px; margin-left: -1.5em; margin-top: 5px; margin-bottom: 5px; } #TOC ul ul { margin-left: -2em; } #TOC li { line-height: 16px; list-style: square outside; } table { margin: 1em auto; border-width: 1px; border-color: #DDDDDD; border-top: 1px solid #111; border-bottom: 1px solid #111; } th { border-bottom: 1px solid #111; } table th { border-width: 1px; padding: 5px; } table td { border-width: 1px; line-height: 16px; padding: 5px 5px; } table thead, table tr.even { background-color: #f7f7f7; } p { margin: 0.5em 0; } blockquote { background-color: #f6f6f6; padding: 0.25em 0.75em; } hr { border-style: solid; border: none; border-top: 1px solid #777; margin: 28px 0; } dl { margin-left: 0; } dl dd { margin-bottom: 13px; margin-left: 13px; } dl dt { font-weight: bold; } ul { margin-top: 0; } ul li { list-style: circle outside; } ul ul { margin-bottom: 0; } figure { margin: 0; text-align: center; } div.figure { text-align: center; } img { background-color: #FFFFFF; padding: 2px; border: 1px solid #DDDDDD; border-radius: 3px; /* border: 1px solid #CCCCCC; */ margin: 0 5px; } h1 { padding-top: 10px; padding-bottom: 10px; border-bottom: 3px solid #f7f7f7; margin-top: 0; font-size: 120%; line-height: 10px; color: rgb(33,33,33); } h1.title { font-size: 200%; line-height: 40px; } h2 { padding-top: 10px; padding-bottom: 5px; border-bottom: 3px solid #f7f7f7; margin-left: 4px; font-size: 110%; color: rgb(33,33,33); } h2.title { font-size: 110%; line-height: 10px; } h3 { border-bottom: 2px solid #f7f7f7; padding-top: 10px; margin-left: 8px; font-size: 105%; color: rgb(33,33,33); } h4 { border-bottom: 1px solid #f7f7f7; margin-left: 12px; font-size: 100%; color: rgb(33,33,33); } h4.author { border-bottom: 0; color: rgb(77,77,77); } h4.date { border-bottom: 1px solid #f7f7f7; font-size: 100%; color: rgb(77,77,77); } h5, h6 { border-bottom: 1px solid #ccc; font-size: 105%; color: rgb(33,33,33); } address{ font-weight: bold; color: rgb(77,77,77); margin-left: 8px; font-size: 100%; } a { color: rgb(24,116,205); text-decoration: none; } a:hover { color: rgb(28,134,238); } a:visited { color: rgb(24,116,205); } a:visited:hover { color: rgb(28,134,238); } a[href^="http:"] { text-decoration: underline; } a[href^="https:"] { text-decoration: underline; } pre, code { background-color: #F8F8F8; border-radius: 3px; color: #333333; white-space: pre-wrap; /* Wrap long lines */ } pre { border-radius: 3px; margin: 5px 0px 10px 0px; padding: 10px; } pre:not([class]) { background-color: #F8F8F8; } code { font-family: Consolas, Monaco, monospace; color: rgb(0,0,0); font-size: 85%; } p > code, li > code { padding: 2px 0; } strong { font-weight: bold; } hi { font-weight: bold; color: rgb(28,134,238); } /* Class described in https://benjeffrey.com/posts/pandoc-syntax-highlighting-css Colours from https://gist.github.com/robsimmons/1172277 */ code span.co { color: rgb(112,112,112); font-style: normal; } /* Comment */ code span.do { color: rgb(50,50,50); font-style: normal; } /* Documentation */ code span.an { font-style: italic; } /* Annotation */ code span.cf { font-weight: bold; } /* ControlFlow */ code span.cv { font-style: italic; } /* CommentVar */ code span.dt { color: #4075AD; } /* DataType */ code span.at { color: #4075AD; } /* Function args */ code span.dv { color: rgb(85,85,85); } /* DecVal (decimal values) */ code span.er { color: rgb(166,23,23); font-weight: bold; } /* Error */ code span.in { font-style: italic; } /* Information */ code span.kw { color: rgb(23,74,133); font-weight: bold; } /* Keyword */ code span.fu { color: rgb(23,74,133); font-weight: bold; } /* Function calls */ code span.al { color: rgb(255,255,255); font-weight: bold; } /* Alert */ code span.pp { font-weight: bold; } /* Preprocessor */ code span.cn { color: rgb(0,0,0); font-weight: normal; } /* Logical */ code span.st { color: rgb(85,85,85); font-style: italic; } /* String */ code span.ot { color: rgb(0,0,0); font-style: normal; } /* R code */ code span.wa { font-style: italic; } /* Warning */ mclust/R/0000755000176200001440000000000014476324343011774 5ustar liggesusersmclust/R/mclustssc.R0000644000176200001440000002565014515743156014150 0ustar liggesusers# Semi-Supervised Classification MclustSSC <- function(data, class, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), warn = mclust.options("warn"), verbose = interactive(), ...) { call <- match.call() data <- data.matrix(data) n <- nrow(data) d <- ncol(data) oneD <- if(d==1) TRUE else FALSE # class <- factor(class, exclude = NA) nclass <- nlevels(class) # if(is.null(G)) G <- nclass if(any(G < nclass)) stop("G cannot be smaller than the number of classes") G <- G[G >= nclass][1] # if(is.null(modelNames)) { modelNames <- if(oneD) c("E", "V") else mclust.options("emModelNames") } # if(n <= d) { m <- match(c("EEE","EEV","VEV","VVV"), mclust.options("emModelNames"), nomatch=0) modelNames <- modelNames[-m] } nModelNames <- length(modelNames) if(verbose) { cat("fitting ...\n") flush.console() pbar <- txtProgressBar(min = 0, max = nModelNames, style = 3) on.exit(close(pbar)) ipbar <- 0 } args <- list(data = data, class = class, G = G, ...) Model <- NULL BIC <- rep(as.double(NA), length(modelNames)) for(m in seq(nModelNames)) { mod <- try(do.call("MclustSSC.fit", c(args, list(modelName = modelNames[m]))), silent = TRUE) if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } if(inherits(mod, "try-error")) next() BIC[m] <- mod$bic if(!is.na(BIC[m]) && BIC[m] >= max(BIC, na.rm = TRUE)) Model <- mod } if(all(is.na(BIC))) { warning("No model(s) can be estimated!!") return() } BIC <- matrix(BIC, nrow = 1, dimnames = list(G, modelNames)) out <- c(list(call = call, data = data, class = class, BIC = BIC, control = control), Model) orderedNames <- c("call", "data", "class", "modelName", "G", "n", "d", "BIC", "loglik", "df", "bic", "parameters", "z", "classification", "prior", "control") out <- structure(out[orderedNames], class = "MclustSSC") return(out) } print.MclustSSC <- function(x, ...) { cat("\'", class(x)[1], "\' model object:\n", sep = "") cat("\n") catwrap("\nAvailable components:\n") print(names(x)) # str(x, max.level = 2, give.attr = FALSE, strict.width = "wrap") invisible(x) } summary.MclustSSC <- function(object, parameters = FALSE, ...) { # collect info nclass <- nlevels(object$class) classes <- levels(object$class) G <- object$G printParameters <- parameters class <- object$class classif <- object$classification classifNames <- levels(object$classification) err <- classError(class[!is.na(class)], classif[!is.na(class)])$errorRate # n <- c(table(class, useNA = "always")) n <- tabulate(class, nbins = G) names(n) <- classifNames if(any(is.na(class))) n <- c(n, "" = sum(is.na(class))) tab <- table("Class" = class, "Predicted" = classif, useNA = "ifany") noise <- FALSE # todo: # noise <- if(is.na(object$hypvol)) FALSE else object$hypvol pro <- object$parameters$pro if(is.null(pro)) pro <- 1 names(pro) <- if(noise) c(classifNames,0) else classifNames mean <- object$parameters$mean colnames(mean) <- names(pro) if(object$d > 1) { sigma <- object$parameters$variance$sigma dimnames(sigma)[[3]] <- names(pro) } else { sigma <- rep(object$parameters$variance$sigmasq, object$G)[1:object$G] names(sigma) <- names(mean) } obj <- list(n = n, d = object$d, loglik = object$loglik, df = object$df, bic = object$bic, nclass = nclass, classes = classes, G = object$G, modelName = object$modelName, pro = pro, mean = mean, variance = sigma, noise = noise, prior = object$prior, tab = tab, err = err, printParameters = printParameters) class(obj) <- "summary.MclustSSC" return(obj) } print.summary.MclustSSC <- function(x, digits = getOption("digits"), ...) { title <- paste("Gaussian finite mixture model for semi-supervised classification") txt <- paste(rep("-", min(nchar(title), getOption("width"))), collapse = "") catwrap(txt) catwrap(title) catwrap(txt) cat("\n") tab <- data.frame("log-likelihood" = x$loglik, "n" = sum(x$n), "df" = x$df, "BIC" = x$bic, row.names = "", check.names = FALSE) print(tab, digits = digits) tab <- data.frame("n" = x$n, "%" = round(x$n/sum(x$n)*100,2), "Model" = c(rep(x$modelName, x$G), ""), "G" = c(rep(1, x$G), ""), check.names = FALSE, row.names = ifelse(is.na(names(x$n)), "", names(x$n))) tab <- as.matrix(tab) names(dimnames(tab)) <- c("Classes", "") print(tab, quote = FALSE, right = TRUE) if(!is.null(x$prior)) { cat("\nPrior: ") cat(x$prior$functionName, "(", paste(names(x$prior[-1]), x$prior[-1], sep = " = ", collapse = ", "), ")", sep = "") cat("\n") } if(x$printParameters) { cat("\nMixing probabilities:\n") print(x$pro, digits = digits) cat("\nMeans:\n") print(x$mean, digits = digits) cat("\nVariances:\n") if(x$d > 1) { for(g in 1:x$G) { cat(names(x$pro)[g], "\n") print(x$variance[,,g], digits = digits) } } else print(x$variance, digits = digits) if(x$noise) { cat("\nHypervolume of noise component:\n") cat(signif(x$noise, digits = digits), "\n") } } cat("\nClassification summary:\n") print(x$tab) invisible(x) } MclustSSC.fit <- function(data, class, G = NULL, modelName = NULL, prior = NULL, control = emControl(), warn = NULL, .verbose = FALSE, ...) { data <- data.matrix(data) n <- nrow(data) p <- ncol(data) class <- factor(class, exclude = NA) nclass <- nlevels(class) known.class <- which(!is.na(class)) unknown.class <- which(is.na(class)) if(is.null(G)) G <- nclass if(is.null(modelName)) stop("modelName must be specified!") #browser() # initialization of z matrix by filling with 0/1 for observations # with known labels z <- matrix(0.0, nrow = n, ncol = G) for(k in 1:nclass) z[class == levels(class)[k], k] <- 1 # store the z which should not be updated z0 <- z[known.class,,drop=FALSE] # initialization of unlabeled data... if(G > nclass) { # via k-means if unobserved classes km <- kmeans(data[unknown.class,,drop=FALSE], centers = G, nstart = 25, iter.max = 100) # z[unknown.class,] <- unmap(km$cluster) z[unknown.class,] <- rep(1,length(unknown.class)) %o% km$size/sum(km$size) } else { # by equal proportion otherwise z[unknown.class,] <- 1/G } loglik0 <- -Inf criterion <- TRUE iter <- 0 if(.verbose) cat("\nmodelName =", modelName, "\n") # while(criterion) { iter <- iter + 1 fit.m <- do.call("mstep", list(data = data, z = z, modelName = modelName, prior = prior, control = control, warn = warn)) fit.e <- do.call("estep", c(list(data = data, control = control, warn = warn), fit.m)) z <- fit.e$z z[known.class,] <- z0 ldens <- do.call("dens", c(list(data = data[-known.class,,drop=FALSE], logarithm = TRUE), fit.m)) lcdens <- do.call("cdens", c(list(data = data[known.class,,drop=FALSE], logarithm = TRUE), fit.m)) lcdens <- sweep(lcdens, MARGIN = 2, FUN = "+", STATS = log(fit.m$parameters$pro)) loglik <- sum(ldens) + sum(lcdens * z0) criterion <- ( iter < control$itmax[1] & (loglik - loglik0) > control$tol[1] ) # print(loglik - loglik0) loglik0 <- loglik if(.verbose) cat("iter =", iter, " loglik =", loglik0, "\n") } fit <- fit.m fit$loglik <- loglik fitclass <- map(fit$z, warn = FALSE) # assign labels of known classes fitclass <- factor(fitclass) labels <- levels(class) if(G > nclass) labels <- c(labels, paste0("class", seq(nclass+1,G))) levels(fitclass) <- labels fit$classification <- fitclass fit$df <- (G-1) + p*nclass + nVarParams(fit$modelName, d = p, G = nclass) fit$bic <- 2*fit$loglik - fit$df*log(n) # return(fit) } plot.MclustSSC <- function(x, what = c("BIC", "classification", "uncertainty"), ...) { object <- x # Argh. Really want to use object anyway if(!inherits(object, "MclustSSC")) stop("object not of class 'MclustSSC'") class(object) <- c(class(object), "Mclust") what <- match.arg(what, several.ok = TRUE) oldpar <- par(no.readonly = TRUE) plot.MclustSSC.bic <- function(...) { dotchart(rev(object$BIC[1,]), pch = 19, xlab = paste("BIC for G =", object$G), ...) } if(interactive() & length(what) > 1) { title <- "Model-based semi-supervised classification plots:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "BIC") plot.MclustSSC.bic(...) if(what[choice] == "classification") plot.Mclust(object, what = "classification", ...) if(what[choice] == "uncertainty") plot.Mclust(object, what = "uncertainty", ...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "BIC")) plot.MclustSSC.bic(...) if(any(what == "classification")) plot.Mclust(object, what = "classification", ...) if(any(what == "uncertainty")) plot.Mclust(object, what = "uncertainty", ...) } invisible() } predict.MclustSSC <- function(object, newdata, ...) { if(!inherits(object, "MclustSSC")) stop("object not of class 'MclustSSC'") if(missing(newdata)) { newdata <- object$data } newdata <- as.matrix(newdata) if(ncol(object$data) != ncol(newdata)) { stop("newdata must match ncol of object data") } # object$data <- newdata z <- do.call("cdens", c(object, list(logarithm = TRUE))) pro <- object$parameters$pro logpro <- log(pro) - log(sum(pro)) noise <- FALSE # (!is.na(object$hypvol)) z <- if(noise) cbind(z, log(object$parameters$Vinv)) else cbind(z) # drop redundant attributes z <- sweep(z, MARGIN = 2, FUN = "+", STATS = logpro) z <- sweep(z, MARGIN = 1, FUN = "-", STATS = apply(z, 1, logsumexp)) z <- exp(z) cl <- c(levels(object$classification), if(noise) 0) colnames(z) <- cl cl <- factor(cl[apply(z, 1, which.max)], levels = cl) out <- list(classification = cl, z = z) return(out) } mclust/R/gmmhd.R0000644000176200001440000004444014404653616013220 0ustar liggesusers###################################################### ## ## ## Identifying Connected Components in Gaussian ## ## Finite Mixture Models for Clustering ## ## ## ## Author: Luca Scrucca ## ###################################################### gmmhd <- function(object, ngrid = min(round((log(nrow(data)))*10), nrow(data)), dr = list(d = 3, lambda = 1, cumEvalues = NULL, mindir = 2), classify = list(G = 1:5, modelNames = mclust.options("emModelNames")[-c(8,10)]), ...) { if(!inherits(object, "Mclust")) stop("first argument must be an object of class 'Mclust'") if(!requireNamespace("geometry", quietly = TRUE)) stop("Package 'geometry' is required. Please install it.") data <- object$data n <- nrow(data) if(ngrid > n) { warning("ngrid too large, set equal to n") n.grid <- n } mNames <- attr(object$BIC, "modelNames") if(is.null(dr$d)) dr$d <- 2 if(is.null(dr$lambda)) dr$lambda <- 1 if(is.null(classify$G)) classify$G <- 1:5 if(is.null(classify$modelNames)) classify$modelNames <- mNames classify$modelNames <- intersect(classify$modelNames, mNames) if(is.null(dr$mindir)) dr$mindir <- 2 if(ncol(data) >= dr$d) { # compute GMMDR directions DR <- MclustDR(object, lambda = dr$lambda) # subset selection of GMMDR directions evalues <- DR$evalues[seq(DR$numdir)] if(is.null(dr$cumEvalues)) { # if dr$cumEvalues not provided # perform suset selection of GMMDR directions DR <- MclustDRsubsel(DR, G = attr(object$BIC, "G"), modelNames = mNames, mindir = dr$mindir, verbose = FALSE) dims <- seq(DR$numdir) } else { # select the smallest subset with cumsum eigenvalues > dr$cumEvalues dims <- min(which(cumsum(evalues/sum(evalues)) > dr$cumEvalues)) dims <- seq(min(dr$mindir, dims)) } # estimate the density from Mclust model on the selected directions x <- DR$dir[,dims,drop=FALSE] colnames(x) <- paste("GMMDR dir", 1:ncol(x), sep = "") mc <- object$call mc$data <- x mc$modelNames <- mNames mc$verbose <- FALSE obj <- eval(mc, parent.frame()) DR$parameters <- obj$parameters fdens <- dens(data = x, modelName = obj$modelName, parameters = obj$parameters) } else { x <- data DR <- NULL fdens <- dens(data = x, modelName = object$modelName, parameters = object$parameters) } p <- ncol(x) xscaled <- scale(x, colMeans(x), apply(x,2,sd)) # if to add vertices of convex envelope # xrange <- apply(x, 2, range) # xbound <- do.call("expand.grid", matrix2list(xrange)) # x <- rbind(as.matrix(x), as.matrix(xbound*1.1)) # fdens <- c(fdens, rep(0,nrow(xbound))) # uniform grid of proportions for which quantiles are calculated pn <- seq(0, 1, length = ngrid) qn <- as.numeric(quantile(fdens[1:n], 1-pn)) nc <- pc <- rep(0, length(qn)) con <- vector("list", length = length(qn)) # Delaunay triangulation matrix of dim (m x p+1), where each row provides a # set of indices to the points describing a simplex of dimension p mode(xscaled) <- "double" # delaunayn requires a real matrix DT <- suppressMessages(geometry::delaunayn(xscaled, options="QJ")) # plot(x); for(l in 1:nrow(DT)) polygon(x[DT[l,],], border = grey(.8)) on.exit(unlink("qhull_out.txt")) # Graph of neighborhood for each point NB <- vector(mode = "list", length = n) for(i in seq(n)) { NB[[i]] <- sort(unique(as.vector(DT[rowSums(DT==i)>0,]))) } for(i in seq(length(qn))) { c <- qn[i] Sc <- which(fdens[1:n] > c); names(Sc) <- NULL if(length(Sc) < 1) next() pc[i] <- length(Sc)/n # select neighborhoods of edges with density > c level nb <- NB[Sc] # select within neighborhoods those edges whose density > c level nb <- lapply(nb, function(nb) sort(intersect(nb, Sc))) nb <- nb[!duplicated(nb)] # table(sapply(nb,length)) # remove neighborhoods which do not share any facet, i.e. having # less than p edges/obs # nb <- nb[sapply(nb, length) >= p] # remove neighborhoods which are not simplices of dim (p+1) nb <- nb[sapply(nb, length) > p] # get connected components ConComp <- ConnectComp(nb) # sapply(ConComp,length); ConComp if(length(ConComp) < 1) next() nc[i] <- length(ConComp) con[[i]] <- ConComp # lapply(ConComp, sort) } # obj <- list(Mclust = object, MclustDA = NULL, MclustDR = DR, x = x, # i.e. the input data or GMMDR directions density = fdens[1:n], con = con, nc = structure(nc, names = format(pn, digit = 3)), pc = pc, pn = pn, qn = structure(qn, names = format(pn, digit = 3)), clusterCores = NULL, cluster = NULL, numClusters = NULL) class(obj) <- "gmmhd" # cluster cores obj$clusterCores <- gmmhdClusterCores(obj) # semi-supervised classification modClass <- gmmhdClassify(obj, G = classify$G, modelNames = classify$modelNames, verbose = FALSE) obj$MclustDA <- modClass$model obj$cluster <- modClass$cluster obj$numClusters <- length(tabulate(obj$cluster)) return(obj) } print.gmmhd <- function(x, digits = getOption("digits"), ...) { cat("\'", class(x)[1], "\' model object:\n", sep = "") cat(paste0(" Mclust initial model = (", x$Mclust$modelName, ",", x$Mclust$G, ")\n")) if(!is.null(x$MclustDR)) cat(paste0(" MclustDR projection = (", x$MclustDR$modelName, ",", x$MclustDR$G, ")\n")) cat(paste0(" GMMHD final number of clusters = ", x$numClusters, "\n")) invisible() } summary.gmmhd <- function(object, ...) { title <- paste("GMM with high-density connected components for clustering") out <- with(object, list(title = title, "Mclust" = list("G" = Mclust$G, "modelName" = Mclust$modelName), "MclustDR" = list("G" = MclustDR$G, "modelName" = MclustDR$modelName), "clusterCores" = table(clusterCores, useNA = "ifany", dnn = NULL), "cluster" = table(cluster, useNA = "ifany", dnn = NULL))) if(is.null(object$MclustDR)) out$MclustDR <- NULL class(out) <- "summary.gmmhd" return(out) } print.summary.gmmhd <- function(x, digits = getOption("digits"), ...) { cat(rep("-", nchar(x$title)),"\n",sep="") cat(x$title, "\n") cat(rep("-", nchar(x$title)),"\n",sep="") # cat("\nInitial model: Mclust (", x$Mclust$modelName, ",", x$Mclust$G, ")", "\n", sep = "") # if(!is.null(x$MclustDR)) cat("\nModel on projection subspace: (", x$MclustDR$modelName, ",", x$MclustDR$G, ")", "\n", sep = "") # cat("\nCluster cores:\n") print(x$clusterCores) # cat("\nFinal clustering:\n") print(x$cluster) # invisible() } plot.gmmhd <- function(x, what = c("mode", "cores", "clusters"), ...) { object <- x what <- match.arg(what, choices = eval(formals(plot.gmmhd)$what), several.ok = TRUE) if(interactive() & length(what) > 1) { title <- "GMM high-density connected components:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "mode") plot.gmmhd.mode(object, ...) if(what[choice] == "cores") plot.gmmhd.cores(object, ...) if(what[choice] == "clusters") plot.gmmhd.clusters(object, ...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "mode")) plot.gmmhd.mode(object, ...) if(any(what == "cores")) plot.gmmhd.cores(object, ...) if(any(what == "clusters")) plot.gmmhd.clusters(object, ...) } invisible() } plot.gmmhd.mode <- function(x, ...) { object <- x # Argh. Really want to use object anyway plot(c(object$pc,1), c(object$nc,0), type = "S", xlab = "Proportion of observed data", ylab = "Mode function", yaxt = "n") axis(side = 2, at = seq(0, max(object$nc, na.rm = TRUE))) } plot.gmmhd.cores <- function(x, col = c("grey50", mclust.options("classPlotColors")), pch = c(1, mclust.options("classPlotSymbols")), ...) { object <- x # Argh. Really want to use object anyway x <- object$x p <- ncol(x) n <- nrow(x) clCores <- object$clusterCores numClusters <- object$numClusters colCores <- col[1] col <- col[-1] col <- col[clCores] col[is.na(col)] <- colCores pch <- unique(pch) pchCores <- pch[1] pch <- pch[-1] pch <- pch[clCores] pch[is.na(pch)] <- pchCores cex <- rep(par("cex"), length(pch)) cex[is.na(clCores)] <- par("cex")/2 if(p == 1) { plot(x, object$density, col = col, pch = pch, cex = cex, ylim = range(0,object$density), xlab = colnames(x)[1], ylab = "Density", ...) } else if(p == 2) { plot(x[,1:2,drop=FALSE], col = col, pch = pch, cex = cex, ...) } else if(p > 2) { pairs(x, col = col, pch = pch, cex = cex, gap = 0, ...) } invisible() } plot.gmmhd.clusters <- function(x, col = mclust.options("classPlotColors"), pch = mclust.options("classPlotSymbols"), ...) { object <- x # Argh. Really want to use object anyway x <- object$x p <- ncol(x) n <- nrow(x) cluster <- object$cluster numClusters <- object$numClusters col <- col[cluster] pch <- setdiff(pch,22)[cluster] if(p == 1) { plot(x, object$density, col = col, pch = pch, ylim = range(0,object$density), xlab = colnames(x)[1], ylab = "Density", ...) } else if(p == 2) { plot(x[,1:2,drop=FALSE], col = col, pch = pch, ...) } else if(p > 2) { pairs(x, col = col, pch = pch, cex = 0.8, gap = 0, ...) } invisible() } gmmhdClusterCores <- function(object, tails = FALSE, ...) { # Identify cluster cores as the first subset of connected components # corresponding to the largest local mode n <- nrow(object$x) nc <- object$nc pc <- object$pc conComp <- object$con # select the subset with largest number of modes ... i <- which(diff(c(nc,0)) < 0) # i <- i[which(nc[i] == max(nc[i]))] # no to consider only the highest mode # remove spurius local modes, i.e. those not identified by at least # two consecutive density level # LS:20150107 okmode <- which(nc[i] == nc[i-1])[1] # LS:20150107 i <- if(length(okmode) > 0) i[okmode] else length(nc) # plot(pc, nc); abline(v = pc[i]) # ... and consider multiplicity of modes # LS: 20150107 i <- which(nc == max(nc[i])) # cc <- conComp[i] clusterCores <- matrix(as.double(NA), n, length(i)) for(j in 1:ncol(clusterCores)) for(cl in 1:length(cc[[j]])) { clusterCores[cc[[j]][[cl]],j] <- cl } while(ncol(clusterCores) > 1) { ncl <- length(unique(na.omit(clusterCores[,2]))) tmp <- rep(NA, n) for(cl in 1:ncl) { l <- which(clusterCores[,2] == cl) if(all(is.na(clusterCores[l,1]))) { tmp[l] <- paste(clusterCores[l,2],"*",sep="") } else { if(length(unique(na.omit(clusterCores[l,1]))) > 1) tmp[l] <- clusterCores[l,1] else tmp[l] <- paste(clusterCores[l,2],"*",sep="") } } clusterCores[,2] <- unclass(as.factor(tmp)) clusterCores <- clusterCores[,-1,drop=FALSE] } clusterCores <- as.vector(clusterCores) return(clusterCores) # select the last subset with largest number of modes # i <- max(which(nc == max(nc))) # select the first subset with largest number of modes i <- which(diff(c(nc,0)) < 0) i <- i[which(nc[i] == max(nc[i]))[1]] # select the largest subset with the largest number of modes # i <- i[max(which(nc[i] == max(nc[i])))] conComp <- object$con[[i]] clusterCores <- rep(NA, n) for(cl in 1:length(conComp)) { clusterCores[conComp[[cl]]] <- cl } return(clusterCores) } gmmhdClassify <- function(object, G = 1:5, modelNames = mclust.options("emModelNames"), verbose = TRUE, ...) { if(!inherits(object, "gmmhd")) stop("object is not of class 'gmmhd'") x <- object$x n <- nrow(x) p <- ncol(x) if(p == 1) modelNames <- unique(substr(modelNames, 1, 1)) clusterCores <- object$clusterCores numClusters <- length(tabulate(clusterCores)) con <- object$con # classify unclustered obs based on training cluster cores isCore <- (!is.na(clusterCores)) logRatio <- function(p) { p <- pmax(pmin(p, 1-sqrt(.Machine$double.eps)),sqrt(.Machine$double.eps)) log(p)-log(1-p) } # select num. components G to guarantee at least minSize obs per class numCompClass <- function(class, G, minSize = 10) { classSize <- tabulate(class) Gin <- as.vector(G) maxG <- classSize %/% minSize maxG <- pmin(maxG, max(G)) G <- vector(length = length(maxG), mode = "list") for(k in 1:length(G)) { G[[k]] <- intersect(Gin, seq(maxG[k])) } return(G) } inc <- isCore cluster <- clusterCores while(sum(inc) < n) { mod <- MclustDA(data = x[inc,,drop=FALSE], class = as.character(cluster[inc]), G = numCompClass(cluster[inc], G), modelNames = modelNames, verbose = verbose) unallocated <- which(!inc) # remove those obs with density ~ 0 dens <- density.MclustDA(mod, newdata=x[unallocated,,drop=FALSE]) dens <- pmax(dens, .Machine$double.eps) i <- (dens/max(dens) > sqrt(.Machine$double.eps)) if(sum(i) > 0) unallocated <- unallocated[i] # pred <- predict(mod, newdata = x[unallocated,,drop=FALSE]) # questa versione puo' non allocare obs ai clusterCores piccoli # zmax <- apply(pred$z,1,max) # zclass <- apply(pred$z,1,which.max) # log.ratio <- logRatio(zmax) # alloc <- (log.ratio >= quantile(log.ratio, prob = sum(inc)/n)) # questa versione cerca di ctr per dim clusters e alloca alla classe # predicted iff logRatio is larger than sqrt(sum(inc)/n) quantile z <- pred$z zclass <- apply(z,1,which.max) alloc <- matrix(NA, nrow(z), ncol(z)) for(k in seq(ncol(z))) { log.ratio <- logRatio(z[,k]) alloc[,k] <- (log.ratio >= quantile(log.ratio, prob = sqrt(sum(inc)/n))) & (zclass == k) } alloc <- apply(alloc, 1, any) toclass <- unallocated[alloc] cluster[toclass] <- zclass[alloc] inc <- (!is.na(cluster)) } mod <- MclustDA(data = x, class = cluster, G = numCompClass(cluster[inc], G), modelNames = modelNames, verbose = verbose) cluster <- predict(mod, x)$classification out <- list(model = mod, clusterCores = clusterCores, cluster = cluster) return(out) } density.MclustDA <- function(x, newdata, prior, logarithm = FALSE, ...) { # Compute the density based on a MclustDA model # (later it may be included in the 'mclust' package) # or it can be obtained from predict.MclustDA object <- x # Argh. Really want to use object anyway if(!inherits(object, "MclustDA")) stop("object not of class 'MclustDA'") models <- object$models nclass <- length(models) n <- sapply(1:nclass, function(i) models[[i]]$n) if(missing(newdata)) { newdata <- object$data } if(object$d == 1) newdata <- as.vector(newdata) if(missing(prior)) { prior <- n/sum(n) } else { if(length(prior) != nclass) stop("wrong number of prior probabilities") if(any(prior < 0)) stop("prior must be nonnegative") } # compute on log scale for stability densfun <- function(mod, data) { do.call("dens", c(list(data = data, logarithm = TRUE), mod)) } # cden <- as.matrix(data.frame(lapply(models, densfun, data = newdata))) cden <- sweep(cden, 2, FUN = "+", STATS = log(prior)) maxlog <- apply(cden, 1, max) cden <- sweep(cden, 1, FUN = "-", STATS = maxlog) den <- log(apply(exp(cden), 1, sum)) + maxlog if(!logarithm) den <- exp(den) return(den) } # old version ConnectComp_old <- function(nb) { # Get connected components # Example: # nb <- list(c(1,2,3), c(2,3,4), c(9,10,11), c(9,11,12), c(1,6,5)) # if(length(nb) < 1 | !is.list(nb)) return(NULL) nb <- lapply(nb, function(x) as.integer(x)) n <- length(nb) u <- sort(unique(unlist(nb))) nu <- length(u) cnb <- cnb.old <- nb stable <- FALSE # merge the neighbors until the configuration is stable while(!stable) { i <- 0 while(i < length(cnb)) { i <- i + 1 j <- which(sapply(cnb, function(nbb) any(intersect(cnb[[i]], nbb)))) cnb[[i]] <- sort(unique(unlist(cnb[j]))) cnb[setdiff(j, i)] <- NULL } if(identical(cnb, cnb.old)) stable <- TRUE cnb.old <- cnb } return(cnb) } ConnectComp <- function(nb) { # Get connected components # Example: # nb <- list(c(1,2,3), c(2,3,4), c(9,10,11), c(9,11,12), c(1,6,5)) # ConnectComp(nb) if(length(nb) < 1 | !is.list(nb)) return(NULL) nb <- lapply(nb, function(x) as.integer(x)) n <- length(nb) u <- sort(unique(unlist(nb))) nu <- length(u) cnb <- cnb.old <- nb stable <- FALSE # merge the neighbors until the configuration is stable while(!stable) { i <- 0 while(i < length(cnb)) { i <- i + 1 j <- which(sapply(cnb, function(nbb) any(is.element(cnb[[i]], nbb)))) cnb[[i]] <- sort(unique(unlist(cnb[j]))) cnb[setdiff(j, i)] <- NULL } if(identical(cnb, cnb.old)) stable <- TRUE cnb.old <- cnb } return(cnb) } mclust/R/densityMclust.R0000644000176200001440000004045714241626404014771 0ustar liggesusersdensityMclust <- function(data, ..., plot = TRUE) { mc <- match.call() obj <- Mclust(data, ...) if(is.null(obj)) return(obj) obj$call <- mc obj$density <- dens(data = obj$data, modelName = obj$modelName, parameters = obj$parameters, logarithm = FALSE) class(obj) <- c("densityMclust", "Mclust") if(plot) plot(obj, what = "density") return(obj) } predict.densityMclust <- function(object, newdata, what = c("dens", "cdens", "z"), logarithm = FALSE, ...) { if(!inherits(object, "densityMclust")) stop("object not of class 'densityMclust'") if(missing(newdata)) { newdata <- object$data } newdata <- as.matrix(newdata) if(ncol(object$data) != ncol(newdata)) { stop("newdata must match ncol of object data") } what <- match.arg(what, choices = eval(formals(predict.densityMclust)$what)) pro <- object$parameters$pro; pro <- pro/sum(pro) noise <- (!is.na(object$hypvol)) cl <- c(seq(object$G), if(noise) 0) switch(what, "dens" = { out <- dens(data = newdata, modelName = object$modelName, parameters = object$parameters, logarithm = logarithm) }, "cdens" = { z <- cdens(data = newdata, modelName = object$modelName, parameters = object$parameters, logarithm = TRUE) z <- if(noise) cbind(z, log(object$parameters$Vinv)) else cbind(z) # drop redundant attributes colnames(z) <- cl out <- if(!logarithm) exp(z) else z }, "z" = { z <- cdens(data = newdata, modelName = object$modelName, parameters = object$parameters, logarithm = TRUE) z <- if(noise) cbind(z, log(object$parameters$Vinv)) else cbind(z) # drop redundant attributes z <- sweep(z, MARGIN = 2, FUN = "+", STATS = log(pro)) z <- sweep(z, MARGIN = 1, FUN = "-", STATS = apply(z, 1, logsumexp)) colnames(z) <- cl out <- if(!logarithm) exp(z) else z } ) return(out) } plot.densityMclust <- function(x, data = NULL, what = c("BIC", "density", "diagnostic"), ...) { object <- x # Argh. Really want to use object anyway what <- match.arg(what, several.ok = TRUE) if(object$d > 1) what <- setdiff(what, "diagnostic") oldpar <- par(no.readonly = TRUE) # on.exit(par(oldpar)) plot.densityMclust.density <- function(...) { if(object$d == 1) plotDensityMclust1(object, data = data, ...) else if(object$d == 2) plotDensityMclust2(object, data = data, ...) else plotDensityMclustd(object, data = data, ...) } plot.densityMclust.bic <- function(...) { plot.mclustBIC(object$BIC, ...) } plot.densityMclust.diagnostic <- function(...) { densityMclust.diagnostic(object, ...) } if(interactive() & length(what) > 1) { title <- "Model-based density estimation plots:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "BIC") plot.densityMclust.bic(...) if(what[choice] == "density") plot.densityMclust.density(...) if(what[choice] == "diagnostic") plot.densityMclust.diagnostic(...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "BIC")) plot.densityMclust.bic(...) if(any(what == "density")) plot.densityMclust.density(...) if(any(what == "diagnostic")) plot.densityMclust.diagnostic(...) } invisible() } plotDensityMclust1 <- function(x, data = NULL, col = gray(0.3), hist.col = "lightgrey", hist.border = "white", breaks = "Sturges", ...) { object <- x # Argh. Really want to use object anyway mc <- match.call(expand.dots = TRUE) mc$x <- mc$data <- mc$col <- mc$hist.col <- mc$hist.border <- mc$breaks <- NULL xlab <- mc$xlab if(is.null(xlab)) xlab <- deparse(object$call$data) ylab <- mc$ylab if(is.null(ylab)) ylab <- "Density" # xrange <- extendrange(object$data, f = 0.1) xlim <- eval(mc$xlim, parent.frame()) if(!is.null(xlim)) xrange <- range(xlim) ylim <- eval(mc$ylim, parent.frame()) # eval.points <- seq(from = xrange[1], to = xrange[2], length = 1000) d <- predict.densityMclust(object, eval.points) # if(!is.null(data)) { h <- hist(data, breaks = breaks, plot = FALSE) plot(h, freq = FALSE, col = hist.col, border = hist.border, main = "", xlim = range(h$breaks, xrange), ylim = if(!is.null(ylim)) range(ylim) else range(0, h$density, d), xlab = xlab, ylab = ylab) box() mc[[1]] <- as.name("lines") mc$x <- eval.points mc$y <- d mc$type <- "l" mc$col <- col eval(mc, parent.frame()) } else { mc[[1]] <- as.name("plot") mc$x <- eval.points mc$y <- d mc$type <- "l" mc$col <- col mc$xlim <- xlim mc$ylim <- if(!is.null(ylim)) range(ylim) else range(0, d) mc$ylab <- ylab mc$xlab <- xlab eval(mc, parent.frame()) } invisible() } plotDensityMclust2 <- function(x, data = NULL, nlevels = 11, levels = NULL, prob = c(0.25, 0.5, 0.75), points.pch = 1, points.col = 1, points.cex = 0.8, ...) { # This function call surfacePlot() with a suitable modification of arguments object <- x # Argh. Really want to use object anyway mc <- match.call(expand.dots = TRUE) mc$x <- mc$points.pch <- mc$points.col <- mc$points.cex <- NULL mc$nlevels <- nlevels mc$levels <- levels if(!is.null(mc$type)) if(mc$type == "level") mc$type <- "hdr" # TODO: to be removed at certain point if(isTRUE(mc$type == "hdr")) { mc$levels <- c(sort(hdrlevels(object$density, prob)), 1.1*max(object$density)) mc$nlevels <- length(mc$levels) } if(is.null(data)) { addPoints <- FALSE mc$data <- object$data } else { data <- as.matrix(data) stopifnot(ncol(data) == ncol(object$data)) addPoints <- TRUE } # set mixture parameters par <- object$parameters # these parameters should be missing par$variance$cholSigma <- par$Sigma <- NULL if(is.null(par$pro)) par$pro <- 1 par$variance$cholsigma <- par$variance$sigma for(k in seq(par$variance$G)) { par$variance$cholsigma[,,k] <- chol(par$variance$sigma[,,k]) } mc$parameters <- par # now surfacePlot() is called mc[[1]] <- as.name("surfacePlot") out <- eval(mc, parent.frame()) if(addPoints) points(data, pch = points.pch, col = points.col, cex = points.cex) # invisible(out) } plotDensityMclustd <- function(x, data = NULL, nlevels = 11, levels = NULL, prob = c(0.25, 0.5, 0.75), points.pch = 1, points.col = 1, points.cex = 0.8, gap = 0.2, ...) { # This function call surfacePlot() with a suitable modification of arguments object <- x # Argh. Really want to use object anyway mc <- match.call(expand.dots = TRUE) mc$x <- mc$points.pch <- mc$points.col <- mc$points.cex <- mc$gap <- NULL mc$nlevels <- nlevels mc$levels <- levels mc$prob <- prob if(!is.null(mc$type)) if(mc$type == "level") mc$type <- "hdr" # TODO: to be removed at certain point if(is.null(data)) { data <- mc$data <- object$data addPoints <- FALSE } else { data <- as.matrix(data) stopifnot(ncol(data) == ncol(object$data)) addPoints <- TRUE } nc <- object$d oldpar <- par(mfrow = c(nc, nc), mar = rep(gap/2,4), oma = rep(3, 4), no.readonly = TRUE) on.exit(par(oldpar)) for(i in seq(nc)) { for(j in seq(nc)) { if(i == j) { plot(data[,c(i,j)], type="n", xlab = "", ylab = "", axes=FALSE) text(mean(par("usr")[1:2]), mean(par("usr")[3:4]), colnames(data)[i], cex = 1.5, adj = 0.5) box() } else { # set mixture parameters par <- object$parameters if(is.null(par$pro)) par$pro <- 1 par$mean <- par$mean[c(j,i),,drop=FALSE] par$variance$d <- 2 sigma <- array(dim = c(2, 2, par$variance$G)) for(g in seq(par$variance$G)) sigma[,,g] <- par$variance$sigma[c(j,i),c(j,i),g] par$variance$sigma <- sigma par$variance$Sigma <- NULL par$variance$cholSigma <- NULL par$variance$cholsigma <- NULL mc$parameters <- par mc$data <- object$data[,c(j,i)] mc$axes <- FALSE mc[[1]] <- as.name("surfacePlot") eval(mc, parent.frame()) box() if(addPoints & (j > i)) points(data[,c(j,i)], pch = points.pch, col = points.col, cex = points.cex) } if(i == 1 && (!(j%%2))) axis(3) if(i == nc && (j%%2)) axis(1) if(j == 1 && (!(i%%2))) axis(2) if(j == nc && (i%%2)) axis(4) } } # invisible() } dens <- function(data, modelName, parameters, logarithm = FALSE, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") # aux <- list(...) logcden <- cdens(data = data, modelName = modelName, parameters = parameters, logarithm = TRUE, warn = warn) pro <- parameters$pro if(is.null(pro)) stop("mixing proportions must be supplied") noise <- (!is.null(parameters$Vinv)) if(noise) { proNoise <- pro[length(pro)] pro <- pro[-length(pro)] } if(any(proz <- pro == 0)) { pro <- pro[!proz] logcden <- logcden[, !proz, drop = FALSE] } logcden <- sweep(logcden, 2, FUN = "+", STATS = log(pro)) # logsumexp maxlog <- apply(logcden, 1, max) logcden <- sweep(logcden, 1, FUN = "-", STATS = maxlog) logden <- log(apply(exp(logcden), 1, sum)) + maxlog # if(noise) logden <- log(exp(logden) + proNoise*parameters$Vinv) out <- if(logarithm) logden else exp(logden) return(out) } cdens <- function(data, modelName, parameters, logarithm = FALSE, warn = NULL, ...) { modelName <- switch(EXPR = modelName, X = "E", XII = "EII", XXI = "EEI", XXX = "EEE", modelName) checkModelName(modelName) funcName <- paste("cdens", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name(funcName) mc$modelName <- NULL eval(mc, parent.frame()) } densityMclust.diagnostic <- function(object, type = c("cdf", "qq"), col = c("black", "black"), lwd = c(2,1), lty = c(1,1), legend = TRUE, grid = TRUE, ...) { # Diagnostic plots for density estimation # (only available for the one-dimensional case) # # Arguments: # object = a 'densityMclust' object # type = type of diagnostic plot: # "cdf" = fitted CDF vs empirical CDF # "qq" = fitted CDF evaluated over the observed data points vs # the quantile from a uniform distribution # # Reference: # Loader C. (1999), Local Regression and Likelihood. New York, Springer, # pp. 87-90) stopifnot("first argument must be an object of class 'densityMclust'" = inherits(object, "densityMclust")) if(object$d > 1) { warning("only available for one-dimensional data") return() } type <- match.arg(type, c("cdf", "qq"), several.ok = TRUE) # main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) data <- as.numeric(object$data) n <- length(data) cdf <- cdfMclust(object, data = data, ngrid = min(n*10,1000), ...) oldpar <- par(no.readonly = TRUE) if(interactive() & length(type) > 1) { par(ask = TRUE) on.exit(par(oldpar)) } if(any(type == "cdf")) { # Fitted CDF vs Emprical CDF empcdf <- ecdf(data) plot(empcdf, do.points = FALSE, verticals = TRUE, col = col[2], lwd = lwd[2], lty = lty[2], xlab = deparse(object$call$data), ylab = "Cumulative Distribution Function", panel.first = if(grid) grid(equilogs=FALSE) else NULL, main = NULL, ...) # if(main) title(main = "CDF plot", cex.main = 1.1) lines(cdf, col = col[1], lwd = lwd[1], lty = lty[1]) rug(data) if(legend) { legend("bottomright", legend = c("Estimated CDF", "Empirical CDF"), ncol = 1, inset = 0.05, cex = 0.8, col = col, lwd = lwd, lty = lty) } } if(any(type == "qq")) { # Q-Q plot q <- quantileMclust(object, p = ppoints(n)) plot(q, sort(data), xlab = "Quantiles from estimated density", ylab = "Sample Quantiles", panel.first = if(grid) grid(equilogs=FALSE) else NULL, main = NULL, ...) # add qq-line Q.y <- quantile(sort(data), probs = c(.25,.75)) Q.x <- quantileMclust(object, p = c(.25,.75)) b <- (Q.y[2] - Q.y[1])/(Q.x[2] - Q.x[1]) a <- Q.y[1] - b*Q.x[1] abline(a, b, untf = TRUE, col = 1, lty = 2) # old method to draw qq-line # with(list(y = sort(data), x = q), # { i <- (y > quantile(y, 0.25) & y < quantile(y, 0.75)) # abline(lm(y ~ x, subset = i), lty = 2) # }) # P-P plot # cdf <- cdfMclust(object, data, ...) # plot(seq(1,n)/(n+1), cdf$y, xlab = "Uniform quantiles", # ylab = "Cumulative Distribution Function", # panel.first = if(grid) grid(equilogs=FALSE) else NULL) # abline(0, 1, untf = TRUE, col = col[2], lty = lty[1]) } invisible() } cdfMclust <- function(object, data, ngrid = 100, ...) { # Cumulative Density Function # (only available for the one-dimensional case) # # Returns the estimated CDF evaluated at points given by the optional # argument data. If not provided, a regular grid of ngrid points is used. # # Arguments: # object = a 'densityMclust' object # data = the data vector # ngrid = the length of rectangular grid stopifnot("first argument must be an object of class 'densityMclust'" = inherits(object, "densityMclust")) if(missing(data)) { eval.points <- extendrange(object$data, f = 0.1) eval.points <- seq(eval.points[1], eval.points[2], length.out = ngrid) } else { eval.points <- sort(as.vector(data)) ngrid <- length(eval.points) } G <- object$G pro <- object$parameters$pro mean <- object$parameters$mean var <- object$parameters$variance$sigmasq if(length(var) < G) var <- rep(var, G) noise <- (!is.null(object$parameters$Vinv)) cdf <- rep(0, ngrid) for(k in seq(G)) { cdf <- cdf + pro[k]*pnorm(eval.points, mean[k], sqrt(var[k])) } if(noise) cdf <- cdf/sum(pro[seq(G)]) out <- list(x = eval.points, y = cdf) return(out) } quantileMclust <- function(object, p, ...) { # Calculate the quantile of a univariate mixture corresponding to cdf # equal to p using bisection line search method. # # Arguments: # object = a 'densityMclust' object # p = vector of probabilities (0 <= p <= 1) stopifnot(inherits(object, "densityMclust")) if(object$d != 1) { stop("quantile function only available for 1-dimensional data") } p <- as.vector(p) m <- object$parameters$mean s <- sqrt(object$parameters$variance$sigmasq) if(object$modelName == "E") s <- rep(s, object$G) r <- matrix(as.double(NA), nrow = length(p), ncol = object$G) for(g in 1:object$G) { r[,g] <- qnorm(p, mean = m[g], sd = s[g]) } if(object$G == 1) return(as.vector(r)) q <- rep(as.double(NA), length(p)) for(i in 1:length(p)) { F <- function(x) cdfMclust(object, x)$y - p[i] q[i] <- uniroot(F, interval = range(r[i,]), tol = sqrt(.Machine$double.eps))$root } q[ p < 0 | p > 1] <- NaN q[ p == 0 ] <- -Inf q[ p == 1 ] <- Inf return(q) } mclust/R/clustCombi.R0000644000176200001440000004607314053123511014216 0ustar liggesusersclustCombi <- function(object = NULL, data = NULL, ...) { if(is.null(object) & is.null(data)) stop("An object or class 'Mclust' or data as matrix/data.frame must be provided!") if(is.null(object)) { object <- Mclust(data, ...) } else { if(!inherits(object, "Mclust")) stop("object not of class 'Mclust'") data <- object$data } combiRes <- combi(data, object) return(combiRes) } combMat <- function(K,l1,l2) { l=c(min(l1,l2), max(l1,l2)) if(any(length(l1) == 0, length(l2) == 0)){ l1 = numeric(0) l2 = l[2]} else { l1 = l[1] l2 = l[2]} M <- rbind(cbind(diag(l2-1), matrix(rep(0,(K-l2+1)*(l2-1)), nrow=l2-1, ncol=K-l2+1)), cbind(matrix(rep(0,l2*(K-l2)), nrow=K-l2, ncol=l2), diag(K-l2))) M[l1,l2] <- 1 return(M) } ## Define xlog to handle x*log(x) as x=0 xlog <- function(x) { xlog1d <- function (xi) if (xi == 0) 0 else (xi*log(xi)) if (is.null(dim(x))) { return(sapply(x,xlog1d)) } else { return(matrix(sapply(x,xlog1d),dim(x))) } } combi <- function(data, MclustOutput, n = nrow(data), d = ncol(data)) { combiM <- list() combiM[[MclustOutput$G]] <- diag(MclustOutput$G) tau <- list() tau[[MclustOutput$G]] = MclustOutput$z classif <- list() classif[[MclustOutput$G]] = map(tau[[MclustOutput$G]]) for (K in MclustOutput$G:2) { dEnt <- matrix(0,nrow=K-1, ncol=K) preCombiTau <- tau[[K]] for (l1 in 1:(K-1)) { for (l2 in (l1+1):K) { postCombiTau <- t(combMat(K,l1,l2) %*% t(preCombiTau)) dEnt[l1,l2] <- sum(xlog(postCombiTau[,l1])) - sum(xlog(preCombiTau[,l1])+xlog(preCombiTau[,l2])) } } l1=which(dEnt==max(dEnt),arr.ind=TRUE)[1] l2=which(dEnt==max(dEnt),arr.ind=TRUE)[2] combiM[[K-1]] <- combMat(K,l1,l2) tau[[K-1]] = t(combiM[[K-1]] %*% t(tau[[K]])) classif[[K-1]] = map(tau[[K-1]]) } output <- list(classification = classif, combiM = combiM, combiz = tau, MclustOutput = MclustOutput) class(output) <- "clustCombi" return(output) } plot.clustCombi <- function(x, what = c("classification", "entropy", "tree"), ...) { object <- x # Argh. Really want to use object anyway if(!inherits(object, "clustCombi")) stop("object not of class 'clustCombi'") data <- object$MclustOutput$data what <- match.arg(what, several.ok = TRUE) oldpar <- par(no.readonly = TRUE) on.exit(par(oldpar)) plot.clustCombi.classification <- function(...) { # Sort z columns so that one of the two combined column is the last one at # each step (prevents the colors and symbols to be mixed as K -> K-1) curr <- 1:object$MclustOutput$G i <- numeric() j <- numeric() for(K in (object$MclustOutput$G):2) { l1 <- which(!object$combiM[[K-1]] %*% rep(1,K) == 1) l2 <- (object$combiM[[K-1]] %*% curr)[l1] - curr[l1] i <- c(curr[l1],i) j <- c(l2,j) curr <- object$combiM[[K-1]] %*% curr - l2*c(rep(0,(l1-1)),1,rep(0,(K-1-l1))) } permutMat <- function(j,K) { M <- diag(K) M[j,j] <- 0 M[K,K] <- 0 M[j,K] <- 1 M[K,j] <- 1 return(M) } combiM <- diag(object$MclustOutput$G) j <- c(1,j) i <- c(0,i) permutz <- object$MclustOutput$z[,j] par(ask=TRUE) for(K in object$MclustOutput$G:1) { curr_title <- if(K == object$MclustOutput$G) paste0("BIC solution (", as.character(K), " clusters)") else paste0("Combined solution with ", as.character(K), " clusters") if(ncol(as.matrix(data)) > 2) { par(oma = c(0,0,2,0), mar = { mar <- oldpar$mar; mar[3] <- 0.1; mar }) } else { par(mar = { mar <- oldpar$mar; mar[3] <- 2.1; mar }) } combiPlot(data = data, z = permutz, combiM = combiM, ...) if(ncol(as.matrix(data)) > 2) { title(curr_title, outer = TRUE, cex.main = 1) } else { title(curr_title, cex.main = 1) } combiM <- combMat(K,which(j==i[K]),K) %*% combiM } par(ask=FALSE) } if(interactive() & length(what) > 1) { title <- "Combined clusterings plots:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "classification") plot.clustCombi.classification(...) if(what[choice] == "entropy") entPlot(z = object$MclustOutput$z, combiM = object$combiM, ...) if(what[choice] == "tree") combiTree(object, ...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "classification")) plot.clustCombi.classification(...) if(any(what == "entropy")) entPlot(z = object$MclustOutput$z, combiM = object$combiM, ...) if(any(what == "tree")) combiTree(object, ...) } invisible() } combiPlot <- function(data, z, combiM, ...) { p <- ncol(as.matrix(data)) if (p > 2) { clPairs(data[,1:min(5,p)], classification = map(t(combiM %*% t(z))), ...) } else if (p == 2) { mclust2Dplot(data = data, parameters = NULL, classification = map(t(combiM %*% t(z))), what = "classification", ...) } else { mclust1Dplot(data = as.matrix(data), parameters = NULL, classification = map(t(combiM %*% t(z))), what = "classification", ...) } } entPlot <- function(z, combiM, abc = c("standard", "normalized"), reg = 2, ...) { oldpar <- par(no.readonly = TRUE) on.exit(par(oldpar)) if(length(abc) > 1) par(ask=TRUE) ent <- numeric() Kmax <- ncol(z) z0 <- z for(K in Kmax:1) { z0 <- t(combiM[[K]] %*% t(z0)) ent[K] <- -sum(xlog(z0)) } if(any(abc == "normalized")) { mergedn <- numeric() z0 <- z for(K in (Kmax-1):1) { z0 <- t(combiM[[K+1]] %*% t(z0)) mergedn[K] = sum(sapply(map(z0), function(x) any(which(as.logical(combiM[[K]][rowSums(combiM[[K]])==2,]))==x))) } } if(Kmax == 2) reg <- NULL if(any(abc == "standard")) { par(mfrow=c(1,2), oma=c(0,0,3,0), mar = { mar <- oldpar$mar; mar[3] <- 0.1; mar }) plot(1:Kmax, ent, xlab = "Number of clusters", ylab = "Entropy", xaxt = "n", ...) axis(side = 1, at = 1:Kmax) if(any(reg == 2)) { pcwsreg <- pcws2_reg(1:Kmax,ent) lines(1:pcwsreg$c, pcwsreg$a1*(1:pcwsreg$c) + pcwsreg$b1, lty = 2, col = "red") lines(pcwsreg$c:Kmax, pcwsreg$a2*(pcwsreg$c:Kmax) + pcwsreg$b2, lty = 2, col = "red") } if(any(reg == 3)) { pcwsreg <- pcws3_reg(1:Kmax,ent) lines(1:pcwsreg$c1, pcwsreg$a1*(1:pcwsreg$c1) + pcwsreg$b1, lty = 2, col = "blue") lines(pcwsreg$c1:pcwsreg$c2, pcwsreg$a2*(pcwsreg$c1:pcwsreg$c2) + pcwsreg$b2, lty = 2, col = "blue") lines(pcwsreg$c2:Kmax, pcwsreg$a3*(pcwsreg$c2:Kmax) + pcwsreg$b3, lty = 2, col = "blue") } plot(1:(Kmax-1), ent[2:Kmax]-ent[1:(Kmax-1)], xlab = "Number of clusters", ylab = "Difference in entropy", xaxt = "n", ...) axis(side = 1, at = 1:(Kmax-1)) title("Entropy plot", outer=TRUE, cex.main = 1) } if(any(abc == "normalized")) { par(mfrow=c(1,2), oma=c(0,0,3,0), mar = { mar <- oldpar$mar; mar[3] <- 0.1; mar }) plot(cumsum(c(0,mergedn)), ent, xlab = "Cumul. count of merged obs.", ylab = "Entropy", ...) if(any(reg == 2)) { X <- cumsum(c(0,mergedn)) pcwsreg <- pcws2_reg(X,ent) lines(X[1:pcwsreg$c], pcwsreg$a1*(X[1:pcwsreg$c]) + pcwsreg$b1, lty = 2, col = "red") lines(X[pcwsreg$c:Kmax], pcwsreg$a2*(X[pcwsreg$c:Kmax]) + pcwsreg$b2, lty = 2, col = "red") } if(any(reg == 3)) { X <- cumsum(c(0,mergedn)) pcwsreg <- pcws3_reg(X,ent) lines(X[1:pcwsreg$c1], pcwsreg$a1*(X[1:pcwsreg$c1]) + pcwsreg$b1, lty = 2, col = "blue") lines(X[pcwsreg$c1:pcwsreg$c2], pcwsreg$a2*(X[pcwsreg$c1:pcwsreg$c2]) + pcwsreg$b2, lty = 2, col = "blue") lines(X[pcwsreg$c2:Kmax], pcwsreg$a3*(X[pcwsreg$c2:Kmax]) + pcwsreg$b3, lty = 2, col = "blue") } plot(1:(Kmax-1), (ent[2:Kmax]-ent[1:(Kmax-1)])/mergedn, xlab = "Number of clusters", ylab = "Normalized difference in entropy", xaxt = "n", ...) axis(side = 1, at = 1:(Kmax-1)) title("Normalized entropy plot", outer=TRUE, cex.main = 1) } invisible() } combiTree <- function(object, type = c("triangle", "rectangle"), yaxis = c("entropy", "step"), edgePar = list(col = "darkgray", lwd = 2), ...) { if(!inherits(object, "clustCombi")) stop("object not of class 'clustCombi'") yaxis <- match.arg(yaxis, eval(formals(combiTree)$yaxis), several.ok = FALSE) type <- match.arg(type, eval(formals(combiTree)$type), several.ok = FALSE) G <- object$MclustOutput$G combiM <- object$combiM # combiZ <- object$combiz # define merging pattern: # - negative numbers are leaves, # - positive are merged clusters (defined by row number in merge) curr <- 1:G merged <- -(1:G) merge <- matrix(NA, G-1, 2) for(k in 1:(G-1)) { Kp <- G - k + 1 l1 <- which(!combiM[[Kp-1]] %*% rep(1,Kp) == 1) l2 <- (combiM[[Kp-1]] %*% curr)[l1] - curr[l1] curr <- setdiff(curr, max(l1, l2)) merge[k,] <- merged[c(l1,l2)] merged[merged == merged[l1] | merged == merged[l2]] <- k } # order along the x-axis sel <- function(x) { if(x < 0) return(abs(x)) else return(c(sel(merge[x,1]), sel(merge[x,2]))) } ord <- abs(c(sel(merge[nrow(merge),1]), sel(merge[nrow(merge),2]))) if(yaxis == "step") { # step h <- 1:(G-1) ylab <- "Steps" } else { # entropy entropy <- sapply(rev(object$combiz), function(z) -sum(xlog(z))) # normalized negentropy h <- entropy; h <- 1 - (h - min(h))/(max(h)-min(h)); h <- h[-1] ylab <- "1 - normalised entropy" } # hclust object (see help(hclust)) hc <- list(merge = merge, # mergin matrix height = h, # define merge heights order = ord, # order of leaves labels = 1:G) # labels of leaves class(hc) <- "hclust" # make it an hclust object # plot(hc, hang = -1) # look at the result # convert to a dendrogram object dendro <- as.dendrogram(hc) plot(dendro, type = type, edgePar = edgePar, ylab = ylab, ...) invisible(dendro) } # pcws2_reg computes the piecewise linear regression -- with two pieces -- to (x,y), for any possible change point and chooses the one leading to the smallest least-square error. pcws2_reg <- function(x, y) { C <- length(x) ssBest = Inf for (c in 2:(C-1)) { x1 <- x[1:c] y1 <- y[1:c] x2 <- x[c:C] y2 <- y[c:C] a1 <- sum((x1-mean(x1))*(y1-mean(y1)))/sum((x1-mean(x1))^2) b1 <- -a1 * mean(x1) + mean(y1) a2 <- sum((x2-mean(x2))*(y2-mean(y2)))/sum((x2-mean(x2))^2) b2 <- -a2 * mean(x2) + mean(y2) ss <- sum((a1*x1+b1-y1)^2) + sum((a2*x2+b2-y2)^2) if (ss < ssBest) { ssBest <- ss cBest <- c a1Best <- a1 a2Best <- a2 b1Best <- b1 b2Best <- b2 } } return(list(c=cBest, a1=a1Best, b1=b1Best, a2=a2Best, b2=b2Best, residuals = c(a1*x1+b1-y1,a2*x2+b2-y2))) } # pcws3_reg computes the piecewise linear regression -- with three pieces -- to (x,y), for any possible change points and chooses the ones leading to the smallest least-square error. pcws3_reg <- function(x, y) { C <- length(x) ssBest = Inf for (c1 in 2:(C-2)) { for (c2 in (c1+1):(C-1)) { x1 <- x[1:c1] y1 <- y[1:c1] x2 <- x[c1:c2] y2 <- y[c1:c2] x3 <- x[c2:C] y3 <- y[c2:C] a1 <- sum((x1-mean(x1))*(y1-mean(y1)))/sum((x1-mean(x1))^2) b1 <- -a1 * mean(x1) + mean(y1) a2 <- sum((x2-mean(x2))*(y2-mean(y2)))/sum((x2-mean(x2))^2) b2 <- -a2 * mean(x2) + mean(y2) a3 <- sum((x3-mean(x3))*(y3-mean(y3)))/sum((x3-mean(x3))^2) b3 <- -a3 * mean(x3) + mean(y3) ss <- sum((a1*x1+b1-y1)^2) + sum((a2*x2+b2-y2)^2) + sum((a3*x3+b3-y3)^2) if (ss < ssBest) { ssBest <- ss c1Best <- c1 c2Best <- c2 a1Best <- a1 b1Best <- b1 a2Best <- a2 b2Best <- b2 a3Best <- a3 b3Best <- b3 } } } return(list(c1=c1Best, c2=c2Best, a1=a1Best, b1=b1Best, a2=a2Best, b2=b2Best, a3=a3Best, b3=b3Best, residuals = c(a1*x1+b1-y1,a2*x2+b2-y2,a3*x3+b3-y3))) } # print.clustCombi <- function(x, ...) # { # output <- x # Argh. Really want to use 'output' # cat("\n EM/BIC Solution\n") # cat(" --------------- \n\n") # cat("Number of components: ", as.character(output$MclustOutput$G), "\n", sep = "") # # cat("Model name: ", output$MclustOutput$parameters$var$modelName, "\n\n", sep="") # for (K in 1:output$MclustOutput$G) # { # cat("Component num.", as.character(K),": ", "\n", sep="") # cat(" proportion: ", sprintf(fmt = "%4.2f ", output$MclustOutput$parameters$pro[K]), "\n", sep="") # if (output$Mclust$d == 1) cat(" mean: ", sprintf(fmt = "%4.2f ", output$MclustOutput$parameters$mean[K]), "\n", sep="") else cat(" mean: ", sprintf(fmt = "%4.2f ", output$MclustOutput$parameters$mean[,K]), "\n", sep="") # } # # cat("\n Combining steps \n") # cat(" --------------- \n\n") # # cl = paste(rep(" ", max(output$MclustOutput$G-4,0)), "Classes labels after this step", rep(" ", max(output$MclustOutput$G-4,0)), sep="") # # if (output$MclustOutput$G>4) for (K in 5:output$MclustOutput$G) cl = paste(" ", cl, " ", sep="") # # cat(" Step | Classes combined at this step | Classes labels after this step", "\n", sep="") # cat("-------|-------------------------------|-------------------------------", "\n", sep="") # curr = 1:output$MclustOutput$G # # cat(" 0 | --- |", sprintf(fmt = "%2d ", curr), "\n", sep="") # # for (K in 1:(output$MclustOutput$G-1)) # { # Kp = output$MclustOutput$G - K + 1 # l1 = which(!output$combiM[[Kp-1]] %*% rep(1,Kp) == 1) # l2 = (output$combiM[[Kp-1]] %*% curr)[l1] - curr[l1] # # nc1 = floor((7-nchar(as.character(K)))/2) # nc2 = (7-nchar(as.character(K))) - nc1 # nc3 = floor((33-nchar(paste(as.character(c(l1)), " & ", as.character(l2))))/2) # nc4 = 33-nchar(paste(as.character(c(l1)), " & ", as.character(l2))) - nc3 # # curr <- output$combiM[[Kp-1]] %*% curr - l2*c(rep(0,(l1-1)),1,rep(0,(Kp-1-l1))) # # cat(rep(" ", nc1), as.character(K), rep(" ", nc2), "|", rep(" ", nc3), as.character(l1), " & ", as.character(l2), rep(" ", nc4), "|", sprintf(fmt = "%2d ", curr), "\n", sep="") # # } # # cat("\n Classification for K classes: output$classification[[K]]\n") # cat(" Combining matrix (K classes -> (K-1) classes): output$combiM[[K]]\n\n") # } print.clustCombi <- function(x, digits = getOption("digits"), ...) { cat("\'", class(x)[1], "\' object:\n", sep = "") cat(paste0(" Mclust model: (", x$MclustOutput$modelName, ",", x$MclustOutput$G, ")\n")) cat(" Available object components: ") cat(names(x), "\n") cat(" Combining matrix (K+1 classes -> K classes): $combiM[[K]]\n") cat(" Classification for K classes: $classification[[K]]\n") invisible() } summary.clustCombi <- function(object, ...) { title <- paste("Combining Gaussian mixture components for clustering") out <- with(object, list(title = title, MclustModelName = object$MclustOutput$modelName, MclustG = object$MclustOutput$G, combiM = object$combiM)) class(out) <- "summary.clustCombi" return(out) } print.summary.clustCombi <- function(x, digits = getOption("digits"), ...) { cat(rep("-", nchar(x$title)),"\n",sep="") cat(x$title, "\n") cat(rep("-", nchar(x$title)),"\n",sep="") # cat("\nMclust model name:", x$MclustModelName, "\n") cat("Number of components:", x$MclustG, "\n") # cat("\nCombining steps:\n\n") # cl <- paste(rep(" ", max(x$MclustG-4,0)), # "Class labels after this step", # rep(" ", max(x$MclustG-4,0)), sep="") # # if(x$MclustG>4) # for(K in 5:x$MclustG) # cl <- paste(" ", cl, " ", sep="") cat(" Step | Classes combined at this step | Class labels after this step", "\n", sep="") cat("-------|-------------------------------|-----------------------------", "\n", sep="") curr <- 1:x$MclustG cat(" 0 | --- | ", sprintf(fmt = "%d ", curr), "\n", sep="") for(K in 1:(x$MclustG-1)) { Kp = x$MclustG - K + 1 l1 = which(!x$combiM[[Kp-1]] %*% rep(1,Kp) == 1) l2 = (x$combiM[[Kp-1]] %*% curr)[l1] - curr[l1] nc1 = floor((7-nchar(as.character(K)))/2) nc2 = (7-nchar(as.character(K))) - nc1 nc3 = floor((33-nchar(paste(as.character(c(l1)), " & ", as.character(l2))))/2) nc4 = 33-nchar(paste(as.character(c(l1)), " & ", as.character(l2))) - nc3 curr <- x$combiM[[Kp-1]] %*% curr - l2*c(rep(0,(l1-1)),1,rep(0,(Kp-1-l1))) cat(rep(" ", nc1), as.character(K), rep(" ", nc2), "|", rep(" ", nc3), as.character(l1), " & ", as.character(l2), rep(" ", nc4), "| ", sprintf(fmt = "%d ", curr), "\n", sep="") } invisible() } clustCombiOptim <- function(object, reg = 2, plot = FALSE, ...) { # Return the optimal number of clusters suggested by the method based on the # entropy and discussed in reference given in help(clustCombi). # # object = "clustCombi" object # reg = see help(entPlot) z <- object$MclustOutput$z combiM <- object$combiM ent <- rep(as.double(NA, nrow(z))) Kmax <- ncol(z) z0 <- z for(K in Kmax:1) { z0 <- t(combiM[[K]] %*% t(z0)) ent[K] <- -sum(xlog(z0)) } if(Kmax == 2) { # reg <- NULL # in the original code # my modification to get however a result reg <- 1 pcwsreg <- list(K = Kmax) } if(reg == 2) { pcwsreg <- pcws2_reg(1:Kmax, ent) } if(reg == 3) { pcwsreg <- pcws3_reg(1:Kmax, ent) } if(plot) { plot(1:Kmax, ent, xlab = "Number of clusters", ylab = "Entropy", panel.first = grid(), xaxt = "n", ...) axis(side = 1, at = 1:Kmax) if(reg == 2) { lines(1:pcwsreg$c, pcwsreg$a1 * (1:pcwsreg$c) + pcwsreg$b1, lty = 2, col = "red") lines(pcwsreg$c:Kmax, pcwsreg$a2 * (pcwsreg$c:Kmax) + pcwsreg$b2, lty = 2, col = "red") } if(reg == 3) { lines(1:pcwsreg$c1, pcwsreg$a1 * (1:pcwsreg$c1) + pcwsreg$b1, lty = 2, col = "blue") lines(pcwsreg$c1:pcwsreg$c2, pcwsreg$a2 * (pcwsreg$c1:pcwsreg$c2) + pcwsreg$b2, lty = 2, col = "blue") lines(pcwsreg$c2:Kmax, pcwsreg$a3 * (pcwsreg$c2:Kmax) + pcwsreg$b3, lty = 2, col = "blue") } } K <- pcwsreg[[1]] z0 <- z for(K in Kmax:K) { z0 <- t(combiM[[K]] %*% t(z0)) } out <- list(numClusters.combi = K, z.combi = z0, cluster.combi = map(z0)) return(out) } mclust/R/mclustaddson.R0000644000176200001440000023653513547605004014627 0ustar liggesusers############################################################################## ### EVV model #### ############################################################################## emEVV <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEVV(data, parameters = parameters, warn = warn)$z meEVV(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } #### meEVV <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K # if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EVV", d = p, G = G, scale = NA, shape = rep(NA,p), orientation = array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="EVV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p)) storage.mode(z) <- "double" # # # MICHAEL from here------------------------------------------ # # without prior specification if(is.null(prior)) { temp <- .Fortran( "meevv", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = double(p*G), O = double(p*p*G), shape.o = double(p*p*G), scale = double(G), shape = double(p*G), pro = double(K), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eqpro = as.logical(control$equalPro), itmax = as.integer(control$itmax[1]), tol = as.double(control$tol[1]), eps = as.double(control$eps), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = as.integer(0), PACKAGE = "mclust") } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EVV"), prior[names(prior) != "functionName"])) # temp <- .Fortran("meevvp", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), O = double(p*p*G), shape.o = double(p*p*G), scale = double(G), shape = double(p*G), pro = double(K), loglik = NA, eqpro = as.logical(control$equalPro), itmax = as.integer(control$itmax[1]), tol = as.double(control$tol[1]), eps = as.double(control$eps), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = FALSE) WARNING <- "EVV model is not available with prior" if(warn) warning(WARNING) temp <- structure(temp, info = NA, WARNING = WARNING, returnCode = -1) return(temp) } # z <- matrix(temp$z, n,K) loglik <- temp$loglik mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale[1] shape <- matrix(temp$shape, p,G) O <- aperm( array(temp$O, c(p,p,G)), c(2,1,3) ) shape.o <- array( temp$shape.o, c(p,p,G) ) pro <- temp$pro niterout <- temp$niterout errout <- temp$errout lapackSVDinfo <- temp$info if( !is.finite(loglik) | any(scale > signif(.Machine$double.xmax, 6)) | any(shape > signif(.Machine$double.xmax, 6)) | any(O > signif(.Machine$double.xmax, 6)) | any(is.nan(scale)) | any(is.nan(shape)) | any(is.nan(O)) ) { loglik <- .Machine$double.xmax } # WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DGESVD" } if(warn) warning(WARNING) z[] <- O[] <- shape[] <- NA scale <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if( loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) shape[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "a z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { # scale <- sum(scale)/n sigma <- scale * shape.o if(niterout >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) niterout <- - niterout ret <- 1 } else ret <- 0 } info <- list(iterations = niterout, error = errout) # info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) ## Sigma = scale * O %*% diag(shape) %*% t(O) variance <- list(modelName = "EVV", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "EVV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } #### mstepEVV <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EVV", d = p, G = G, scale = NA, shape = rep(NA,p), orientation=array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="EVV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), G) # # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran( "msevv", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), mu = double(p*G), O = double(p*p*G), shape.o = double(p*p*G), scale = double(G), shape = double(p*G), pro = double(G), lwork = as.integer(lwork), info = as.integer(0), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EVV"), prior[names(prior) != "functionName"])) # temp <- .Fortran("meevvp", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), O = double(p*p*G), shape.o = double(p*p*G), scale = double(G), shape = double(p*G), pro = double(G), lwork = as.integer(lwork), info = FALSE, eps = as.double(.Machine$double.eps)) WARNING <- "EVV model is not available with prior" if(warn) warning(WARNING) } # lapackSVDinfo <- temp$info mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale[1] # lambda O <- aperm( array(temp$O, c(p,p,G)), c(2,1,3) ) shape.o <- array( temp$shape.o, c(p,p,G) ) shape <- matrix(temp$shape, p,G) pro <- temp$pro WARNING <- NULL # if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DGESVD fails to converge" ret <- -4 } else { WARNING <- "input error for LAPACK DGESVD" ret <- -5 } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) # } else if( any(abs(c(scale, shape)) > signif(.Machine$double.xmax, 6)) ) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- scale <- shape[] <- O[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { # scale <- sum(scale)/n # scale <- sum(scale)/sum(z) # lambda --> if noise, see help(mstep) sigma <- scale * shape.o ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- dimnames(shape) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EVV", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "EVV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } #### estepEVV <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation G <- ncol(mu) noise <- l == G + 1 if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EVV", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here---------------------------------------------- # temp <- .Fortran( "esevv", x = as.double(data), z = double(n*K), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = as.double(mu), O = as.double( aperm(O, c(2,1,3)) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(pro), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EVV", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } #### cdensEVV <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation p <- ncol(data) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EVV", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # temp <- .Fortran( "esevv", x = as.double(data), z = double(n*G), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(G), mu = as.double(mu), O = as.double( aperm(O, c(2,1,3)) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(-1), Vinv = as.double(-1), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EVV", WARNING = WARNING, returnCode = ret) } ### simEVV <- function(parameters, n, seed = NULL, ...) { if (!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if (any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warn <- "parameters are missing" warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "EVV")) } pro <- parameters$pro if (is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins = G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if (dim(rtshape)[1] != d | dim(rtshape)[2] != G) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) for (k in 1:G) { m <- ctabel[k] sss <- rtscale * rtshape[,k] cholSigma <- t(parameters$variance$orientation[,,k]) * sss x[clabels == k, ] <- sweep( matrix(rnorm(m*d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[,k], FUN = "+" ) } dimnames(x) <- list(NULL, 1:d) structure(cbind(group = clabels, x), modelName = "EVV") } ############################################################################## ### VEE model #### ############################################################################## emVEE <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVEE(data, parameters = parameters, warn = warn)$z meVEE(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } #### meVEE <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VEE", d = p, G = G, scale=rep(NA,G), shape=rep(NA,p), orientation=array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="VEE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) storage.mode(z) <- "double" # # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran("mevee", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = double(p*G), C = double(p*p), U = double(p*p*G), scale = double(G), shape = double(p), pro = double(K), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eqpro = as.logical(control$equalPro), itmaxin = as.integer(control$itmax[2]), tolin = as.double(control$tol[2]), itmaxout = as.integer(control$itmax[1]), tolout = as.double(control$tol[1]), eps = as.double(control$eps), niterin = integer(1), errin = double(1), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = as.integer(0), PACKAGE = "mclust") # } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VEE"), prior[names(prior) != "functionName"])) # temp <- .Fortran("meveep", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), C = double(p*p), U = double(p*p*G), scale = double(G), shape = double(p), pro = double(K), loglik = NA, eqpro = as.logical(control$equalPro), itmaxin = as.integer(control$itmax[2]), tolin = as.double(control$tol[2]), itmaxout = as.integer(control$itmax[1]), tolout = as.double(control$tol[1]), eps = as.double(control$eps), niterin = integer(1), errin = double(1), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = FALSE) WARNING <- "VEE model is not available with prior" if(warn) warning(WARNING) temp <- structure(temp, info = NA, WARNING = WARNING, returnCode = -1) return(temp) } z <- matrix(temp$z, n,K) niterin <- temp$niterin errin <- temp$errin niterout <- temp$niterout errout <- temp$errout loglik <- temp$loglik lapackSVDinfo <- temp$info mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale shape <- temp$shape shape.o <- matrix(temp$C, p,p) O <- if(any(is.nan(shape.o))) shape.o else svd(shape.o, nu = 0)$v pro <- temp$pro if( !is.finite(loglik) | any(scale > signif(.Machine$double.xmax, 6)) | any(shape > signif(.Machine$double.xmax, 6)) | any(O > signif(.Machine$double.xmax, 6)) | any(is.nan(scale)) | any(is.nan(shape)) | any(is.nan(O)) ) { loglik <- .Machine$double.xmax } # WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DSYEV or DPOTRI fails to converge" } else { WARNING <- "input error for LAPACK DPOTRF, DSYEV or DPOTRI" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- sweep( array(shape.o, c(p,p,G)), 3, FUN = "*", STATS = scale ) if(niterin >= control$itmax[2]) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) niterin <- - niterin ret <- 2 } else if(niterout >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) niterout <- - niterout ret <- 1 } else ret <- 0 } info <- structure(c(niterout = niterout, errout = errout), inner = c(niterin = niterin, errin = errin)) # info <- structure(c(iterations = its, error = err), # inner = c(iterations = inner, error = inerr)) dimnames(z) <- list(dimnames(data)[[1]],NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) ## Sigma = scale * O %*% diag(shape) %*% t(O) variance <- list(modelName = "VEE", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VEE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } #### mstepVEE <- function(data, z, prior = NULL, warn = NULL, control = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VEE", d = p, G = G, scale = rep(NA,G), shape = rep(NA,p), orientation = array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="VEE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) WARNING <- "z is missing" if(warn) warning(WARNING) return(structure(list( n = n, d = p, G = G, mu = matrix(NA,p, G), sigma = array(NA, c(p, p, G)), decomp = list(d = p, G = G, scale = rep(NA, G), shape = rep(NA, p), orientation = array(NA, c(p, p, G))), pro = rep(NA,G), modelName = "VEE", prior = prior), WARNING = WARNING)) } # shape <- sqrt(rev(sort(shape/exp(sum(log(shape))/p)))) if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if (is.null(control)) control <- emControl() itmax <- if(length(control$itmax) == 1) control$itmax else control$itmax[2] tol <- if(length(control$tol) == 1) control$tol else control$tol[2] lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) # # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran( "msvee", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), mu = double(p*G), U = double(p*p*G), C = double(p*p), scale = as.double( rep(1,G) ), pro = double(G), lwork = as.integer(lwork), info = as.integer(0), itmax = as.integer(itmax), tol = as.double(tol), niterin = integer(1), errin = double(1), # eps = as.double(.Machine$double.eps), PACKAGE = "mclust") } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VEE"), prior[names(prior) != "functionName"])) # # temp <- .Fortran("msveep", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), U = double(p*p*G), C = double(p*p), scale = double(G), pro = double(G), lwork = as.integer(lwork), info = FALSE, itmax = as.integer(itmax), tol = as.double(tol), niterin = integer(1), errin = double(1), eps = as.double(control$eps)) WARNING <- "VEE model is not available with prior" if(warn) warning(WARNING) } lapackSVDinfo <- temp$info errin <- temp$errin niterin <- temp$niterin mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale shape.o <- matrix(temp$C, p,p) SVD <- svd(shape.o, nu = 0) shape <- SVD$d O <- SVD$v pro <- temp$pro WARNING <- NULL # if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DSYEV or DPOTRI fails to converge" } else { WARNING <- "input error for LAPACK DPOTRF, DSYEV or DPOTRI" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if(any(c(scale, shape) > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- sweep( array(shape.o, c(p,p,G)), 3, FUN = "*", STATS = scale ) if(niterin >= itmax) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) niterin <- - niterin } ret <- 2 } info <- c(iteration = niterin, error = errin) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VEE", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VEE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control), info = info, WARNING = WARNING, returnCode = ret) } ### estepVEE <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation G <- ncol(mu) noise <- l == G + 1 if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VEE", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here------------------------------------------------------- # temp <- .Fortran( "esvee", x = as.double(data), z = double(n*K), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = as.double(mu), O = as.double( t(O) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(pro), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VEE", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } #### cdensVEE <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation p <- ncol(data) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VEE", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here------------------------------------------------------- # temp <- .Fortran( "esvee", x = as.double(data), z = double(n*G), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(G), mu = as.double(mu), O = as.double( t(O) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(-1), Vinv = as.double(-1), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "VEE", WARNING = WARNING, returnCode = ret) } ### simVEE <- function(parameters, n, seed = NULL, ...) { if (!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if (any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warn <- "parameters are missing" warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "VEE")) } pro <- parameters$pro if (is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins = G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if (length(rtshape) != d) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) if (length(rtscale) != G) stop("scale incompatible with mean") for (k in 1:G) { m <- ctabel[k] sss <- rtscale[k] * rtshape cholSigma <- t(parameters$variance$orientation) * sss x[clabels == k, ] <- sweep( matrix(rnorm(m*d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[,k], FUN = "+" ) } dimnames(x) <- list(NULL, 1:d) structure(cbind(group = clabels, x), modelName = "VEE") } ############################################################################## ### EVE model #### ############################################################################## emEVE <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEVE(data, parameters = parameters, warn = warn)$z meEVE(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } #### meEVE <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EVE", d = p, G = G, scale=rep(NA,G), shape=rep(NA,p), orientation=array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="EVE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) storage.mode(z) <- "double" # # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran("meeve", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = double(p*G), O = as.double( diag(p) ), U = double(p*p*G), scale = double(1), shape = as.double( matrix(1, p,G) ), pro = double(K), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eqpro = as.logical(control$equalPro), itmaxin = as.integer(control$itmax[2]), tolin = as.double(control$tol[2]), itmaxout = as.integer(control$itmax[1]), tolout = as.double(control$tol[1]), eps = as.double(control$eps), niterin = integer(1), errin = double(1), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = as.integer(0), PACKAGE = "mclust") # } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EVE"), prior[names(prior) != "functionName"])) # temp <- .Fortran("meevep", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), O = double(p*p), U = double(p*p*G), scale = double(1), shape = double(p*G), pro = double(G), loglik = NA, eqpro = as.logical(control$equalPro), itmaxin = as.integer(control$itmax[2]), tolin = as.double(control$tol[2]), itmaxout = as.integer(control$itmax[1]), tolout = as.double(control$tol[1]), eps = as.double(control$eps), niterin = integer(1), errin = double(1), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = FALSE) WARNING <- "EVE model is not available with prior" if(warn) warning(WARNING) temp <- structure(temp, info = NA, WARNING = WARNING, returnCode = -1) return(temp) } z <- matrix(temp$z, n,K) niterin <- temp$niterin errin <- temp$errin niterout <- temp$niterout errout <- temp$errout loglik <- temp$loglik lapackSVDinfo <- temp$info mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale shape <- matrix(temp$shape, p,G) O <- t( matrix(temp$O, p,p) ) pro <- temp$pro if( !is.finite(loglik) | any(scale > signif(.Machine$double.xmax, 6)) | any(shape > signif(.Machine$double.xmax, 6)) | any(O > signif(.Machine$double.xmax, 6)) | any(is.nan(scale)) | any(is.nan(shape)) | any(is.nan(O)) ) { loglik <- .Machine$double.xmax } # WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DSYEV or DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DSYEV or DGESVD" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- array( apply(shape, 2, function(sh) scale * O%*%diag(sh)%*%t(O)), c(p,p,G) ) if(niterin >= control$itmax[2]) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) niterin <- - niterin ret <- 2 } else if(niterout >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) niterout <- - niterout ret <- 1 } else ret <- 0 } info <- structure(c(niterout = niterout, errout = errout), inner = c(niterin = niterin, errin = errin)) # info <- structure(c(iterations = its, error = err), # inner = c(iterations = inner, error = inerr)) dimnames(z) <- list(dimnames(data)[[1]],NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) ## Sigma = scale * O %*% diag(shape) %*% t(O) variance <- list(modelName = "EVE", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "EVE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } #### mstepEVE <- function(data, z, prior = NULL, warn = NULL, control = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EVE", d = p, G = G, scale = rep(NA,G), shape = rep(NA,p), orientation = array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="EVE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) WARNING <- "z is missing" if(warn) warning(WARNING) return(structure(list( n = n, d = p, G = G, mu = matrix(NA,p, G), sigma = array(NA, c(p, p, G)), decomp = list(d = p, G = G, scale = rep(NA, G), shape = rep(NA, p), orientation = array(NA, c(p, p, G))), pro = rep(NA,G), modelName = "EVE", prior = prior), WARNING = WARNING)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if (is.null(control)) control <- emControl() itmax <- if(length(control$itmax) == 1) control$itmax else control$itmax[2] tol <- if(length(control$tol) == 1) control$tol else control$tol[2] lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) # # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran("mseve", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), mu = double(p*G), U = double(p*p*G), O = as.double( diag(p) ), scale = as.double(1), shape = as.double( matrix(1, p,G) ), pro = double(G), lwork = as.integer(lwork), info = as.integer(0), itmax = as.integer(itmax), tol = as.double(tol), niterin = integer(1), errin = double(1), eps = as.double(.Machine$double.eps), # d = 100000, # trgtvec = as.double(100000), PACKAGE = "mclust") } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EVE"), prior[names(prior) != "functionName"])) # # temp <- .Fortran("msevep", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), U = double(p*p*G), O = double(p*p), scale = double(1), pro = double(G), shape = double(p*G), lwork = as.integer(lwork), info = FALSE, itmax = as.integer(itmax), tol = as.double(tol), niterin = integer(1), errin = double(1), eps = as.double(.Machine$double.eps)) WARNING <- "EVE model is not available with prior" if(warn) warning(WARNING) } lapackSVDinfo <- temp$info errin <- temp$errin niterin <- temp$niterin mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale O <- t( matrix(temp$O, p,p) ) shape <- matrix(temp$shape, p,G) pro <- temp$pro WARNING <- NULL # if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DSYEV or DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DSYEV or DGESVD" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if( any(c(scale, shape) > signif(.Machine$double.xmax, 6)) ) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- array( apply(shape, 2, function(sh) scale * O%*%diag(sh)%*%t(O)), c(p,p,G) ) if(niterin >= itmax) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) niterin <- - niterin } ret <- 2 } info <- c(iteration = niterin, error = errin) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EVE", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "EVE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control), info = info, WARNING = WARNING, returnCode = ret) } ### estepEVE <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation G <- ncol(mu) noise <- l == G + 1 if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EVE", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here------------------------------------------------------- # temp <- .Fortran( "eseve", x = as.double(data), z = double(n*K), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = as.double(mu), O = as.double( t(O) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(pro), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EVE", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } #### cdensEVE <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation p <- ncol(data) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EVE", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here------------------------------------------------------- # temp <- .Fortran( "eseve", x = as.double(data), z = double(n*G), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(G), mu = as.double(mu), O = as.double( t(O) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(-1), Vinv = as.double(-1), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EVE", WARNING = WARNING, returnCode = ret) } ### simEVE <- function(parameters, n, seed = NULL, ...) { if (!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if (any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warn <- "parameters are missing" warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "EVE")) } pro <- parameters$pro if (is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins = G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if (dim(rtshape)[1] != d | dim(rtshape)[2] != G) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) for (k in 1:G) { m <- ctabel[k] sss <- rtscale * rtshape[,k] cholSigma <- t(parameters$variance$orientation) * sss x[clabels == k, ] <- sweep( matrix(rnorm(m*d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[,k], FUN = "+" ) } dimnames(x) <- list(NULL, 1:d) structure(cbind(group = clabels, x), modelName = "EVE") } ############################################################################## ### VVE model #### ############################################################################## emVVE <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVVE(data, parameters = parameters, warn = warn)$z meVVE(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } #### meVVE <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VVE", d = p, G = G, scale=rep(NA,G), shape=rep(NA,p), orientation=array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="VVE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) storage.mode(z) <- "double" # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran("mevve", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = double(p*G), O = as.double( diag(p) ), U = double(p*p*G), scale = as.double( rep(1, G) ), shape = as.double( matrix(1, p,G) ), pro = double(K), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eqpro = as.logical(control$equalPro), itmaxin = as.integer(control$itmax[2]), tolin = as.double(control$tol[2]), itmaxout = as.integer(control$itmax[1]), tolout = as.double(control$tol[1]), eps = as.double(control$eps), niterin = integer(1), errin = double(1), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = as.integer(0), PACKAGE = "mclust") # } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVE"), prior[names(prior) != "functionName"])) # temp <- .Fortran("mevvep", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), O = double(p*p), U = double(p*p*G), scale = as.double(rep(1, G)), shape = double(p*G), pro = double(G), loglik = NA, eqpro = as.logical(control$equalPro), itmaxin = as.integer(control$itmax[2]), tolin = as.double(control$tol[2]), itmaxout = as.integer(control$itmax[1]), tolout = as.double(control$tol[1]), eps = as.double(control$eps), niterin = integer(1), errin = double(1), niterout = integer(1), errout = double(1), lwork = as.integer(lwork), info = FALSE) WARNING <- "VVE model is not available with prior" if(warn) warning(WARNING) temp <- structure(temp, info = NA, WARNING = WARNING, returnCode = -1) return(temp) } z <- matrix(temp$z, n,K) niterin <- temp$niterin errin <- temp$errin niterout <- temp$niterout errout <- temp$errout loglik <- temp$loglik lapackSVDinfo <- temp$info mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp$scale shape <- matrix(temp$shape, p,G) O <- t( matrix(temp$O, p,p) ) pro <- temp$pro if( !is.finite(loglik) | any(scale > signif(.Machine$double.xmax, 6)) | any(shape > signif(.Machine$double.xmax, 6)) | any(O > signif(.Machine$double.xmax, 6)) | any(is.nan(scale)) | any(is.nan(shape)) | any(is.nan(O)) ) { loglik <- .Machine$double.xmax } # WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DSYEV or DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DSYEV or DGESVD" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(NA, c(p,p,G)) for ( g in 1:G ) sigma[,,g] <- scale[g] * O %*% diag(shape[,g]) %*% t(O) if(niterin >= control$itmax[2]) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) niterin <- - niterin ret <- 2 } else if(niterout >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) niterout <- - niterout ret <- 1 } else ret <- 0 } info <- structure(c(niterout = niterout, errout = errout), inner = c(niterin = niterin, errin = errin)) # info <- structure(c(iterations = its, error = err), # inner = c(iterations = inner, error = inerr)) dimnames(z) <- list(dimnames(data)[[1]],NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) ## Sigma = scale * O %*% diag(shape) %*% t(O) variance <- list(modelName = "VVE", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VVE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } #### mstepVVE <- function(data, z, prior = NULL, warn = NULL, control = NULL, ...) { if (is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VVE", d = p, G = G, scale = rep(NA,G), shape = rep(NA,p), orientation = array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(NA,p,G), variance=variance) return(structure(list(modelName="VVE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) WARNING <- "z is missing" if(warn) warning(WARNING) return(structure(list( n = n, d = p, G = G, mu = matrix(NA,p, G), sigma = array(NA, c(p, p, G)), decomp = list(d = p, G = G, scale = rep(NA, G), shape = rep(NA, p), orientation = array(NA, c(p, p, G))), pro = rep(NA,G), modelName = "VVE", prior = prior), WARNING = WARNING)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if (is.null(control)) control <- emControl() itmax <- if(length(control$itmax) == 1) control$itmax else control$itmax[2] tol <- if(length(control$tol) == 1) control$tol else control$tol[2] lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) # # MICHAEL from here------------------------------------------------------- # # without prior specification if(is.null(prior)) { temp <- .Fortran("msvve", x = as.double(data), z = as.double(z), n = as.integer(n), p = as.integer(p), G = as.integer(G), mu = double(p*G), U = double(p*p*G), O = as.double( diag(p) ), scale = as.double( rep(1, G) ), shape = as.double( matrix(1, p,G) ), pro = double(G), lwork = as.integer(lwork), info = as.integer(0), itmax = as.integer(itmax), tol = as.double(tol), niterin = integer(1), errin = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") } else { # with prior priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVE"), prior[names(prior) != "functionName"])) # # temp <- .Fortran("msvvep", ...) temp <- list(x = data, z = z, n = n, p = p, G = G, mu = double(p*G), U = double(p*p*G), O = double(p*p), scale = double(1), pro = double(G), shape = double(p*G), lwork = as.integer(lwork), info = FALSE, itmax = as.integer(itmax), tol = as.double(tol), niterin = integer(1), errin = double(1), eps = as.double(.Machine$double.eps)) WARNING <- "VVE model is not available with prior" if(warn) warning(WARNING) } lapackSVDinfo <- temp$info errin <- temp$errin niterin <- temp$niterin mu <- matrix(temp$mu, p,G) dimnames(mu) <- list(NULL, as.character(1:G)) O <- t( matrix(temp$O, p,p) ) shape <- matrix(temp$shape, p,G) scale <- temp$scale pro <- temp$pro WARNING <- NULL # if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DSYEV or DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DSYEV or DGESVD" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 # } else if(any(c(scale, shape) > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { # sigma <- array( apply(shape, 2, function(sh) O%*%diag(sh)%*%t(O)), c(p,p,G) ) sigma <- array(NA, c(p,p,G)) for ( g in 1:G ) sigma[,,g] <- scale[g] * O %*% diag(shape[,g]) %*% t(O) if(niterin >= itmax) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) niterin <- - niterin } ret <- 2 } info <- c(iteration = niterin, error = errin) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VVE", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VVE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control), info = info, WARNING = WARNING, returnCode = ret) } ### estepVVE <- function(data, parameters, warn = NULL, ...) { if (is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation G <- ncol(mu) noise <- l == G + 1 if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VVE", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here------------------------------------------------------- # temp <- .Fortran( "esvve", x = as.double(data), z = double(n*K), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(K), mu = as.double(mu), O = as.double( t(O) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(pro), Vinv = as.double( if (is.null(Vinv)) -1 else Vinv ), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VVE", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } #### cdensVVE <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) scale <- parameters$variance$scale shape <- parameters$variance$shape O <- parameters$variance$orientation p <- ncol(data) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(NA,n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VVE", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") # # MICHAEL from here------------------------------------------------------- # temp <- .Fortran( "esvve", x = as.double(data), z = double(n*G), n = as.integer(n), p = as.integer(p), G = as.integer(G), Gnoise = as.integer(G), mu = as.double(mu), O = as.double( t(O) ), scale = as.double(scale), shape = as.double(shape), pro = as.double(-1), Vinv = as.double(-1), loglik = double(1), eps = as.double(.Machine$double.eps), PACKAGE = "mclust") # loglik <- temp$loglik z <- matrix(temp$z, n,G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "VVE", WARNING = WARNING, returnCode = ret) } ### simVVE <- function(parameters, n, seed = NULL, ...) { if (!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if (any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warn <- "parameters are missing" warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "VVE")) } pro <- parameters$pro if (is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins = G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if (dim(rtshape)[1] != d | dim(rtshape)[2] != G) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) if (length(rtscale) != G) stop("scale incompatible with mean") for (k in 1:G) { m <- ctabel[k] sss <- rtscale[k] * rtshape[,k] cholSigma <- t(parameters$variance$orientation) * sss x[clabels == k, ] <- sweep( matrix(rnorm(m*d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[,k], FUN = "+" ) } dimnames(x) <- list(NULL, 1:d) structure(cbind(group = clabels, x), modelName = "VVE") } ############################################################################# # Examples of some simple R wrapper functions fcrossprod <- function(X, Y, ...) { out <- .Fortran("crossprodf", X = as.matrix(X), Y = as.matrix(Y), n = as.integer(nrow(X)), p = as.integer(ncol(X)), q = as.integer(ncol(Y)), XTY = matrix(0, ncol(X), ncol(Y)), PACKAGE = "mclust") return(out$XTY) }mclust/R/zzz.R0000644000176200001440000000222714234415473012754 0ustar liggesusers# .onLoad <- function(libname, pkgname) # { # library.dynam("mclust", pkgname, libname) # } mclustStartupMessage <- function() { # Startup message obtained as # > figlet -f slant MCLUST # msg <- c(paste0( # " __ ___________ __ _____________ # / |/ / ____/ / / / / / ___/_ __/ # / /|_/ / / / / / / / /\\__ \\ / / # / / / / /___/ /___/ /_/ /___/ // / # /_/ /_/\\____/_____/\\____//____//_/ version ", # # Startup message obtained as # > figlet -f slant mclust msg <- c(paste0( " __ __ ____ ___ _____/ /_ _______/ /_ / __ `__ \\/ ___/ / / / / ___/ __/ / / / / / / /__/ / /_/ (__ ) /_ /_/ /_/ /_/\\___/_/\\__,_/____/\\__/ version ", packageVersion("mclust")), "\nType 'citation(\"mclust\")' for citing this R package in publications.") return(msg) } .onAttach <- function(lib, pkg) { # unlock .mclust variable allowing its modification unlockBinding(".mclust", asNamespace("mclust")) # startup message msg <- mclustStartupMessage() if(!interactive()) msg[1] <- paste("Package 'mclust' version", packageVersion("mclust")) packageStartupMessage(msg) invisible() } mclust/R/impute.R0000644000176200001440000002030713477457724013436 0ustar liggesusersimputeData <- function(data, categorical = NULL, seed = NULL, verbose = interactive()) { if(!requireNamespace("mix", quietly = TRUE)) stop("imputeData function require 'mix' package to be installed!") fac <- apply(data, 2, is.factor) if(is.null(categorical)) { categorical <- fac } else { if(any(!categorical & fac)) { stop("data has a factor that is not designated as categorical") } if(any(categorical | !fac)) { warning("a categorical is not designated as a factor") for(i in which(categorical | !fac)) data[[i]] <- as.factor(data[[i]]) } } # remove categorical variables and add a dummy variable if(nocat <- !any(categorical)) { data <- cbind(as.factor(1), data) categorical <- c(TRUE, categorical) } ord <- c(which(categorical), which(!categorical)) # do the imputations s <- mix::prelim.mix(data[,ord], p = sum(categorical)) if(is.null(seed)) seed <- runif(1, min = .Machine$integer.max/1024, max = .Machine$integer.max) # find ML estimate thetahat <- mix::em.mix(s, showits = verbose) # set random number generator seed mix::rngseed(seed) # data augmentation from posterior newtheta <- mix::da.mix(s, thetahat, steps = 100, showits = verbose) # impute under newtheta dataImp <- mix::imp.mix(s, newtheta) # there is a bug, so it needs to refix the seed and impute again mix::rngseed(seed) dataImp <- mix::imp.mix(s, newtheta) if(nocat) dataImp[,-1] else dataImp[,order(ord)] } imputePairs <- function(data, dataImp, symbols = c(1, 16), colors = c("black", "red"), labels, panel = points, ..., lower.panel = panel, upper.panel = panel, diag.panel = NULL, text.panel = textPanel, label.pos = 0.5 + has.diag/3, cex.labels = NULL, font.labels = 1, row1attop = TRUE, gap = 0.2) { textPanel <- function(x = 0.5, y = 0.5, txt, cex, font) text(x, y, txt, cex = cex, font = font) localAxis <- function(side, x, y, xpd, bg, col = NULL, main, oma, ...) { if (side%%2 == 1) Axis(x, side = side, xpd = NA, ...) else Axis(y, side = side, xpd = NA, ...) } localPlot <- function(..., main, oma, font.main, cex.main) plot(...) localLowerPanel <- function(..., main, oma, font.main, cex.main) lower.panel(...) localUpperPanel <- function(..., main, oma, font.main, cex.main) upper.panel(...) localDiagPanel <- function(..., main, oma, font.main, cex.main) diag.panel(...) dots <- list(...) nmdots <- names(dots) if (!is.matrix(data)) { data <- as.data.frame(data) for (i in seq_along(names(data))) { if (is.factor(data[[i]]) || is.logical(data[[i]])) data[[i]] <- as.numeric(data[[i]]) if (!is.numeric(unclass(data[[i]]))) stop("non-numeric argument to 'pairs'") } } else if (!is.numeric(data)) stop("non-numeric argument to 'pairs'") panel <- match.fun(panel) if ((has.lower <- !is.null(lower.panel)) && !missing(lower.panel)) lower.panel <- match.fun(lower.panel) if ((has.upper <- !is.null(upper.panel)) && !missing(upper.panel)) upper.panel <- match.fun(upper.panel) if ((has.diag <- !is.null(diag.panel)) && !missing(diag.panel)) diag.panel <- match.fun(diag.panel) if (row1attop) { tmp <- lower.panel lower.panel <- upper.panel upper.panel <- tmp tmp <- has.lower has.lower <- has.upper has.upper <- tmp } nc <- ncol(data) if (nc < 2) stop("only one column in the argument to 'pairs'") has.labs <- TRUE if (missing(labels)) { labels <- colnames(data) if (is.null(labels)) labels <- paste("var", 1:nc) } else if (is.null(labels)) has.labs <- FALSE oma <- if ("oma" %in% nmdots) dots$oma else NULL main <- if ("main" %in% nmdots) dots$main else NULL if (is.null(oma)) { oma <- c(4, 4, 4, 4) if (!is.null(main)) oma[3] <- 6 } opar <- par(mfrow = c(nc, nc), mar = rep(gap/2, 4), oma = oma) on.exit(par(opar)) for (i in if (row1attop) 1:nc else nc:1) for (j in 1:nc) { localPlot(dataImp[, j], dataImp[, i], xlab = "", ylab = "", axes = FALSE, type = "n", ...) if (i == j || (i < j && has.lower) || (i > j && has.upper)) { box() if (i == 1 && (!(j%%2) || !has.upper || !has.lower)) localAxis(1 + 2 * row1attop, dataImp[, j], dataImp[, i], ...) if (i == nc && (j%%2 || !has.upper || !has.lower)) localAxis(3 - 2 * row1attop, dataImp[, j], dataImp[, i], ...) if (j == 1 && (!(i%%2) || !has.upper || !has.lower)) localAxis(2, dataImp[, j], dataImp[, i], ...) if (j == nc && (i%%2 || !has.upper || !has.lower)) localAxis(4, dataImp[, j], dataImp[, i], ...) mfg <- par("mfg") if (i == j) { if (has.diag) localDiagPanel(as.vector(dataImp[, i]), ...) if (has.labs) { par(usr = c(0, 1, 0, 1)) if (is.null(cex.labels)) { l.wid <- strwidth(labels, "user") cex.labels <- max(0.8, min(2, 0.9/max(l.wid))) } text.panel(0.5, label.pos, labels[i], cex = cex.labels, font = font.labels) } } else if (i < j) { classification <- as.numeric(apply(data[,c(i,j)], 1, function(x) any(is.na(x)))) + 1 localLowerPanel(as.vector(dataImp[, j]), as.vector(dataImp[,i]), pch = symbols[classification], col = colors[classification], ...) } else { classification <- as.numeric(apply(data[,c(i,j)], 1, function(x) any(is.na(x)))) + 1 localUpperPanel(as.vector(dataImp[, j]), as.vector(dataImp[, i]), pch = symbols[classification], col = colors[classification], ...) } if (any(par("mfg") != mfg)) stop("the 'panel' function made a new plot") } else par(new = FALSE) } if (!is.null(main)) { font.main <- if ("font.main" %in% nmdots) dots$font.main else par("font.main") cex.main <- if ("cex.main" %in% nmdots) dots$cex.main else par("cex.main") mtext(main, 3, 3, TRUE, 0.5, cex = cex.main, font = font.main) } invisible(NULL) } # LS: old to be removed # matchCluster <- function(group, cluster) # { # if(length(group) != length(cluster)) # stop("arguments must be vector of the same length") # group <- as.factor(group) # cluster <- as.factor(cluster) # tab <- table(group,cluster) # j <- apply(tab,2,which.max) # cluster <- factor(cluster, labels = levels(group)[j]) # cluster <- as.character(cluster) # group <- as.character(group) # misclassified <- !(cluster == group) # out <- list(cluster = cluster, misclassified = misclassified, ord = j) # return(out) # } matchCluster <- function(group, cluster) { if(length(group) != length(cluster)) stop("arguments must be vector of the same length") group <- as.factor(group) cluster <- as.factor(cluster) map <- mapClass(as.numeric(group), as.numeric(cluster)) map1 <- unlist(map[[1]]); names(map1) <- NULL map2 <- unlist(map[[2]]); names(map2) <- NULL cl <- cluster levels(cl) <- map2 cl <- as.character(levels(cl)[as.numeric(cl)]) cl <- as.character(cl) group <- as.character(group) misclassified <- !(cluster == group) out <- list(cluster = cl, misclassified = misclassified, ord = map1) return(out) } majorityVote <- function(x) { # local function to find the maximum position in a vector, # breaking ties at random whichMax <- function (x) { m <- seq_along(x)[x == max(x, na.rm = TRUE)] if(length(m) > 1) sample(m, size = 1) else m } x <- as.vector(x) tab <- table(x) m <- whichMax(tab) out <- list(table = tab, ind = m, majority = names(tab)[m]) return(out) } mclust/R/toremove.R0000644000176200001440000002435413324447600013760 0ustar liggesusers# functions to be removed?? EMclust <- function(data, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), initialization = list(hcPairs=NULL, subset=NULL, noise=NULL), Vinv = NULL, warn = FALSE, x = NULL, ...) { if (!is.null(x)) { if (!missing(prior) || !missing(control) || !missing(initialization) || !missing(Vinv)) stop("only G and modelNames may be specified as arguments when x is supplied") prior <- attr(x,"prior") control <- attr(x,"control") initialization <- attr(x,"initialization") Vinv <- attr(x,"Vinv") warn <- attr(x,"warn") } dimData <- dim(data) oneD <- is.null(dimData) || length(dimData[dimData > 1]) == 1 if(!oneD && length(dimData) != 2) stop("data must be a vector or a matrix") if(oneD) { data <- drop(as.matrix(data)) n <- length(data) d <- 1 } else { data <- as.matrix(data) n <- nrow(data) d <- ncol(data) } if (is.null(x)) { if (is.null(modelNames)) { if (d == 1) { modelNames <- c("E", "V") } else { modelNames <- mclust.options("emModelNames") if (n <= d) { # select only spherical and diagonal models m <- match(modelNames, c("EII", "VII", "EEI", "VEI", "EVI", "VVI"), nomatch = 0) modelNames <- modelNames[m] } } } if (is.null(G)) { G <- if (is.null(initialization$noise)) 1:9 else 0:9 } else { G <- sort(as.numeric(G)) } Gall <- G Mall <- modelNames } else { Glabels <- dimnames(x)[[1]] Mlabels <- dimnames(x)[[2]] if (is.null(G)) G <- Glabels if (is.null(modelNames)) modelNames <- Mlabels Gmatch <- match(as.character(G), Glabels, nomatch = 0) Mmatch <- match(modelNames, Mlabels, nomatch = 0) if (all(Gmatch) && all(Mmatch)) { attr( x, "G") <- as.numeric(G) attr( x, "modelNames") <- modelNames attr( x, "returnCodes") <- attr(x, "returnCodes")[as.character(G),modelNames,drop=FALSE] return(x[as.character(G),modelNames,drop=FALSE]) } Gall <- sort(as.numeric(unique(c(as.character(G), Glabels)))) Mall <- unique(c(modelNames, Mlabels)) } if (any(as.logical(as.numeric(G))) < 0) { if (is.null(initialization$noise)) { stop("G must be positive") } else { stop("G must be nonnegative") } } if (d == 1 && any(nchar(modelNames) > 1)) { Emodel <- any(sapply(modelNames, function(x) charmatch("E", x, nomatch = 0)[1]) == 1) Vmodel <- any(sapply(modelNames, function(x) charmatch("V", x, nomatch = 0)[1]) == 1) modelNames <- c("E", "V")[c(Emodel, Vmodel)] } l <- length(Gall) m <- length(Mall) EMPTY <- -.Machine$double.xmax BIC <- RET <- matrix(EMPTY, nrow = l, ncol = m, dimnames = list(as.character(Gall), as.character(Mall))) if (!is.null(x)) { BIC[dimnames(x)[[1]],dimnames(x)[[2]]] <- x RET[dimnames(x)[[1]],dimnames(x)[[2]]] <- attr(x, "returnCodes") BIC <- BIC[as.character(G),modelNames,drop=FALSE] RET <- RET[as.character(G),modelNames,drop=FALSE] } G <- as.numeric(G) Glabels <- as.character(G) Gout <- G if (is.null(initialization$noise)) { if (G[1] == 1) { for (mdl in modelNames[BIC["1",] == EMPTY]) { out <- mvn(modelName = mdl, data = data, prior = prior) BIC["1", mdl] <- bic(modelName = mdl, loglik = out$loglik, n = n, d = d, G = 1, equalPro = FALSE) RET["1", mdl] <- attr(out, "returnCode") } if (l == 1) { BIC[BIC == EMPTY] <- NA return(structure(BIC, G = G, modelNames = modelNames, prior = prior, control = control, initialization = initialization, warn = warn, n = n, d = d, oneD = oneD, returnCodes = RET, class = "mclustBIC")) } G <- G[-1] Glabels <- Glabels[-1] } if (is.null(initialization$subset)) { ####################################################### # all data in initial hierarchical clustering phase ####################################################### if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(modelName = mclust.options("hcModelName"), data = data) } else { hcPairs <- hc(modelName = "EII", data = data) } } else { hcPairs <- NULL # hcPairs <- hc(modelName = "E", data = data) } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) for (g in Glabels) { if (d > 1 || !is.null(hcPairs)) { z <- unmap(clss[, g]) } else { z <- unmap(qclass( data, as.numeric(g))) } for (modelName in modelNames[BIC[g,] == EMPTY]) { out <- me(modelName = modelName, data = data, z = z, prior = prior, control = control, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = as.numeric(g), equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") } } } else { ###################################################### # initial hierarchical clustering phase on a subset ###################################################### if (is.logical(initialization$subset)) initialization$subset <- (1:n)[initialization$subset] if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(data = data[initialization$subset, ], modelName = mclust.options("hcModelName")) } else { hcPairs <- hc(data = data[initialization$subset,], modelName = "EII") } } else { hcPairs <- NULL # hcPairs <- hc(data = data[initialization$subset], # modelName = "E") } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) for (g in Glabels) { if (d > 1 || !is.null(hcPairs)) { z <- unmap(clss[, g]) } else { z <- unmap(qclass(data[initialization$subset], as.numeric(g))) } dimnames(z) <- list(as.character(initialization$subset), NULL) for (modelName in modelNames[!is.na(BIC[g,])]) { ms <- mstep(modelName = modelName, z = z, data = as.matrix(data)[initialization$subset, ], prior = prior, control = control, warn = warn) # # ctrl <- control # ctrl$itmax[1] <- 1 # ms <- me(modelName = modelName, data = as.matrix(data)[ # initialization$subset, ], z = z, prior = prior, control = ctrl) # es <- do.call("estep", c(list(data = data, warn = warn), ms)) out <- me(modelName = modelName, data = data, z = es$z, prior = prior, control = control, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = as.numeric(g), equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") } } } } else { ###################################################### # noise case ###################################################### if (!is.null(initialization$subset)) stop("subset option not implemented with noise") if (is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) noise <- initialization$noise if (!is.logical(noise)) noise <- as.logical(match(1:n, noise, nomatch = 0)) if (!G[1]) { hood <- n * log(Vinv) BIC["0", ] <- 2 * hood - log(n) if (l == 1) { return(structure(BIC, G = G, modelNames = modelNames, prior = prior, control = control, initialization = list(hcPairs = hcPairs, subset = initialization$subset), warn = warn, n = n, d = d, oneD = oneD, returnCodes = RET, class = "mclustBIC")) } G <- G[-1] Glabels <- Glabels[-1] } if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(data = data[!noise,], modelName = mclust.options("hcModelName")) } else { hcPairs <- hc(data = data[!noise,], modelName = "EII") } } else { hcPairs <- NULL # hcPairs <- hc(data = data[!noise], modelName = "E") } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) z <- matrix(0, n, max(G) + 1) for (g in Glabels) { z[] <- 0 k <- as.numeric(g) if (d > 1 || !is.null(hcPairs)) { z[!noise, 1:k] <- unmap(clss[, g]) } else { z[!noise, 1:k] <- unmap(qclass(data[!noise])) } z[noise, k+1] <- 1 K <- 1:(k+1) for (modelName in modelNames[BIC[g,] == EMPTY]) { out <- me(modelName = modelName, data = data, z = z[, K], prior = prior, control = control, Vinv = Vinv, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = k, noise = TRUE, equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") } } } structure(BIC, G = Gout, modelNames = modelNames, prior = prior, control = control, initialization = list(hcPairs = hcPairs, subset = initialization$subset, noise = initialization$noise), Vinv = Vinv, warn = warn, n = n, d = d, oneD = oneD, returnCodes = RET, class = "mclustBIC") } # EMclust <- function(...) .Defunct("mclustBIC", PACKAGE = "mclust") mclust/R/mclust.R0000644000176200001440000100431614404653154013427 0ustar liggesusersMclust <- function(data, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), initialization = NULL, warn = mclust.options("warn"), x = NULL, verbose = interactive(), ...) { call <- match.call() data <- data.matrix(data) d <- ncol(data) if(!is.null(x)) { if(!inherits(x, "mclustBIC")) stop("If provided, argument x must be an object of class 'mclustBIC'") } mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name("mclustBIC") mc[[2]] <- data BIC <- eval(mc, parent.frame()) # get the best model from BIC table G <- attr(BIC, "G") modelNames <- attr(BIC, "modelNames") Sumry <- summary(BIC, data, G = G, modelNames = modelNames) if(length(Sumry)==0) { if(warn) warning("no model(s) could be fitted. Try adjusting G and modelNames arguments") return() } if(!(length(G) == 1)) { bestG <- length(tabulate(Sumry$cl)) if(warn) { if(bestG == max(G) & warn) warning("optimal number of clusters occurs at max choice") else if(bestG == min(G) & warn) warning("optimal number of clusters occurs at min choice") } } oldClass(Sumry) <- NULL Sumry$bic <- Sumry$bic[1] Sumry$icl <- icl.Mclust(Sumry) Sumry$hypvol <- if(is.null(attr(BIC, "Vinv"))) as.double(NA) else 1/attr(BIC, "Vinv") # df <- (2*Sumry$loglik - Sumry$bic)/log(Sumry$n) df <- if(is.null(Sumry$modelName)) NULL else with(Sumry, nMclustParams(modelName, d, G, noise = (!is.na(hypvol)), equalPro = attr(Sumry, "control")$equalPro)) ans <- c(list(call = call, data = data, BIC = BIC, df = df), Sumry) orderedNames <- c("call", "data", "modelName", "n", "d", "G", "BIC", "loglik", "df", "bic", "icl", "hypvol", "parameters", "z", "classification", "uncertainty") structure(ans[orderedNames], class = "Mclust") } print.Mclust <- function(x, digits = getOption("digits"), ...) { txt <- paste0("\'", class(x)[1], "\' model object: ") noise <- !is.null(attr(x$BIC, "Vinv")) if(x$G == 0 & noise) { txt <- paste0(txt, "single noise component") } else { txt <- paste0(txt, "(", x$model, ",", x$G, ")", if(noise) " + noise component") } catwrap(txt) cat("\n") catwrap("\nAvailable components:\n") print(names(x)) invisible(x) } summary.Mclust <- function(object, classification = TRUE, parameters = FALSE, ...) { classification <- as.logical(classification) parameters <- as.logical(parameters) # collect info G <- object$G noise <- if(is.na(object$hypvol)) FALSE else object$hypvol pro <- object$parameters$pro if(is.null(pro)) pro <- 1 names(pro) <- if(noise) c(seq_len(G),0) else seq(G) mean <- object$parameters$mean if(object$d > 1) { sigma <- object$parameters$variance$sigma } else { sigma <- rep(object$parameters$variance$sigmasq, object$G)[1:object$G] names(sigma) <- names(mean) } if(is.null(object$density)) { title <- paste("Gaussian finite mixture model fitted by EM algorithm") printClassification <- classification classification <- if(printClassification) { factor(object$classification, levels = { l <- seq_len(object$G) if(is.numeric(noise)) l <- c(l,0) l }) } else NULL } else { title <- paste("Density estimation via Gaussian finite mixture modeling") printClassification <- FALSE classification <- NULL } # obj <- list(title = title, n = object$n, d = object$d, G = G, modelName = object$modelName, loglik = object$loglik, df = object$df, bic = object$bic, icl = object$icl, pro = pro, mean = mean, variance = sigma, noise = noise, prior = attr(object$BIC, "prior"), printParameters = parameters, printClassification = printClassification, classification = classification) class(obj) <- "summary.Mclust" return(obj) } print.summary.Mclust <- function(x, digits = getOption("digits"), ...) { txt <- paste(rep("-", min(nchar(x$title), getOption("width"))), collapse = "") catwrap(txt) catwrap(x$title) catwrap(txt) # cat("\n") if(x$G == 0) { catwrap("Mclust model with only a noise component:") } else { catwrap(paste0("Mclust ", x$modelName, " (", mclustModelNames(x$modelName)$type, ") model with ", x$G, ifelse(x$G > 1, " components", " component"), if(x$noise) " and a noise term", ":")) } cat("\n") # if(!is.null(x$prior)) { catwrap(paste0("Prior: ", x$prior$functionName, "(", paste(names(x$prior[-1]), x$prior[-1], sep = " = ", collapse = ", "), ")", sep = "")) cat("\n") } # tab <- data.frame("log-likelihood" = x$loglik, "n" = x$n, "df" = x$df, "BIC" = x$bic, "ICL" = x$icl, row.names = "", check.names = FALSE) print(tab, digits = digits) # if(x$printClassification) { cat("\nClustering table:") print(table(x$classification), digits = digits) } # if(x$printParameters) { cat("\nMixing probabilities:\n") print(x$pro, digits = digits) cat("\nMeans:\n") print(x$mean, digits = digits) cat("\nVariances:\n") if(x$d > 1) { for(g in 1:x$G) { cat("[,,", g, "]\n", sep = "") print(x$variance[,,g], digits = digits) } } else print(x$variance, digits = digits) if(x$noise) { cat("\nHypervolume of noise component:\n") cat(signif(x$noise, digits = digits), "\n") } } # invisible(x) } plot.Mclust <- function(x, what = c("BIC", "classification", "uncertainty", "density"), dimens = NULL, xlab = NULL, ylab = NULL, addEllipses = TRUE, main = FALSE, ...) { object <- x # Argh. Really want to use object anyway if(!inherits(object, "Mclust")) stop("object not of class 'Mclust'") data <- object$data p <- ncol(data) if(p == 1) colnames(data) <- deparse(object$call$data) dimens <- if(is.null(dimens)) seq(p) else dimens[dimens <= p] d <- length(dimens) main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) what <- match.arg(what, several.ok = TRUE) oldpar <- par(no.readonly = TRUE) plot.Mclust.bic <- function(...) plot.mclustBIC(object$BIC, xlab = xlab, ...) plot.Mclust.classification <- function(...) { if(d == 1) { mclust1Dplot(data = data[,dimens,drop=FALSE], what = "classification", classification = object$classification, z = object$z, xlab = if(is.null(xlab)) colnames(data)[dimens] else xlab, main = main, ...) } if(d == 2) { pars <- object$parameters pars$mean <- pars$mean[dimens,,drop=FALSE] pars$variance$d <- length(dimens) pars$variance$sigma <- pars$variance$sigma[dimens,dimens,,drop=FALSE] mclust2Dplot(data = data[,dimens,drop=FALSE], what = "classification", classification = object$classification, parameters = if(addEllipses) pars else NULL, xlab = if(is.null(xlab)) colnames(data)[dimens][1] else xlab, ylab = if(is.null(ylab)) colnames(data)[dimens][2] else ylab, main = main, ...) } if(d > 2) { pars <- object$parameters pars$mean <- pars$mean[dimens,,drop=FALSE] pars$variance$d <- length(dimens) pars$variance$sigma <- pars$variance$sigma[dimens,dimens,,drop=FALSE] on.exit(par(oldpar)) par(mfrow = c(d, d), mar = rep(0.2/2,4), oma = rep(3,4)) for(i in seq(d)) { for(j in seq(d)) { if(i == j) { plot(data[, dimens[c(j, i)]], type = "n", xlab = "", ylab = "", axes = FALSE) text(mean(par("usr")[1:2]), mean(par("usr")[3:4]), labels = colnames(data[, dimens])[i], cex = 1.5, adj = 0.5) box() } else { coordProj(data = data, dimens = dimens[c(j,i)], what = "classification", classification = object$classification, parameters = object$parameters, addEllipses = addEllipses, main = FALSE, xaxt = "n", yaxt = "n", ...) } if(i == 1 && (!(j%%2))) axis(3) if(i == d && (j%%2)) axis(1) if(j == 1 && (!(i%%2))) axis(2) if(j == d && (i%%2)) axis(4) } } } } plot.Mclust.uncertainty <- function(...) { pars <- object$parameters if(d > 1) { pars$mean <- pars$mean[dimens,,drop=FALSE] pars$variance$d <- length(dimens) pars$variance$sigma <- pars$variance$sigma[dimens,dimens,,drop=FALSE] } # if(p == 1 || d == 1) { mclust1Dplot(data = data[,dimens,drop=FALSE], what = "uncertainty", parameters = pars, z = object$z, xlab = if(is.null(xlab)) colnames(data)[dimens] else xlab, main = main, ...) } if(p == 2 || d == 2) { mclust2Dplot(data = data[,dimens,drop=FALSE], what = "uncertainty", parameters = pars, # uncertainty = object$uncertainty, z = object$z, classification = object$classification, xlab = if(is.null(xlab)) colnames(data)[dimens][1] else xlab, ylab = if(is.null(ylab)) colnames(data)[dimens][2] else ylab, addEllipses = addEllipses, main = main, ...) } if(p > 2 && d > 2) { on.exit(par(oldpar)) par(mfrow = c(d, d), mar = rep(0,4), mar = rep(0.2/2,4), oma = rep(3,4)) for(i in seq(d)) { for(j in seq(d)) { if(i == j) { plot(data[, dimens[c(j, i)]], type="n", xlab = "", ylab = "", axes = FALSE) text(mean(par("usr")[1:2]), mean(par("usr")[3:4]), labels = colnames(data[,dimens])[i], cex = 1.5, adj = 0.5) box() } else { coordProj(data = data, what = "uncertainty", parameters = object$parameters, # uncertainty = object$uncertainty, z = object$z, classification = object$classification, dimens = dimens[c(j,i)], main = FALSE, addEllipses = addEllipses, xaxt = "n", yaxt = "n", ...) } if(i == 1 && (!(j%%2))) axis(3) if(i == d && (j%%2)) axis(1) if(j == 1 && (!(i%%2))) axis(2) if(j == d && (i%%2)) axis(4) } } } } plot.Mclust.density <- function(...) { if(p == 1) { objdens <- as.densityMclust(object) plotDensityMclust1(objdens, xlab = if(is.null(xlab)) colnames(data)[dimens] else xlab, main = if(main) main else NULL, ...) # mclust1Dplot(data = data, # parameters = object$parameters, # # z = object$z, # what = "density", # xlab = if(is.null(xlab)) colnames(data)[dimens] else xlab, # main = main, ...) } if(p == 2) { surfacePlot(data = data, parameters = object$parameters, what = "density", xlab = if(is.null(xlab)) colnames(data)[1] else xlab, ylab = if(is.null(ylab)) colnames(data)[2] else ylab, main = main, ...) } if(p > 2) { objdens <- as.densityMclust(object) objdens$data <- objdens$data[,dimens,drop=FALSE] objdens$varname <- colnames(data)[dimens] objdens$range <- apply(data, 2, range) objdens$d <- d objdens$parameters$mean <- objdens$parameters$mean[dimens,,drop=FALSE] objdens$parameters$variance$d <- d objdens$parameters$variance$sigma <- objdens$parameters$variance$sigma[dimens,dimens,,drop=FALSE] # if (d == 1) plotDensityMclust1(objdens, ...) else if (d == 2) plotDensityMclust2(objdens, ...) else plotDensityMclustd(objdens, ...) } } if(interactive() & length(what) > 1) { title <- "Model-based clustering plots:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "BIC") plot.Mclust.bic(...) if(what[choice] == "classification") plot.Mclust.classification(...) if(what[choice] == "uncertainty") plot.Mclust.uncertainty(...) if(what[choice] == "density") plot.Mclust.density(...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "BIC")) plot.Mclust.bic(...) if(any(what == "classification")) plot.Mclust.classification(...) if(any(what == "uncertainty")) plot.Mclust.uncertainty(...) if(any(what == "density")) plot.Mclust.density(...) } invisible() } logLik.Mclust <- function(object, ...) { if(is.null(object$loglik)) l <- sum(do.call("dens", c(object, logarithm = TRUE))) else l <- object$loglik if(is.null(object$df)) { noise <- if(is.null(object$hypvol)) FALSE else (!is.na(object$hypvol)) equalPro <- if(is.null(object$BIC)) FALSE else attr(object$BIC, "control")$equalPro df <- with(object, nMclustParams(modelName, d, G, noise = noise, equalPro = equalPro)) } else df <- object$df attr(l, "nobs") <- object$n attr(l, "df") <- df class(l) <- "logLik" return(l) } predict.Mclust <- function(object, newdata, ...) { if(!inherits(object, "Mclust")) stop("object not of class 'Mclust'") if(missing(newdata)) { newdata <- object$data } newdata <- as.matrix(newdata) if(ncol(object$data) != ncol(newdata)) { stop("newdata must match ncol of object data") } # object$data <- newdata z <- do.call("cdens", c(object, list(logarithm = TRUE))) pro <- object$parameters$pro pro <- pro/sum(pro) noise <- (!is.na(object$hypvol)) z <- if(noise) cbind(z, log(object$parameters$Vinv)) else cbind(z) # drop redundant attributes z <- sweep(z, MARGIN = 2, FUN = "+", STATS = log(pro)) z <- sweep(z, MARGIN = 1, FUN = "-", STATS = apply(z, 1, logsumexp)) z <- exp(z) cl <- c(seq(object$G), if(noise) 0) colnames(z) <- cl cl <- cl[apply(z, 1, which.max)] out <- list(classification = cl, z = z) return(out) } mclustBIC <- function(data, G = NULL, modelNames = NULL, prior = NULL, control = emControl(), initialization = list(hcPairs = NULL, subset = NULL, noise = NULL), Vinv = NULL, warn = mclust.options("warn"), x = NULL, verbose = interactive(), ...) { dimData <- dim(data) oneD <- (is.null(dimData) || length(dimData[dimData > 1]) == 1) if(!oneD && length(dimData) != 2) stop("data must be a vector or a matrix") if(oneD) { data <- drop(as.matrix(data)) n <- length(data) d <- 1 } else { data <- as.matrix(data) n <- nrow(data) d <- ncol(data) } if(is.null(x)) { if(is.null(modelNames)) { if(d == 1) { modelNames <- c("E", "V") } else { modelNames <- mclust.options("emModelNames") if(n <= d) { # select only spherical and diagonal models m <- match(modelNames, c("EII", "VII", "EEI", "VEI", "EVI", "VVI"), nomatch = 0) modelNames <- modelNames[m] } } } if(!is.null(prior)) { # remove models not available with prior modelNames <- setdiff(modelNames, c("EVE","VEE","VVE","EVV")) } if(is.null(G)) { G <- if (is.null(initialization$noise)) 1:9 else 0:9 } else { G <- sort(as.integer(unique(G))) } if(is.null(initialization$noise)) { if (any(G > n)) G <- G[G <= n] } else { noise <- initialization$noise if(is.logical(noise)) noise <- which(noise) if(any(match(noise, 1:n, nomatch = 0) == 0)) stop("numeric or logical vector for noise must correspond to row indexes of data") initialization$noise <- noise nnoise <- length(noise) if(any(G > (n-nnoise))) G <- G[G <= n-nnoise] } if(!is.null(initialization$subset)) { subset <- initialization$subset if(is.logical(subset)) subset <- which(subset) initialization$subset <- subset if(any(G > n)) G <- G[G <= n] } Gall <- G Mall <- modelNames } else { if(!missing(prior) || !missing(control) || !missing(initialization) || !missing(Vinv)) stop("only G and modelNames may be specified as arguments when x is supplied") prior <- attr(x,"prior") control <- attr(x,"control") initialization <- attr(x,"initialization") Vinv <- attr(x,"Vinv") warn <- attr(x,"warn") Glabels <- dimnames(x)[[1]] Mlabels <- dimnames(x)[[2]] if(is.null(G)) G <- Glabels if(is.null(modelNames)) modelNames <- Mlabels Gmatch <- match(as.character(G), Glabels, nomatch = 0) Mmatch <- match(modelNames, Mlabels, nomatch = 0) if(all(Gmatch) && all(Mmatch)) { out <- x[as.character(G),modelNames,drop=FALSE] mostattributes(out) <- attributes(x) attr(out, "dim") <- c(length(G), length(modelNames)) attr(out, "dimnames") <- list(G, modelNames) attr(out, "G") <- as.numeric(G) attr(out, "modelNames") <- modelNames attr(out, "returnCodes") <- attr(x, "returnCodes")[as.character(G),modelNames,drop=FALSE] return(out) } Gall <- sort(as.numeric(unique(c(as.character(G), Glabels)))) Mall <- unique(c(modelNames, Mlabels)) } if(any(as.logical(as.numeric(G))) < 0) { if(is.null(initialization$noise)) { stop("G must be positive") } else { stop("G must be nonnegative") } } if(d == 1 && any(nchar(modelNames) > 1)) { Emodel <- any(sapply(modelNames, function(x) charmatch("E", x, nomatch = 0)[1]) == 1) Vmodel <- any(sapply(modelNames, function(x) charmatch("V", x, nomatch = 0)[1]) == 1) modelNames <- c("E", "V")[c(Emodel, Vmodel)] } # set subset for initialization when subset is not, no hcPairs is provided, and # data size is larger than the value specified in mclust.options() if(is.null(initialization$subset) & is.null(initialization$hcPairs) & n > mclust.options("subset")) { initialization$subset <- sample(seq.int(n), size = mclust.options("subset"), replace = FALSE) } l <- length(Gall) m <- length(Mall) if(verbose) { cat("fitting ...\n") flush.console() pbar <- txtProgressBar(min = 0, max = l*m+1, style = 3) on.exit(close(pbar)) ipbar <- 0 } EMPTY <- -.Machine$double.xmax BIC <- RET <- matrix(EMPTY, nrow = l, ncol = m, dimnames = list(as.character(Gall), as.character(Mall))) if(!is.null(x)) { BIC[dimnames(x)[[1]],dimnames(x)[[2]]] <- x RET[dimnames(x)[[1]],dimnames(x)[[2]]] <- attr(x, "returnCodes") BIC <- BIC[as.character(G),modelNames,drop=FALSE] RET <- RET[as.character(G),modelNames,drop=FALSE] } G <- as.numeric(G) Glabels <- as.character(G) Gout <- G if(is.null(initialization$noise)) { ## standard case ---- if (G[1] == 1) { for(mdl in modelNames[BIC["1",] == EMPTY]) { out <- mvn(modelName = mdl, data = data, prior = prior) BIC["1", mdl] <- bic(modelName = mdl, loglik = out$loglik, n = n, d = d, G = 1, equalPro = FALSE) RET["1", mdl] <- attr(out, "returnCode") if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } } if (l == 1) { BIC[BIC == EMPTY] <- NA if(verbose) { ipbar <- l*m+1; setTxtProgressBar(pbar, ipbar) } return(structure(BIC, G = G, modelNames = modelNames, prior = prior, control = control, initialization = initialization, warn = warn, n = n, d = d, oneD = oneD, returnCodes = RET, class = "mclustBIC")) } G <- G[-1] Glabels <- Glabels[-1] } if (is.null(initialization$subset)) { ## all data in initial hierarchical clustering phase (no subset) ---- if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(data = data, modelName = mclust.options("hcModelName"), use = mclust.options("hcUse")) } else { hcPairs <- hc(data = data, modelName = "EII", use = mclust.options("hcUse")) } } else { hcPairs <- NULL # hcPairs <- hc(data = data, modelName = "E") } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) for (g in Glabels) { if (d > 1 || !is.null(hcPairs)) { cl <- clss[,g] } else { cl <- qclass(data, as.numeric(g)) } if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } z <- unmap(cl, groups = 1:max(cl)) if(any(apply( z, 2, max) == 0) & warn) { # missing groups if(warn) warning("there are missing groups") small <- sqrt(.Machine$double.neg.eps) z[z < small] <- small z <- t(apply( z, 1, function(x) x/sum(x))) } for(modelName in na.omit(modelNames[BIC[g,] == EMPTY])) { out <- me(data = data, modelName = modelName, z = z, prior = prior, control = control, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = as.numeric(g), equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } } } } else { ## initial hierarchical clustering phase on a subset ---- subset <- initialization$subset if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(data = data[subset,], modelName = mclust.options("hcModelName"), use = mclust.options("hcUse")) } else { hcPairs <- hc(data = data[subset,], modelName = "EII", use = mclust.options("hcUse")) } } else { hcPairs <- NULL # hcPairs <- hc(modelName = "E", data = data[subset]) } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) for (g in Glabels) { if (d > 1 || !is.null(hcPairs)) { cl <- clss[, g] } else { cl <- qclass(data[subset], as.numeric(g)) } if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } z <- unmap(cl, groups = 1:max(cl)) if(any(apply( z, 2, max) == 0) & warn) { # missing groups if(warn) warning("there are missing groups") small <- sqrt(.Machine$double.neg.eps) z[z < small] <- small z <- t(apply( z, 1, function(x) x/sum(x))) } for (modelName in modelNames[!is.na(BIC[g,])]) { ms <- mstep(data = as.matrix(data)[initialization$subset,], modelName = modelName, z = z, prior = prior, control = control, warn = warn) # # ctrl <- control # ctrl$itmax[1] <- 1 # ms <- me( data = as.matrix(data)[initialization$subset, ], # modelName = modelName, z = z, prior = prior, control = ctrl) # es <- do.call("estep", c(list(data = data, warn = warn), ms)) out <- me(data = data, modelName = modelName, z = es$z, prior = prior, control = control, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = as.numeric(g), equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } } } } } else { ## noise case ---- noise <- initialization$noise if (is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) if (is.null(initialization$subset)) { ## all data in initial hierarchical clustering phase (no subset) ---- if(nnoise == n) stop("All observations cannot be initialised as noise!") if (!G[1]) { hood <- n * log(Vinv) BIC["0",] <- 2 * hood - log(n) if (l == 1) { return(structure(BIC, G = G, modelNames = modelNames, prior = prior, control = control, initialization = list(hcPairs = hcPairs, noise = initialization$noise), warn = warn, n = n, d = d, oneD = oneD, returnCodes = RET, class = "mclustBIC")) } G <- G[-1] Glabels <- Glabels[-1] } if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(data = data[-noise,], modelName = mclust.options("hcModelName"), use = mclust.options("hcUse")) } else { hcPairs <- hc(data = data[-noise,], modelName = "EII", use = mclust.options("hcUse")) } } else { hcPairs <- NULL # hcPairs <- hc(modelName = "E", data = data[-noise]) } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } z <- matrix(0, n, max(G) + 1) for (g in Glabels) { z[] <- 0 k <- as.numeric(g) if(d > 1 || !is.null(hcPairs)) { cl <- clss[,g] } else { cl <- qclass(data[-noise], k = k) } z[-noise,1:k] <- unmap(cl, groups = 1:max(cl)) if(any(apply(z[-noise,1:k,drop=FALSE], 2, max) == 0) & warn) { # missing groups if(warn) warning("there are missing groups") # todo: should be pmax(...) qui sotto?? z[-noise,1:k] <- max(z[-noise,1:k], sqrt(.Machine$double.neg.eps)) # todo: should be t(...) qui sotto?? z[-noise,1:k] <- apply(z[-noise,1:k,drop=FALSE], 1, function(z) z/sum(z)) } z[noise, k+1] <- 1 K <- 1:(k+1) for (modelName in na.omit(modelNames[BIC[g,] == EMPTY])) { out <- me(data = data, modelName = modelName, z = z[, K], prior = prior, Vinv = Vinv, control = control, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = k, noise = TRUE, equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } } } } else { ## initial hierarchical clustering phase on a subset ---- subset <- initialization$subset subset <- setdiff(subset, noise) # remove from subset noise obs initialization$subset <- subset if(length(subset) == 0) stop("No observations in the initial subset after removing the noise!") if (!G[1]) { hood <- n * log(Vinv) BIC["0",] <- 2 * hood - log(n) if (l == 1) { return(structure(BIC, G = G, modelNames = modelNames, prior = prior, control = control, initialization = list(hcPairs = hcPairs, subset = initialization$subset), warn = warn, n = n, d = d, oneD = oneD, returnCodes = RET, class = "mclustBIC")) } G <- G[-1] Glabels <- Glabels[-1] } if (is.null(initialization$hcPairs)) { if (d != 1) { if (n > d) { hcPairs <- hc(data = data[subset,], modelName = mclust.options("hcModelName"), use = mclust.options("hcUse")) } else { hcPairs <- hc(data = data[subset,], modelName = "EII", use = mclust.options("hcUse")) } } else { hcPairs <- NULL # hcPairs <- hc(modelName = "E", data = data[subset]) } } else hcPairs <- initialization$hcPairs if (d > 1 || !is.null(hcPairs)) clss <- hclass(hcPairs, G) if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } for (g in Glabels) { k <- as.numeric(g) if (d > 1 || !is.null(hcPairs)) { cl <- clss[, g] } else { cl <- qclass(data[subset], k = k) } z <- unmap(cl, groups = 1:max(cl)) if(any(apply(z, 2, max) == 0) & warn) { # missing groups if(warn) warning("there are missing groups") small <- sqrt(.Machine$double.neg.eps) z[z < small] <- small z <- t(apply( z, 1, function(x) x/sum(x))) } for (modelName in na.omit(modelNames[BIC[g,] == EMPTY])) { ms <- mstep(data = as.matrix(data)[subset,], modelName = modelName, z = z, prior = prior, control = control, warn = warn) es <- do.call("estep", c(list(data = data, warn = warn), ms)) if(is.na(es$loglik)) { BIC[g, modelName] <- NA RET[g, modelName] <- attr(es, "returnCode") } else { es$z <- cbind(es$z, 0) es$z[noise,] <- matrix(c(rep(0,k),1), byrow = TRUE, nrow = length(noise), ncol = k+1) out <- me(data = data, modelName = modelName, z = es$z, prior = prior, Vinv = Vinv, control = control, warn = warn) BIC[g, modelName] <- bic(modelName = modelName, loglik = out$loglik, n = n, d = d, G = k, noise = TRUE, equalPro = control$equalPro) RET[g, modelName] <- attr(out, "returnCode") } if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } } } } } if(verbose) { ipbar <- l*m+1; setTxtProgressBar(pbar, ipbar) } if(!is.null(prior) & any(is.na(BIC))) warning("The presence of BIC values equal to NA is likely due to one or more of the mixture proportions being estimated as zero, so that the model estimated reduces to one with a smaller number of components.") structure(BIC, G = Gout, modelNames = modelNames, prior = prior, Vinv = Vinv, control = control, initialization = list(hcPairs = hcPairs, subset = initialization$subset, noise = initialization$noise), warn = warn, n = n, d = d, oneD = oneD, criterion = "BIC", returnCodes = RET, class = "mclustBIC") } print.mclustBIC <- function(x, pick = 3, ...) { subset <- !is.null(attr(x, "subset")) oldClass(x) <- attr(x, "args") <- NULL attr(x, "criterion") <- NULL attr(x, "control") <- attr(x, "initialization") <- NULL attr(x, "oneD") <- attr(x, "warn") <- attr(x, "Vinv") <- NULL attr(x, "prior") <- attr(x, "G") <- attr(x, "modelNames") <- NULL ret <- attr(x, "returnCodes") == -3 n <- attr(x, "n") d <- attr(x, "d") attr(x, "returnCodes") <- attr(x, "n") <- attr(x, "d") <- NULL catwrap("Bayesian Information Criterion (BIC):") NextMethod("print") cat("\n") catwrap(paste("Top", pick, "models based on the BIC criterion:")) print(pickBIC(x, pick), ...) invisible() } summary.mclustBIC <- function(object, data, G, modelNames, ...) { mc <- match.call(expand.dots = FALSE) if(missing(data)) { if(!missing(G)) object <- object[rownames(object) %in% G,,drop=FALSE] if(!missing(modelNames)) object <- object[,colnames(object) %in% modelNames,drop=FALSE] ans <- pickBIC(object, ...) class(ans) <- "summary.mclustBIC" } else { if(is.null(attr(object,"initialization")$noise)) { mc[[1]] <- as.name("summaryMclustBIC") } else { mc[[1]] <- as.name("summaryMclustBICn") } warn <- attr(object, "warn") ans <- eval(mc, parent.frame()) if(length(ans) == 0) return(ans) Glabels <- dimnames(object)[[1]] if(length(Glabels) != 1 && (!missing(G) && length(G) > 1)) { Grange <- range(as.numeric(Glabels)) if(match(ans$G, Grange, nomatch = 0) & warn) warning("best model occurs at the min or max of number of components considered!") } } ans } summaryMclustBIC <- function (object, data, G = NULL, modelNames = NULL, ...) { dimData <- dim(data) oneD <- (is.null(dimData) || length(dimData[dimData > 1]) == 1) if (!oneD && length(dimData) != 2) stop("data must be a vector or a matrix") if (oneD) { data <- drop(as.matrix(data)) n <- length(data) d <- 1 } else { data <- as.matrix(data) n <- nrow(data) d <- ncol(data) } initialization <- attr(object, "initialization") hcPairs <- initialization$hcPairs subset <- initialization$subset prior <- attr(object, "prior") control <- attr(object, "control") warn <- attr(object, "warn") oldClass(object) <- NULL attr(object, "prior") <- attr(object, "warn") <- NULL attr(object, "modelNames") <- attr(object, "oneD") <- NULL attr(object, "initialization") <- attr(object, "control") <- NULL d <- if (is.null(dim(data))) 1 else ncol(data) if(is.null(G)) G <- dimnames(object)[[1]] if(is.null(modelNames)) modelNames <- dimnames(object)[[2]] bestBICs <- pickBIC(object[as.character(G), modelNames, drop = FALSE], k = 3) if(all(is.na(bestBICs))) { return(structure(list(), bestBICvalues = bestBICs, prior = prior, control = control, initialization = initialization, class = "summary.mclustBIC")) } temp <- unlist(strsplit(names(bestBICs)[1], ",")) bestModel <- temp[1] G <- as.numeric(temp[2]) if(G == 1) { out <- mvn(modelName = bestModel, data = data, prior = prior) ans <- c(list(bic = bestBICs, z = unmap(rep(1,n)), classification = rep(1, n), uncertainty = rep(0, n)), out) orderedNames <- c("modelName", "n", "d", "G", "bic", "loglik", "parameters", "z", "classification", "uncertainty") return(structure(ans[orderedNames], bestBICvalues = bestBICs, prior = prior, control = control, initialization = initialization, class = "summary.mclustBIC")) } if(is.null(subset)) { if(d > 1 || !is.null(hcPairs)) { z <- unmap(hclass(hcPairs, G)) } else { z <- unmap(qclass(data, G), groups = 1:G) } out <- me(data = data, modelName = bestModel, z = z, prior = prior, control = control, warn = warn) if(sum((out$parameters$pro - colMeans(out$z))^2) > sqrt(.Machine$double.eps)) { # perform extra M-step and update parameters ms <- mstep(data = data, modelName = bestModel, z = out$z, prior = prior, warn = warn) if(attr(ms, "returnCode") == 0) out$parameters <- ms$parameters } } else { if(d > 1 || !is.null(hcPairs)) { z <- unmap(hclass(hcPairs, G)) } else { z <- unmap(qclass(data[subset], G)) } ms <- mstep(data = as.matrix(data)[subset,], modelName = bestModel, prior = prior, z = z, control = control, warn = warn) es <- do.call("estep", c(list(data = data), ms)) out <- me(data = data, modelName = bestModel, z = es$z, prior = prior, control = control, warn = warn) # perform extra M-step and update parameters ms <- mstep(data = data, modelName = bestModel, z = out$z, prior = prior, warn = warn) if(attr(ms, "returnCode") == 0) out$parameters <- ms$parameters } obsNames <- if (is.null(dim(data))) names(data) else dimnames(data)[[1]] classification <- map(out$z, warn = warn) uncertainty <- 1 - apply(out$z, 1, max) names(classification) <- names(uncertainty) <- obsNames ans <- c(list(bic = bic(bestModel, out$loglik, out$n, out$d, out$G, noise = FALSE, equalPro = control$equalPro), # bic = as.vector(bestBICs[1]), classification = classification, uncertainty = uncertainty), out) orderedNames <- c("modelName", "n", "d", "G", "bic", "loglik", "parameters", "z", "classification", "uncertainty") structure(ans[orderedNames], bestBICvalues = bestBICs, prior = prior, control = control, initialization = initialization, class = "summary.mclustBIC") } summaryMclustBICn <- function(object, data, G = NULL, modelNames = NULL, ...) { dimData <- dim(data) oneD <- is.null(dimData) || length(dimData[dimData > 1]) == 1 if(!oneD && length(dimData) != 2) stop("data must be a vector or a matrix") if(oneD) { data <- drop(as.matrix(data)) n <- length(data) d <- 1 } else { data <- as.matrix(data) n <- nrow(data) d <- ncol(data) } initialization <- attr(object, "initialization") hcPairs <- initialization$hcPairs subset <- initialization$subset noise <- initialization$noise if(is.logical(noise)) noise <- which(noise) prior <- attr(object, "prior") control <- attr(object, "control") warn <- attr(object, "warn") Vinv <- attr(object, "Vinv") oldClass(object) <- NULL attr(object, "control") <- attr(object, "initialization") <- NULL attr(object, "prior") <- attr(object, "Vinv") <- NULL attr(object, "warn") <- NULL ## if (is.null(G)) G <- dimnames(object)[[1]] if (is.null(modelNames)) modelNames <- dimnames(object)[[2]] bestBICs <- pickBIC(object[as.character(G), modelNames, drop = FALSE], k = 3) if(all(is.na(bestBICs))) { return(structure(list(), bestBICvalues = bestBICs, prior = prior, control = control, initialization = initialization, class = "summary.mclustBIC")) } temp <- unlist(strsplit(names(bestBICs)[1], ",")) bestModel <- temp[1] G <- as.numeric(temp[2]) if(G == 0) { ans <- list(bic = bestBICs[1], z = unmap(rep(0,n)), classification = rep(0, n), uncertainty = rep(0, n), n = n, d = ncol(data), modelName = bestModel, G = 0, loglik = n * log(Vinv), Vinv = Vinv, parameters = NULL) orderedNames <- c("modelName", "n", "d", "G", "bic", "loglik", "Vinv", "parameters", "z", "classification", "uncertainty") return(structure(ans[orderedNames], bestBICvalues = bestBICs, prior = prior, control = control, initialization = initialization, class = "summary.mclustBIC")) } G1 <- G + 1 if(is.null(subset)) { z <- matrix(0, n, G1) if(d > 1 || !is.null(hcPairs)) { z[-noise, 1:G] <- unmap(hclass(hcPairs, G)) } else { z[-noise, 1:G] <- unmap(qclass(data[-noise], G)) } z[noise, G1] <- 1 out <- me(data = data, modelName = bestModel, z = z, prior = prior, Vinv = Vinv, control = control, warn = warn) } else { subset <- setdiff(subset, noise) # set subset among those obs not noise if(d > 1 || !is.null(hcPairs)) { z <- unmap(hclass(hcPairs, G)) } else { z <- unmap(qclass(data[subset], G)) } ms <- mstep(data = as.matrix(data)[subset,], modelName = bestModel, z = z, prior = prior, control = control, warn = warn) es <- do.call("estep", c(list(data = data, warn = warn), ms)) es$z <- cbind(es$z, 0) es$z[noise,] <- matrix(c(rep(0,G),1), byrow = TRUE, nrow = length(noise), ncol = G+1) out <- me(data = data, modelName = bestModel, z = es$z, prior = prior, Vinv = Vinv, control = control, warn = warn) } obsNames <- if(is.null(dim(data))) names(data) else dimnames(data)[[1]] classification <- map(out$z, warn = warn) classification[classification == G1] <- 0 uncertainty <- 1 - apply(out$z, 1, max) names(classification) <- names(uncertainty) <- obsNames ans <- c(list(bic = as.vector(bestBICs[1]), classification = classification, uncertainty = uncertainty, Vinv = Vinv), out) orderedNames <- c("modelName", "n", "d", "G", "bic", "loglik", "parameters", "Vinv", "z", "classification", "uncertainty") structure(ans[orderedNames], bestBICvalues = bestBICs, prior = prior, control = control, initialization = initialization, class = "summary.mclustBIC") } print.summary.mclustBIC <- function(x, digits = getOption("digits"), ...) { if("classification" %in% names(x)) { bic <- attr(x,"bestBICvalues") l <- length(bic) if(l == 1) { cat("BIC value:\n") print(bic, digits = digits) } else { cat("Best BIC values:\n") bic <- drop(as.matrix(bic)) bic <- rbind(BIC = bic, "BIC diff" = bic - max(bic)) print(bic, digits = digits) } cat("\n") catwrap(paste0("Classification table for model (", if(l == 1) names(bic)[1] else colnames(bic)[1], "):")) print(table(x$classification), digits = digits, ...) } else { cat("Best BIC values:\n") x <- if(length(x) == 0) attr(x,"bestBICvalues") else drop(as.matrix(x)) x <- rbind(BIC = x, "BIC diff" = x - max(x)) print(x, digits = digits) } invisible() } plot.mclustBIC <- function(x, G = NULL, modelNames = NULL, symbols = NULL, colors = NULL, xlab = NULL, ylab = "BIC", legendArgs = list(x = "bottomright", ncol = 2, cex = 1, inset = 0.01), ...) { args <- list(...) if(is.null(xlab)) xlab <- "Number of components" subset <- !is.null(attr(x, "initialization")$subset) noise <- !is.null(attr(x, "initialization")$noise) ret <- attr(x, "returnCodes") == -3 # legendArgsDefault <- list(x = "bottomright", ncol = 2, cex = 1, inset = 0.01) legendArgsDefault <- eval(formals(plot.mclustBIC)$legendArgs) legendArgs <- append(as.list(legendArgs), legendArgsDefault) legendArgs <- legendArgs[!duplicated(names(legendArgs))] n <- ncol(x) dnx <- dimnames(x) x <- matrix(as.vector(x), ncol = n) dimnames(x) <- dnx if(is.null(modelNames)) modelNames <- dimnames(x)[[2]] if(is.null(G)) G <- as.numeric(dimnames(x)[[1]]) # BIC <- x[as.character(G), modelNames, drop = FALSE] # X <- is.na(BIC) # nrowBIC <- nrow(BIC) # ncolBIC <- ncol(BIC) if(is.null(symbols)) { colNames <- dimnames(x)[[2]] m <- length(modelNames) if(is.null(colNames)) { symbols <- if(m > 9) LETTERS[1:m] else as.character(1:m) names(symbols) <- modelNames } else { symbols <- mclust.options("bicPlotSymbols")[modelNames] } } if(is.null(colors)) { colNames <- dimnames(x)[[2]] if(is.null(colNames)) { colors <- 1:m names(colors) <- modelNames } else { # colors <- mclust.options("bicPlotColors")[modelNames] colors <- mclust.options("bicPlotColors") if(!is.null(names(colors)) & !any(names(colors) == "")) colors <- colors[modelNames] } } x <- x[,modelNames, drop = FALSE] ylim <- if(is.null(args$ylim)) range(as.vector(x[!is.na(x)])) else args$ylim matplot(as.numeric(dnx[[1]]), x, type = "b", xaxt = "n", xlim = range(G), ylim = ylim, pch = symbols, col = colors, lty = 1, xlab = xlab, ylab = ylab, main = "") axis(side = 1, at = as.numeric(dnx[[1]])) if(!is.null(legendArgs)) { do.call("legend", c(list(legend = modelNames, col = colors, pch = symbols), legendArgs)) } invisible(symbols) } pickBIC <- function(x, k = 3, ...) { if(!is.matrix(x)) { warning("sorry, the pickBIC function cannot be applied to the provided argument!") return() } Glabels <- dimnames(x)[[1]] modelNames <- dimnames(x)[[2]] mis <- is.na(x) if(all(mis) & mclust.options("warn")) { warning("none of the selected models could be fitted") return(rep(NA,k)) } x[mis] <- - .Machine$double.xmax x <- data.frame(as.vector(x), Glabels[as.vector(row(x))], modelNames[as.vector(col(x))]) # x <- x[rev(order(x[,1])),] # order by including first simpler models if ties are present x <- x[order(-x[, 1], x[,2], x[,3]),] namesx <- apply(x[,-1,drop = FALSE], 1, function(z) paste(as.character(z[2]), as.character(z[1]), sep = ",")) k <- min(k, nrow(x)) x <- x[1:k,1] x[x == - .Machine$double.xmax] <- NA namesx <- namesx[1:k] namesx[is.na(x)] <- " " names(x) <- namesx x } mclustBICupdate <- function(BIC, ...) { args <- list(...) nargs <- length(args) BIC1 <- BIC if(length(args) > 1) { # recursively call the function when multiple arguments BIC2 <- mclustBICupdate(args[[1]], args[[-1]]) } else { BIC2 <- args[[1]] } if(is.null(BIC1)) return(BIC2) if(is.null(BIC2)) return(BIC1) stopifnot(inherits(BIC1, c("mclustBIC", "mclustSBIC", "mclustICL")) & inherits(BIC2, c("mclustBIC", "mclustSBIC", "mclustICL"))) stopifnot(all.equal(attributes(BIC1)[c("n", "d")], attributes(BIC2)[c("n", "d")])) G <- unique(c(rownames(BIC1), rownames(BIC2))) modelNames <- unique(c(colnames(BIC1), colnames(BIC2))) BIC <- matrix(as.double(NA), nrow = length(G), ncol = length(modelNames), dimnames = list(G, modelNames)) BIC[rownames(BIC1),colnames(BIC1)] <- BIC1[rownames(BIC1),colnames(BIC1)] BIC[rownames(BIC2),colnames(BIC2)] <- BIC2[rownames(BIC2),colnames(BIC2)] r <- intersect(rownames(BIC1), rownames(BIC2)) c <- intersect(colnames(BIC1), colnames(BIC2)) BIC[r,c] <- pmax(BIC1[r,c], BIC2[r,c], na.rm = TRUE) attr <- if(pickBIC(BIC2,1) > pickBIC(BIC1,1)) attributes(BIC2) else attributes(BIC1) attr$dim <- dim(BIC) attr$dimnames <- dimnames(BIC) attr$G <- as.numeric(G) attr$modelNames <- modelNames attr$returnCodes <- NULL attributes(BIC) <- attr return(BIC) } mclustLoglik <- function(object, ...) { stopifnot(inherits(object, "mclustBIC")) BIC <- object G <- as.numeric(rownames(BIC)) modelNames <- colnames(BIC) n <- attr(BIC, "n") d <- attr(BIC, "d") noise <- if(is.null(attr(BIC, "noise"))) FALSE else TRUE loglik <- matrix(as.double(NA), nrow = length(G), ncol = length(modelNames), dimnames = list(G, modelNames)) for(i in seq_along(G)) for(j in seq_along(modelNames)) { npar <- nMclustParams(G = G[i], modelName = modelNames[j], d = d, noise = noise) loglik[i,j] <- 0.5*(BIC[i,j] + npar*log(n)) } mostattributes(loglik) <- attributes(BIC) attr(loglik, "criterion") <- "loglik" class(loglik) <- "mclustLoglik" return(loglik) } print.mclustLoglik <- function(x, ...) { oldClass(x) <- attr(x, "args") <- NULL attr(x, "criterion") <- NULL attr(x, "control") <- attr(x, "initialization") <- NULL attr(x, "oneD") <- attr(x, "warn") <- attr(x, "Vinv") <- NULL attr(x, "prior") <- attr(x, "G") <- attr(x, "modelNames") <- NULL attr(x, "returnCodes") <- attr(x, "n") <- attr(x, "d") <- NULL catwrap("Log-likelihood:") NextMethod("print") invisible() } mclustModel <- function(data, BICvalues, G=NULL, modelNames=NULL, ...) { mc <- match.call(expand.dots = FALSE) if (is.null(attr(BICvalues,"initialization")$noise)) { mc[[1]] <- as.name("summaryMclustBIC") } else { mc[[1]] <- as.name("summaryMclustBICn") } nm <- names(mc) mc[1:3] <- mc[c(1,3,2)] nm[1:3] <- nm[c(1,3,2)] nm[nm == "BICvalues"] <- "object" names(mc) <- nm ans <- eval(mc, parent.frame()) ans$classification <- ans$uncertainty <- NULL attr( ans, "bestBICvalues") <- NULL attr( ans, "prior") <- NULL attr( ans, "control") <- NULL attr( ans, "initialization") <- NULL oldClass(ans) <- "mclustModel" ans } mclustModelNames <- function(model) { type <- switch(EXPR = as.character(model), "E" = "univariate, equal variance", "V" = "univariate, unequal variance", "EII" = "spherical, equal volume", "VII" = "spherical, varying volume", "EEI" = "diagonal, equal volume and shape", "VEI" = "diagonal, equal shape", "EVI" = "diagonal, equal volume, varying shape", "VVI" = "diagonal, varying volume and shape", "EEE" = "ellipsoidal, equal volume, shape and orientation", "EVE" = "ellipsoidal, equal volume and orientation", "VEE" = "ellipsoidal, equal shape and orientation", "VVE" = "ellipsoidal, equal orientation", "EEV" = "ellipsoidal, equal volume and shape", "VEV" = "ellipsoidal, equal shape", "EVV" = "ellipsoidal, equal volume", "VVV" = "ellipsoidal, varying volume, shape, and orientation", "X" = "univariate normal", "XII" = "spherical multivariate normal", "XXI" = "diagonal multivariate normal", "XXX" = "ellipsoidal multivariate normal", warning("invalid model")) return(list(model = model, type = type)) } defaultPrior <- function(data, G, modelName, ...) { aux <- list(...) if(is.null(aux$shrinkage)) { shrinkage <- 0.01 } else if(is.na(aux$shrinkage) || !aux$shrinkage) { shrinkage <- 0 } else if(aux$shrinkage < 0) { stop("negative value given for shrinkage") } else { shrinkage <- aux$shrinkage } if(is.null(aux$mean)) { mean <- if (is.null(dim(data))) mean(data) else colMeans(data) } else if(any(is.na(aux$mean))) { if(shrinkage) stop("positive shrinkage with no prior mean specified") mean <- if (is.null(dim(data))) mean(data) else colMeans(data) } else { if(!shrinkage) stop("prior mean specified but not shrinkage") mean <- aux$mean } switch(EXPR = modelName, E = , V = , X = { dof <- 3 if(is.null(aux$scale)) { scale <- var(data)/G^2 } else { scale <- aux$scale } list(shrinkage = shrinkage, mean = mean, dof = dof, scale = scale) }, ## EII = , VII = , XII = , EEI = , EVI = , VEI = , VVI = , XXI = { n <- nrow(data) p <- ncol(data) dof <- p + 2 if(is.null(aux$scale)) { fac <- (1/G)^(2/p) scale <- (fac * sum(apply(data, 2, var)))/ p } else { scale <- aux$scale } list(shrinkage = shrinkage, mean = mean, dof = dof, scale = scale) }, ## EEE = , EVE = , VEE = , VVE = , EEV = , VEV = , EVV = , VVV = , XXX = { n <- nrow(data) p <- ncol(data) dof <- p + 2 if(is.null(aux$scale)) { fac <- (1/G)^(2/p) if(n > p) { scale <- fac * var(data) } else { scale <- fac * diag(apply(data, 2, var)) } } else { scale <- aux$scale } list(shrinkage = shrinkage, mean = mean, dof = dof, scale = scale) }, stop("no default prior for this model")) } emControl <- function(eps = .Machine$double.eps, tol = c(1.0e-05, sqrt(.Machine$double.eps)), itmax = c(.Machine$integer.max, .Machine$integer.max), equalPro = FALSE) { if(any(eps < 0)) stop("eps is negative") if(any(eps >= 1)) stop("eps is not less than 1") if(any(tol < 0)) stop("tol is negative") if(any(tol >= 1)) stop("tol is not less than 1") if(any(itmax < 0)) stop("itmax is negative") if(length(tol) == 1) tol <- rep(tol, 2) if(length(itmax) == 1) itmax <- c(itmax, .Machine$integer.max) i <- is.infinite(itmax) if(any(i)) itmax[i] <- .Machine$integer.max list(eps = eps, tol = tol, itmax = itmax, equalPro = equalPro) } priorControl <- function(functionName = "defaultPrior", ...) { c(list(functionName = functionName), list(...)) } cdensEEE <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) > 2) stop("data must be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EEE", WARNING = WARNING, returnCode = 9)) } if(is.null(parameters$variance$cholSigma)) stop("variance parameters are missing") temp <- .Fortran("eseee", as.logical(1), as.double(data), as.double(mu), as.double(parameters$variance$cholSigma), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(p), double(1), double(n * G), PACKAGE = "mclust")[10:12] lapackCholInfo <- temp[[1]][1] loglik <- temp[[2]] z <- matrix(temp[[3]], n, G) WARNING <- NULL if(lapackCholInfo) { if(lapackCholInfo > 0) { WARNING <- "sigma is not positive definite" if(warn) warning(WARNING) } else { WARNING <- "input error for LAPACK DPOTRF" if(warn) warning(WARNING) } z[] <- NA ret <- -9 } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EEE", WARNING = WARNING, returnCode = ret) } emEEE <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEEE(data, parameters = parameters, warn = warn)$z meEEE(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepEEE <- function(data, parameters, warn = NULL, ...) { if (is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) > 2) stop("data must be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EEE", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$cholSigma)) stop("variance parameters are missing") temp <- .Fortran("eseee", as.logical(1), as.double(data), as.double(mu), as.double(parameters$variance$cholSigma), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(p), double(1), double(n * K), PACKAGE = "mclust")[10:12] lapackCholInfo <- temp[[1]][1] loglik <- temp[[2]] z <- matrix(temp[[3]], n, K) WARNING <- NULL if(lapackCholInfo) { if(lapackCholInfo > 0) { WARNING <- "sigma is not positive definite" warning(WARNING) ret <- -4 } else { WARNING <- "input error for LAPACK DPOTRF" warning(WARNING) ret <- -5 } z[] <- loglik <- NA } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EEE", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meEEE <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EEE", d = p, G = G, Sigma = matrix(as.double(NA), p, p), cholSigma = matrix(as.double(NA), p, p)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="EEE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("meeee", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(p * p), double(K), double(p), PACKAGE = "mclust")[7:13] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EEE"), prior[names(prior) != "functionName"])) temp <- .Fortran("meeeep", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$ scale) else priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(p * p), double(K), double(p), PACKAGE = "mclust")[c(11:17, 10)] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) cholSigma <- matrix(temp[[6]], p, p) pro <- temp[[7]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) Sigma <- matrix( NA, p, p) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- logprior <- NA sigma <- array(NA, c(p, p, G)) Sigma <- matrix(as.double(NA), p, p) ret <- if(control$equalPro) -2 else -3 } else { Sigma <- unchol(cholSigma, upper = TRUE) sigma <- array(0, c(p, p, G)) for(k in 1:G) sigma[, , k] <- Sigma if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(Sigma) <- dimnames(cholSigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EEE", d = p, G = G, sigma = sigma, Sigma = Sigma, cholSigma = cholSigma) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "EEE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepEEE <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EEE", d = p, G = G, sigma <- array(NA, c(p,p, G)), Sigma = matrix(as.double(NA), p, p), cholSigma = matrix(as.double(NA), p, p)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="EEE", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("mseee", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(p), double(p * G), double(p * p), double(G), PACKAGE = "mclust")[7:9] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EEE"), prior[names(prior) != "functionName"])) temp <- .Fortran("mseeep", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$scale) else priorParams$scale), as.double(priorParams$dof), double(p), double(p * G), double(p * p), double(G), PACKAGE = "mclust")[11:13] } mu <- matrix(temp[[1]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) cholSigma <- matrix(temp[[2]], p, p) pro <- temp[[3]] sigma <- array(0, c(p, p, G)) Sigma <- unchol(cholSigma, upper = TRUE) for(k in 1:G) sigma[, , k] <- Sigma WARNING <- NULL if(any(mu > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- sigma[] <- Sigma[] <- cholSigma[] <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(Sigma) <- dimnames(cholSigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EEE", d = p, G = G, sigma = sigma, Sigma = Sigma, cholSigma= cholSigma) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "EEE", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simEEE <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "EEE")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) if(is.null(cholSigma <- parameters$variance$cholSigma)) { if(is.null(Sigma <- parameters$variance$Sigma)) { stop("variance parameters must inlcude either Sigma or cholSigma" ) } cholSigma <- chol(Sigma) } for(k in 1:G) { m <- ctabel[k] x[clabels == k,] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[,k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "EEE") } cdensEEI <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EEI", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("eseei", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EEI", WARNING = WARNING, returnCode = ret) } cdensEII <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EII", WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(sigmasq < 0) stop("sigma-squared is negative") if(!sigmasq) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EII", WARNING = WARNING, returnCode = 9)) } temp <- .Fortran("eseii", as.double(data), as.double(mu), as.double(sigmasq), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[9:10] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EII", WARNING = WARNING, returnCode = ret) } emEEI <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEEI(data, parameters = parameters, warn = warn)$z meEEI(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepEEI <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EEI", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("eseei", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EEI", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meEEI <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data should be in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EEI", d = p, G = G, scale = NA, shape = rep(NA,p)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="EEI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("meeei", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(1), double(p), double(K), PACKAGE = "mclust")[7:14] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EEI"), prior[names(prior) != "functionName"])) temp <- .Fortran("meeeip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(1), double(p), double(K), PACKAGE = "mclust")[11:18] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp[[6]] shape <- temp[[7]] pro <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) sigma <- array(NA, c(p, p, G)) Sigma <- matrix(as.double(NA), p, p) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } sigma <- array(NA, c(p, p, G)) Sigma <- matrix(as.double(NA), p, p) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(0, c(p, p, G)) Sigma <- diag(scale * shape) for(k in 1:G) sigma[, , k] <- Sigma if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(Sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EEI", d = p, G = G, sigma = sigma, Sigma = Sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "EEI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepEEI <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EEI", d = p, G = G, scale = NA, shape = rep(NA,p)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="EEI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("mseei", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(p * G), double(1), double(p), double(G), PACKAGE = "mclust")[6:9] } else { storage.mode(z) <- "double" priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EEI"), prior[names( prior) != "functionName"])) temp <- .Fortran("mseeip", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p * G), double(1), double(p), double(G), PACKAGE = "mclust")[10:13] } mu <- matrix(temp[[1]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp[[2]] shape <- temp[[3]] pro <- temp[[4]] WARNING <- NULL if(any(c(shape, scale) > signif(.Machine$double.xmax, 6)) || any(!c( scale, shape))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- scale <- shape[] <- NA sigma <- Sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- array(0, c(p, p, G)) Sigma <- diag(scale * shape) for(k in 1:G) sigma[, , k] <- Sigma ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(Sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EEI", d = p, G = G, sigma = sigma, Sigma = Sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "EEI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simEEI <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "EEI")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) shape <- parameters$variance$shape if(length(shape) != d) stop("shape incompatible with mean") cholSigma <- diag(sqrt(parameters$variance$scale * shape)) for(k in 1:G) { m <- ctabel[k] x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "EEI") } cdensE <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- drop(data) n <- length(data) mu <- drop(parameters$mean) G <- length(mu) if(any(is.na(unlist(parameters[c("mean", "variance")]))) || any(is.null(parameters[c("mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(names(data), NULL) return(structure(z, logarithm = logarithm, modelName = "E", WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(is.null(sigmasq)) stop("variance parameters are missing") if(length(sigmasq) > 1) if(warn) warning("more than one sigma-squared given") if(sigmasq < 0) stop("sigma-squared is negative") if(!sigmasq) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(names(data), NULL) return(structure(z, logarithm = logarithm, modelName = "E", WARNING = WARNING, returnCode = 9)) } temp <- .Fortran("es1e", as.double(data), as.double(mu), as.double(sigmasq), as.double(-1), as.integer(n), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[8:9] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(names(data),NULL) structure(z, logarithm = logarithm, modelName = "E", WARNING = WARNING, returnCode = ret) } emE <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepE(data, parameters = parameters, warn = warn)$z meE(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepE <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- drop(data) n <- length(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- drop(parameters$mean) G <- length(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(names(data), NULL) return(structure(list(modelName = "E", n=n, d=1, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(is.null(sigmasq)) stop("variance parameters are missing") if(length(sigmasq) > 1) if(warn) warning("more than one sigma-squared specified") if(sigmasq < 0) stop("sigma-squared is negative") if(!sigmasq) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(names(data), NULL) return(structure(list(modelName = "E", n=n, d=1, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = -1)) } temp <- .Fortran("es1e", as.double(data), as.double(mu), as.double(sigmasq), as.double(pro), as.integer(n), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[8:9] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(names(data),NULL) structure(list(modelName = "E", n = n, d = 1, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } cdensEEV <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EEV", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") temp <- .Fortran("eseev", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(aperm(parameters$variance$orientation,c(2,1,3))), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(p), double(p), double(1), double(n * G), PACKAGE = "mclust")[13:14] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EEV", WARNING = WARNING, returnCode = ret) } emEEV <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEEV(data, parameters = parameters, warn = warn)$z meEEV(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepEEV <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EEV", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") temp <- .Fortran("eseev", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(aperm(parameters$variance$orientation,c(2,1,3))), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(p), double(p), double(1), double(n * K), PACKAGE = "mclust")[13:14] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EEV", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meEEV <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EEV", d = p, G = G, scale = NA, shape = rep(NA,p), orientation = array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="EEV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p)) storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("meeev", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), as.integer(lwork), double(p * G), double(1), double(p), double(p * p * G), double(K), double(lwork), double(p), PACKAGE = "mclust")[7:16] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EEV"), prior[names(prior) !="functionName"])) temp <- .Fortran("meeevp", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$ scale) else priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), as.integer(lwork), double(p * G), double(1), double(p), double(p * p * G), double(K), double(lwork), double(p), PACKAGE = "mclust")[11:20] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] lapackSVDinfo <- temp[[5]] mu <- matrix(temp[[6]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp[[7]] shape <- temp[[8]] O <- aperm(array(temp[[9]], c(p, p, G)),c(2,1,3)) pro <- temp[[10]] WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DGESVD" } z[] <- O[] <- shape[] <- NA scale <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) shape[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "a z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- scale * shapeO(shape, O, transpose = FALSE) if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) ## Sigma = scale * O %*% diag(shape) %*% t(O) variance <- list(modelName = "EEV", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "EEV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepEEV <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EEV", d = p, G = G, scale = NA, shape = rep(NA,p), orientation=array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="EEV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } # shape <- sqrt(rev(sort(shape/exp(sum(log(shape))/p)))) if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop( "improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), G) if(is.null(prior)) { temp <- .Fortran("mseev", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(lwork), as.integer(lwork), double(p * G), double(1), double(p), double(p * p * G), double(G), PACKAGE = "mclust")[7:12] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EEV"), prior[names(prior) != "functionName"])) temp <- .Fortran("mseevp", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$ scale) else priorParams$scale), as.double(priorParams$dof), double(lwork), as.integer(lwork), double(p * G), double(1), double(p), double(p * p * G), double(G), PACKAGE = "mclust")[11:16] } lapackSVDinfo <- temp[[1]] mu <- matrix(temp[[2]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp[[3]] shape <- temp[[4]] O <- aperm( array(temp[[5]], c(p, p, G)), c(2,1,3)) pro <- temp[[6]] WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DGESVD fails to converge" if(warn) warning(WARNING) ret <- -4 } else { WARNING <- "input error for LAPACK DGESVD" if(warn) warning(WARNING) ret <- -5 } O[] <- shape[] <- scale <- NA sigma <- array(NA, c(p, p, G)) } else if(any(c(abs(scale), shape) > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- scale <- O[] <- shape[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- scale * shapeO(shape, O, transpose = FALSE) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EEV", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "EEV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simEEV <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "EEV")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) shape <- parameters$variance$shape if(length(shape) != d) stop("shape incompatible with mean") sss <- sqrt(parameters$variance$scale * shape) for(k in 1:G) { m <- ctabel[k] cholSigma <- t(parameters$variance$orientation[, , k]) * sss x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "EEV") } emEII <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEII(data, parameters = parameters, warn = warn)$z meEII(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepEII <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) p <- ncol(data) n <- nrow(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EII", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(is.null(sigmasq)) if(warn) warning("variance parameters are missing") if(sigmasq < 0) stop("sigma-squared is negative") if(!sigmasq) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EII", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = -1)) } temp <- .Fortran("eseii", as.double(data), as.double(mu), as.double(sigmasq), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[9:10] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EII", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meEII <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] # number of groups if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EII", d = p, G = G, sigmasq = NA) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="EII", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("meeii", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(1), double(K), PACKAGE = "mclust")[7:13] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EII"), prior[names(prior) != "functionName"])) temp <- .Fortran("meeiip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(1), double(K), PACKAGE = "mclust")[c(11:17, 10)] } mu <- matrix(temp[[5]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] sigmasq <- temp[[6]] Sigma <- diag(rep(sigmasq, p)) pro <- temp[[7]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6) || sigmasq <= max(control$eps,0)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) mu[] <- pro[] <- sigmasq <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- sigmasq <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(0, c(p, p, G)) for(k in 1:G) sigma[, , k] <- Sigma if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(Sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EII", d = p, G = G, sigma = sigma, Sigma = Sigma, sigmasq = sigmasq, scale = sigmasq) parameters <- list(pro=pro, mean=mu, variance = variance, Vinv=Vinv) structure(list(modelName = "EII", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepEII <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EII", d = p, G = G, sigmasq = NA) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="EII", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mseii", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(p * G), double(1), double(G), PACKAGE = "mclust")[6:8] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EII"), prior[names(prior) !="functionName"])) temp <- .Fortran("mseiip", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p * G), double(1), double(G), PACKAGE = "mclust")[10:12] } mu <- matrix(temp[[1]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) sigmasq <- temp[[2]] pro <- temp[[3]] sigma <- array(0, c(p, p, G)) Sigma <- diag(rep(sigmasq, p)) for(k in 1:G) sigma[, , k] <- Sigma WARNING <- NULL if(sigmasq > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(Sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]]) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EII", d = p, G = G, sigma = sigma, Sigma = Sigma, sigmasq = sigmasq, scale = sigmasq) parameters <- list(pro=pro, mean=mu, variance = variance) structure(list(modelName = "EII", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simEII <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d), modelName = "EII")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) sigmasq <- parameters$variance$sigmasq cholSigma <- diag(rep(sqrt(sigmasq), d)) for(k in 1:G) { m <- ctabel[k] x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "EII") } meE <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be 1 dimensional") data <- as.vector(data) n <- length(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal length of data") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if (Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "E", d = 1, G = G, sigmasq = NA) parameters <- list(pro=rep(NA,G), mean=rep(NA,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="E", prior=prior, n=n, d=1, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("me1e", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(G), double(1), double(K), PACKAGE = "mclust")[6:12] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "E"), prior[names(prior) != "functionName"])) temp <- .Fortran("me1ep", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(G), double(1), double(K), PACKAGE = "mclust")[c(10:16, 9)] } mu <- temp[[5]] names(mu) <- as.character(1:G) z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] sigmasq <- temp[[6]] pro <- temp[[7]] ## log post <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6) || sigmasq <= max(control$eps,0)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) mu[] <- pro[] <- sigmasq <- z[] <- loglik <- logprior <- NA ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- sigmasq <- z[] <- loglik <- NA ret <- if(control$equalPro) -2 else -3 } else if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 info <- c(iterations = its, error = err) dimnames(z) <- list(names(data), NULL) variance <- list(modelName = "E", d = 1, G = G, sigmasq = sigmasq) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "E", prior = prior, n = n, d = 1, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepE <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- as.vector(data) n <- length(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") # number of groups G <- dimz[2] ## if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName="E", d=1, G=G, sigmasq=NA) parameters <- list(pro=rep(NA,G), mean=rep(NA,G), variance=variance) return(structure(list(modelName="E", prior=prior, n=n, d=1, G=G, z = z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("ms1e", as.double(data), as.double(z), as.integer(n), as.integer(G), double(G), double(1), double(G), PACKAGE = "mclust")[5:7] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "E"), prior[names(prior) != "functionName"])) storage.mode(z) <- "double" temp <- .Fortran("ms1ep", as.double(data), z, as.integer(n), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(G), double(1), double(G), PACKAGE = "mclust")[9:11] } mu <- temp[[1]] names(mu) <- as.character(1:G) sigmasq <- temp[[2]] pro <- temp[[3]] WARNING <- NULL if(sigmasq > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) pro[] <- mu[] <- sigmasq <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(names(data), NULL) variance <- list(modelName = "E", d = 1, G = G, sigmasq = sigmasq) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "E", prior = prior, n = n, d = 1, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simE <- function(parameters, n, seed = NULL, ...) { if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, 2), modelName = "E")) } if(!is.null(seed)) set.seed(seed) mu <- parameters$mean G <- length(mu) pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- rep(0, n) sd <- sqrt(parameters$variance$sigmasq) for(k in 1:G) { x[clabels == k] <- mu[k] + rnorm(ctabel[k], sd = sd) } structure(cbind(group = clabels, "1" = x), modelName = "E") } cdensEVI <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- parameters$mean G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "EVI", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("esevi", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "EVI", WARNING = WARNING, returnCode = ret) } emEVI <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepEVI(data, parameters = parameters, warn = warn)$z meEVI(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepEVI <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "EVI", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("esevi", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "EVI", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meEVI <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if (Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EVI", d = p, G = G, scale = NA, shape = matrix(as.double(NA),p,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="EVI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("meevi", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(1), double(p * G), double(K), PACKAGE = "mclust")[7:14] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EVI"), prior[names(prior) != "functionName"])) temp <- .Fortran("meevip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(1), double(p * G), double(K), PACKAGE = "mclust")[11:18] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) scale <- temp[[6]] shape <- matrix(temp[[7]], p, G) dimnames(mu) <- dimnames(shape) <- list(NULL, as.character(1:G)) pro <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { if(warn) warning("z column sum fell below threshold") WARNING <- "z column sum fell below threshold" } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(apply(scale * shape, 2, diag), c(p, p, G)) if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EVI", d = p, G = G, sigma = sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "EVI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepEVI <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "EVI", d = p, G = G, scale = NA, shape = matrix(as.double(NA),p,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="EVI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("msevi", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(p * G), double(1), double(p * G), double(G), PACKAGE = "mclust")[6:9] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "EVI"), prior[names( prior) != "functionName"])) temp <- .Fortran("msevip", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p * G), double(1), double(p * G), double(G), PACKAGE = "mclust")[10:13] } mu <- matrix(temp[[1]], p, G) scale <- temp[[2]] shape <- matrix(temp[[3]], p, G) dimnames(mu) <- dimnames(shape) <- list(NULL, as.character(1:G)) pro <- temp[[4]] WARNING <- NULL if(any(c(scale, shape) > signif(.Machine$double.xmax, 6)) || any(!c( scale, shape))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- scale <- shape[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- array(apply(scale * shape, 2, diag), c(p, p, G)) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "EVI", d = p, G = G, sigma = sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "EVI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simEVI <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "EVI")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) shape <- as.matrix(parameters$variance$shape) if(!all(dim(shape) == dim(mean))) stop("shape incompatible with mean") sss <- sqrt(parameters$variance$scale * shape) for(k in 1:G) { m <- ctabel[k] x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% diag(sss[, k]), MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "EVI") } # old version: LS 20150317 sigma2decomp <- function(sigma, G = NULL, tol = sqrt(.Machine$double.eps), ...) { dimSigma <- dim(sigma) if(is.null(dimSigma)) stop("sigma improperly specified") d <- dimSigma[1] if(dimSigma[2] != d) stop("sigma improperly specified") l <- length(dimSigma) if(l < 2 || l > 3) stop("sigma improperly specified") if(is.null(G)) { if(l == 2) { G <- 1 sigma <- array(sigma, c(dimSigma, 1)) } else { G <- dimSigma[3] } } else { if(l == 3 && G != dimSigma[3]) stop("sigma and G are incompatible") if(l == 2 && G != 1) sigma <- array(sigma, c(d,d,G)) } # angle between subspaces subspace <- function(A, B) { for(k in 1:ncol(A)) { B <- B - A[,k,drop=FALSE] %*% (t(A[,k,drop=FALSE]) %*% B) } norm(B, type = "2") } # check equality of values uniq <- function(x) { abs(max(x) - min(x)) < tol } decomp <- list(d = d, G = G, scale = rep(0, G), shape = matrix(0, d, G), orientation = array(0, c(d, d, G))) for(k in 1:G) { ev <- eigen(sigma[,,k], symmetric = TRUE) temp <- log(ev$values); temp[!is.finite(temp)] <- 0 logScale <- sum(temp)/d decomp$scale[k] <- exp(logScale) decomp$shape[,k] <- exp(temp - logScale) decomp$orientation[,,k] <- ev$vectors } scaleName <- "V" shapeName <- "V" orientName <- "V" # check scale/volume if(uniq(decomp$scale)) { decomp$scale <- decomp$scale[1] scaleName <- "E" } # check shape if(all(apply(decomp$shape, 1, uniq))) { decomp$shape <- decomp$shape[, 1] if(all(uniq(decomp$shape))) { shapeName <- "I" decomp$shape <- rep(1, d) } else { shapeName <- "E" } } # check orientation eqOrientation <- { if(d == 2) all(apply(matrix(decomp$orientation, nrow = d * d, ncol = G), 1, uniq)) else all(apply(decomp$orientation[,,-1,drop=FALSE], 3, function(o) subspace(decomp$orientation[,,1],o)) < tol) } if(eqOrientation) { decomp$orientation <- decomp$orientation[,,1] if(all(apply(cbind(decomp$orientation, diag(d)), 1, uniq))) { orientName <- "I" decomp$orientation <- NULL } else { orientName <- "E" } } decomp$modelName <- paste0(scaleName, shapeName, orientName) decomp$sigma <- sigma orderedNames <- c("sigma", "d", "modelName", "G", "scale", "shape", "orientation") return(decomp[orderedNames]) } sigma2decomp <- function(sigma, G = NULL, tol = sqrt(.Machine$double.eps), ...) { dimSigma <- dim(sigma) if(is.null(dimSigma)) stop("sigma improperly specified") d <- dimSigma[1] if(dimSigma[2] != d) stop("sigma improperly specified") l <- length(dimSigma) if(l < 2 || l > 3) stop("sigma improperly specified") if(is.null(G)) { if(l == 2) { G <- 1 sigma <- array(sigma, c(dimSigma, 1)) } else { G <- dimSigma[3] } } else { if(l == 3 && G != dimSigma[3]) stop("sigma and G are incompatible") if(l == 2 && G != 1) sigma <- array(sigma, c(d,d,G)) } # angle between subspaces subspace <- function(A, B) { for(k in 1:ncol(A)) { B <- B - A[,k,drop=FALSE] %*% (t(A[,k,drop=FALSE]) %*% B) } norm(B, type = "2") } # check equality of values uniq <- function(x) { abs(max(x) - min(x)) < tol } decomp <- list(d = d, G = G, scale = rep(0, G), shape = matrix(0, d, G), orientation = array(0, c(d, d, G))) for(k in 1:G) { ev <- eigen(sigma[,,k], symmetric = TRUE) temp <- log(ev$values); temp[!is.finite(temp)] <- 0 logScale <- sum(temp)/d decomp$scale[k] <- exp(logScale) decomp$shape[,k] <- exp(temp - logScale) decomp$orientation[,,k] <- ev$vectors } scaleName <- "V" shapeName <- "V" orientName <- "V" # check scale/volume if(uniq(decomp$scale)) { decomp$scale <- decomp$scale[1] scaleName <- "E" } # check shape if(all(apply(decomp$shape, 1, uniq))) { decomp$shape <- decomp$shape[, 1] if(all(uniq(decomp$shape))) { shapeName <- "I" decomp$shape <- rep(1, d) } else { shapeName <- "E" } } # check orientation D <- decomp$orientation eqOrientation <- all(apply(D, 3, function(d) any(apply(d, 2, function(x) cor(D[,,1], x)^2) > (1-tol)))) if(eqOrientation) { decomp$orientation <- decomp$orientation[,,1] orientName <- "E" if(sum(abs(svd(decomp$orientation)$v) - diag(d)) < tol) { orientName <- "I" # decomp$orientation <- NULL } } decomp$modelName <- paste0(scaleName, shapeName, orientName) decomp$sigma <- sigma orderedNames <- c("sigma", "d", "modelName", "G", "scale", "shape", "orientation") return(decomp[orderedNames]) } decomp2sigma <- function(d, G, scale, shape, orientation = NULL, ...) { nod <- missing(d) noG <- missing(G) lenScale <- length(scale) if(lenScale != 1) { if(!noG && G != lenScale) stop("scale incompatibile with G") G <- lenScale } shape <- as.matrix(shape) p <- nrow(shape) if(!nod && p != d) stop("shape incompatible with d") d <- p g <- ncol(shape) if(g != 1) { if(!is.null(G) && g != G) stop("shape incompatible with scale") if(!noG && g != G) stop("shape incompatible with G") G <- g } if(is.null(orientation)) { orientName <- "I" if(is.null(G)) { G <- if(noG) 1 else G } orientation <- array(diag(d), c(d, d, G)) } else { dimO <- dim(orientation) l <- length(dimO) if(is.null(dimO) || l < 2 || l > 3 || dimO[1] != dimO[2]) stop("orientation improperly specified") if(dimO[1] != d) stop("orientation incompatible with shape") if(l == 3) { orientName <- "V" if(is.null(G)) { if(!noG && dimO[3] != G) stop("orientation incompatible with G") G <- dimO[3] } else if(G != dimO[3]) stop("orientation incompatible with scale and/or shape" ) } else { orientName <- "E" if(is.null(G)) { G <- if(noG) 1 else G } orientation <- array(orientation, c(d, d, G)) } } if(G == 1) { scaleName <- shapeName <- "X" } else { scaleName <- if(lenScale == 1) "E" else "V" shapeName <- if(g == 1) "E" else "V" scale <- rep(scale, G) shape <- matrix(shape, nrow = d, ncol = G) } sigma <- array(0, c(d, d, G)) for(k in 1:G) { sigma[,,k] <- crossprod(t(orientation[,,k]) * sqrt(scale[k] * shape[,k])) } structure(sigma, modelName = paste0(scaleName, shapeName, orientName)) } grid1 <- function (n, range = c(0, 1), edge = TRUE) { if (any(n < 0 | round(n) != n)) stop("n must be nonpositive and integer") G <- rep(0, n) if (edge) { G <- seq(from = min(range), to = max(range), by = abs(diff(range))/(n - 1)) } else { lj <- abs(diff(range)) incr <- lj/(2 * n) G <- seq(from = min(range) + incr, to = max(range) - incr, by = 2 * incr) } G } grid2 <- function (x, y) { lx <- length(x) ly <- length(y) xy <- matrix(0, nrow = lx * ly, ncol = 2) l <- 0 for (j in 1:ly) { for (i in 1:lx) { l <- l + 1 xy[l,] <- c(x[i], y[j]) } } xy } hypvol <- function (data, reciprocal = FALSE) { dimdat <- dim(data) oneD <- ((is.null(dimdat) || NCOL(data) == 1)) if (oneD) { n <- length(as.vector(data)) ans <- if (reciprocal) 1/diff(range(data)) else diff(range(data)) return(ans) } if (length(dimdat) != 2) stop("data must be a vector or a matrix") data <- as.matrix(data) sumlogdifcol <- function(x) sum(log(apply(x, 2, function(colm) diff(range(colm))))) bdvolog <- sumlogdifcol(data) pcvolog <- sumlogdifcol(princomp(data)$scores) volog <- min(bdvolog, pcvolog) if(reciprocal) { minlog <- log(.Machine$double.xmin) if (-volog < minlog) { warning("hypervolume smaller than smallest machine representable positive number") ans <- 0 } else ans <- exp(-volog) } else { maxlog <- log(.Machine$double.xmax) if (volog > maxlog) { warning("hypervolume greater than largest machine representable number") ans <- Inf } else ans <- exp(volog) } return(ans) } "[.mclustBIC" <- function (x, i, j, drop = FALSE) { ATTR <- attributes(x)[c("G", "modelNames", "prior", "control", "initialization", "Vinv", "warn", "n", "d", "oneD", "returnCodes", "class")] oldClass(x) <- NULL x <- NextMethod("[") if (is.null(dim(x))) return(x) ATTR$G <- as.numeric(dimnames(x)[[1]]) ATTR$modelNames <- dimnames(x)[[2]] ATTR$returnCodes <- ATTR$returnCodes[dimnames(x)[[1]],dimnames(x)[[2]], drop=FALSE] do.call("structure", c(list(.Data = x), ATTR)) } bic <- function(modelName, loglik, n, d, G, noise = FALSE, equalPro = FALSE, ...) { nparams <- nMclustParams(modelName = modelName, d = d, G = G, noise = noise, equalPro = equalPro) 2 * loglik - nparams * log(n) } checkModelName <- function(modelName) { switch(EXPR = modelName, "X" = , "E" = , "V" = , "XII" = , "XXI" = , "XXX" = , "EII" = , "VII" = , "EEI" = , "VEI" = , "EVI" = , "VVI" = , "EEE" = , "EVE" = , "VEE" = , "VVE" = , "EEV" = , "VEV" = , "EVV" = , "VVV" = TRUE, stop("invalid model name")) } em <- function(data, modelName, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { checkModelName(modelName) funcName <- paste("em", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name(funcName) mc$modelName <- NULL eval(mc, parent.frame()) } estep <- function(data, modelName, parameters, warn = NULL, ...) { checkModelName(modelName) funcName <- paste("estep", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name(funcName) mc$modelName <- NULL eval(mc, parent.frame()) } mclustVariance <- function(modelName, d=NULL, G=2) { x <- -1 if (nchar(modelName) == 1) { if (!is.null(d) && d != 1) stop("modelName and d are incompatible") varList <- switch(EXPR = modelName, "X" = list(sigmasq = x), "E" = list(sigmasq = x), "V" = list(sigmasq = rep(x,G)), stop("modelName not recognized")) } else { if (nchar(modelName) != 3) stop("modelName is misspecified") if (is.null(d)) d <- 3 varList <- switch(EXPR = modelName, "XII" = list(sigmasq = x), "EII" = list(sigmasq = x, scale = x, shape = rep(x,d)), "VII" = list(sigmasq = rep(x,G), scale = rep(x,G), shape = rep(x,d)), "XXI" = list(scale = x, shape = rep(x,d)), "EEI" = list(scale = x, shape = rep(x,d)), "EVI" = list(scale = x, shape = matrix(x,d,G)), "VEI" = list(scale = rep(x,G), shape = rep(x,d)), "VVI" = list(scale = rep(x,G), shape = matrix(x,d,G)), "XXX" = { M <- matrix(x,d,d); M[row(M) > col(M)] <- 0; list(cholSigma = M) }, "EEE" = { M <- matrix(x,d,d); M[row(M) > col(M)] <- 0; list(cholSigma = M) }, "VEE" = list(scale = rep(x,G), shape = rep(x,d), orientation = matrix(x,d,d)), "VVE" = list(scale = rep(x,G), shape = matrix(x,d,G), orientation = matrix(x,d,d)), "EVV" = list(scale = x, shape = matrix(x,d,G), orientation = array(x,c(d,d,G))), "EVE" = list(scale = x, shape = matrix(x,d,G), orientation = matrix(x,d,d)), "EEV" = list(scale = x, shape = rep(x,d), orientation = array(x,c(d,d,G))), "VEV" = list(scale = x, shape = matrix(x,d,G), orientation = array(x,c(d,d,G))), "VVV" = { A <- array(x,c(d,d,G)); I <- row(A[,,1]) > col(A[,,1]) for (k in 1:G) A[,,k][I] <- 0 list(cholsigma = A)}, stop("modelName not recognized")) } c(modelName = modelName, d = d, G = G, varList) } me <- function(data, modelName, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { checkModelName(modelName) funcName <- paste("me", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name(funcName) mc$modelName <- NULL eval(mc, parent.frame()) } mstep <- function(data, modelName, z, prior = NULL, warn = NULL, ...) { checkModelName(modelName) funcName <- paste("mstep", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name(funcName) mc$modelName <- NULL eval(mc, parent.frame()) } mvn <- function(modelName, data, prior = NULL, warn = NULL, ...) { modelName <- switch(EXPR = modelName, "E" = "X", "V" = "X", "X" = "X", "Spherical" = "XII", "EII" = "XII", "VII" = "XII", "XII" = "XII", "Diagonal" = "XXI", "EEI" = "XXI", "VEI" = "XXI", "EVI" = "XXI", "VVI" = "XXI", "XXI" = "XXI", "Ellipsoidal" = "XXX", "EEE" = "XXX", "VEE" = "XXX", "EVE" = "XXX", "EVV" = "XXX", "VVE" = "XXX", "EEV" = "XXX", "VEV" = "XXX", "VVV" = "XXX", "XXX" = "XXX", stop("invalid model name")) funcName <- paste("mvn", modelName, sep = "") mc <- match.call() mc[[1]] <- as.name(funcName) mc[[2]] <- NULL out <- eval(mc, parent.frame()) varnames <- colnames(as.matrix(data)) if(!all(is.null(varnames))) { rownames(out$parameters$mean) <- varnames dimnames(out$parameters$variance$Sigma) <- list(varnames, varnames) dimnames(out$parameters$variance$sigma) <- list(varnames, varnames, NULL) } return(out) } nVarParams <- function(modelName, d, G, ...) { modelName <- switch(EXPR = modelName, X = "E", XII = "EII", XXI = "EEI", XXX = "EEE", modelName) # checkModelName(modelName) switch(EXPR = modelName, "E" = 1, "V" = G, "EII" = 1, "VII" = G, "EEI" = d, "VEI" = G + (d-1), "EVI" = 1 + G * (d-1), "VVI" = G * d, "EEE" = d*(d+1)/2, "EVE" = 1 + G*(d-1) + d*(d-1)/2, "VEE" = G + (d-1) + d*(d-1)/2, "VVE" = G + G * (d-1) + d*(d-1)/2, "EEV" = 1 + (d-1) + G * d*(d-1)/2, "VEV" = G + (d-1) + G * d*(d-1)/2, "EVV" = 1 - G + G * d*(d+1)/2, "VVV" = G * d*(d+1)/2, stop("invalid model name")) } nMclustParams <- function(modelName, d, G, noise = FALSE, equalPro = FALSE, ...) { modelName <- switch(EXPR = modelName, X = "E", XII = "EII", XXI = "EEI", XXX = "EEE", modelName) checkModelName(modelName) if(G == 0) { ## one noise cluster case if(!noise) stop("undefined model") nparams <- 1 } else { nparams <- nVarParams(modelName, d = d, G = G) + G*d if(!equalPro) nparams <- nparams + (G - 1) if(noise) nparams <- nparams + 2 } return(nparams) } sim <- function(modelName, parameters, n, seed = NULL, ...) { modelName <- switch(EXPR = modelName, X = "E", XII = "EII", XXI = "EEI", XXX = "EEE", modelName) checkModelName(modelName) funcName <- paste("sim", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name(funcName) mc$modelName <- NULL eval(mc, parent.frame()) } cdensVEI <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VEI", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("esvei", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],dimnames(mu)[[2]]) structure(z, logarithm = logarithm, modelName = "VEI", WARNING = WARNING, returnCode = ret) } emVEI <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVEI(data, parameters = parameters, warn = warn)$z meVEI(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepVEI <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VEI", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("esvei", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VEI", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meVEI <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VEI", d = p, G = G, scale = rep(NA,G), shape = rep(NA,p)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="VEI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mevei", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax), as.double(control$tol), as.double(control$eps), double(p * G), double(G), double(p), double(K), double(G), double(p), double(p * G), PACKAGE = "mclust")[7:14] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VEI"), prior[names(prior) != "functionName"])) temp <- .Fortran("meveip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax), as.double(control$tol), as.double(control$eps), double(p * G), double(G), double(p), double(K), double(G), double(p), double(p * G), PACKAGE = "mclust")[11:18] } z <- temp[[1]] its <- temp[[2]][1] inner <- temp[[2]][2] err <- temp[[3]][1] inerr <- temp[[3]][2] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) scale <- temp[[6]] shape <- temp[[7]] dimnames(mu) <- list(NULL, as.character(1:G)) pro <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) sigma <- array(NA, c(p, p, G)) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } sigma <- array(NA, c(p, p, G)) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(0, c(p, p, G)) for(k in 1:G) sigma[, , k] <- diag(scale[k] * shape) if(inner >= control$itmax[2]) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) inner <- - inner ret <- 2 } else if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) attr(info, "inner") <- c(iterations = inner, error = inerr) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VEI", d = p, G = G, sigma = sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VEI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepVEI <- function(data, z, prior = NULL, warn = NULL, control = NULL,...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VEI", d = p, G = G, scale = rep(NA,G), shape = rep(NA,p)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="VEI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if (is.null(control)) control <- emControl() itmax <- if(length(control$itmax) == 1) control$itmax else control$ itmax[2] tol <- if(length(control$tol) == 1) control$tol else control$tol[2] if(is.null(prior)) { temp <- .Fortran("msvei", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.integer(itmax), as.double(tol), double(p * G), double(G), double(p), double(G), double(G), double(p), double(p * G), PACKAGE = "mclust")[6:11] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VEI"), prior[names( prior) != "functionName"])) temp <- .Fortran("msveip", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), as.integer(itmax), as.double(tol), double(p * G), double(G), double(p), double(G), double(G), double(p), double(p * G), PACKAGE = "mclust")[10:15] } inner <- temp[[1]] inerr <- temp[[2]] mu <- matrix(temp[[3]], p, G) scale <- temp[[4]] shape <- temp[[5]] dimnames(mu) <- list(NULL, as.character(1:G)) pro <- temp[[6]] WARNING <- NULL if(any(c(scale, shape) > signif(.Machine$double.xmax, 6)) || any(! c(scale, shape))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- shape <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { ret <- 0 sigma <- array(0, c(p, p, G)) for(k in 1:G) sigma[, , k] <- diag(scale[k] * shape) if(inner >= itmax) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) inner <- - inner } } info <- c(iterations = inner, error = inerr) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VEI", d = p, G = G, sigma = sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VEI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control), info = info, WARNING = WARNING, returnCode = ret) } simVEI <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "VEI")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if(length(rtshape) != d) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) if(length(rtscale) != G) stop("scale incompatible with mean") for(k in 1:G) { m <- ctabel[k] x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% diag(rtscale[k] * rtshape), MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "VEI") } cdensV <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- drop(data) n <- length(data) mu <- drop(parameters$mean) G <- length(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(names(data), NULL) return(structure(z, logarithm = logarithm, modelName = "V", WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(is.null(sigmasq)) stop("variance parameters are missing") if(any(sigmasq < 0)) stop("sigma-squared is negative") if(any(!sigmasq)) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(names(data), NULL) return(structure(z, logarithm = logarithm, modelName = "V", WARNING = WARNING, returnCode = 9)) } if (length(sigmasq) == 1) sigmasq <- rep(sigmasq,G) temp <- .Fortran("es1v", as.double(data), as.double(mu), as.double(sigmasq), as.double(-1), as.integer(n), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[8:9] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(names(data),NULL) structure(z, logarithm = logarithm, modelName = "V", WARNING = WARNING, returnCode = ret) } emV <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepV(data, parameters = parameters, warn = warn)$z meV(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepV <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- drop(data) n <- length(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- drop(parameters$mean) G <- length(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(names(data), NULL) return(structure(list(modelName = "V", n=n, d=1, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(is.null(sigmasq)) stop("variance parameters are missing") if(any(sigmasq < 0)) stop("sigma-squared is negative") if(any(!sigmasq)) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(names(data), NULL) return(structure(list(modelName = "V", n=n, d=1, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = -1)) } temp <- .Fortran("es1v", as.double(data), as.double(mu), as.double(sigmasq), as.double(pro), as.integer(n), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[8:9] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(names(data),NULL) structure(list(modelName = "V", n = n, d = 1, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } cdensVEV <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VEV", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") temp <- .Fortran("esvev", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(aperm(parameters$variance$orientation,c(2,1,3))), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(p), double(p), double(1), double(n * G), PACKAGE = "mclust")[13:14] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "VEV", WARNING = WARNING, returnCode = ret) } emVEV <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVEV(data, parameters = parameters, warn = warn)$z meVEV(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepVEV <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VEV", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape) || is.null(parameters$variance$orientation)) stop("variance parameters are missing") temp <- .Fortran("esvev", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(aperm(parameters$variance$orientation,c(2,1,3))), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(p), double(p), double(1), double(n * K), PACKAGE = "mclust")[13:14] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VEV", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meVEV <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VEV", d = p, G = G, scale=rep(NA,G), shape=rep(NA,p), orientation=array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="VEV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mevev", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax), as.double(control$tol), as.double(control$eps), as.integer(lwork), double(p * G), double(G), double(p), double(p * p * G), double(K), double(lwork), double(p), PACKAGE = "mclust")[7:16] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VEV"), prior[names(prior) != "functionName"])) temp <- .Fortran("mevevp", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$ scale) else priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax), as.double(control$tol), as.double(control$eps), as.integer(lwork), double(p * G), double(G), double(p), double(p * p * G), double(K), double(lwork), double(p), PACKAGE = "mclust")[11:20] } z <- temp[[1]] its <- temp[[2]][1] inner <- temp[[2]][2] err <- temp[[3]][1] inerr <- temp[[3]][2] loglik <- temp[[4]] lapackSVDinfo <- temp[[5]] mu <- matrix(temp[[6]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp[[7]] shape <- temp[[8]] O <- aperm( array(temp[[9]], c(p, p, G)), c(2,1,3)) pro <- temp[[10]] WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DGESVD fails to converge" } else { WARNING <- "input error for LAPACK DGESVD" } if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) O[] <- shape[] <- scale[] <- NA mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- shapeO(shape, O, transpose = FALSE) sigma <- sweep(sigma, MARGIN = 3, STATS = scale, FUN = "*") if(inner >= control$itmax[2]) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) inner <- - inner ret <- 2 } else if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- structure(c(iterations = its, error = err), inner = c( iterations = inner, error = inerr)) dimnames(z) <- list(dimnames(data)[[1]],NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) ## Sigma = scale * O %*% diag(shape) %*% t(O) variance <- list(modelName = "VEV", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VEV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepVEV <- function(data, z, prior = NULL, warn = NULL, control = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VEV", d = p, G = G, scale = rep(NA,G), shape = rep(NA,p), orientation = array(NA,c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="VEV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) WARNING <- "z is missing" if(warn) warning(WARNING) return(structure(list(n = n, d = p, G = G, mu = matrix(as.double(NA), p, G), sigma = array(NA, c(p, p, G)), decomp = list( d = p, G = G, scale = rep(NA, G), shape = rep(NA, p), orientation = array(NA, c(p, p, G))), pro = rep(NA, G), modelName = "VEV", prior = prior), WARNING = WARNING)) } # shape <- sqrt(rev(sort(shape/exp(sum(log(shape))/p)))) if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop( "improper specification of z") if (is.null(control)) control <- emControl() itmax <- if(length(control$itmax) == 1) control$itmax else control$ itmax[2] tol <- if(length(control$tol) == 1) control$tol else control$tol[2] lwork <- max(3 * min(n, p) + max(n, p), 5 * min(n, p), p + G) if(is.null(prior)) { temp <- .Fortran("msvev", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(lwork), as.integer(lwork), as.integer(itmax), as.double(tol), double(p * G), double(G), double(p), double(p * p * G), double(G), PACKAGE = "mclust")[7:14] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VEV"), prior[names(prior) != "functionName"])) temp <- .Fortran("msvevp", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$ scale) else priorParams$scale), as.double(priorParams$dof), double(lwork), as.integer(lwork), as.integer(itmax), as.double(tol), double(p * G), double(G), double(p), double(p * p * G), double(G), PACKAGE = "mclust")[11:18] } lapackSVDinfo <- temp[[1]] inner <- temp[[2]] inerr <- temp[[3]] mu <- matrix(temp[[4]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) scale <- temp[[5]] shape <- temp[[6]] O <- aperm(array(temp[[7]], c(p, p, G)),c(2,1,3)) pro <- temp[[8]] WARNING <- NULL if(lapackSVDinfo) { if(lapackSVDinfo > 0) { WARNING <- "LAPACK DGESVD fails to converge" if(warn) warning(WARNING) } else { WARNING <- "input error for LAPACK DGESVD" if(warn) warning(WARNING) } O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -9 } else if(any(c(scale, shape) > signif(.Machine$double.xmax, 6)) || any( !c(scale, shape))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- O[] <- shape[] <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- sweep(shapeO(shape, O, transpose = FALSE), MARGIN = 3, STATS = scale, FUN = "*") if(inner >= itmax) { WARNING <- "inner iteration limit reached" if(warn) warning(WARNING) inner <- - inner } ret <- 2 } info <- c(iteration = inner, error = inerr) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(O) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VEV", d = p, G = G, sigma = sigma, scale = scale, shape = shape, orientation = O) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VEV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control), info = info, WARNING = WARNING, returnCode = ret) } simVEV <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "VEV")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if(length(rtshape) != d) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) if(length(rtscale) != G) stop("scale incompatible with mean") for(k in 1:G) { m <- ctabel[k] sss <- rtscale[k] * rtshape cholSigma <- t(parameters$variance$orientation[, , k]) * sss x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% cholSigma, MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "VEV") } cdensVII <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VII", WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(any(sigmasq < 0)) stop("sigma-squared is negative") if(any(!sigmasq)) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VII", WARNING = WARNING, returnCode = 9)) } temp <- .Fortran("esvii", as.double(data), as.double(mu), as.double(sigmasq), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[9:10] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "VII", WARNING = WARNING, returnCode = ret) } emVII <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVII(data, parameters = parameters, warn = warn)$z meVII(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepVII <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VII", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } sigmasq <- parameters$variance$sigmasq if(is.null(sigmasq)) stop("variance parameters are missing") if(any(sigmasq < 0)) stop("sigma-squared is negative") if(any(!sigmasq)) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VII", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = -1)) } temp <- .Fortran("esvii", as.double(data), as.double(mu), as.double(sigmasq), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[9:10] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VII", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meVII <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data must be in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VII", d=p, G=G, sigmasq=rep(NA,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="VII", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mevii", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(G), double(K), PACKAGE = "mclust")[7:13] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VII"), prior[names(prior) != "functionName"])) storage.mode(z) <- "double" temp <- .Fortran("meviip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(G), double(K), PACKAGE = "mclust")[c(11:17, 10)] } mu <- matrix(temp[[5]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] sigmasq <- temp[[6]] pro <- temp[[7]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6) || any(sigmasq <= max(control$eps, 0))) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) mu[] <- pro[] <- sigmasq <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- sigmasq <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(0, c(p, p, G)) for(k in 1:G) sigma[, , k] <- diag(rep(sigmasq[k], p)) if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VII", d = p, G = G, sigma = sigma, sigmasq = sigmasq, scale = sigmasq) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VII", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } meVVI <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VVI", d = p, G = G, scale = rep(NA,G), shape = matrix(as.double(NA),p,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="VVI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mevvi", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(G), double(p * G), double(K), PACKAGE = "mclust")[7:14] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVI"), prior[names(prior) != "functionName"])) temp <- .Fortran("mevvip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(G), double(p * G), double(K), PACKAGE = "mclust")[11:18] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) scale <- temp[[6]] shape <- matrix(temp[[7]], p, G) dimnames(mu) <- dimnames(shape) <- list(NULL, as.character(1:G)) pro <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) sigma <- array(NA, c(p, p, G)) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } sigma <- array(NA, c(p, p, G)) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(apply(sweep(shape, MARGIN = 2, STATS = scale, FUN = "*"), 2, diag), c(p, p, G)) if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VVI", d = p, G = G, sigma = sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VVI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepVII <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal number of observations") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VII", d=p, G=G, sigmasq=rep(NA,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="VII", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("msvii", as.double(data), z, as.integer(n), as.integer(p), as.integer(G), double(p * G), double(G), double(G), PACKAGE = "mclust")[6:8] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VII"), prior[names(prior) != "functionName"])) temp <- .Fortran("msviip", as.double(data), z, as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p * G), double(G), double(G), PACKAGE = "mclust")[10:12] } mu <- matrix(temp[[1]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) sigmasq <- temp[[2]] pro <- temp[[3]] sigma <- array(0, c(p, p, G)) for(k in 1:G) sigma[, , k] <- diag(rep(sigmasq[k], p)) WARNING <- NULL if(any(sigmasq > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VII", d = p, G = G, sigma = sigma, sigmasq = sigmasq, scale = sigmasq) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VII", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simVII <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d), modelName = "VII")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) sigmasq <- parameters$variance$sigmasq for(k in 1:G) { m <- ctabel[k] x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% diag(rep(sqrt(sigmasq[k]), d)), MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "VII") } meV <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- as.vector(data) n <- length(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal length of data") K <- dimz[2] if(!is.null(Vinv)) { G <- K - 1 if (Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "V", d=1, G=G, sigmasq = rep(NA,G)) parameters <- list(pro=rep(NA,G), mean=rep(NA,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="V", prior=prior, n=n, d=1, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("me1v", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(G), as.double(if(is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(G), double(G), double(K), PACKAGE = "mclust")[6:12] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "V"), prior[names(prior) != "functionName"])) temp <- .Fortran("me1vp", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(G), double(G), double(K), PACKAGE = "mclust")[c(10:16, 9)] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- temp[[5]] names(mu) <- as.character(1:G) sigmasq <- temp[[6]] pro <- temp[[7]] ## logpost <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6) || any(sigmasq <= max(control$eps, 0))) { WARNING <- "sigma-squared falls below threshold" if(warn) warning(WARNING) mu[] <- pro[] <- sigmasq[] <- z[] <- loglik <- NA ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- sigmasq[] <- z[] <- loglik <- NA ret <- if(control$equalPro) -2 else -3 } else if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 info <- c(iterations = its, error = err) dimnames(z) <- list(names(data),NULL) variance = list(modelName = "V", d = 1, G = G, sigmasq = sigmasq, scale = sigmasq) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "V", prior = prior, n = n, d = 1, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepV <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- as.vector(data) n <- length(data) ## z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") # number of groups G <- dimz[2] ## if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "V", d=1, G=G, sigmasq=rep(NA,G)) parameters <- list(pro=rep(NA,G), mean=rep(NA,G), variance=variance) return(structure(list(modelName="V", prior=prior, n=n, d=1, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("ms1v", as.double(data), as.double(z), as.integer(n), as.integer(G), double(G), double(G), double(G), PACKAGE = "mclust")[5:7] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "V"), prior[names(prior) != "functionName"])) storage.mode(z) <- "double" temp <- .Fortran("ms1vp", as.double(data), z, as.integer(n), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(G), double(G), double(G), PACKAGE = "mclust")[9:11] } mu <- temp[[1]] names(mu) <- as.character(1:G) sigmasq <- temp[[2]] pro <- temp[[3]] WARNING <- NULL if(any(sigmasq > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- sigmasq[] <- z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(names(data),NULL) variance = list(modelName = "V", d = 1, G = G, sigmasq = sigmasq, scale = sigmasq) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "V", prior = prior, n = n, d = 1, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simV <- function(parameters, n, seed = NULL, ...) { if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, 2), modelName = "V")) } if(!is.null(seed)) set.seed(seed) mu <- parameters$mean G <- length(mu) pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- rep(0, n) sd <- sqrt(parameters$variance$sigmasq) for(k in 1:G) { x[clabels == k] <- mu[k] + rnorm(ctabel[k], sd = sd[k]) } structure(cbind(group = clabels, "1" = x), modelName = "V") } cdensVVI <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mu", "variance")]))) || any(is.null(parameters[c("pro", "mu", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(z, logarithm = logarithm, modelName = "VVI", WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("esvvi", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(1), double(n * G), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, G) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "VVI", WARNING = WARNING, retrinCode = ret) } emVVI <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVVI(data, parameters = parameters, warn = warn)$z meVVI(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepVVI <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if (is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mu", "variance")]))) || any(is.null(parameters[c("pro", "mu", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VVI", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$scale) || is.null(parameters$variance$shape)) stop("variance parameters are missing") temp <- .Fortran("esvvi", as.double(data), as.double(mu), as.double(parameters$variance$scale), as.double(parameters$variance$shape), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(1), double(n * K), PACKAGE = "mclust")[10:11] loglik <- temp[[1]] z <- matrix(temp[[2]], n, K) WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VVI", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meVVI <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) > 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VVI", d = p, G = G, scale = rep(NA,G), shape = matrix(as.double(NA),p,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="VVI", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mevvi", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(G), double(p * G), double(K), PACKAGE = "mclust")[7:14] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVI"), prior[names(prior) != "functionName"])) temp <- .Fortran("mevvip", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(G), double(p * G), double(K), PACKAGE = "mclust")[11:18] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) scale <- temp[[6]] shape <- matrix(temp[[7]], p, G) dimnames(mu) <- dimnames(shape) <- list(NULL, as.character(1:G)) pro <- temp[[8]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) sigma <- array(NA, c(p, p, G)) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } sigma <- array(NA, c(p, p, G)) mu[] <- pro[] <- z[] <- loglik <- shape[] <- NA ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(apply(sweep(shape, MARGIN = 2, STATS = scale, FUN = "*"), 2, diag), c(p, p, G)) if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = err) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VVI", d = p, G = G, sigma = sigma, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VVI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepVVI <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VII", d=p, G=G, sigmasq=rep(NA,G)) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="VII", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("msvvi", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(p * G), double(G), double(p * G), double(G), PACKAGE = "mclust")[6:9] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVI"), prior[names( prior) != "functionName"])) temp <- .Fortran("msvvip", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p * G), double(G), double(p * G), double(G), PACKAGE = "mclust")[10:13] } mu <- matrix(temp[[1]], p, G) scale <- temp[[2]] shape <- matrix(temp[[3]], p, G) dimnames(mu) <- dimnames(shape) <- list(NULL, as.character(1:G)) pro <- temp[[4]] WARNING <- NULL if(any(c(scale, shape) > signif(.Machine$double.xmax, 6)) || any(! c(scale, shape))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- pro[] <- shape <- scale[] <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else { sigma <- array(apply(sweep(shape, MARGIN = 2, STATS = scale, FUN = "*"), 2, diag), c(p, p, G)) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VVI", d = p, G = G, sigma = sigma, sigmasq = scale, scale = scale, shape = shape) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VVI", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simVVI <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "VVI")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) rtshape <- sqrt(parameters$variance$shape) if(!all(dim(rtshape) == dim(mu))) stop("shape incompatible with mean") rtscale <- sqrt(parameters$variance$scale) if(length(rtscale) != G) stop("scale incompatible with mean") for(k in 1:G) { m <- ctabel[k] x[clabels == k, ] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% diag(rtscale[k] * rtshape[, k]), MARGIN = 2, STATS = mu[, k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "VVI") } cdensVVV <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) mu <- as.matrix(parameters$mean) G <- ncol(mu) if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,G) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VVV", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$cholsigma)) stop("variance parameters are missing") temp <- .Fortran("esvvv", as.logical(1), as.double(data), as.double(mu), as.double(parameters$variance$cholsigma), as.double(-1), as.integer(n), as.integer(p), as.integer(G), as.double(-1), double(p), double(1), double(n * G), PACKAGE = "mclust")[10:12] lapackCholInfo <- temp[[1]][1] loglik <- temp[[2]] z <- matrix(temp[[3]], n, G) WARNING <- NULL if(lapackCholInfo) { if(lapackCholInfo > 0) { WARNING <- "sigma is not positive definite" if(warn) warning(WARNING) } else { WARNING <- "input error for LAPACK DPOTRF" if(warn) warning(WARNING) } z[] <- NA ret <- -9 } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- NA ret <- -1 } else { if (!logarithm) z <- exp(z) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(z, logarithm = logarithm, modelName = "VVV", WARNING = WARNING, returnCode = ret) } emVVV <- function(data, parameters, prior = NULL, control = emControl(), warn = NULL, ...) { z <- estepVVV(data, parameters = parameters, warn = warn)$z meVVV(data, z = z, prior = prior, control = control, Vinv = parameters$Vinv, warn = warn) } estepVVV <- function(data, parameters, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) if(is.null(dimdat) || length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) pro <- parameters$pro pro <- pro/sum(pro) l <- length(pro) mu <- as.matrix(parameters$mean) G <- ncol(mu) noise <- (l == G + 1) if(!noise) { if(l != G) stop("pro improperly specified") K <- G Vinv <- NULL } else { K <- G + 1 Vinv <- parameters$Vinv if(is.null(Vinv) || Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } if(any(is.na(unlist(parameters[c("pro", "mean", "variance")]))) || any(is.null(parameters[c("pro", "mean", "variance")]))) { WARNING <- "parameters are missing" if(warn) warning(WARNING) z <- matrix(as.double(NA),n,K) dimnames(z) <- list(dimnames(data)[[1]], NULL) return(structure(list(modelName = "VVV", n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if (is.null(parameters$variance$cholsigma)) stop("variance parameters are missing") temp <- .Fortran("esvvv", as.logical(1), as.double(data), as.double(mu), as.double(parameters$variance$cholsigma), as.double(pro), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), double(p), double(1), double(n * K), PACKAGE = "mclust")[10:12] lapackCholInfo <- temp[[1]][1] loglik <- temp[[2]] z <- matrix(temp[[3]], n, K) WARNING <- NULL if(lapackCholInfo) { if(lapackCholInfo > 0) { WARNING <- "sigma is not positive definite" if(warn) warning(WARNING) } else { WARNING <- "input error for LAPACK DPOTRF" if(warn) warning(WARNING) } z[] <- loglik <- NA ret <- -9 } else if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "cannot compute E-step" if(warn) warning(WARNING) z[] <- loglik <- NA ret <- -1 } else ret <- 0 dimnames(z) <- list(dimnames(data)[[1]],NULL) structure(list(modelName = "VVV", n = n, d = p, G = G, z = z, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } meVVV <- function(data, z, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should in the form of a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("data and z should have the same row dimension") K <- dimz[2] if (!is.null(Vinv)) { G <- K - 1 if(Vinv <= 0) Vinv <- hypvol(data, reciprocal = TRUE) } else G <- K if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VVV", d = p, G = G, sigma = array(NA, c(p,p,G)), cholsigma = array(NA, c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance, Vinv=Vinv) return(structure(list(modelName="VVV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, control=control, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") storage.mode(z) <- "double" if(is.null(prior)) { temp <- .Fortran("mevvv", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(p * p * G), double(K), double(p), double(p*p), PACKAGE = "mclust")[7:13] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVV"), prior[names(prior) != "functionName"])) temp <- .Fortran("mevvvp", as.logical(control$equalPro), as.double(data), as.integer(n), as.integer(p), as.integer(G), as.double(if (is.null(Vinv)) -1 else Vinv), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$ scale) else priorParams$scale), as.double(priorParams$dof), z, as.integer(control$itmax[1]), as.double(control$tol[1]), as.double(control$eps), double(p * G), double(p * p * G), double(K), double(p), double(p*p), PACKAGE = "mclust")[c(11:17, 10)] } z <- temp[[1]] its <- temp[[2]] err <- temp[[3]] loglik <- temp[[4]] mu <- matrix(temp[[5]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) cholsigma <- array(temp[[6]], c(p, p, G)) pro <- temp[[7]] WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- -1 } else if(loglik < - signif(.Machine$double.xmax, 6)) { if(control$equalPro) { WARNING <- "z column sum fell below threshold" if(warn) warning(WARNING) } else { WARNING <- "mixing proportion fell below threshold" if(warn) warning(WARNING) } mu[] <- pro[] <- z[] <- loglik <- NA sigma <- array(NA, c(p, p, G)) ret <- if(control$equalPro) -2 else -3 } else { sigma <- array(apply(cholsigma, 3, unchol, upper = TRUE), c(p,p,G)) if(its >= control$itmax[1]) { WARNING <- "iteration limit reached" if(warn) warning(WARNING) its <- - its ret <- 1 } else ret <- 0 } info <- c(iterations = its, error = abs(err)) dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- dimnames(cholsigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VVV", d = p, G = G, sigma = sigma, cholsigma = cholsigma) parameters <- list(pro=pro, mean=mu, variance=variance, Vinv=Vinv) structure(list(modelName = "VVV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters, control = control, loglik = loglik), info = info, WARNING = WARNING, returnCode = ret) } mstepVVV <- function(data, z, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD || length(dimdat) != 2) stop("data should be a matrix or a vector") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) z <- as.matrix(z) dimz <- dim(z) if(dimz[1] != n) stop("row dimension of z should equal data length") G <- dimz[2] if(all(is.na(z))) { WARNING <- "z is missing" if(warn) warning(WARNING) variance <- list(modelName = "VVV", d = p, G = G, sigma <- array(NA, c(p,p, G)), cholsigma = array(NA, c(p,p,G))) parameters <- list(pro=rep(NA,G), mean=matrix(as.double(NA),p,G), variance=variance) return(structure(list(modelName="VVV", prior=prior, n=n, d=p, G=G, z=z, parameters=parameters, loglik=NA), WARNING = WARNING, returnCode = 9)) } if(any(is.na(z)) || any(z < 0) || any(z > 1)) stop("improper specification of z") if(is.null(prior)) { temp <- .Fortran("msvvv", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), double(p), double(p * G), double(p * p * G), double(G), double(p * p), PACKAGE = "mclust")[7:9] } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = G, modelName = "VVV"), prior[names(prior) != "functionName"])) temp <- .Fortran("msvvvp", as.double(data), as.double(z), as.integer(n), as.integer(p), as.integer(G), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$scale) else priorParams$scale), as.double(priorParams$dof), double(p), double(p * G), double(p * p * G), double(G), double(p * p), PACKAGE = "mclust")[11:13] } mu <- matrix(temp[[1]], p, G) dimnames(mu) <- list(NULL, as.character(1:G)) cholsigma <- array(temp[[2]], c(p, p, G)) pro <- temp[[3]] WARNING <- NULL if(any(c(mu, cholsigma) > signif(.Machine$double.xmax, 6))) { WARNING <- "cannot compute M-step" if(warn) warning(WARNING) mu[] <- sigma[] <- cholsigma[] <- NA ret <- -1 } else { sigma <- array(apply(cholsigma, 3, unchol, upper = TRUE), c(p,p,G)) ret <- 0 } dimnames(z) <- list(dimnames(data)[[1]], NULL) dimnames(mu) <- list(dimnames(data)[[2]], NULL) dimnames(sigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) dimnames(cholsigma) <- list(dimnames(data)[[2]], dimnames(data)[[2]], NULL) variance <- list(modelName = "VVV", d = p, G = G, sigma = sigma, cholsigma= cholsigma) parameters <- list(pro=pro, mean=mu, variance=variance) structure(list(modelName = "VVV", prior = prior, n = n, d = p, G = G, z = z, parameters = parameters), WARNING = WARNING, returnCode = ret) } simVVV <- function(parameters, n, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) mu <- as.matrix(parameters$mean) d <- nrow(mu) G <- ncol(mu) if(any(is.na(parameters[c("mean", "variance")])) || any(is.null(parameters[c("mean", "variance")]))) { warning("parameters are missing") return(structure(matrix(as.double(NA), n, d + 1), modelName = "VVV")) } pro <- parameters$pro if(is.null(pro)) pro <- rep(1/G, G) clabels <- sample(1:G, size = n, replace = TRUE, prob = pro) ctabel <- tabulate(clabels, nbins=G) x <- matrix(0, n, d) if(is.null(cholsigma <- parameters$variance$cholsigma)) { if(is.null(sigma <- parameters$variance$sigma)) { stop("variance parameters must inlcude either sigma or cholsigma" ) } cholsigma <- apply(sigma, 3, chol) for(k in 1:ncol(cholsigma)) sigma[, , k] <- cholsigma[, k] cholsigma <- sigma } if(dim(cholsigma)[3] != G) stop("variance incompatible with mean") for(k in 1:G) { m <- ctabel[k] x[clabels == k,] <- sweep(matrix(rnorm(m * d), nrow = m, ncol = d) %*% cholsigma[,,k], MARGIN = 2, STATS = mu[,k], FUN = "+") } dimnames(x) <- list(NULL, paste0("x", 1:d)) structure(cbind(group = clabels, x), modelName = "VVV") } # single component univariate case mvnX <- function(data, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one dimensional") data <- as.vector(data) n <- length(data) if(is.null(prior)) { temp <- .Fortran("mvn1d", as.double(data), as.integer(n), double(1), double(1), double(1), PACKAGE = "mclust")[3:5] logpost <- NULL } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = 1, modelName = "X"), prior[names(prior) != "functionName"])) temp <- .Fortran("mvn1p", as.double(data), as.integer(n), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(1), double(1), double(1), PACKAGE = "mclust")[c(7:9, 6)] logpost <- temp[[4]] } mu <- temp[[1]] sigmasq <- temp[[2]] loglik <- temp[[3]] ret <- 0 WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "sigma-squared vanishes" if(warn) warning(WARNING) loglik <- NA ret <- -1 } variance = list(modelName= "X", d = 1, G = 1, sigmasq = sigmasq) parameters <- list(pro = 1, mean = mu, variance = variance) structure(list(modelName = "X", prior = prior, n = n, d = 1, G = 1, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } cdensX <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { call <- match.call() mc <- match.call(expand.dots = FALSE) mc[[1]] <- as.name("cdensE") z <- eval(mc, parent.frame()) attr(z, "modelName") <- "X" return(z) } emX <- function(data, prior = NULL, warn = NULL, ...) { mvnX(data = data, prior = prior, warn = warn, ...) } meX <- emX # single component multivariate case with diagonal common variance mvnXII <- function(data, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD) stop("for multidimensional data only") if(length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) if(is.null(prior)) { temp <- .Fortran("mvnxii", as.double(data), as.integer(n), as.integer(p), double(p), double(1), double(1), PACKAGE = "mclust")[4:6] logpost <- NULL } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = 1, modelName = "XII"), prior[names(prior) != "functionName"])) temp <- .Fortran("mnxiip", as.double(data), as.integer(n), as.integer(p), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p), double(1), double(1), PACKAGE = "mclust")[c(8:10, 7)] logpost <- temp[[4]] } mu <- temp[[1]] sigmasq <- temp[[2]] loglik <- temp[[3]] Sigma <- sigmasq * diag(p) ret <- 0 WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) loglik <- NA ret <- -1 } variance <- list(modelName = "XII", d = p, G = 1, sigmasq = sigmasq, Sigma = Sigma, sigma = array(Sigma, c(p, p, 1)), scale = sigmasq) parameters <- list(pro = 1, mean = matrix(mu, ncol = 1), variance = variance) structure(list(modelName = "XII", prior = prior, n = n, d = p, G = 1, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } cdensXII <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { call <- match.call() mc <- match.call(expand.dots = FALSE) mc[[1]] <- as.name("cdensEII") z <- eval(mc, parent.frame()) attr(z, "modelName") <- "XII" return(z) } emXII <- function(data, prior = NULL, warn = NULL, ...) { mvnXII(data = data, prior = prior, warn = warn, ...) } meXII <- emXII # single component multivariate case with diagonal different variances mvnXXI <- function(data, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD) stop("for multidimensional data only") if(length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) if(is.null(prior)) { temp <- .Fortran("mvnxxi", as.double(data), as.integer(n), as.integer(p), double(p), double(1), double(p), double(1), PACKAGE = "mclust")[4:7] logpost <- NULL } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = 1, modelName = "XXI"), prior[names(prior) != "functionName"])) temp <- .Fortran("mnxxip", as.double(data), as.integer(n), as.integer(p), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(priorParams$scale), as.double(priorParams$dof), double(p), double(1), double(p), double(1), PACKAGE = "mclust")[c(8:11, 7)] logpost <- temp[[5]] } mu <- temp[[1]] scale <- temp[[2]] shape <- temp[[3]] loglik <- temp[[4]] Sigma <- diag(scale * shape) ret <- 0 WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) loglik <- NA ret <- -1 } variance <- list(modelName = "XXI", d = p, G = 1, Sigma = Sigma, sigma = array(Sigma, c(p, p, 1)), scale = scale, shape = shape) parameters <- list(pro = 1, mean = matrix(mu, ncol = 1), variance = variance) structure(list(modelName = "XXI", prior = prior, n = n, d = p, G = 1, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } cdensXXI <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { call <- match.call() mc <- match.call(expand.dots = FALSE) mc[[1]] <- as.name("cdensEEI") z <- eval(mc, parent.frame()) attr(z, "modelName") <- "XXI" return(z) } emXXI <- function(data, prior = NULL, warn = NULL, ...) { mvnXXI(data = data, prior = prior, warn = warn, ...) } meXXI <- emXXI # single component multivariate case with full covariance matrix mvnXXX <- function(data, prior = NULL, warn = NULL, ...) { if(is.null(warn)) warn <- mclust.options("warn") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(oneD) stop("for multidimensional data only") if(length(dimdat) != 2) stop("data must be a matrix") data <- as.matrix(data) n <- nrow(data) p <- ncol(data) if(is.null(prior)) { temp <- .Fortran("mvnxxx", as.double(data), as.integer(n), as.integer(p), double(p), double(p * p), double(1), PACKAGE = "mclust")[c(4:6)] logpost <- NULL } else { priorParams <- do.call(prior$functionName, c(list(data = data, G = 1, modelName = "XXX"), prior[names(prior) != "functionName"])) temp <- .Fortran("mnxxxp", as.double(data), as.integer(n), as.integer(p), double(p), as.double(priorParams$shrinkage), as.double(priorParams$mean), as.double(if(any(priorParams$scale != 0)) chol(priorParams$scale) else priorParams$scale), as.double(priorParams$dof), double(p), double(p * p), double(1), PACKAGE = "mclust")[c(9:11, 8)] logpost <- temp[[4]] } mu <- temp[[1]] cholSigma <- matrix(temp[[2]], p, p) Sigma <- unchol(cholSigma, upper = TRUE) loglik <- temp[[3]] ## Sigma = t(cholSigma) %*% cholSigma ret <- 0 WARNING <- NULL if(loglik > signif(.Machine$double.xmax, 6)) { WARNING <- "singular covariance" if(warn) warning(WARNING) loglik <- NA ret <- -1 } variance <- list(modelName = "XXX", d = p, G = 1, Sigma = Sigma, cholSigma = cholSigma, cholsigma = cholSigma, sigma = array(Sigma, c(p, p, 1))) parameters <- list(pro = 1, mean = matrix(mu, ncol = 1), variance = variance) structure(list(modelName = "XXX", prior = prior, n = n, d = p, G = 1, parameters = parameters, loglik = loglik), WARNING = WARNING, returnCode = ret) } cdensXXX <- function(data, logarithm = FALSE, parameters, warn = NULL, ...) { call <- match.call() mc <- match.call(expand.dots = FALSE) mc[[1]] <- as.name("cdensEEE") z <- eval(mc, parent.frame()) attr(z, "modelName") <- "XXX" return(z) } emXXX <- function(data, prior = NULL, warn = NULL, ...) { mvnXXX(data = data, prior = prior, warn = warn, ...) } meXXX <- emXXX mclust/R/util.R0000644000176200001440000003414614064570074013101 0ustar liggesusers adjustedRandIndex <- function (x, y) { x <- as.vector(x) y <- as.vector(y) if(length(x) != length(y)) stop("arguments must be vectors of the same length") tab <- table(x,y) if(all(dim(tab)==c(1,1))) return(1) a <- sum(choose(tab, 2)) b <- sum(choose(rowSums(tab), 2)) - a c <- sum(choose(colSums(tab), 2)) - a d <- choose(sum(tab), 2) - a - b - c ARI <- (a - (a + b) * (a + c)/(a + b + c + d)) / ((a + b + a + c)/2 - (a + b) * (a + c)/(a + b + c + d)) return(ARI) } classError <- function(classification, class) { q <- function(map, len, x) { x <- as.character(x) map <- lapply(map, as.character) y <- sapply(map, function(x) x[1]) best <- y != x if(all(len) == 1) return(best) errmin <- sum(as.numeric(best)) z <- sapply(map, function(x) x[length(x)]) mask <- len != 1 counter <- rep(0, length(len)) k <- sum(as.numeric(mask)) j <- 0 while(y != z) { i <- k - j m <- mask[i] counter[m] <- (counter[m] %% len[m]) + 1 y[x == names(map)[m]] <- map[[m]][counter[m]] temp <- y != x err <- sum(as.numeric(temp)) if(err < errmin) { errmin <- err best <- temp } j <- (j + 1) %% k } best } if(any(isNA <- is.na(classification))) { classification <- as.character(classification) nachar <- paste(unique(classification[!isNA]),collapse="") classification[isNA] <- nachar } MAP <- mapClass(classification, class) len <- sapply(MAP[[1]], length) if(all(len) == 1) { CtoT <- unlist(MAP[[1]]) I <- match(as.character(classification), names(CtoT), nomatch= 0) one <- CtoT[I] != class } else { one <- q(MAP[[1]], len, class) } len <- sapply(MAP[[2]], length) if(all(len) == 1) { TtoC <- unlist(MAP[[2]]) I <- match(as.character(class), names(TtoC), nomatch = 0) two <- TtoC[I] != classification } else { two <- q(MAP[[2]], len, classification) } err <- if(sum(as.numeric(one)) > sum(as.numeric(two))) as.vector(one) else as.vector(two) bad <- seq(along = classification)[err] list(misclassified = bad, errorRate = length(bad)/length(class)) } mapClass <- function(a, b) { l <- length(a) x <- y <- rep(NA, l) if(l != length(b)) { warning("unequal lengths") return(x) } # LS: new - check if both a & b are factors or character vectors # with the same levels then assume they are known classes and # match by level names if(is.factor(a) & is.factor(b) & nlevels(a) == nlevels(b)) { aTOb <- as.list(levels(b)) names(aTOb) <- levels(a) bTOa <- as.list(levels(a)) names(bTOa) <- levels(b) out <- list(aTOb = aTOb, bTOa = bTOa) return(out) } if(is.character(a) & is.character(b) & length(unique(a)) == length(unique(b))) { aTOb <- as.list(unique(b)) names(aTOb) <- unique(a) bTOa <- as.list(unique(a)) names(bTOa) <- unique(b) out <- list(aTOb = aTOb, bTOa = bTOa) return(out) } # otherwise match by closest class correspondence Tab <- table(a, b) Ua <- dimnames(Tab)[[1]] Ub <- dimnames(Tab)[[2]] aTOb <- rep(list(Ub), length(Ua)) names(aTOb) <- Ua bTOa <- rep(list(Ua), length(Ub)) names(bTOa) <- Ub # k <- nrow(Tab) Map <- rep(0, k) Max <- apply(Tab, 1, max) for(i in 1:k) { I <- match(Max[i], Tab[i, ], nomatch = 0) aTOb[[i]] <- Ub[I] } if(is.numeric(b)) aTOb <- lapply(aTOb, as.numeric) # k <- ncol(Tab) Map <- rep(0, k) Max <- apply(Tab, 2, max) for(j in (1:k)) { J <- match(Max[j], Tab[, j]) bTOa[[j]] <- Ua[J] } if(is.numeric(a)) bTOa <- lapply(bTOa, as.numeric) # out <- list(aTOb = aTOb, bTOa = bTOa) return(out) } map <- function(z, warn = mclust.options("warn"), ...) { nrowz <- nrow(z) cl <- numeric(nrowz) I <- 1:nrowz J <- 1:ncol(z) for(i in I) { cl[i] <- (J[z[i, ] == max(z[i, ])])[1] } if(warn) { K <- as.logical(match(J, sort(unique(cl)), nomatch = 0)) if(any(!K)) warning(paste("no assignment to", paste(J[!K], collapse = ","))) } return(cl) } unmap <- function(classification, groups=NULL, noise=NULL, ...) { # converts a classification to conditional probabilities # classes are arranged in sorted order unless groups is specified # if a noise indicator is specified, that column is placed last n <- length(classification) u <- sort(unique(classification)) if(is.null(groups)) { groups <- u } else { if(any(match( u, groups, nomatch = 0) == 0)) stop("groups incompatible with classification") miss <- match( groups, u, nomatch = 0) == 0 } cgroups <- as.character(groups) if(!is.null(noise)) { noiz <- match( noise, groups, nomatch = 0) if(any(noiz == 0)) stop("noise incompatible with classification") groups <- c(groups[groups != noise],groups[groups==noise]) noise <- as.numeric(factor(as.character(noise), levels = unique(groups))) } groups <- as.numeric(factor(cgroups, levels = unique(cgroups))) classification <- as.numeric(factor(as.character(classification), levels = unique(cgroups))) k <- length(groups) - length(noise) nam <- levels(groups) if(!is.null(noise)) { k <- k + 1 nam <- nam[1:k] nam[k] <- "noise" } z <- matrix(0, n, k, dimnames = c(names(classification),nam)) for(j in 1:k) { z[classification == groups[j], j] <- 1 } return(z) } BrierScore <- function(z, class) { z <- as.matrix(z) z <- sweep(z, 1, STATS = rowSums(z), FUN = "/") cl <- unmap(class, groups = if(is.factor(class)) levels(class) else NULL) if(any(dim(cl) != dim(z))) stop("input arguments do not match!") sum((cl-z)^2)/(2*nrow(cl)) } orth2 <- function (n) { u <- rnorm(n) u <- u/vecnorm(u) v <- rnorm(n) v <- v/vecnorm(v) Q <- cbind(u, v - sum(u * v) * u) dimnames(Q) <- NULL Q } randomOrthogonalMatrix <- function(nrow, ncol, n = nrow, d = ncol, seed = NULL) { # Generate a random orthogonal basis matrix of dimension (nrow x ncol) using # the algorithm in # Heiberger R. (1978) Generation of random orthogonal matrices. JRSS C, 27, # 199-206. if(!is.null(seed)) set.seed(seed) if(missing(nrow) & missing(n)) stop() if(missing(nrow)) { warning("Use of argument 'n' is deprecated. Please use 'nrow'") nrow <- n } if(missing(ncol) & missing(d)) stop() if(missing(ncol)) { warning("Use of argument 'd' is deprecated. Please use 'ncol'") ncol <- d } Q <- qr.Q(qr(matrix(rnorm(nrow*ncol), nrow = nrow, ncol = ncol))) return(Q) } logsumexp <- function(x) { # Numerically efficient implementation of log(sum(exp(x))) max <- max(x) max + log(sum(exp(x-max))) } partconv <- function(x, consec = TRUE) { n <- length(x) y <- numeric(n) u <- unique(x) if(consec) { # number groups in order of first row appearance l <- length(u) for(i in 1:l) y[x == u[i]] <- i } else { # represent each group by its lowest-numbered member for(i in u) { l <- x == i y[l] <- (1:n)[l][1] } } y } partuniq <- function(x) { # finds the classification that removes duplicates from x charconv <- function(x, sep = "001") { if(!is.data.frame(x)) x <- data.frame(x) do.call("paste", c(as.list(x), sep = sep)) } n <- nrow(x) x <- charconv(x) k <- duplicated(x) partition <- 1.:n partition[k] <- match(x[k], x) partition } dmvnorm <- function(data, mean, sigma, log = FALSE) { data <- as.matrix(data) n <- nrow(data) d <- ncol(data) if(missing(mean)) mean <- rep(0, length = d) mean <- as.vector(mean) if(length(mean) != d) stop("data and mean have non-conforming size") if(missing(sigma)) sigma <- diag(d) sigma <- as.matrix(sigma) if(ncol(sigma) != d) stop("data and sigma have non-conforming size") if(max(abs(sigma - t(sigma))) > sqrt(.Machine$double.eps)) stop("sigma must be a symmetric matrix") # - 1st approach # cholsigma <- chol(sigma) # logdet <- 2 * sum(log(diag(cholsigma))) # md <- mahalanobis(data, center = mean, # cov = chol2inv(cholsigma), inverted = TRUE) # logdens <- -(ncol(data) * log(2 * pi) + logdet + md)/2 # # - 2nd approach # cholsigma <- chol(sigma) # logdet <- 2 * sum(log(diag(cholsigma))) # mean <- outer(rep(1, nrow(data)), as.vector(matrix(mean,d))) # data <- t(data - mean) # conc <- chol2inv(cholsigma) # Q <- colSums((conc %*% data)* data) # logdens <- as.vector(Q + d*log(2*pi) + logdet)/(-2) # # - 3rd approach (via Fortran code) logdens <- .Fortran("dmvnorm", as.double(data), # x as.double(mean), # mu as.double(sigma), # Sigma as.integer(n), # n as.integer(d), # p double(d), # w double(1), # hood double(n), # logdens PACKAGE = "mclust")[[8]] # if(log) logdens else exp(logdens) } shapeO <- function(shape, O, transpose = FALSE) { dimO <- dim(O) if(dimO[1] != dimO[2]) stop("leading dimensions of O are unequal") if((ldO <- length(dimO)) != 3) { if(ldO == 2) { dimO <- c(dimO, 1) O <- array(O, dimO) } else stop("O must be a matrix or an array") } l <- length(shape) if(l != dimO[1]) stop("dimension of O and length s are unequal") storage.mode(O) <- "double" .Fortran("shapeo", as.logical(transpose), as.double(shape), O, as.integer(l), as.integer(dimO[3]), double(l * l), integer(1), PACKAGE = "mclust")[[3]] } traceW <- function(x) { # sum(as.vector(sweep(x, 2, apply(x, 2, mean)))^2) dimx <- dim(x) n <- dimx[1] p <- dimx[2] .Fortran("mcltrw", as.double(x), as.integer(n), as.integer(p), double(p), double(1), PACKAGE = "mclust")[[5]] } unchol <- function(x, upper = NULL) { if(is.null(upper)) { upper <- any(x[row(x) < col(x)]) lower <- any(x[row(x) > col(x)]) if(upper && lower) stop("not a triangular matrix") if(!(upper || lower)) { x <- diag(x) return(diag(x * x)) } } dimx <- dim(x) storage.mode(x) <- "double" .Fortran("uncholf", as.logical(upper), x, as.integer(nrow(x)), as.integer(ncol(x)), integer(1), PACKAGE = "mclust")[[2]] } vecnorm <- function (x, p = 2) { if (is.character(p)) { if (charmatch(p, "maximum", nomatch = 0) == 1) p <- Inf else if (charmatch(p, "euclidean", nomatch = 0) == 1) p <- 2 else stop("improper specification of p") } if (!is.numeric(x) && !is.complex(x)) stop("mode of x must be either numeric or complex") if (!is.numeric(p)) stop("improper specification of p") if (p < 1) stop("p must be greater than or equal to 1") if (is.numeric(x)) x <- abs(x) else x <- Mod(x) if (p == 2) return(.Fortran("d2norm", as.integer(length(x)), as.double(x), as.integer(1), double(1), PACKAGE = "mclust")[[4]]) if (p == Inf) return(max(x)) if (p == 1) return(sum(x)) xmax <- max(x) if (!xmax) xmax <- max(x) if (!xmax) return(xmax) x <- x/xmax xmax * sum(x^p)^(1/p) } errorBars <- function(x, upper, lower, width = 0.1, code = 3, angle = 90, horizontal = FALSE, ...) { # Draw error bars at x from upper to lower. If horizontal = FALSE (default) # bars are drawn vertically, otherwise horizontally. if(horizontal) arrows(upper, x, lower, x, length = width, angle = angle, code = code, ...) else arrows(x, upper, x, lower, length = width, angle = angle, code = code, ...) } covw <- function(X, Z, normalize = TRUE) # Given data matrix X(n x p) and weight matrix Z(n x G) computes # weighted means(p x G), weighted covariance matrices S(p x p x G) and # weighted scattering matrices W(p x p x G) { X <- as.matrix(X) Z <- as.matrix(Z) n <- nrow(X) p <- ncol(X) nZ <- nrow(Z) G <- ncol(Z) if(n != nZ) stop("X and Z must have same number of rows") if(normalize) Z <- t( apply(Z, 1, function(z) z/sum(z)) ) tmp <- .Fortran("covwf", X = as.double(X), Z = as.double(Z), n = as.integer(n), p = as.integer(p), G = as.integer(G), mean = double(p*G), S = double(p*p*G), W = double(p*p*G), PACKAGE = "mclust") out <- list(mean = matrix(tmp$mean, p,G), S = array(tmp$S, c(p,p,G)), W = array(tmp$W, c(p,p,G)) ) return(out) } hdrlevels <- function(density, prob) { # Compute the levels for Highest Density Levels (HDR) for estimated 'density' # values and probability levels 'prob'. # # Reference: Hyndman (1996) Computing and Graphing Highest Density Regions if(missing(density) | missing(prob)) stop("Please provide both 'density' and 'prob' arguments to function call!") density <- as.vector(density) prob <- pmin(pmax(as.numeric(prob), 0), 1) alpha <- 1-prob lev <- quantile(density, alpha, na.rm = TRUE) names(lev) <- paste0(round(prob*100),"%") return(lev) } catwrap <- function(x, width = getOption("width"), ...) { # version of cat with wrapping at specified width cat(paste(strwrap(x, width = width, ...), collapse = "\n"), "\n") } ## ## Convert to a from classes 'Mclust' and 'densityMclust' ## as.Mclust <- function(x, ...) { UseMethod("as.Mclust") } as.Mclust.default <- function(x, ...) { if(inherits(x, "Mclust")) x else stop("argument 'x' cannot be coerced to class 'Mclust'") } as.Mclust.densityMclust <- function(x, ...) { class(x) <- c("Mclust", class(x)[1]) return(x) } as.densityMclust <- function(x, ...) { UseMethod("as.densityMclust") } as.densityMclust.default <- function(x, ...) { if(inherits(x, "densityMclust")) x else stop("argument 'x' cannot be coerced to class 'densityMclust'") } as.densityMclust.Mclust <- function(x, ...) { class(x) <- c("densityMclust", class(x)) x$density <- dens(data = x$data, modelName = x$modelName, parameters = x$parameters, logarithm = FALSE) return(x) }mclust/R/weights.R0000644000176200001440000000410414125667133013566 0ustar liggesusers############################################################################### ## Weights for MCLUST ## ## Written by Thomas Brendan Murphy ## Bugs fix by Luca Scrucca ############################################################################# me.weighted <- function(data, modelName, z, weights = NULL, prior = NULL, control = emControl(), Vinv = NULL, warn = NULL, ...) { data <- as.matrix(data) N <- nrow(data) if(is.null(warn)) warn <- mclust.options("warn") if(is.null(weights)) { weights <- rep(1,N) } if(any(weights < 0)| any(!is.finite(weights))) { stop("Weights must be positive and finite") } if(!is.vector(weights)) { stop("Weights must be a vector") } if(max(weights) > 1) { if(warn) warning("Weights rescaled to have maximum equal to 1") weights <- weights/max(weights) } zw <- z*weights llold <- -Inf eps <- .Machine$double.eps criterion <- TRUE iter <- 0 while(criterion) { iter <- iter+1 fit.m <- do.call("mstep", list(data = data, z = zw, modelName = modelName, prior = prior, control = control, Vinv = Vinv, warn = warn)) fit.m$parameters$pro <- fit.m$parameters$pro/mean(weights) fit.e <- do.call("estep", c(list(data = data, control = control, Vinv = Vinv, warn = warn), fit.m)) zw <- pmax(fit.e$z*weights, eps) criterion <- criterion & (iter < control$itmax[1]) ldens <- do.call("dens", c(list(data = data, logarithm=TRUE, warn = warn), fit.m)) ll <- sum(weights*ldens) criterion <- criterion & (ll-llold > control$tol[1]) llold <- ll } fit <- fit.m fit$z <- fit.e$z fit$weights <- weights fit$loglik <- ll return(fit) } mclust/R/options.R0000644000176200001440000000516614210126033013600 0ustar liggesusers############################################################################# .mclust <- structure(list( emModelNames = c("EII", "VII", "EEI", "VEI", "EVI", "VVI", "EEE", "VEE", "EVE", "VVE", "EEV", "VEV", "EVV", "VVV"), # in mclust version <= 4.x # emModelNames = c("EII", "VII", "EEI", "VEI", "EVI", "VVI", "EEE", "EEV", "VEV", "VVV"), hcModelName = "VVV", hcUse = "SVD", subset = 2000, fillEllipses = FALSE, bicPlotSymbols = structure(c(17, 2, 16, 10, 13, 1, 15, 8, 5, 9, 12, 7, 14, 0, 17, 2), .Names = c("EII", "VII", "EEI", "EVI", "VEI", "VVI", "EEE", "VEE", "EVE", "VVE", "EEV", "VEV", "EVV", "VVV", "E", "V")), bicPlotColors = structure( { pal <- grDevices::colorRampPalette(c("forestgreen", "royalblue1", "red3"), space = "Lab") c("gray", "black", pal(12), "gray", "black") }, .Names = c("EII", "VII", "EEI", "EVI", "VEI", "VVI", "EEE", "VEE", "EVE", "VVE", "EEV", "VEV", "EVV", "VVV", "E", "V")), classPlotSymbols = c(16, 0, 17, 3, 15, 4, 1, 8, 2, 7, 5, 9, 6, 10, 11, 18, 12, 13, 14), classPlotColors = c("dodgerblue2", "red3", "green3", "slateblue", "darkorange", "skyblue1", "violetred4", "forestgreen", "steelblue4", "slategrey", "brown", "black", "darkseagreen", "darkgoldenrod3", "olivedrab", "royalblue", "tomato4", "cyan2", "springgreen2"), warn = FALSE)) mclust.options <- function(...) { current <- get(".mclust", envir = asNamespace("mclust")) if(nargs() == 0) return(current) args <- list(...) if(length(args) == 1 && is.null(names(args))) { arg <- args[[1]] switch(mode(arg), list = args <- arg, character = return(.mclust[[arg]]), stop("invalid argument: ", dQuote(arg))) } if(length(args) == 0) return(current) n <- names(args) if (is.null(n)) stop("options must be given by name") changed <- current[n] current[n] <- args assign(".mclust", current, envir = asNamespace("mclust")) # assignInNamespace(".mclust", current, ns = asNamespace("mclust")) # invisible(changed) # bettina suggestion... invisible(current) } mclust/R/mclustdr.R0000644000176200001440000011624614241627102013753 0ustar liggesusers###################################################### ## ## ## Dimension reduction for model-based ## ## clustering and classification ## ## ## ## Author: Luca Scrucca ## ###################################################### # GMMDR dimension reduction ----------------------------------------------- MclustDR <- function(object, lambda = 1, normalized = TRUE, Sigma, tol = sqrt(.Machine$double.eps)) { # Dimension reduction for model-based clustering and classification stopifnot("first argument must be an object of class 'Mclust' or 'MclustDA'" = inherits(object, c("Mclust", "MclustDA"))) call <- match.call() x <- data.matrix(object$data) n <- nrow(x) p <- ncol(x) lambda <- pmax(0, min(lambda, 1)) #----------------------------------------------------------------- # overall parameters mu <- colMeans(x) if(missing(Sigma)) Sigma <- var(x)*(n-1)/n # within-cluster parameters based on fitted mixture model if(inherits(object, "Mclust")) { type <- "Mclust" G <- object$G modelName <- object$modelName y <- object$classification cl2mc <- seq(G) class <- as.factor(y) par <- object$parameters f <- par$pro if(is.null(f)) f <- 1 if(!is.na(object$hypvol)) f <- f[-length(f)] # within-group means mu.G <- matrix(par$mean,p,G) # within-group covars if(p == 1) { Sigma.G <- array(par$variance$sigmasq, c(p,p,G)) } else { Sigma.G <- par$variance$sigma } } else if(inherits(object, "MclustDA")) { type <- object$type modelName <- sapply(object$models, function(m) m$modelName) class <- object$class class <- factor(class, levels = names(object$models)) y <- rep(NA, length(class)) for(i in 1:nlevels(class)) { y[class == levels(class)[i]] <- paste(levels(class)[i], object$models[[i]]$classification, sep =":") } y <- as.numeric(factor(y)) cl2mc <- rep(seq(length(object$models)), sapply(object$models, function(m) m$G)) m <- sapply(object$models, function(mod) mod$n) ncomp <- sapply(object$models, function(mod) mod$G) G <- sum(ncomp) f <- vector(length = G) mu.G <- matrix(as.double(NA), nrow = p, ncol = G) Sigma.G <- array(NA, dim = c(p,p,G)) for(i in 1:length(object$models)) { ii <- seq(c(0,cumsum(ncomp))[i]+1,c(0,cumsum(ncomp))[i+1]) par <- object$models[[i]]$parameters if(is.null(par$pro)) par$pro <- 1 f[ii] <- par$pro * m[i]/sum(m) # within-group means mu.G[,ii] <- par$mean # within-group covars if(p == 1) { Sigma.G[,,ii] <- array(par$variance$sigmasq, c(p,p,1)) } else { Sigma.G[,,ii] <- par$variance$sigma } } } #----------------------------------------------------------------- SVD <- svd(Sigma, nu = 0, nv = min(n,p)) pos <- which(SVD$d > max(tol*SVD$d[1], 0)) SVD$d <- SVD$d[pos] SVD$v <- SVD$v[,pos,drop=FALSE] inv.Sigma <- SVD$v %*% (1/SVD$d * t(SVD$v)) inv.sqrt.Sigma <- SVD$v %*% (1/sqrt(SVD$d) * t(SVD$v)) #----------------------------------------------------------------- # pooled within-group covariance S <- matrix(0, p, p) for(j in seq_len(G)) S <- S + f[j]*Sigma.G[,,j] #----------------------------------------------------------------- # kernel matrix M.I <- crossprod(t(sweep(mu.G, 1, FUN="-", STATS=mu))*sqrt(f)) M.II <- matrix(0, p, p) if(lambda < 1) { for(j in seq_len(G)) M.II <- M.II + f[j]*crossprod(inv.sqrt.Sigma%*%(Sigma.G[,,j]-S)) } # convex combination of M_I and M_II M <- 2*lambda*crossprod(inv.sqrt.Sigma %*% M.I) + 2*(1-lambda)*M.II # regularize the M_II # M <- M.I + lambda*M.II # M <- crossprod(inv.sqrt.Sigma %*% M.I) + # (1-lambda)*M.II + lambda/p * diag(p) # SVD <- eigen.decomp(M, inv.sqrt.Sigma, invsqrt = TRUE) l <- SVD$l; l <- (l+abs(l))/2 numdir <- min(p, sum(l > sqrt(.Machine$double.eps))) basis <- as.matrix(SVD$v)[,seq(numdir),drop=FALSE] sdx <- diag(Sigma) std.basis <- as.matrix(apply(basis, 2, function(x) x*sdx)) if(normalized) { basis <- as.matrix(apply(basis, 2, normalize)) std.basis <- as.matrix(apply(std.basis, 2, normalize)) } dimnames(basis) <- list(colnames(x), paste("Dir", 1:ncol(basis), sep="")) dimnames(std.basis) <- dimnames(basis) Z <- scale(x, scale = FALSE) %*% basis # out = list(call = call, type = type, x = x, Sigma = Sigma, classification = class, mixcomp = y, class2mixcomp = cl2mc, G = G, modelName = modelName, mu = mu.G, sigma = Sigma.G, pro = f, M = M, M.I = M.I, M.II = M.II, lambda = lambda, evalues = l, raw.evectors = as.matrix(SVD$v), basis = basis, std.basis = std.basis, numdir = numdir, dir = Z) class(out) = "MclustDR" return(out) } print.MclustDR <- function(x, digits = getOption("digits"), ...) { txt <- paste0("\'", class(x)[1], "\' model object: ") catwrap(txt) cat("\n") catwrap("\nAvailable components:\n") print(names(x)) # str(x, max.level = 1, give.attr = FALSE, strict.width = "wrap") invisible(x) } summary.MclustDR <- function(object, numdir, std = FALSE, ...) { if(missing(numdir)) numdir <- object$numdir dim <- seq(numdir) if(object$type == "Mclust") { n <- as.vector(table(object$classification)) G <- object$G } else { n <- as.vector(table(object$classification)) G <- as.vector(table(object$class2mixcomp)) } obj <- list(type = object$type, modelName = object$modelName, classes = levels(object$classification), n = n, G = G, basis = object$basis[,seq(dim),drop=FALSE], std = std, std.basis = object$std.basis[,seq(dim),drop=FALSE], evalues = object$evalues[seq(dim)], evalues.cumperc = with(object, { evalues <- evalues[seq(numdir)] cumsum(evalues)/sum(evalues)*100 }) ) class(obj) <- "summary.MclustDR" return(obj) } print.summary.MclustDR <- function(x, digits = max(5, getOption("digits") - 3), ...) { title <- paste("Dimension reduction for model-based clustering and classification") txt <- paste(rep("-", min(nchar(title), getOption("width"))), collapse = "") catwrap(txt) catwrap(title) catwrap(txt) if(x$type == "Mclust") { tab <- data.frame(n = x$n) rownames(tab) <- x$classes tab <- as.matrix(tab) names(dimnames(tab)) <- c("Clusters", "") cat("\n") catwrap(paste0("Mixture model type: ", x$type, " (", x$modelName, ", ", x$G, ")")) print(tab, quote = FALSE, right = TRUE) } else if(x$type == "MclustDA" | x$type == "EDDA") { tab <- data.frame(n = x$n, Model = x$modelName, G = x$G) rownames(tab) <- x$classes tab <- as.matrix(tab) names(dimnames(tab)) <- c("Classes", "") cat("\n") catwrap(paste("Mixture model type:", x$type)) print(tab, quote = FALSE, right = TRUE) } else stop("invalid model type") cat("\n") if(x$std) { catwrap("Standardized basis vectors using predictors scaled to have std.dev. equal to one:") print(x$std.basis, digits = digits) } else { catwrap("Estimated basis vectors:") print(x$basis, digits = digits) } cat("\n") evalues <- rbind("Eigenvalues" = x$evalues, "Cum. %" = x$evalues.cumperc) colnames(evalues) <- colnames(x$basis) print(evalues, digits=digits) invisible() } projpar.MclustDR <- function(object, dim, center = TRUE, raw = FALSE) { # Transform estimated parameters to projection subspace given by # 'dim' directions x <- object$x p <- ncol(x) n <- nrow(x) G <- object$G numdir <- object$numdir if(missing(dim)) dim <- seq(numdir) numdir <- length(dim) if(raw) V <- object$raw.evectors[,dim,drop=FALSE] else V <- object$basis[,dim,drop=FALSE] # mu <- t(object$mu) if(center) mu <- scale(mu, center = apply(x,2,mean), scale = FALSE) Mu <- mu %*% V # sigma <- object$sigma cho <- array(apply(sigma, 3, chol), c(p, p, G)) Sigma <- array(apply(cho, 3, function(R) crossprod(R %*% V)), c(numdir, numdir, G)) # return(list(mean = Mu, variance = Sigma)) } predict.MclustDR <- function(object, dim = 1:object$numdir, newdata, eval.points, ...) { dim <- dim[dim <= object$numdir] if(missing(newdata) & missing(eval.points)) { dir <- object$dir[,dim,drop=FALSE] } else if(!missing(newdata)) { newdata <- as.matrix(newdata) newdata <- scale(newdata, center = colMeans(object$x), scale = FALSE) dir <- newdata %*% object$basis[,dim,drop=FALSE] } else if(!missing(eval.points)) { dir <- as.matrix(eval.points) } n <- nrow(dir) G <- object$G # num. components nclass <- nlevels(object$classification) # num. classes par <- projpar.MclustDR(object, dim) Mu <- par$mean Sigma <- par$variance # old version # cden <- array(NA, c(n, G)) # for(j in 1:G) # { cden[,j] <- dmvnorm(dir, Mu[j,], Sigma[,,j], log = FALSE) } # z <- sweep(cden, 2, FUN = "*", STATS = object$pro) # den <- apply(z, 1, sum) # z <- sweep(z, 1, FUN = "/", STATS = den) # new version: more efficient and accurate z <- array(NA, c(n, G)) for(j in 1:G) { z[,j] <- dmvnorm(dir, Mu[j,], Sigma[,,j], log = TRUE) } z <- sweep(z, 2, FUN = "+", STATS = log(object$pro)) logden <- apply(z, 1, logsumexp) z <- sweep(z, 1, FUN = "-", STATS = logden) z <- exp(z) # zz <- matrix(0, n, nclass) for(j in seq(nclass)) { zz[,j] <- rowSums(z[,object$class2mixcomp == j,drop=FALSE]) } z <- zz; rm(zz) class <- factor(apply(z,1,which.max), levels = 1:nclass, labels = levels(object$classification)) out <- list(dir = dir, density = exp(logden), z = z, uncertainty = 1 - apply(z,1,max), classification = class) return(out) } predict2D.MclustDR <- function(object, dim = 1:2, ngrid = 100, xlim, ylim) { dim <- dim[1:2] dir <- object$dir[,dim,drop=FALSE] G <- object$G par <- projpar.MclustDR(object, dim) Mu <- par$mean Sigma <- par$variance if(missing(xlim)) xlim <- range(dir[,1]) # +c(-1,1)*0.05*diff(range(x))) if(missing(ylim)) ylim <- range(dir[,2]) # +c(-1,1)*0.05*diff(range(x))) xygrid <- cbind(seq(xlim[1], xlim[2], length = ngrid), seq(ylim[1], ylim[2], length = ngrid)) grid <- expand.grid(xygrid[,1], xygrid[,2]) pred <- predict.MclustDR(object, dim = dim, eval.points = grid) out <- list(x = xygrid[,1], y = xygrid[,2], density = matrix(pred$density, ngrid, ngrid), z = array(pred$z, c(ngrid, ngrid, ncol(pred$z))), uncertainty = matrix(pred$uncertainty, ngrid, ngrid), classification = matrix(pred$classification, ngrid, ngrid)) return(out) } plot.MclustDR <- function(x, dimens, what = c("scatterplot", "pairs", "contour", "classification", "boundaries", "density", "evalues"), symbols, colors, col.contour = gray(0.7), col.sep = grey(0.4), ngrid = 200, nlevels = 5, asp = NULL, ...) { object <- x x <- object$x p <- ncol(x) n <- nrow(x) G <- object$G y <- object$mixcomp class <- as.numeric(object$classification) nclass <- length(table(class)) dir <- object$dir numdir <- object$numdir dimens <- if(missing(dimens)) seq(numdir) else intersect(as.numeric(dimens), seq(numdir)) if(length(dimens) == 0) stop("invalid 'dimens' value(s) provided") if(missing(symbols)) { if(G <= length(mclust.options("classPlotSymbols"))) { symbols <- mclust.options("classPlotSymbols") } else if(G <= 26) { symbols <- LETTERS } } if(length(symbols) == 1) symbols <- rep(symbols,nclass) if(length(symbols) < nclass) { warning("more symbols needed to show classification") symbols <- rep(16, nclass) } if(missing(colors)) { colors <- mclust.options("classPlotColors") } if(length(colors) == 1) colors <- rep(colors,nclass) if(length(colors) < nclass) { warning("more colors needed to show mixture components") colors <- rep("black", nclass) } #################################################################### what <- match.arg(what, several.ok = TRUE) oldpar <- par(no.readonly = TRUE) # on.exit(par(oldpar)) if(any(i <- (what == "pairs")) & (length(dimens) == 2)) { what[i] <- "scatterplot" } if(length(dimens) == 1) { what[!(what == "classification" | what == "density" | what == "evalues")] <- "classification" } what <- unique(what) plot.MclustDR.scatterplot <- function(...) { dir <- dir[,dimens,drop=FALSE] plot(dir, col = colors[class], pch = symbols[class], xlab = colnames(dir)[1], ylab = colnames(dir)[2], asp = asp, ...) } plot.MclustDR.pairs <- function(...) { dir <- dir[,dimens,drop=FALSE] pairs(dir, col = colors[class], pch = symbols[class], gap = 0.2, asp = asp, ...) } plot.MclustDR.density <- function(...) { dimens <- dimens[1] dir <- object$dir[,dimens,drop=FALSE] par <- projpar.MclustDR(object, dimens) Mu <- par$mean Sigma <- par$variance xgrid <- extendrange(dir, f = 0.1) xgrid <- seq(min(xgrid), max(xgrid), length=2*ngrid) dens <- matrix(as.double(NA), length(xgrid), G) for(j in 1:G) dens[,j] <- dnorm(xgrid, Mu[j,], sqrt(Sigma[,,j])) # if(object$type == "MclustDA") { d <- t(apply(dens, 1, function(x, p = object$pro) p*x)) dens <- matrix(as.double(NA), length(xgrid), nclass) tab <- table(y, class) for(i in 1:ncol(tab)) { j <- which(tab[,i] > 0) dens[,i] <- apply(d[,j,drop=FALSE],1,sum) } } plot(0, 0, type = "n", xlab = colnames(dir), ylab = "Density", xlim = range(xgrid), ylim = range(0, dens*1.1)) for(j in 1:ncol(dens)) lines(xgrid, dens[,j], col = colors[j]) rug(dir, lwd = 1) } plot.MclustDR.contour <- function(...) { dimens <- dimens[1:2] dir <- object$dir[,dimens,drop=FALSE] par <- projpar.MclustDR(object, dimens) Mu <- par$mean Sigma <- par$variance # draw contours for each class or cluster plot(dir, type = "n", asp = asp) for(k in seq(nclass)) { i <- which(object$class2mixcomp == k) parameters <- list(pro = object$pro[i]/sum(object$pro[i]), mean = t(par$mean[i,,drop=FALSE]), variance = list(G = length(i), d = 2, sigma = par$variance[,,i,drop=FALSE])) surfacePlot(dir, parameters, col = col.contour, nlevels = nlevels, grid = ngrid, xlim = par("usr")[1:2], ylim = par("usr")[3:4], asp = asp, add = TRUE) } points(dir, col = colors[class], pch = symbols[class], ...) } plot.MclustDR.classification.Mclust <- function(...) { if(object$numdir == 1) { dir <- object$dir[,1] boxplot(dir ~ class, horizontal = TRUE, col = adjustcolor(mclust.options("classPlotColors"), alpha.f = 0.3)[1:nclass], border = mclust.options("classPlotColors")[1:nclass], ylab = "Classification", xlab = "Dir1", xlim = c(0,nclass+0.5)) points(dir, rep(0,n), pch = "|") return() } # numdir >= 2 dimens <- dimens[1:2] dir <- object$dir[,dimens,drop=FALSE] pred <- predict2D.MclustDR(object, dimens, ngrid, xlim = extendrange(dir[,1], f = 0.05), ylim = extendrange(dir[,2], f = 0.05)) pred$classification <- apply(pred$z, 1:2, which.max) image(pred$x, pred$y, pred$classification, col = adjustcolor(colors[1:G], alpha.f = 0.1), xlab = colnames(dir)[1], ylab = colnames(dir)[2], xaxs = "i", yaxs = "i", asp = asp) points(dir, col = colors[class], pch = symbols[class], ...) } plot.MclustDR.classification.MclustDA <- function(...) { dimens <- dimens[1:2] dir <- object$dir[,dimens,drop=FALSE] pred <- predict2D.MclustDR(object, dimens, ngrid, xlim = extendrange(dir[,1], f = 0.05), ylim = extendrange(dir[,2], f = 0.05)) pred$classification <- apply(pred$z, 1:2, which.max) image(pred$x, pred$y, pred$classification, col = adjustcolor(colors[1:nclass], alpha.f = 0.1), xlab = colnames(dir)[1], ylab = colnames(dir)[2], xaxs = "i", yaxs = "i", asp = asp) points(dir, col = colors[class], pch = symbols[class], ...) } plot.MclustDR.boundaries.Mclust <- function(...) { dimens <- dimens[1:2] dir <- object$dir[,dimens,drop=FALSE] pred <- predict2D.MclustDR(object, dimens, ngrid, xlim = extendrange(dir[,1], f = 0.05), ylim = extendrange(dir[,2], f = 0.05)) pred$uncertainty[c(1,ngrid),] <- NA pred$uncertainty[,c(1,ngrid)] <- NA image(pred$x, pred$y, pred$uncertainty, col = rev(gray.colors(10, start = 0, end = 1)), breaks = seq(0, 1-1/nclass, length = 11), xlab = colnames(dir)[1], ylab = colnames(dir)[2], xaxs = "i", yaxs = "i", asp = asp) points(dir, col = colors[class], pch = symbols[class], ...) } plot.MclustDR.boundaries.MclustDA <- function(...) { dimens <- dimens[1:2] dir <- object$dir[,dimens,drop=FALSE] pred <- predict2D.MclustDR(object, dimens, ngrid, xlim = extendrange(dir[,1], f = 0.05), ylim = extendrange(dir[,2], f = 0.05)) pred$uncertainty[c(1,ngrid),] <- NA pred$uncertainty[,c(1,ngrid)] <- NA image(pred$x, pred$y, pred$uncertainty, col = rev(gray.colors(10, start = 0, end = 1)), breaks = seq(0, 1-1/nclass, length = 11), xlab = colnames(dir)[1], ylab = colnames(dir)[2], xaxs = "i", yaxs = "i", asp = asp) points(dir, col = colors[class], pch = symbols[class], ...) } plot.MclustDR.evalues <- function(...) { plotEvalues.MclustDR(object, numdir = max(dimens), plot = TRUE) } if(interactive() & length(what) > 1) { title <- "Dimension reduction for model-based clustering and classification plots:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "scatterplot") plot.MclustDR.scatterplot(...) if(what[choice] == "pairs") plot.MclustDR.pairs(...) if(what[choice] == "contour") plot.MclustDR.contour(...) if(what[choice] == "classification" & object$type == "Mclust") plot.MclustDR.classification.Mclust(...) if(what[choice] == "classification" & (object$type == "EDDA" | object$type == "MclustDA")) plot.MclustDR.classification.MclustDA(...) if(what[choice] == "boundaries" & object$type == "Mclust") plot.MclustDR.boundaries.Mclust(...) if(what[choice] == "boundaries" & (object$type == "EDDA" | object$type == "MclustDA")) plot.MclustDR.boundaries.MclustDA(...) if(what[choice] == "density") plot.MclustDR.density(...) if(what[choice] == "evalues") plot.MclustDR.evalues(...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "scatterplot")) plot.MclustDR.scatterplot(...) if(any(what == "pairs")) plot.MclustDR.pairs(...) if(any(what == "contour")) plot.MclustDR.contour(...) if(any(what == "classification" & object$type == "Mclust")) plot.MclustDR.classification.Mclust(...) if(any(what == "classification" & (object$type == "EDDA" | object$type == "MclustDA"))) plot.MclustDR.classification.MclustDA(...) if(any(what == "boundaries" & object$type == "Mclust")) plot.MclustDR.boundaries.Mclust(...) if(any(what == "boundaries" & (object$type == "EDDA" | object$type == "MclustDA"))) plot.MclustDR.boundaries.MclustDA(...) if(any(what == "density")) plot.MclustDR.density(...) if(any(what == "evalues")) plot.MclustDR.evalues(...) } invisible() } plotEvalues.MclustDR <- function(x, numdir, plot = FALSE, legend = TRUE, ylim, ...) { object <- x G <- object$G f <- object$pro lambda <- object$lambda # dim <- if(missing(numdir)) seq(object$numdir) else seq(numdir) if(missing(numdir)) numdir <- object$numdir dim <- seq(numdir) d <- length(dim) par <- projpar.MclustDR(object, dim = dim, center = TRUE, raw = TRUE) mu <- par$mean Sigma.G <- par$variance # M1 <- t(mu) %*% diag(f) %*% mu l1 <- 2*lambda*diag(crossprod(M1)) # S <- matrix(0, d, d) for(j in seq(G)) S <- S + f[j]*Sigma.G[,,j] M2 <- matrix(0, d, d) for(j in 1:G) { C <- (Sigma.G[,,j]-S) M2 <- M2 + f[j] * tcrossprod(C) } l2 <- 2*(1-lambda)*diag(M2) # l <- object$evalues[dim] # if(plot) { if(missing(ylim)) ylim <- range(0, max(l)+diff(range(l))*0.05) plot(dim, l, type="b", lty = 1, pch = 16, cex = 1.5, xaxt = "n", ylim = ylim, xlab = "MclustDR directions", ylab = "Eigenvalues", panel.first = { abline(v = dim, col = "lightgray", lty = "dotted") abline(h = axTicks(2,par("yaxp")), col = "lightgray", lty = "dotted") } ) axis(1, at = dim, labels = dim) lines(dim, l1, type="b", lty = 2, pch = 22, cex = 1.5) lines(dim, l2, type="b", lty = 2, pch = 2, cex = 1.5) if(legend) { legend("topright", lty = c(1,2,2), pch = c(16,22,2), legend = c("Eigenvalues", "from means", "from vars"), bg = ifelse(par("bg")=="transparent", "white", par("bg")), inset = 0.01, pt.cex = 1.5) } } out <- list(dim = dim, evalues = l, mean.contrib = l1, var.contrib = l2) if(plot) invisible(out) else return(out) } # Auxiliary functions ----------------------------------------------------- # TODO: remove # mvdnorm <- function(x, mu, sigma, log = FALSE, tol = sqrt(.Machine$double.eps)) # { # if(is.vector(x)) # { x <- matrix(x, ncol = length(x)) } # else # { x <- as.matrix(x) } # SVD <- svd(sigma) # pos <- (SVD$d > max(tol*SVD$d[1], 0)) # in case of not full rank covar matrix # inv.sigma <- SVD$v[,pos,drop=FALSE] %*% (1/SVD$d[pos] * # t(SVD$u[,pos,drop=FALSE])) # z <- mahalanobis(x, center = mu, cov = inv.sigma, inverted = TRUE) # # logdet <- sum(log(eigen(sigma, symmetric = TRUE, only.values = TRUE)$values)) # logdet <- sum(log(SVD$d[pos])) # logdens <- -(ncol(x) * log(2 * pi) + logdet + z)/2 # if(log) return(logdens) # else return(exp(logdens)) # } ellipse <- function(c, M, r, npoints = 100) { # Returns the cartesian coordinates of points x on the ellipse # (x-c)'M(x-c) = r^2, # where x = x(theta) and theta varies from 0 to 2*pi radians in npoints steps. # local functions circle <- function(theta, r) r*c(cos(theta),sin(theta)) ellip <- function(theta, r, lambda) lambda*circle(theta, r) point <- function(theta) c+c(gamma %*% ellip(theta, r, lam)) # SVD <- svd(M) lam <- 1/sqrt(SVD$d) gamma <- SVD$v coord <- t(sapply(seq(0, 2*pi, length=npoints), function(th) point(th))) return(coord) } eigen.decomp <- function(A, B, invsqrt = FALSE) { # # Generalized eigenvalue decomposition of A with respect to B. # # A generalized eigenvalue problem AV = BLV is said to be symmetric positive # definite if A is symmetric and B is positive definite. V is the matrix of # generalized eigenvectors, and L is the diagonal matrix of generalized # eigenvalues (Stewart, 2001, pag. 229-230). # # Properties: # V'AV = L # V'BV = I # # The algorithm implemented is described in Stewart (2001, pag. 234) and used # by Li (2000). # # References: # Li, K.C., 2000. High dimensional data analysis via the SIR-PHD approach, # Stewart, G.W., 2001. Matrix Algorithms: vol II Eigensystems, SIAM. if(!invsqrt) { SVD <- svd(B, nu=0) # in case of not full rank covar matrix tol <- .Machine$double.eps pos <- which(SVD$d > max(tol*SVD$d[1], 0)) SVD$d <- SVD$d[pos] SVD$v <- SVD$v[,pos,drop=FALSE] # Computes inverse square root matrix such that: # t(inv.sqrt.B) %*% inv.sqrt.B = inv.sqrt.B %*% t(inv.sqrt.B) = solve(B) inv.sqrt.B <- SVD$v %*% (1/sqrt(SVD$d) * t(SVD$v)) } else { inv.sqrt.B <- B } # Compute B^(-1/2)' A B^(-1/2) = UDU' # evectors = B^(-1/2) U # evalues = D A <- t(inv.sqrt.B) %*% A %*% inv.sqrt.B SVD <- svd(A, nu=0) list(l = SVD$d, v = inv.sqrt.B %*% SVD$v) } # Subset selection of GMMDR/GMMDRC directions ----------------------------- MclustDRsubsel <- function(object, G = 1:9, modelNames = mclust.options("emModelNames"), ..., bic.stop = 0, bic.cutoff = 0, mindir = 1, verbose = interactive()) { # Subset selection for GMMDR directions based on bayes factors. # # object = a MclustDR object # G = a vector of cluster sizes for searching # modelNames = a vector of models for searching # ... = further arguments passed through Mclust/MclustDA # bic.stop = criterion to stop the search. If maximal BIC difference is # less than bic.stop the algorithm stops. # Two tipical values are: # 0 = stops when BIC difference becomes negative (default) # -Inf = stops when all directions have been selected # bic.cutoff = select simplest ``best'' model within bic.cutoff from the # maximum value achieved. Setting this to 0 (default) simply # select the model with the largest BIC difference. # mindir = the minimum number of diretions to be estimated # verbose = if 0 no trace info is shown; if 1 a trace of each step # of the search is printed; if 2 a detailed trace info is # is shown. stopifnot("first argument must be an object of class 'MclustDR'" = inherits(object, "MclustDR")) hcUse <- mclust.options("hcUse") mclust.options("hcUse" = "VARS") on.exit(mclust.options("hcUse" = hcUse)) mc <- match.call(expand.dots = TRUE) mc[[1]] <- switch(object$type, "Mclust" = as.name("MclustDRsubsel_cluster"), "EDDA" = as.name("MclustDRsubsel_classif"), "MclustDA" = as.name("MclustDRsubsel_classif"), stop("Not allowed 'MclustDR' type!")) eval(mc, parent.frame()) } MclustDRsubsel_cluster <- function(object, G = 1:9, modelNames = mclust.options("emModelNames"), ..., bic.stop = 0, bic.cutoff = 0, mindir = 1, verbose = interactive()) { drmodel <- object mclustType <- drmodel$type lambda <- drmodel$lambda numdir <- drmodel$numdir numdir0 <- numdir+1 dir <- drmodel$dir[,seq(numdir),drop=FALSE] mindir <- max(1, as.numeric(mindir), na.rm = TRUE) verbose <- as.numeric(verbose) ncycle <- 0 while(numdir < numdir0) { ncycle <- ncycle+1 if(verbose > 0) cat("\nCycle", ncycle, "...\n") out <- MclustDRsubsel1cycle(drmodel, G, modelNames, bic.stop = bic.stop, bic.cutoff = bic.cutoff, verbose = if(verbose > 1) TRUE else FALSE) if(verbose > 0) { cat("\n"); print(out$tab) } mod <- do.call("Mclust", list(data = dir[,out$subset,drop=FALSE], G = G, modelNames = if(length(out$subset) > 1) modelNames else c("E", "V"), verbose = FALSE, ...)) numdir0 <- numdir drmodel0 <- MclustDR(mod, lambda = lambda) if(drmodel0$numdir < mindir) break drmodel <- drmodel0 numdir <- drmodel$numdir dir <- drmodel$dir[,seq(numdir),drop=FALSE] } # format object using original data obj <- drmodel obj$basisx <- MclustDRrecoverdir(obj, data = object$x, std = FALSE) obj$std.basisx <- MclustDRrecoverdir(obj, data = object$x, std = TRUE) class(obj) <- c("MclustDRsubsel", class(obj)) return(obj) } MclustDRsubsel1cycle <- function(object, G = 1:9, modelNames = mclust.options("emModelNames"), bic.stop = 0, bic.cutoff = 0, verbose = interactive()) { # Single cycle of subset selection for GMMDR directions based on bayes factors. stopifnot("first argument must be an object of class 'MclustDR'" = inherits(object, "MclustDR")) d <- object$numdir dir <- object$dir[,seq(d),drop=FALSE] n <- nrow(dir) if(is.null(colnames(dir))) colnames(dir) = paste("[,", 1:d, "]", sep="") dir.names <- colnames(dir) BIC <- Model1 <- Model2 <- tab <- NULL; Model1$bic <- 0 bic.stop <- bic.stop + sqrt(.Machine$double.eps) bic.cutoff <- bic.cutoff + sqrt(.Machine$double.eps) inc <- NULL; excl <- seq(1,d) model1D <- if(any(grep("V", modelNames))) c("E", "V") else "E" # hskip <- paste(rep(" ",4),collapse="") if(verbose) cat("\n", hskip, "Start greedy search...\n", sep="") while(length(excl)>0) { if(verbose) { cat(hskip, rep("-",15), "\n", sep="") cat(paste(hskip, "Step", length(inc)+1, "\n")) } for(j in excl) { # Select simplest model with smallest num. of components # within bic.cutoff bic <- mclustBIC(dir[,sort(c(inc, j)),drop=FALSE], G = G, modelNames = if(length(inc)>0) modelNames else model1D, verbose = FALSE) bic.tab <- (as.matrix(max(bic, na.rm=TRUE) - bic) < bic.cutoff)*1 bestG <- which(rowSums(bic.tab, na.rm=TRUE) > 0)[1] bestmod <- which(bic.tab[bestG,,drop=FALSE] > 0)[1] out <- data.frame(Variable = dir.names[j], model = colnames(bic.tab)[bestmod], G = G[bestG], bic = c(bic[bestG,bestmod]), bic.diff = c(bic[bestG,bestmod] - Model1$bic - MclustDRBICreg(dir[,j], dir[,inc])) ) # Model2 <- rbind(Model2, out) } if(verbose) print(cbind(" " = hskip, Model2), row.names = FALSE) # stop if max BIC difference is < than cut-off bic.stop if(max(Model2$bic.diff) < bic.stop & length(inc) > 0) { break } # otherwise keep selecting i <- which.max(Model2$bic.diff) inc <- append(inc, excl[i]) excl <- setdiff(excl, excl[i]) tab <- rbind(tab, Model2[i,]) Model1 <- Model2[i,] Model2 <- NULL } rownames(tab) <- 1:nrow(tab) colnames(tab) <- c("Variable", "Model", "G", "BIC", "BIC.dif") subsets <- sapply(1:nrow(tab), function(x) list(inc[1:x])) # return(list(subset = subsets[[length(subsets)]], tab = tab)) } MclustDRsubsel_classif <- function(object, G = 1:9, modelNames = mclust.options("emModelNames"), ..., bic.stop = 0, bic.cutoff = 0, mindir = 1, verbose = interactive()) { drmodel <- object mclustType <- drmodel$type lambda <- drmodel$lambda numdir <- drmodel$numdir numdir0 <- numdir+1 dir <- drmodel$dir[,seq(numdir),drop=FALSE] mindir <- max(1, as.numeric(mindir), na.rm = TRUE) verbose <- as.numeric(verbose) ncycle <- 0 while(numdir < numdir0) { ncycle <- ncycle+1 if(verbose > 0) cat("\nCycle", ncycle, "...\n") out <- MclustDRCsubsel1cycle(drmodel, G, modelNames, bic.stop = bic.stop, bic.cutoff = bic.cutoff, verbose = if(verbose > 1) TRUE else FALSE) if(verbose > 0) { cat("\n"); print(out$tab) } mod <- do.call("MclustDA", list(data = dir[,out$subset,drop=FALSE], class = object$classification, G = G, modelNames = if(length(out$subset) > 1) modelNames else if(any(grep("V", modelNames))) c("E", "V") else "E", modelType = mclustType, verbose = FALSE, ...)) numdir0 <- numdir drmodel0 <- MclustDR(mod, lambda = lambda) if(drmodel0$numdir < mindir) break drmodel <- drmodel0 numdir <- drmodel$numdir dir <- drmodel$dir[,seq(numdir),drop=FALSE] } # format object using original data obj <- drmodel obj$basisx <- MclustDRrecoverdir(obj, data = object$x, std = FALSE) obj$std.basisx <- MclustDRrecoverdir(obj, data = object$x, std = TRUE) class(obj) <- c("MclustDRsubsel", class(obj)) return(obj) } MclustDRCsubsel1cycle <- function(object, G = 1:9, modelNames = mclust.options("emModelNames"), bic.stop = 0, bic.cutoff = 0, verbose = TRUE) { # Single cycle of subset selection for GMMDRC directions based on bayes factors. stopifnot("first argument must be an object of class 'MclustDR'" = inherits(object, "MclustDR")) d <- object$numdir dir <- object$dir[,seq(d),drop=FALSE] n <- nrow(dir) if(is.null(colnames(dir))) colnames(dir) = paste("[,", seq(d), "]", sep="") dir.names <- colnames(dir) BIC <- Model1 <- Model2 <- tab <- NULL; Model1$bic <- 0 bic.stop <- bic.stop + sqrt(.Machine$double.eps) bic.cutoff <- bic.cutoff + sqrt(.Machine$double.eps) inc <- NULL; excl <- seq(d) model1D <- if(any(grep("V", modelNames))) c("E", "V") else "E" # hskip <- paste(rep(" ",4),collapse="") if(verbose) cat("\n", hskip, "Start greedy search...\n", sep="") while(length(excl)>0) { if(verbose) { cat(hskip, rep("-",15), "\n", sep="") cat(paste(hskip, "Step", length(inc)+1, "\n")) } for(j in excl) { # Select simplest model with smallest num. of components # within bic.cutoff mod <- MclustDA(dir[,sort(c(inc, j)),drop=FALSE], class = object$classification, G = G, modelNames = if(length(inc)>0) modelNames else model1D, modelType = object$type, verbose = FALSE) out <- data.frame(Variable = dir.names[j], model = paste(sapply(mod$models, function(m) m$modelName),collapse="|"), G = paste(sapply(mod$models, function(m) m$G),collapse="|"), bic = mod$bic, bic.diff = c(mod$bic - # (Model1$bic + bic.reg(z2, z1)) Model1$bic - MclustDRBICreg(dir[,j], dir[,inc])) ) # Model2 <- rbind(Model2, out) } if(verbose) print(cbind(" " = hskip, Model2), row.names = FALSE) # stop if max BIC difference is < than cut-off bic.stop if(max(Model2$bic.dif) < bic.stop & length(inc) > 0) { break } # otherwise keep selecting i <- which.max(Model2$bic.dif) inc <- append(inc, excl[i]) excl <- setdiff(excl, excl[i]) tab <- rbind(tab, Model2[i,]) Model1 <- Model2[i,] Model2 <- NULL } rownames(tab) <- 1:nrow(tab) colnames(tab) <- c("Variable", "Model", "G", "BIC", "BIC.dif") subsets <- sapply(1:nrow(tab), function(x) list(inc[1:x])) # return(list(subset = subsets[[length(subsets)]], tab = tab)) } # BICreg <- function(y, x) # { n <- length(y) # mod <- lm.fit(cbind(rep(1,n), x), y) # rss <- sum(mod$residuals^2) # -n*log(2*pi) - n*log(rss/n) - n - (n - mod$df.residual + 1) * log(n) # } MclustDRBICreg <- function(y, x, stepwise = TRUE) { x <- as.matrix(x) y <- as.vector(y) n <- length(y) mod0 <- lm(y ~ 1) if(ncol(x) >= 1) { mod1 <- lm(y ~ 1+x) if(stepwise) { mod <- step(mod0, k = log(n), trace = 0, scope = list(lower = formula(mod0), upper = formula(mod1)), direction = "forward") } else mod <- mod1 } else mod <- mod0 rss <- sum(mod$residuals^2) p <- (n - mod$df.residual + 1) -n*log(2*pi) - n*log(rss/n) - n - p*log(n) } normalize <- function(x) { # Normalize the vector x to have unit length x <- as.vector(x) x <- x/sqrt(as.vector(crossprod(x))) return(x) } MclustDRrecoverdir <- function(object, data, normalized = TRUE, std = FALSE) { # Recover coefficients of the linear combination defining the MclustDR # directions. This is useful if the directions are obtained from other # directions stopifnot("first argument must be an object of class 'MclustDR'" = inherits(object, "MclustDR")) if(missing(data)) x <- object$x else x <- as.matrix(data) x <- scale(x, center=TRUE, scale=FALSE) numdir <- object$numdir dir <- object$dir[,seq(numdir),drop=FALSE] # B <- as.matrix(coef(lm(dir ~ x)))[-1,,drop=FALSE] # ok but old B <- qr.solve(x, dir) if(std) { sdx <- sd(x) B <- apply(B, 2, function(x) x*sdx) } if(normalized) { B <- as.matrix(apply(B, 2, normalize)) } rownames(B) <- colnames(x) return(B) } ## Define print and summary methods for showing basis coefs ## in the original scale of variables print.MclustDRsubsel <- function(x, ...) { x$basis <- x$basisx class(x) <- class(x)[2] NextMethod() } summary.MclustDRsubsel <- function(object, ...) { object$basis <- object$basisx object$std.basis <- object$std.basisx class(object) <- class(object)[2] NextMethod() } mclust/R/mclustda.R0000644000176200001440000012044514476325645013747 0ustar liggesusersMclustDA <- function(data, class, G = NULL, modelNames = NULL, modelType = c("MclustDA", "EDDA"), prior = NULL, control = emControl(), initialization = NULL, warn = mclust.options("warn"), verbose = interactive(), ...) { call <- match.call() mc <- match.call(expand.dots = TRUE) # if(missing(data)) stop("no training data provided!") data <- data.matrix(data) n <- nrow(data) p <- ncol(data) oneD <- if(p==1) TRUE else FALSE # if(missing(class)) stop("class labels for training data must be provided!") class <- as.factor(class) classLabel <- levels(class) ncl <- nlevels(class) if(ncl == 1) G <- 1 prop <- as.vector(table(class))/n names(prop) <- classLabel # modelType <- match.arg(modelType, choices = eval(formals(MclustDA)$modelType), several.ok = FALSE) # if(is.null(G)) { G <- rep(list(1:5), ncl) } else if(is.list(G)) { G <- lapply(G, sort) } else { G <- rep(list(sort(G)), ncl) } if(any(unlist(G) <= 0)) stop("G must be positive") # if(is.null(modelNames)) { if(oneD) modelNames <- c("E", "V") else modelNames <- mclust.options("emModelNames") } if(n <= p) { m <- match(c("EEE","EEV","VEV","VVV"), mclust.options("emModelNames"), nomatch=0) modelNames <- modelNames[-m] } if(!is.list(modelNames)) { modelNames <- rep(list(modelNames), ncl) } # # hcUse <- mclust.options("hcUse") # mclust.options("hcUse" = "VARS") # on.exit(mclust.options("hcUse" = hcUse)) # if(modelType == "EDDA") { mc[[1]] <- as.name("mstep") mc$class <- mc$G <- mc$modelNames <- mc$modelType <- NULL mc$warn <- FALSE mc$z <- unmap(as.numeric(class)) G <- 1 modelNames <- unique(unlist(modelNames)) BIC <- rep(NA, length(modelNames)) Model <- NULL if(verbose) { cat("fitting ...\n") flush.console() pbar <- txtProgressBar(min = 0, max = length(modelNames), style = 3) on.exit(close(pbar)) ipbar <- 0 } for(i in seq(modelNames)) { mc$modelName <- as.character(modelNames[i]) mStep <- eval(mc, parent.frame()) eStep <- do.call("estep", c(mStep, list(data = data, warn = FALSE))) BIC[i] <- do.call("bic", c(eStep, list(equalPro = TRUE))) if(!is.na(BIC[i]) && BIC[i] >= max(BIC, na.rm = TRUE)) Model <- eStep if(verbose) { ipbar <- ipbar+1; setTxtProgressBar(pbar, ipbar) } } if(all(is.na(BIC))) { warning("No model(s) can be estimated!!") return() } names(BIC) <- modelNames bic <- max(BIC, na.rm = TRUE) loglik <- Model$loglik df <- (2*loglik - bic)/log(Model$n) # there are (nclass-1) more df than really needed # equal to logLik(object) but faster Model <- c(Model, list("BIC" = BIC)) Models <- rep(list(Model), ncl) names(Models) <- classLabel for(l in 1:ncl) { I <- (class == classLabel[l]) Models[[l]]$n <- sum(I) Models[[l]]$G <- 1 Models[[l]]$bic <- Models[[l]]$loglik <- NULL par <- Models[[l]]$parameters par$pro <- 1 par$mean <- if(oneD) par$mean[l] else par$mean[,l,drop=FALSE] par$variance$G <- 1 if(oneD) { # par$variance$sigma <- par$variance$sigma[l] if(length(par$variance$sigmasq) > 1) par$variance$sigmasq <- par$variance$sigmasq[l] else par$variance$sigmasq <- par$variance$sigmasq } else { par$variance$sigma <- par$variance$sigma[,,l,drop=FALSE] if(length(par$variance$sigmasq) > 1) par$variance$sigmasq <- par$variance$sigmasq[l] if(length(par$variance$scale) > 1) par$variance$scale <- par$variance$scale[l] if(length(dim(par$variance$shape)) > 1) par$variance$shape <- par$variance$shape[,l] if(length(dim(par$variance$orientation)) > 2) # LS was > 1 par$variance$orientation <- par$variance$orientation[,,l,drop=FALSE] if(length(dim(par$variance$cholSigma)) > 2) par$variance$cholSigma <- par$variance$cholSigma[,,l,drop=FALSE] if(length(dim(par$variance$cholsigma)) > 2) par$variance$cholsigma <- par$variance$cholsigma[,,l,drop=FALSE] } Models[[l]]$parameters <- par Models[[l]]$z <- NULL # z[I,,drop=FALSE] Models[[l]]$classification <- rep(1, sum(I)) # apply(z[I,,drop=FALSE], 1, which.max) Models[[l]]$uncertainty <- NULL # 1 - apply(z[I,], 1, max) Models[[l]]$observations <- which(I) } } else { # modelType == "MclustDA" i.e. different covariance structures for each class Models <- rep(list(NULL), ncl) mc[[1]] <- as.name("mclustBIC") mc$class <- NULL # noise <- eval(mc$initialization$noise, parent.frame()) for(l in 1:ncl) { I <- (class == classLabel[l]) mc[[2]] <- data[I,] mc$G <- G[[l]] mc$modelNames <- as.character(modelNames[[l]]) # if(!is.null(noise)) # mc$initialization$noise <- noise[I] if(verbose) cat(paste0("Class ", classLabel[l], ": ")) BIC <- eval(mc, parent.frame()) # slightly adjust parameters if none of the models can be fitted while(all(is.na(BIC))) { if(length(mc$modelNames) == 1) { j <- which(mc$modelNames == mclust.options("emModelNames")) if(j == 1) mc$G <- mc$G - 1 else mc$modelNames <- mclust.options("emModelNames")[j-1] } else { mc$G <- mc$G - 1 } BIC <- eval(mc, parent.frame()) } SUMMARY <- summary(BIC, data[I,]) SUMMARY$bic <- BIC names(SUMMARY)[which(names(SUMMARY) == "bic")] <- "BIC" Models[[l]] <- c(SUMMARY, list(observations = which(I))) } bic <- loglik <- df <- NULL } names(Models) <- classLabel Models$Vinv <- NULL out <- list(call = call, data = data, class = class, type = modelType, n = n, d = p, prop = prop, models = Models, bic = bic, loglik = loglik, df = df) out <- structure(out, prior = prior, control = control, class = "MclustDA") if(modelType == "MclustDA") { l <- logLik.MclustDA(out, data) out$loglik <- as.numeric(l) out$df <- attr(l, "df") out$bic <- 2*out$loglik - log(n)*out$df } return(out) } print.MclustDA <- function(x, ...) { cat("\'", class(x)[1], "\' model object:\n", sep = "") models <- x$models nclass <- length(models) n <- sapply(1:nclass, function(i) models[[i]]$n) M <- sapply(1:nclass, function(i) models[[i]]$modelName) G <- sapply(1:nclass, function(i) models[[i]]$G) out <- data.frame(n = n, Model = M, G = G) rownames(out) <- names(models) out <- as.matrix(out) names(dimnames(out)) <- c("Classes", "") print(out, quote = FALSE, right = TRUE) cat("\n") catwrap("\nAvailable components:\n") print(names(x)) # str(x, max.level = 2, give.attr = FALSE, strict.width = "wrap") invisible(x) } summary.MclustDA <- function(object, parameters = FALSE, newdata, newclass, ...) { # collect info models <- object$models nclass <- length(models) classes <- names(models) n <- sapply(1:nclass, function(i) models[[i]]$n) G <- sapply(1:nclass, function(i) models[[i]]$G) modelName <- sapply(1:nclass, function(i) models[[i]]$modelName) prior <- attr(object, "prior") printParameters <- parameters par <- getParameters.MclustDA(object) class <- object$class data <- object$data pred <- predict(object, newdata = data, ...) ce <- mean(class != pred$classification) brier <- BrierScore(pred$z, class) tab <- try(table(class, pred$classification)) if(inherits(tab, "try-error")) { ce <- tab <- NA } else { names(dimnames(tab)) <- c("Class", "Predicted") } tab.newdata <- ce.newdata <- brier.newdata <- NULL if(!missing(newdata) & !missing(newclass)) { pred.newdata <- predict(object, newdata = newdata, ...) if(missing(newclass)) { tab.newdata <- table(pred.newdata$classification) names(dimnames(tab.newdata)) <- "Predicted" } else { tab.newdata <- table(newclass, pred.newdata$classification) names(dimnames(tab.newdata)) <- c("Class", "Predicted") ce.newdata <- mean(newclass != pred.newdata$classification) brier.newdata <- BrierScore(pred.newdata$z, newclass) } } obj <- list(type = object$type, n = n, d = object$d, loglik = object$loglik, df = object$df, bic = object$bic, nclass = nclass, classes = classes, G = G, modelName = modelName, prop = object$prop, parameters = par, prior = prior, tab = tab, ce = ce, brier = brier, tab.newdata = tab.newdata, ce.newdata = ce.newdata, brier.newdata = brier.newdata, printParameters = printParameters) class(obj) <- "summary.MclustDA" return(obj) } print.summary.MclustDA <- function(x, digits = getOption("digits"), ...) { title <- paste("Gaussian finite mixture model for classification") txt <- paste(rep("-", min(nchar(title), getOption("width"))), collapse = "") catwrap(txt) catwrap(title) catwrap(txt) cat("\n") catwrap(paste(x$type, "model summary:")) cat("\n") # tab <- data.frame("log-likelihood" = x$loglik, "n" = sum(x$n), "df" = x$df, "BIC" = x$bic, row.names = "", check.names = FALSE) print(tab, digits = digits) tab <- data.frame("n" = x$n, "%" = round(x$n/sum(x$n)*100,2), "Model" = x$modelName, "G" = x$G, check.names = FALSE, row.names = x$classes) tab <- as.matrix(tab) names(dimnames(tab)) <- c("Classes", "") print(tab, digits = digits, quote = FALSE, right = TRUE) if(!is.null(x$prior)) { cat("\nPrior: ") cat(x$prior$functionName, "(", paste(names(x$prior[-1]), x$prior[-1], sep = " = ", collapse = ", "), ")", sep = "") cat("\n") } if(x$printParameters) { cat("\nClass prior probabilities:\n") print(x$prop, digits = digits) for(i in seq(x$nclass)) { cat("\nClass = ", x$class[i], "\n", sep = "") par <- x$parameters[[i]] if(x$type == "MclustDA") { cat("\nMixing probabilities: ") cat(round(par$pro, digits = digits), "\n") } cat("\nMeans:\n") print(par$mean, digits = digits) cat("\nVariances:\n") if(x$d > 1) { for(g in seq(x$G[i])) { cat("[,,", g, "]\n", sep = "") print(par$variance[,,g], digits = digits) } } else print(par$variance, digits = digits) } } cat("\nTraining confusion matrix:\n") print(x$tab) cat("Classification error =", round(x$ce, min(digits,4)), "\n") cat("Brier score =", round(x$brier, min(digits,4)), "\n") if(!is.null(x$tab.newdata)) { cat("\nTest confusion matrix:\n") print(x$tab.newdata) if(!is.null(x$ce.newdata)) { cat("Classification error =", round(x$ce.newdata, min(digits,4)), "\n") cat("Brier score =", round(x$brier.newdata, min(digits,4)), "\n") } } invisible(x) } getParameters.MclustDA <- function(object) { # collect info models <- object$models nclass <- length(models) classes <- names(models) n <- sapply(1:nclass, function(i) models[[i]]$n) G <- sapply(1:nclass, function(i) models[[i]]$G) modelName <- sapply(1:nclass, function(i) models[[i]]$modelName) # prior <- attr(object, "prior") par <- vector(mode = "list", length = nclass) for(i in seq(nclass)) { par[[i]] <- models[[i]]$parameters if(is.null(par[[i]]$pro)) par$pro <- 1 if(par[[i]]$variance$d < 2) { sigma <- rep(par[[i]]$variance$sigma, models[[i]]$G)[1:models[[i]]$G] names(sigma) <- names(par[[i]]$mean) par[[i]]$variance$sigma <- sigma } par[[i]]$variance <- par[[i]]$variance$sigma } return(par) } logLik.MclustDA <- function (object, data, ...) { if(missing(data)) data <- object$data n <- object$n d <- object$d par <- getParameters.MclustDA(object) nclass <- length(par) fclass <- sapply(object$models, function(m) m$n)/n logfclass <- log(fclass) G <- sapply(par, function(x) length(x$pro)) if(object$type == "EDDA") { df <- d * nclass + nVarParams(object$models[[1]]$modelName, d = d, G = nclass) } else { df <- sum(sapply(object$models, function(mod) with(mod, (G - 1) + G * d + nVarParams(modelName, d = d, G = G)))) } # ll <- sapply(object$models, function(mod) # { do.call("dens", c(list(data = data, logarithm = FALSE), mod)) }) # l <- sum(log(apply(ll, 1, function(l) sum(fclass*l)))) ll <- sapply(object$models, function(mod) { do.call("dens", c(list(data = data, logarithm = TRUE), mod)) }) l <- sum(apply(ll, 1, function(l) logsumexp(logfclass+l))) attr(l, "nobs") <- n attr(l, "df") <- df class(l) <- "logLik" return(l) } predict.MclustDA <- function(object, newdata, prop = object$prop, ...) { if(!inherits(object, "MclustDA")) stop("object not of class 'MclustDA'") models <- object$models nclass <- length(models) classNames <- if(is.null(object$class)) names(models) else levels(object$class) n <- sapply(1:nclass, function(i) models[[i]]$n) if(missing(newdata)) { newdata <- object$data } # if(object$d == 1) newdata <- as.vector(newdata) if(is.numeric(prop)) { if(any(prop < 0)) stop("'prop' must be nonnegative") if(length(prop) != nclass) stop("'prop' is of incorrect length") prop <- prop/sum(prop) } else { prop <- n/sum(n) } # class density computed on log scale densfun <- function(mod, data) { do.call("dens", c(list(data = data, logarithm = TRUE), mod)) } # z <- matrix(as.double(NA), nrow = NROW(newdata), ncol = nclass) for(j in 1:nclass) z[,j] <- densfun(models[[j]], data = newdata) z <- sweep(z, MARGIN = 2, FUN = "+", STATS = log(prop)) z <- sweep(z, MARGIN = 1, FUN = "-", STATS = apply(z, 1, logsumexp)) z <- exp(z) colnames(z) <- classNames cl <- apply(z, 1, which.max) class <- factor(classNames[cl], levels = classNames) # out <- list(classification = class, z = z) return(out) } plot.MclustDA <- function(x, what = c("scatterplot", "classification", "train&test", "error"), newdata, newclass, dimens = NULL, symbols, colors, main = NULL, ...) { object <- x # Argh. Really want to use object anyway if(!inherits(object, "MclustDA")) stop("object not of class 'MclustDA'") data <- object$data if(object$d > 1) dataNames <- colnames(data) else dataNames <- deparse(object$call$data) n <- nrow(data) p <- ncol(data) dimens <- if(is.null(dimens)) seq(p) else dimens[dimens <= p] d <- length(dimens) if(d == 0) { warning("dimens larger than data dimensionality...") return(invisible()) } if(missing(newdata)) { newdata <- matrix(as.double(NA), 0, p) } else { newdata <- as.matrix(newdata) } if(ncol(newdata) != p) stop("incompatible newdata dimensionality") if(missing(newclass)) { newclass <- vector(length = 0) } else { if(nrow(newdata) != length(newclass)) stop("incompatible newdata and newclass") } if(object$d > 1) newdataNames <- colnames(newdata) else newdataNames <- deparse(match.call()$newdata) what <- match.arg(what, several.ok = TRUE) models <- object$models M <- length(models) if(missing(dimens)) dimens <- seq_len(p) trainClass <- object$class nclass <- length(unique(trainClass)) Data <- rbind(data, newdata) predClass <- predict(object, Data)$classification if(missing(symbols)) { if(M <= length(mclust.options("classPlotSymbols"))) { symbols <- mclust.options("classPlotSymbols") } else if(M <= 26) { symbols <- LETTERS } } if(length(symbols) == 1) symbols <- rep(symbols,M) if(length(symbols) < M & !any(what == "train&test")) { warning("more symbols needed to show classification") symbols <- rep(16, M) } if(missing(colors)) { colors <- mclust.options("classPlotColors") } if(length(colors) == 1) colors <- rep(colors,M) if(length(colors) < M & !any(what == "train&test")) { warning("more colors needed to show classification") colors <- rep("black", M) } oldpar <- par(no.readonly = TRUE) plot.MclustDA.scatterplot <- function(...) { if(d == 1) { mclust1Dplot(data = if(nrow(newdata) == 0) data[,dimens[1],drop=FALSE] else newdata[,dimens[1],drop=FALSE], what = "classification", classification = if(nrow(newdata) == 0) trainClass else newclass, xlab = if(nrow(newdata) == 0) dataNames[dimens] else newdataNames[dimens], ylab = "Classes", main = NULL, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) if(nrow(newdata) == 0) "Training data" else "Test data" else NULL, cex.main = oldpar$cex.lab) } scatellipses <- function(data, dimens, nclass, symbols, colors, ...) { m <- lapply(models, function(m) { m$parameters$mean <- array(m$parameters$mean[dimens,], c(2,m$G)) m$parameters$variance$sigma <- array(m$parameters$variance$sigma[dimens,dimens,], c(2,2,m$G)) m }) plot(data[,dimens], type = "n", ...) for(l in 1:nclass) { I <- m[[l]]$observations points(data[I,dimens[1]], data[I,dimens[2]], pch = symbols[l], col = colors[l]) for(g in 1:(m[[l]]$G)) { mvn2plot(mu = m[[l]]$parameters$mean[,g], sigma = m[[l]]$parameters$variance$sigma[,,g], k = 15, fillEllipse = mclust.options("fillEllipses"), col = if(mclust.options("fillEllipses")) colors[l] else rep("grey30",3)) } } } if(d == 2) { scatellipses(if(nrow(newdata) == 0) data else newdata, dimens = dimens[1:2], nclass = nclass, symbols = symbols, colors = colors, xlab = if(nrow(newdata) == 0) dataNames[dimens[1]] else newdataNames[dimens[1]], ylab = if(nrow(newdata) == 0) dataNames[dimens[2]] else newdataNames[dimens[2]], ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) if(nrow(newdata) == 0) "Training data" else "Test data" else NULL, cex.main = oldpar$cex.lab) } if(d > 2) { on.exit(par(oldpar)) par(mfrow = c(d, d), mar = rep(0.2/2,4), oma = rep(4,4)+c(0,0,1*(!is.null(main)),0)) for(i in seq(d)) { for(j in seq(d)) { if(i == j) { plot(if(nrow(newdata) == 0) data[,dimens[c(i,j)]] else newdata[,dimens[c(i,j)]], type="n", xlab = "", ylab = "", axes=FALSE) text(mean(par("usr")[1:2]), mean(par("usr")[3:4]), labels = if(nrow(newdata) == 0) dataNames[dimens][i] else newdataNames[dimens][i], cex = 1.5, adj = 0.5) box() } else { scatellipses(if(nrow(newdata) == 0) data else newdata, dimens = dimens[c(j,i)], nclass = nclass, symbols = symbols, colors = colors, xaxt = "n", yaxt = "n") } if(i == 1 && (!(j%%2))) axis(3) if(i == d && (j%%2)) axis(1) if(j == 1 && (!(i%%2))) axis(2) if(j == d && (i%%2)) axis(4) } } if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) if(nrow(newdata) == 0) "Training data" else "Test data" else NULL, cex.main = 1.2*oldpar$cex.main, outer = TRUE, line = 3) } } plot.MclustDA.classification <- function(...) { if(nrow(newdata) == 0 && d == 1) { mclust1Dplot(data = data[,dimens[1],drop=FALSE], what = "classification", classification = predClass[1:n], colors = colors[1:nclass], xlab = dataNames[dimens], main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training data classification" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) == 0 && d == 2) { coordProj(data = data[,dimens], what = "classification", classification = predClass[1:n], main = FALSE, colors = colors[1:nclass], symbols = symbols[1:nclass], ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training data classification" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) == 0 && d > 2) { clPairs(data[,dimens], classification = predClass[1:n], colors = colors[1:nclass], symbols = symbols[1:nclass], cex.labels = 1.5, main = if(!is.null(main)) if(is.character(main)) main else if(as.logical(main)) "Training data classification" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) > 0 && d == 1) { mclust1Dplot(data = newdata[,dimens], what = "classification", classification = predClass[-(1:n)], xlab = newdataNames[dimens], main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Test data classification" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) > 0 && d == 2) { coordProj(data = newdata[,dimens], what ="classification", classification = predClass[-(1:n)], colors = colors[1:nclass], symbols = symbols[1:nclass], main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Test data classification" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) > 0 & length(dimens) > 2) { on.exit(par(oldpar)) # par(oma = c(0,0,10,0)) clPairs(data = newdata[,dimens], classification = predClass[-(1:n)], colors = colors[1:nclass], symbols = symbols[1:nclass], cex.labels = 1.5, main = if(!is.null(main)) if(is.character(main)) main else if(as.logical(main)) "Test data classification" else NULL, cex.main = oldpar$cex.lab) } } plot.MclustDA.traintest <- function(...) { cl <- factor(rep(c("Train","Test"), times = c(nrow(data), nrow(newdata))), levels = c("Train", "Test")) if(d == 1) { mclust1Dplot(data = Data[,dimens], what = "classification", classification = cl, xlab = dataNames[dimens], ylab = "", colors = c("grey20", "grey80"), main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training and Test data" else NULL, cex.main = oldpar$cex.lab) } if(d == 2) { coordProj(Data, dimens = dimens[1:2], what = "classification", classification = cl, cex = 0.8, symbols = c(19,3), colors = c("grey80", "grey20"), main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training (o) and Test (+) data" else NULL, cex.main = oldpar$cex.lab) } if(d > 2) { clPairs(Data[,dimens], classification = cl, symbols = c(19,3), colors = c("grey80", "grey20"), main = if(!is.null(main)) if(is.character(main)) main else if(as.logical(main)) "Training (o) and Test (+) data" else NULL, cex.main = oldpar$cex.lab) } } plot.MclustDA.error <- function(...) { if(nrow(newdata) == 0 && d == 1) { mclust1Dplot(data = data[,dimens], what = "error", classification = predClass[1:n], truth = trainClass, xlab = dataNames[dimens], main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training data error" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) == 0 && d == 2) { coordProj(data = data[,dimens[1:2]], what = "error", classification = predClass[1:n], truth = trainClass, main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training data error" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) == 0 && d > 2) { on.exit(par(oldpar)) par(mfrow = c(d, d), mar = rep(0.2/2,4), oma = rep(4,4)+c(0,0,1*(!is.null(main)),0)) for(i in seq(d)) { for(j in seq(d)) { if(i == j) { plot(data[,dimens[c(i,j)]], type="n", xlab = "", ylab = "", axes=FALSE) text(mean(par("usr")[1:2]), mean(par("usr")[3:4]), dataNames[dimens][i], cex = 1.5, adj = 0.5) box() } else { coordProj(data = data[,dimens[c(j,i)]], what = "error", classification = predClass[1:n], truth = trainClass, main = FALSE, xaxt = "n", yaxt = "n") } if(i == 1 && (!(j%%2))) axis(3) if(i == d && (j%%2)) axis(1) if(j == 1 && (!(i%%2))) axis(2) if(j == d && (i%%2)) axis(4) } } if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Training data error" else NULL, cex.main = 1.2*oldpar$cex.main, outer = TRUE, line = 3) } if(nrow(newdata) > 0 && d == 1) { mclust1Dplot(data = newdata[,dimens], what = "error", classification = predClass[-(1:n)], truth = newclass, xlab = newdataNames[dimens], main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Test data error" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) > 0 && d == 2) { coordProj(data = newdata[,dimens[1:2]], what = "error", classification = predClass[-(1:n)], truth = newclass, main = FALSE, ...) if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Test data error" else NULL, cex.main = oldpar$cex.lab) } if(nrow(newdata) > 0 && d > 2) { on.exit(par(oldpar)) par(mfrow = c(d, d), mar = rep(0.2/2,4), oma = rep(4,4)+c(0,0,1*(!is.null(main)),0)) for(i in seq(d)) { for(j in seq(d)) { if(i == j) { plot(newdata[,dimens[c(i,j)]], type="n", xlab = "", ylab = "", axes=FALSE) text(mean(par("usr")[1:2]), mean(par("usr")[3:4]), newdataNames[dimens][i], cex = 1.5, adj = 0.5) box() } else { coordProj(data = newdata[,dimens[c(j,i)]], what = "error", classification = predClass[-(1:n)], truth = newclass, main = FALSE, xaxt = "n", yaxt = "n") } if(i == 1 && (!(j%%2))) axis(3) if(i == d && (j%%2)) axis(1) if(j == 1 && (!(i%%2))) axis(2) if(j == d && (i%%2)) axis(4) } } if(!is.null(main)) title(if(is.character(main)) main else if(as.logical(main)) "Test data error" else NULL, cex.main = 1.2*oldpar$cex.main, outer = TRUE, line = 3) } } if(interactive() & length(what) > 1) { title <- "Model-based discriminant analysis plots:" # present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) while(choice != 0) { if(what[choice] == "scatterplot") plot.MclustDA.scatterplot(...) if(what[choice] == "classification") plot.MclustDA.classification(...) if(what[choice] == "train&test") plot.MclustDA.traintest(...) if(what[choice] == "error") plot.MclustDA.error(...) # re-present menu waiting user choice choice <- menu(what, graphics = FALSE, title = title) } } else { if(any(what == "scatterplot")) plot.MclustDA.scatterplot(...) if(any(what == "classification")) plot.MclustDA.classification(...) if(any(what == "train&test")) plot.MclustDA.traintest(...) if(any(what == "error")) plot.MclustDA.error(...) } invisible() } # TODO: old version to be deleted at a certain point # cvMclustDA <- function(object, nfold = 10, # metric = c("error", "brier"), # prop = object$prop, # verbose = interactive(), ...) # { # # call <- object$call # nfold <- as.numeric(nfold) # metric <- match.arg(metric, # choices = eval(formals(cvMclustDA)$metric), # several.ok = FALSE) # # # data <- object$data # class <- as.factor(object$class) # n <- length(class) # G <- lapply(object$models, function(mod) mod$G) # modelName <- lapply(object$models, function(mod) mod$modelName) # # # ce <- function(pred, class) # { # 1 - sum(class == pred, na.rm = TRUE)/length(class) # } # # # folds <- if(nfold == n) lapply(1:n, function(x) x) # else balancedFolds(class, nfolds = nfold) # nfold <- length(folds) # folds.size <- sapply(folds, length) # # # cvmetric <- rep(NA, nfold) # cvclass <- factor(rep(NA, n), levels = levels(class)) # cvprob <- matrix(as.double(NA), nrow = n, ncol = nlevels(class), # dimnames = list(NULL, levels(class))) # # if(verbose) # { # cat("cross-validating ...\n") # flush.console() # pbar <- txtProgressBar(min = 0, max = nfold, style = 3) # on.exit(close(pbar)) # } # # for(i in seq(nfold)) # { # x <- data[-folds[[i]],,drop=FALSE] # y <- class[-folds[[i]]] # call$data <- x # call$class <- y # call$G <- G # call$modelNames <- modelName # call$verbose <- FALSE # mod <- eval(call, parent.frame()) # # # predTest <- predict(mod, data[folds[[i]],,drop=FALSE], prop = prop) # cvmetric[i] <- if(metric == "error") # ce(predTest$classification, class[folds[[i]]]) # else # BrierScore(predTest$z, class[folds[[i]]]) # cvclass[folds[[i]]] <- predTest$classification # cvprob[folds[[i]],] <- predTest$z # # # if(verbose) # setTxtProgressBar(pbar, i) # } # # # cv <- sum(cvmetric*folds.size)/sum(folds.size) # se <- sqrt(var(cvmetric)/nfold) # # # out <- list(classification = cvclass, # z = cvprob, # error = if(metric == "error") cv else NA, # brier = if(metric == "brier") cv else NA, # se = se) # return(out) # } cvMclustDA <- function(object, nfold = 10, prop = object$prop, verbose = interactive(), ...) { if(!is.null(match.call(expand.dots = TRUE)$metric)) warning("'metric' argument is deprecated! Ignored.") # call <- object$call nfold <- as.numeric(nfold) data <- object$data class <- as.factor(object$class) n <- length(class) G <- lapply(object$models, function(mod) mod$G) modelName <- lapply(object$models, function(mod) mod$modelName) # ce <- function(pred, class) { 1 - sum(class == pred, na.rm = TRUE)/length(class) } # folds <- if(nfold == n) lapply(1:n, function(x) x) else balancedFolds(class, nfolds = nfold) nfold <- length(folds) folds.size <- sapply(folds, length) # metric.cv <- matrix(as.double(NA), nrow = nfold, ncol = 2) class.cv <- factor(rep(NA, n), levels = levels(class)) prob.cv <- matrix(as.double(NA), nrow = n, ncol = nlevels(class), dimnames = list(NULL, levels(class))) if(verbose) { cat("cross-validating ...\n") flush.console() pbar <- txtProgressBar(min = 0, max = nfold, style = 3) on.exit(close(pbar)) } for(i in seq(nfold)) { x <- data[-folds[[i]],,drop=FALSE] y <- class[-folds[[i]]] call$data <- x call$class <- y call$G <- G call$modelNames <- modelName call$verbose <- FALSE mod <- eval(call, parent.frame()) # predTest <- predict(mod, data[folds[[i]],,drop=FALSE], prop = prop) metric.cv[i,1] <- ce(predTest$classification, class[folds[[i]]]) metric.cv[i,2] <- BrierScore(predTest$z, class[folds[[i]]]) class.cv[folds[[i]]] <- predTest$classification prob.cv[folds[[i]],] <- predTest$z # if(verbose) setTxtProgressBar(pbar, i) } # cv <- sapply(1:2, function(m) sum(metric.cv[,m]*folds.size)/sum(folds.size)) # se <- apply(metric.cv, 2, function(m) sqrt(var(m)/nfold)) se <- sapply(1:2, function(m) sqrt( ( sum( (metric.cv[,m] - cv[m])^2 * folds.size) / (sum(folds.size)*(nfold-1)/nfold)) / nfold)) # out <- list(classification = class.cv, z = prob.cv, ce = cv[1], se.ce = se[1], brier = cv[2], se.brier = se[2]) return(out) } balancedFolds <- function(y, nfolds = min(min(table(y)), 10)) { # Create 'nfolds' balanced folds conditional on grouping variable 'y'. # Function useful in evaluating a classifier by balanced cross-validation. # Returns a list with 'nfolds' elements containing indexes of each fold. # # Based on balanced.folds() in package 'pamr' by T. Hastie, R. Tibshirani, # Balasubramanian Narasimhan, Gil Chu. totals <- table(y) fmax <- max(totals) nfolds <- min(nfolds, fmax) # ensure number of folds not larger than the max class size folds <- as.list(seq(nfolds)) yids <- split(seq(y), y) # get the ids in a list, split by class ## create a big matrix, with enough rows to get in all the folds per class bigmat <- matrix(as.double(NA), nrow = ceiling(fmax/nfolds) * nfolds, ncol = length(totals)) for(i in seq(totals)) { bigmat[seq(totals[i]), i] <- if (totals[i]==1) yids[[i]] else sample(yids[[i]]) } smallmat <- matrix(bigmat, nrow = nfolds) # reshape the matrix ## clever sort to mix up the NAs smallmat <- permuteRows(t(smallmat)) res <-vector("list", nfolds) for(j in 1:nfolds) { jj <- !is.na(smallmat[, j]) res[[j]] <- smallmat[jj, j] } return(res) } permuteRows <- function(x) { dd <- dim(x) n <- dd[1] p <- dd[2] mm <- runif(length(x)) + rep(seq(n) * 10, rep(p, n)) matrix(t(x)[order(mm)], n, p, byrow = TRUE) } # Deprecated functions cv1EMtrain <- function(data, labels, modelNames=NULL) { .Deprecated("cvMclustDA", package = "mclust") z <- unmap(as.numeric(labels)) G <- ncol(z) dimDataset <- dim(data) oneD <- is.null(dimDataset) || length(dimDataset[dimDataset > 1]) == 1 if (oneD || length(dimDataset) != 2) { if (is.null(modelNames)) modelNames <- c("E", "V") if (any(!match(modelNames, c("E", "V"), nomatch = 0))) stop("modelNames E or V for one-dimensional data") n <- length(data) cv <- matrix(1, nrow = n, ncol = length(modelNames)) dimnames(cv) <- list(NULL, modelNames) for (m in modelNames) { for (i in 1:n) { mStep <- mstep(modelName = m, data = data[-i], z = z[-i,], warn = FALSE) eStep <- do.call("estep", c(mStep, list(data = data[i], warn = FALSE))) if (is.null(attr(eStep, "warn"))) { k <- (1:G)[eStep$z == max(eStep$z)] l <- (1:G)[z[i,] == max(z[i,])] cv[i, m] <- as.numeric(!any(k == l)) } } } } else { if (is.null(modelNames)) modelNames <- mclust.options("emModelNames") n <- nrow(data) cv <- matrix(1, nrow = n, ncol = length(modelNames)) dimnames(cv) <- list(NULL, modelNames) for (m in modelNames) { for (i in 1:n) { mStep <- mstep(modelName = m, data = data[-i,], z = z[-i,], warn = FALSE) eStep <- do.call("estep", c(mStep, list(data = data[i, , drop = FALSE], warn = FALSE))) if (is.null(attr(eStep, "warn"))) { k <- (1:G)[eStep$z == max(eStep$z)] l <- (1:G)[z[i,] == max(z[i,])] cv[i, m] <- as.numeric(!any(k == l)) } } } } errorRate <- apply(cv, 2, sum) errorRate/n } bicEMtrain <- function(data, labels, modelNames=NULL) { .Deprecated("MclustDA", package = "mclust") z <- unmap(as.numeric(labels)) G <- ncol(z) dimData <- dim(data) oneD <- is.null(dimData) || length(dimData[dimData > 1]) == 1 if (oneD || length(dimData) != 2) { if (is.null(modelNames)) modelNames <- c("E", "V") if (any(!match(modelNames, c("E", "V"), nomatch = 0))) stop("modelNames E or V for one-dimensional data") } else { if (is.null(modelNames)) modelNames <- mclust.options("emModelNames") } BIC <- rep(NA, length(modelNames)) names(BIC) <- modelNames for (m in modelNames) { mStep <- mstep(modelName = m, data = data, z = z, warn = FALSE) eStep <- do.call("estep", c(mStep, list(data=data, warn=FALSE))) if (is.null(attr(eStep, "warn"))) BIC[m] <- do.call("bic", eStep) } BIC } cv.MclustDA <- function(...) { .Deprecated("cvMclustDA", package = "mclust") cvMclustDA(...) } # "[.mclustDAtest" <- function (x, i, j, drop = FALSE) # { # clx <- oldClass(x) # oldClass(x) <- NULL # NextMethod("[") # } classPriorProbs <- function(object, newdata = object$data, itmax = 1e3, eps = sqrt(.Machine$double.eps)) { if(!inherits(object, "MclustDA")) stop("object not of class 'MclustDA'") z <- predict(object, newdata = newdata)$z prop <- object$prop p <- colMeans(z) p0 <- p+1 it <- 0 # while(max(abs(p-p0)/abs(p)) > eps & it < itmax) while(any(abs(p-p0) > eps*(1+abs(p))) & it < itmax) { it <- it+1 p0 <- p # z_upd <- t(apply(z, 1, function(z) { z <- z*p/prop; z/sum(z) })) z_upd <- sweep(z, 2, FUN = "*", STATS = p/prop) z_upd <- sweep(z_upd, MARGIN = 1, FUN = "/", STATS = rowSums(z_upd)) p <- colMeans(z_upd) } return(p) } mclust/R/graphics.R0000644000176200001440000014073614317553720013727 0ustar liggesusersmclust1Dplot <- function(data, parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "density", "error", "uncertainty"), symbols = NULL, colors = NULL, ngrid = length(data), xlab = NULL, ylab = NULL, xlim = NULL, ylim = NULL, cex = 1, main = FALSE, ...) { p <- ncol(as.matrix(data)) if (p != 1) stop("for one-dimensional data only") data <- as.vector(data) n <- length(data) if(is.null(classification) && !is.null(z)) classification <- map(z) if(is.null(uncertainty) && !is.null(z)) uncertainty <- 1 - apply(z, 1, max) if (!is.null(parameters)) { mu <- parameters$mean L <- ncol(mu) sigmasq <- parameters$variance$sigmasq haveParams <- !is.null(mu) && !is.null(sigmasq) && !any(is.na(mu)) && !any(is.na(sigmasq)) } else haveParams <- FALSE if (is.null(xlim)) xlim <- range(data) if (haveParams) { G <- length(mu) if ((l <- length(sigmasq)) == 1) { sigmasq <- rep(sigmasq, G) } else if (l != G) { params <- FALSE warning("mu and sigma are incompatible") } } if (!is.null(truth)) { if (is.null(classification)) { classification <- truth truth <- NULL } else { if (length(unique(truth)) != length(unique(classification))) truth <- NULL else truth <- as.character(truth) } } if(!is.null(classification)) { classification <- as.character(classification) U <- sort(unique(classification)) L <- length(U) if(is.null(symbols)) { symbols <- rep("|", L) } else if(length(symbols) == 1) { symbols <- rep(symbols, L) } else if(length(symbols) < L) { warning("more symbols needed to show classification") symbols <- rep("|", L) } if(is.null(colors)) { colors <- mclust.options("classPlotColors")[1:L] } else if(length(colors) == 1) { colors <- rep(colors, L) } else if(length(colors) < L) { warning("more colors needed to show classification") colors <- rep("black", L) } } main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) what <- match.arg(what, choices = eval(formals(mclust1Dplot)$what)) bad <- what == "classification" && is.null(classification) bad <- bad || (what == "uncertainty" && is.null(uncertainty)) bad <- bad || (what == "error" && (is.null(classification) || is.null(truth))) if(bad) stop("insufficient input for specified plot") M <- L switch(what, "classification" = { plot(data, seq(from = 0, to = M, length = n), type = "n", xlab = if(is.null(xlab)) "" else xlab, ylab = if(is.null(ylab)) "Classification" else ylab, xlim = xlim, ylim = if(is.null(ylim)) grDevices::extendrange(r = c(0,M), f = 0.1) else ylim, yaxt = "n", main = "", ...) axis(side = 2, at = 0:M, labels = c("", sort(unique(classification)))) if(main) title("Classification") for(k in 1:L) { I <- classification == U[k] if(symbols[k] == "|") { vpoints(data[I], rep(0, length(data[I])), cex = cex) vpoints(data[I], rep(k, length(data[I])), col = colors[k], cex = cex) } else { points(data[I], rep(0, length(data[I])), pch = symbols[k], cex = cex) points(data[I], rep(k, length(data[I])), pch = symbols[k], col = colors[k], cex = cex) } } }, "error" = { ERRORS <- classError(classification, truth)$misclassified plot(data, seq(from = 0, to = M, length = n), type = "n", xlab = xlab, ylab = if(is.null(ylab)) "Class errors" else ylab, xlim = xlim, ylim = if(is.null(ylim)) grDevices::extendrange(r = c(0,M), f = 0.1) else ylim, yaxt = "n", ...) axis(side = 2, at = 0:M, labels = c("", unique(classification))) if(main) title("Classification error") good <- rep(TRUE, length(classification)) good[ERRORS] <- FALSE sym <- "|" for(k in 1:L) { K <- classification == U[k] I <- (K & good) if(any(I)) { if(FALSE) { sym <- if (L > 4) 1 else if (k == 4) 5 else k - 1 } l <- sum(as.numeric(I)) if(sym == "|") vpoints(data[I], rep(0, l), col = colors[k], cex = cex) else points(data[I], rep(0, l), pch = sym, col = colors[k], cex = cex) } I <- K & !good if(any(I)) { if(FALSE) { sym <- if (L > 5) 16 else k + 14 } l <- sum(as.numeric(I)) if(sym == "|") vpoints(data[I], rep(k, l), col = colors[k], cex = cex) else points(data[I], rep(k, l), pch = sym, col = colors[k], cex = cex) } } }, "uncertainty" = { u <- (uncertainty - min(uncertainty))/ (max(uncertainty) - min(uncertainty) + sqrt(.Machine$double.eps)) b <- bubble(u, cex = cex*c(0.3, 2), alpha = c(0.3, 1)) if(is.null(classification)) { classification <- rep(1, length(u)) U <- 1 } if(is.null(colors)) colors <- palette()[1] cl <- sapply(classification, function(cl) which(cl == U)) plot(data, uncertainty, type = "h", xlab = xlab, ylab = if(is.null(ylab)) "Uncertainty" else ylab, xlim = xlim, ylim = if(is.null(ylim)) c(0,1) else ylim, col = mapply(adjustcolor, col = colors[cl], alpha.f = b$alpha), ...) rug(data, lwd = 1, col = adjustcolor(par("fg"), alpha.f = 0.8)) if(main) title("Uncertainty") }, "density" = { if(is.null(parameters$pro) && parameters$variance$G != 1) stop("mixing proportions missing") x <- grid1(n = ngrid, range = xlim, edge = TRUE) plot(x, dens(data = x, modelName = "V", parameters = parameters), xlab = xlab, ylab = if(is.null(ylab)) "Density" else ylab, xlim = xlim, type = "l", main = "", ...) if(main) title("Density") }, { plot(data, rep(0, n), type = "n", xlab = "", ylab = "", xlim = xlim, main = "", ...) vpoints(data, rep(0, n), cex = cex) if(main) title("Point Plot") } ) invisible() } mclust2Dplot <- function(data, parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "uncertainty", "error"), addEllipses = TRUE, fillEllipses = mclust.options("fillEllipses"), symbols = NULL, colors = NULL, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, scale = FALSE, cex = 1, PCH = ".", main = FALSE, swapAxes = FALSE, ...) { if(dim(data)[2] != 2) stop("data must be two dimensional") if(is.null(classification) && !is.null(z)) classification <- map(z) if(is.null(uncertainty) && !is.null(z)) uncertainty <- 1 - apply(z, 1, max) if(!is.null(parameters)) { mu <- parameters$mean L <- ncol(mu) sigma <- parameters$variance$sigma haveParams <- !is.null(mu) && !is.null(sigma) && !any(is.na(mu)) && !any(is.na(sigma)) } else haveParams <- FALSE main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) if(is.null(xlim)) xlim <- range(data[, 1]) if(is.null(ylim)) ylim <- range(data[, 2]) if(scale) { par(pty = "s") d <- diff(xlim) - diff(ylim) if(d > 0) { ylim <- c(ylim[1] - d/2, ylim[2] + d/2.) } else { xlim <- c(xlim[1] + d/2, xlim[2] - d/2) } } dnames <- dimnames(data)[[2]] if(is.null(xlab)) { xlab <- if(is.null(dnames)) "" else dnames[1] } if(is.null(ylab)) { ylab <- if(is.null(dnames)) "" else dnames[2] } if(haveParams) { G <- ncol(mu) dimpar <- dim(sigma) if(length(dimpar) != 3) { haveParams <- FALSE warning("covariance must be a 3D matrix") } if(G != dimpar[3]) { haveParams <- FALSE warning("means and variance parameters are incompatible") } mu <- array(mu, c(2, G)) sigma <- array(sigma, c(2, 2, G)) } if(swapAxes) { if(haveParams) { mu <- mu[2:1,] sigma <- sigma[2:1, 2:1,] } data <- data[, 2:1] } if(!is.null(truth)) { if(is.null(classification)) { classification <- truth truth <- NULL } else { if(length(unique(truth)) != length(unique(classification))) truth <- NULL else truth <- as.character(truth) } } if(charmatch("classification", what, nomatch = 0) && is.null(classification) && !is.null(z)) { classification <- map(z) } if(!is.null(classification)) { classification <- as.character(classification) U <- sort(unique(classification)) L <- length(U) noise <- (U[1] == "0") if(is.null(symbols)) { if(L <= length(mclust.options("classPlotSymbols"))) { symbols <- mclust.options("classPlotSymbols")[1:L] if(noise) { symbols <- c(16,symbols)[1:L] } } else if(L <= 9) { symbols <- as.character(1:9) } else if(L <= 26) { symbols <- LETTERS } } if(is.null(colors)) { if(L <= length(mclust.options("classPlotColors"))) { colors <- mclust.options("classPlotColors")[1:L] if(noise) { colors <- unique(c("black", colors))[1:L] } } } else if(length(colors) == 1) colors <- rep(colors, L) if(length(symbols) < L) { warning("more symbols needed to show classification ") symbols <- rep(16,L) } if(length(colors) < L) { warning("more colors needed to show classification ") colors <- rep("black",L) } } what <- match.arg(what, choices = eval(formals(mclust2Dplot)$what)) bad <- what == "classification" && is.null(classification) bad <- bad || (what == "uncertainty" && is.null(uncertainty)) bad <- bad || (what == "error" && (is.null(classification) || is.null(truth))) if(bad) stop("insufficient input for specified plot") switch(EXPR = what, "classification" = { plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) title("Classification") for(k in 1:L) { I <- classification == U[k] points(data[I, 1], data[I, 2], pch = symbols[k], col = colors[k], cex = if(U[k] == "0") cex/2 else cex) } }, "error" = { ERRORS <- classError(classification, truth)$misclassified plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) title("Classification Errors") CLASSES <- unique(as.character(truth)) symOpen <- c(2, 0, 1, 5) symFill <- c(17, 15, 16, 18) good <- rep(TRUE,length(classification)) good[ERRORS] <- FALSE if(L > 4) { points(data[good, 1], data[good, 2], pch = 1, col = colors, cex = cex) points(data[!good, 1], data[!good, 2], pch = 16, cex = cex) } else { for(k in 1:L) { K <- truth == CLASSES[k] points(data[K, 1], data[K, 2], pch = symOpen[k], col = colors[k], cex = cex) if(any(I <- (K & !good))) { points(data[I, 1], data[I, 2], pch = symFill[k], cex = cex) } } } }, "uncertainty" = { u <- (uncertainty - min(uncertainty))/ (max(uncertainty) - min(uncertainty) + sqrt(.Machine$double.eps)) b <- bubble(u, cex = cex*c(0.3, 2), alpha = c(0.3, 0.9)) cl <- sapply(classification, function(cl) which(cl == U)) plot(data[, 1], data[, 2], pch = 19, xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", cex = b$cex, col = mapply(adjustcolor, col = colors[cl], alpha.f = b$alpha), ...) if(main) title("Uncertainty") fillEllipses <- FALSE }, { plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) title("Point Plot") points(data[, 1], data[, 2], pch = PCH, cex = cex) } ) if(haveParams && addEllipses) { ## plot ellipsoids for(g in 1:G) mvn2plot(mu = mu[,g], sigma = sigma[,,g], k = 15, fillEllipse = fillEllipses, col = if(fillEllipses) colors[g] else rep("grey30",3)) } invisible() } # old version mvn2plot <- function(mu, sigma, k = 15, alone = FALSE, col = rep("grey30",3), pch = 8, lty = c(1,2), lwd = c(1,1)) { p <- length(mu) if (p != 2) stop("only two-dimensional case is available") if (any(unique(dim(sigma)) != p)) stop("mu and sigma are incompatible") ev <- eigen(sigma, symmetric = TRUE) s <- sqrt(rev(sort(ev$values))) V <- t(ev$vectors[, rev(order(ev$values))]) theta <- (0:k) * (pi/(2 * k)) x <- s[1] * cos(theta) y <- s[2] * sin(theta) xy <- cbind(c(x, -x, -x, x), c(y, y, -y, -y)) xy <- xy %*% V xy <- sweep(xy, MARGIN = 2, STATS = mu, FUN = "+") if(alone) { xymin <- apply(xy, 2, FUN = "min") xymax <- apply(xy, 2, FUN = "max") r <- ceiling(max(xymax - xymin)/2) xymid <- (xymin + xymax)/2 plot(xy[, 1], xy[, 2], type = "n", xlab = "x", ylab = "y", xlim = c(-r, r) + xymid[1], ylim = c(-r, r) + xymid[2]) } l <- length(x) i <- 1:l for(k in 1:4) { lines(xy[i,], col = col[1], lty = lty[1], lwd = lwd[1]) i <- i + l } x <- s[1] y <- s[2] xy <- cbind(c(x, -x, 0, 0), c(0, 0, y, -y)) xy <- xy %*% V xy <- sweep(xy, MARGIN = 2, STATS = mu, FUN = "+") lines(xy[1:2,], col = col[2], lty = lty[2], lwd = lwd[2]) lines(xy[3:4,], col = col[2], lty = lty[2], lwd = lwd[2]) points(mu[1], mu[2], col = col[3], pch = pch) invisible() } mvn2plot <- function(mu, sigma, k = 15, alone = FALSE, fillEllipse = FALSE, alpha = 0.3, col = rep("grey30", 3), pch = 8, lty = c(1,2), lwd = c(1,1), ...) { p <- length(mu) if(p != 2) stop("only two-dimensional case is available") if(any(unique(dim(sigma)) != p)) stop("mu and sigma are incompatible") ev <- eigen(sigma, symmetric = TRUE) s <- sqrt(rev(sort(ev$values))) V <- t(ev$vectors[, rev(order(ev$values))]) theta <- (0:k) * (pi/(2 * k)) x <- s[1] * cos(theta) y <- s[2] * sin(theta) xy <- cbind(c(x, -x, -x, x), c(y, y, -y, -y)) xy <- xy %*% V xy <- sweep(xy, MARGIN = 2, STATS = mu, FUN = "+") # if(alone) { xymin <- apply(xy, 2, FUN = "min") xymax <- apply(xy, 2, FUN = "max") r <- ceiling(max(xymax - xymin)/2) xymid <- (xymin + xymax)/2 plot(xy[, 1], xy[, 2], type = "n", xlab = "x", ylab = "y", xlim = c(-r, r) + xymid[1], ylim = c(-r, r) + xymid[2]) } # draw ellipses if(fillEllipse) { col <- rep(col, 3) polygon(xy[chull(xy),], border = NA, col = adjustcolor(col[1], alpha.f = alpha)) } else { l <- length(x) i <- 1:l for(k in 1:4) { lines(xy[i,], col = col[1], lty = lty[1], lwd = lwd[1]) i <- i + l } } # draw principal axes and centroid x <- s[1] y <- s[2] xy <- cbind(c(x, -x, 0, 0), c(0, 0, y, -y)) xy <- xy %*% V xy <- sweep(xy, MARGIN = 2, STATS = mu, FUN = "+") lines(xy[1:2,], col = col[2], lty = lty[2], lwd = lwd[2]) lines(xy[3:4,], col = col[2], lty = lty[2], lwd = lwd[2]) points(mu[1], mu[2], col = col[3], pch = pch) # invisible() } clPairs <- function (data, classification, symbols = NULL, colors = NULL, cex = NULL, labels = dimnames(data)[[2]], cex.labels = 1.5, gap = 0.2, grid = FALSE, ...) { data <- as.matrix(data) n <- nrow(data) d <- ncol(data) if(missing(classification)) classification <- rep(1, n) if(!is.factor(classification)) classification <- as.factor(classification) l <- length(levels(classification)) if(length(classification) != n) stop("classification variable must have the same length as nrows of data!") if(is.null(symbols)) { if(l == 1) { symbols <- "." } if(l <= length(mclust.options("classPlotSymbols"))) { symbols <- mclust.options("classPlotSymbols") } else { if(l <= 9) { symbols <- as.character(1:9) } else if(l <= 26) { symbols <- LETTERS[1:l] } else symbols <- rep(16,l) } } if(length(symbols) == 1) symbols <- rep(symbols, l) if(length(symbols) < l) { symbols <- rep(16, l) warning("more symbols needed") } if(is.null(colors)) { if(l <= length(mclust.options("classPlotColors"))) colors <- mclust.options("classPlotColors")[1:l] } if(length(colors) == 1) colors <- rep(colors, l) if(length(colors) < l) { colors <- rep( "black", l) warning("more colors needed") } if(is.null(cex)) cex <- 1 if(length(cex) == 1) cex <- rep(cex, l) cex <- cex[1:l] grid <- isTRUE(as.logical(grid)) if(d > 2) { pairs(x = data, labels = labels, panel = function(...) { if(grid) grid() points(...) }, pch = symbols[classification], col = colors[classification], cex = cex[classification], gap = gap, cex.labels = cex.labels, ...) } else if(d == 2) { plot(data, cex = cex[classification], pch = symbols[classification], col = colors[classification], panel.first = if(grid) grid(), ...) } invisible(list(d = d, class = levels(classification), col = colors, pch = symbols[seq(l)], cex = cex)) } clPairsLegend <- function(x, y, class, col, pch, cex, box = TRUE, ...) { usr <- par("usr") if(box & all(usr == c(0,1,0,1))) { oldpar <- par(mar = rep(0.2, 4), no.readonly = TRUE) on.exit(par(oldpar)) box(which = "plot", lwd = 0.8) } if(!all(usr == c(0,1,0,1))) { x <- x*(usr[2]-usr[1])+usr[1] y <- y*(usr[4]-usr[3])+usr[3] } dots <- list(...) dots$x <- x dots$y <- y dots$legend <- class dots$text.width <- max(strwidth(dots$title, units = "user"), strwidth(dots$legend, units = "user")) dots$col <- if(missing(col)) 1 else col dots$text.col <- if(missing(col)) 1 else col dots$pch <- if(missing(pch)) 1 else pch dots$cex <- if(missing(cex)) 1 else cex dots$title.col <- par("fg") dots$title.adj <- 0.1 dots$xpd <- NA do.call("legend", dots) } coordProj <- function(data, dimens = c(1,2), parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "error", "uncertainty"), addEllipses = TRUE, fillEllipses = mclust.options("fillEllipses"), symbols = NULL, colors = NULL, scale = FALSE, xlim = NULL, ylim = NULL, cex = 1, PCH = ".", main = FALSE, ...) { if(is.null(dimens)) dimens <- c(1, 2) if(is.null(classification) && !is.null(z)) classification <- map(z) if(is.null(uncertainty) && !is.null(z)) uncertainty <- 1 - apply(z, 1, max) if(!is.null(parameters)) { mu <- parameters$mean L <- ncol(mu) sigma <- parameters$variance$sigma haveParams <- !is.null(mu) && !is.null(sigma) && !any(is.na(mu)) && !any( is.na(sigma)) } else haveParams <- FALSE data <- data[, dimens, drop = FALSE] if(dim(data)[2] != 2) stop("need two dimensions") if(is.null(xlim)) xlim <- range(data[, 1]) if(is.null(ylim)) ylim <- range(data[, 2]) if(scale) { oldpar <- par(no.readonly = TRUE) on.exit(par(oldpar)) par(pty = "s") d <- diff(xlim) - diff(ylim) if(d > 0) { ylim <- c(ylim[1] - d/2, ylim[2] + d/2.) } else { xlim <- c(xlim[1] + d/2, xlim[2] - d/2) } } if(is.null(dnames <- dimnames(data)[[2]])) xlab <- ylab <- "" else { xlab <- dnames[1] ylab <- dnames[2] } main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) if(haveParams) { G <- ncol(mu) dimpar <- dim(sigma) if(length(dimpar) != 3) { haveParams <- FALSE warning("covariance must be a 3D matrix") } if(G != dimpar[3]) { haveParams <- FALSE warning("means and variance parameters are incompatible") } mu <- array(mu[dimens, ], c(2, G)) sigma <- array(sigma[dimens, dimens, ], c(2, 2, G)) } if(!is.null(truth)) { truth <- as.factor(truth) if(is.null(classification)) { classification <- truth truth <- NULL } } if(!is.null(classification)) { classification <- as.factor(classification) U <- levels(classification) L <- nlevels(classification) noise <- (U[1] == "0") if(is.null(symbols)) { if(L <= length(mclust.options("classPlotSymbols"))) { symbols <- mclust.options("classPlotSymbols")[1:L] if(noise) { symbols <- c(16,symbols)[1:L] } } else if(L <= 9) { symbols <- as.character(1:9) } else if(L <= 26) { symbols <- LETTERS } } else if(length(symbols) == 1) symbols <- rep(symbols, L) if(is.null(colors)) { if(L <= length(mclust.options("classPlotColors"))) { colors <- mclust.options("classPlotColors")[1:L] if(noise) { colors <- unique(c("black", colors))[1:L] } } } else if(length(colors) == 1) colors <- rep(colors, L) if(length(symbols) < L) { warning("more symbols needed to show classification ") symbols <- rep(16,L) } if(length(colors) < L) { warning("more colors needed to show classification ") colors <- rep("black",L) } } if(length(what) > 1) what <- what[1] choices <- c("classification", "error", "uncertainty") m <- charmatch(what, choices, nomatch = 0) if(m) { what <- choices[m] bad <- what == "classification" && is.null(classification) bad <- bad || (what == "uncertainty" && is.null(uncertainty)) bad <- bad || (what == "error" && (is.null(classification) || is.null( truth))) if(bad) warning("insufficient input for specified plot") badClass <- (what == "error" && (length(unique(classification)) != length( unique(truth)))) if(badClass && !bad) warning("classification and truth differ in number of groups") bad <- bad && badClass } else { bad <- !m warning("what improperly specified") } if(bad) what <- "bad" switch(EXPR = what, "classification" = { plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) { TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection showing Classification") title(main = TITLE) } for(k in 1:L) { I <- classification == U[k] points(data[I, 1], data[I, 2], pch = symbols[k], col = colors[k], cex = if(U[k] == "0") cex/3 else cex) } }, "error" = { ERRORS <- classError(classification, truth)$misclassified plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) { TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection showing Errors") title(main = TITLE) } CLASSES <- levels(truth) symOpen <- symb2open(mclust.options("classPlotSymbols")) symFill <- symb2fill(mclust.options("classPlotSymbols")) good <- rep(TRUE, length(classification)) good[ERRORS] <- FALSE if(L > length(symOpen)) { points(data[good, 1], data[good, 2], pch = 1, col = colors, cex = cex) points(data[!good, 1], data[!good, 2], pch = 16, cex = cex) } else { for(k in 1:L) { K <- truth == CLASSES[k] if(any(I <- (K & good))) { points(data[I, 1], data[I, 2], pch = symOpen[k], col = colors[k], cex = cex) } if(any(I <- (K & !good))) { points(data[I, 1], data[I, 2], cex = cex, pch = symFill[k], col = "black", bg = "black") } } } }, "uncertainty" = { u <- (uncertainty - min(uncertainty)) / (max(uncertainty) - min(uncertainty) + sqrt(.Machine$double.eps)) b <- bubble(u, cex = cex * c(0.3, 2), alpha = c(0.3, 0.9)) cl <- sapply(classification, function(cl) which(cl == U)) plot(data[, 1], data[, 2], pch = 19, main = "", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, cex = b$cex, col = mapply(adjustcolor, col = colors[cl], alpha.f = b$alpha), ...) if(main) { TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection showing Uncertainty") title(main = TITLE) } fillEllipses <- FALSE }, { plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) { TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection") title(main = TITLE) } points(data[, 1], data[, 2], pch = PCH, cex = cex) } ) if(haveParams && addEllipses) { ## plot ellipsoids for(g in 1:G) mvn2plot(mu = mu[,g], sigma = sigma[,,g], k = 15, fillEllipse = fillEllipses, col = if(fillEllipses) colors[g] else rep("grey30",3)) } invisible() } symb2open <- function(x) { symb <- 0:18 open <- c(0:14,0,1,2,5) open[sapply(x, function(x) which(symb == x))] } symb2fill <- function(x) { symb <- 0:18 fill <- c(15:17, 3:4, 23, 25, 7:9, 20, 11:18) fill[sapply(x, function(x) which(symb == x))] } randProj <- function(data, seeds = NULL, parameters = NULL, z = NULL, classification = NULL, truth = NULL, uncertainty = NULL, what = c("classification", "error", "uncertainty"), quantiles = c(0.75, 0.95), addEllipses = TRUE, fillEllipses = mclust.options("fillEllipses"), symbols = NULL, colors = NULL, scale = FALSE, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, cex = 1, PCH = ".", main = FALSE, ...) { if(is.null(classification) && !is.null(z)) classification <- map(z) if(is.null(uncertainty) && !is.null(z)) uncertainty <- 1 - apply(z, 1, max) if(!is.null(parameters)) { mu <- parameters$mean L <- ncol(mu) sigma <- parameters$variance$sigma haveParams <- !is.null(mu) && !is.null(sigma) && !any(is.na(mu)) && !any(is.na(sigma)) } else haveParams <- FALSE d <- ncol(data) if(haveParams) { G <- ncol(mu) dimpar <- dim(sigma) if(length(dimpar) != 3) { haveParams <- FALSE warning("covariance must be a 3D matrix") } if(G != dimpar[3]) { haveParams <- FALSE warning("means and variance parameters are incompatible") } cho <- array(apply(sigma, 3, chol), c(d, d, G)) } if(!is.null(truth)) { truth <- as.factor(truth) if(is.null(classification)) { classification <- truth truth <- NULL } else { if(length(unique(truth)) != length(unique(classification))) truth <- NULL else truth <- as.character(truth) } } if(!is.null(classification)) { classification <- as.factor(classification) U <- levels(classification) L <- nlevels(classification) noise <- (U[1] == "0") if(is.null(symbols)) { if(L <= length(mclust.options("classPlotSymbols"))) { symbols <- mclust.options("classPlotSymbols")[1:L] if(noise) { symbols <- c(16,symbols)[1:L] } } else if(L <= 9) { symbols <- as.character(1:9) } else if(L <= 26) { symbols <- LETTERS } } else if(length(symbols) == 1) symbols <- rep(symbols, L) if(is.null(colors)) { if(L <= length(mclust.options("classPlotColors"))) { colors <- mclust.options("classPlotColors")[1:L] if(noise) colors <- unique(c("black", colors))[1:L] } } else if(length(colors) == 1) colors <- rep(colors, L) if(length(symbols) < L) { warning("more symbols needed to show classification ") symbols <- rep(16,L) } if (length(colors) < L) { warning("more colors needed to show classification ") colors <- rep("black",L) } } if(is.null(xlab)) xlab <- "randProj1" if(is.null(ylab)) ylab <- "randProj2" what <- match.arg(what, choices = eval(formals(randProj)$what)) bad <- what == "classification" && is.null(classification) bad <- bad || (what == "uncertainty" && is.null(uncertainty)) bad <- bad || (what == "error" && (is.null(classification) || is.null(truth))) if(bad) stop("insufficient input for specified plot") main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) nullXlim <- is.null(xlim) nullYlim <- is.null(ylim) if(scale || length(seeds) > 1) { oldpar <- par(no.readonly = TRUE) on.exit(par(oldpar)) if(scale) par(pty = "s") if(length(seeds) > 1) par(ask = TRUE) } # if not provided get a seed at random if(length(seeds) == 0) { seeds <- as.numeric(Sys.time()) seeds <- (seeds - floor(seeds))*1e8 } for(seed in seeds) { set.seed(seed) # B <- orth2(d) B <- randomOrthogonalMatrix(d, 2) dataProj <- as.matrix(data) %*% B if(dim(dataProj)[2] != 2) stop("need two dimensions") if(nullXlim) xlim <- range(dataProj[,1]) if(nullYlim) ylim <- range(dataProj[,2]) if(scale) { d <- diff(xlim) - diff(ylim) if(d > 0) { ylim <- c(ylim[1] - d/2, ylim[2] + d/2.) } else { xlim <- c(xlim[1] + d/2, xlim[2] - d/2) } } switch(what, "classification" = { plot(dataProj[,1:2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, ...) for(k in 1:L) { I <- classification == U[k] points(dataProj[I,1:2], pch = symbols[k], col = colors[k], cex = cex) } if(main) { TITLE <- paste("Random Projection showing Classification: seed = ", seed) title(TITLE) } }, "error" = { ERRORS <- classError(classification, truth)$misclassified plot(dataProj[, 1:2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, ...) if(main) { TITLE <- paste("Random Projection showing Errors: seed = ", seed) title(TITLE) } CLASSES <- unique(as.character(truth)) symOpen <- c(2, 0, 1, 5) symFill <- c(17, 15, 16, 18) good <- !ERRORS if(L > 4) { points(dataProj[good, 1:2], pch = 1, col = colors, cex = cex) points(dataProj[!good, 1:2], pch = 16, cex = cex) } else { for(k in 1:L) { K <- which(truth == CLASSES[k]) points(dataProj[K, 1:2], pch = symOpen[k], col = colors[k], cex = cex) if(any(I <- intersect(K, ERRORS))) points(dataProj[I,1:2], pch = symFill[k], cex = cex) } } }, "uncertainty" = { plot(dataProj[, 1:2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = "", ...) if(main) { TITLE <- paste("Random Projection showing Uncertainty: seed = ", seed) title(TITLE) } breaks <- quantile(uncertainty, probs = sort(quantiles)) I <- uncertainty <= breaks[1] points(dataProj[I, 1:2], pch = 16, col = "gray75", cex = 0.5 * cex) I <- uncertainty <= breaks[2] & !I points(dataProj[I, 1:2], pch = 16, col = "gray50", cex = 1 * cex) I <- uncertainty > breaks[2] & !I points(dataProj[I, 1:2], pch = 16, col = "black", cex = 1.5 * cex) fillEllipses <- FALSE }, { plot(dataProj[, 1:2], type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, ...) if(main) { TITLE <- paste("Random Projection: seed = ", seed) title(TITLE) } points(dataProj[, 1:2], pch = PCH, cex = cex) } ) muProj <- crossprod(B, mu) sigmaProj <- array(apply(cho, 3, function(R) crossprod(R %*% B)), c(2, 2, G)) if(haveParams && addEllipses) { ## plot ellipsoids for(g in 1:G) mvn2plot(mu = muProj[,g], sigma = sigmaProj[,,g], k = 15, fillEllipse = fillEllipses, col = if(fillEllipses) colors[g] else rep("grey30",3)) } } invisible(list(basis = B, data = dataProj, mu = muProj, sigma = sigmaProj)) } surfacePlot <- function(data, parameters, what = c("density", "uncertainty"), type = c("contour", "hdr", "image", "persp"), transformation = c("none", "log", "sqrt"), grid = 200, nlevels = 11, levels = NULL, prob = c(0.25, 0.5, 0.75), col = gray(0.5), col.palette = function(...) hcl.colors(..., "blues", rev = TRUE), hdr.palette = blue2grey.colors, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, main = FALSE, scale = FALSE, swapAxes = FALSE, verbose = FALSE, ...) { data <- as.matrix(data) if(dim(data)[2] != 2) stop("data must be two dimensional") if(any(type == "level")) type[type == "level"] <- "hdr" # TODO: to be removed at certain point type <- match.arg(type, choices = eval(formals(surfacePlot)$type)) what <- match.arg(what, choices = eval(formals(surfacePlot)$what)) transformation <- match.arg(transformation, choices = eval(formals(surfacePlot)$transformation)) # densNuncer <- function(modelName, data, parameters) { if(is.null(parameters$variance$cholsigma)) { parameters$variance$cholsigma <- parameters$variance$sigma G <- dim(parameters$variance$sigma)[3] for(k in 1:G) parameters$variance$cholsigma[,,k] <- chol(parameters$variance$sigma[,,k]) } cden <- cdensVVV(data = data, parameters = parameters, logarithm = TRUE) pro <- parameters$pro if(!is.null(parameters$Vinv)) pro <- pro[-length(pro)] z <- sweep(cden, MARGIN = 2, FUN = "+", STATS = log(pro)) logden <- apply(z, 1, logsumexp) z <- sweep(z, MARGIN = 1, FUN = "-", STATS = logden) z <- exp(z) data.frame(density = exp(logden), uncertainty = 1 - apply(z, 1, max)) } pro <- parameters$pro mu <- parameters$mean sigma <- parameters$variance$sigma haveParams <- (!is.null(mu) && !is.null(sigma) && !is.null(pro) && !any(is.na(mu)) && !any(is.na(sigma)) && !(any(is.na(pro)))) if(haveParams) { G <- ncol(mu) dimpar <- dim(sigma) if(length(dimpar) != 3) { haveParams <- FALSE warning("covariance must be a 3D matrix") } if(G != dimpar[3]) { haveParams <- FALSE warning("means and variance parameters are incompatible") } mu <- array(mu, c(2, G)) sigma <- array(sigma, c(2, 2, G)) } else stop("need parameters to compute density") if(swapAxes) { if(haveParams) { parameters$pro <- pro[2:1] parameters$mean <- mu[2:1,] parameters$variance$sigma <- sigma[2:1, 2:1,] } data <- data[, 2:1] } main <- if(is.null(main) || is.character(main)) FALSE else as.logical(main) if(is.null(xlim)) xlim <- range(data[, 1]) if(is.null(ylim)) ylim <- range(data[, 2]) if(scale) { par(pty = "s") d <- diff(xlim) - diff(ylim) if(d > 0) { ylim <- c(ylim[1] - d/2, ylim[2] + d/2) } else { xlim <- c(xlim[1] + d/2, xlim[2] - d/2) } } dnames <- dimnames(data)[[2]] if(is.null(xlab)) { xlab <- if(is.null(dnames)) "" else dnames[1] } if(is.null(ylab)) { ylab <- if(is.null(dnames)) "" else dnames[2] } if(length(grid) == 1) grid <- c(grid, grid) x <- grid1(n = grid[1], range = xlim, edge = TRUE) y <- grid1(n = grid[2], range = ylim, edge = TRUE) xy <- grid2(x, y) if(verbose) message("computing density and uncertainty over grid ...") Z <- densNuncer(modelName = "VVV", data = xy, parameters = parameters) lx <- length(x) ly <- length(y) # switch(what, "density" = { zz <- matrix(Z$density, lx, ly) title2 <- "Density" }, "uncertainty" = { zz <- matrix(Z$uncertainty, lx, ly) title2 <- "Uncertainty" }, stop("what improperly specified")) # switch(transformation, "none" = { title1 <- "" }, "log" = { zz <- log(zz) title1 <- "log" }, "sqrt" = { zz <- sqrt(zz) title1 <- "sqrt" }, stop("transformation improperly specified")) # switch(type, "contour" = { title3 <- "Contour" if(is.null(levels)) levels <- pretty(zz, nlevels) contour(x = x, y = y, z = zz, levels = levels, xlab = xlab, ylab = ylab, col = col, main = "", ...) }, "hdr" = { title3 <- "HDR level" z <- densNuncer(modelName = "VVV", data = data, parameters = parameters)$density levels <- c(sort(hdrlevels(z, prob)), 1.1*max(z)) plot(x, y, type = "n", xlab = xlab, ylab = ylab, ...) fargs <- formals(".filled.contour") dargs <- c(list(x = x, y = y, z = zz, levels = levels, col = hdr.palette(length(levels))), args) dargs <- dargs[names(dargs) %in% names(fargs)] fargs[names(dargs)] <- dargs do.call(".filled.contour", fargs) }, "image" = { title3 <- "Image" col <- col.palette(nlevels) if(length(col) == 1) { if(!is.null(levels)) nlevels <- length(levels) col <- mapply(adjustcolor, col = col, alpha.f = seq(0.1, 1, length = nlevels)) } image(x = x, y = y, z = zz, xlab = xlab, ylab = ylab, col = col, main = "", ...) }, "persp" = { title3 <- "Perspective" dots <- list(...) if(is.null(dots$theta)) dots$theta <- -30 if(is.null(dots$phi)) dots$phi <- 20 if(is.null(dots$expand)) dots$expand <- 0.6 p3d <- do.call("persp", c(list(x = x, y = y, z = zz, border = NA, xlab = xlab, ylab = ylab, col = adjustcolor(col, alpha.f = 0.5), zlab = "Density", main = ""), dots)) ii <- floor(seq(1, length(y), length.out = 2*nlevels)) for(i in ii[-c(1,length(ii))]) lines(trans3d(x, y[i], zz[,i], pmat = p3d)) ii <- floor(seq(1, length(x), length.out = 2*nlevels)) for(i in ii[-c(1,length(ii))]) lines(trans3d(x[i], y, zz[i,], pmat = p3d)) } ) if(main) { TITLE <- paste(c(title1, title2, title3, "Plot"), collapse = " ") title(TITLE) } invisible(list(x = x, y = y, z = zz)) } uncerPlot <- function (z, truth=NULL, ...) { oldpar <- par(no.readonly = TRUE) on.exit(par(oldpar)) par(pty = "m") uncer <- 1 - apply(z, 1, max) ord <- order(uncer) M <- max(uncer) plot(uncer[ord], type = "n", xaxt = "n", ylim = c(-(M/32), M), ylab = "uncertainty", xlab = "observations in order of increasing uncertainty") points(uncer[ord], pch = 15, cex = 0.5) lines(uncer[ord]) abline(h = c(0, 0), lty = 3) if (!is.null(truth)) { truth <- as.numeric(as.factor(truth)) n <- length(truth) result <- map(z) bad <- classError(result, truth)$misclassified if(length(bad)) { for(i in bad) { x <- (1:n)[ord == i] lines(c(x, x), c(-(0.5/32), uncer[i]), lty = 1) } } } invisible() } blue2grey.colors <- function(n) { # manually selected basecol <- c("#E6E6E6", "#bcc9d1", "#6c7f97", "#3e5264") # selected using colorspace::sequential_hcl(5, palette = "blues2") # basecol <- c("#023FA5", "#6A76B2", "#A1A6C8", "#CBCDD9", "#E2E2E2") palette <- grDevices::colorRampPalette(basecol, space = "Lab") palette(n) } bubble <- function(x, cex = c(0.2, 3), alpha = c(0.1, 1)) { x <- as.vector(x) cex <- cex[!is.na(cex)] alpha <- alpha[!is.na(alpha)] x <- (x - min(x))/(max(x) - min(x) + sqrt(.Machine$double.eps)) n <- length(x) r <- sqrt(x/pi) r <- (r - min(r, na.rm = TRUE))/ (max(r, na.rm = TRUE) - min(r, na.rm = TRUE) + sqrt(.Machine$double.eps)) cex <- r * diff(range(cex)) + min(cex) alpha <- x * diff(range(alpha)) + min(alpha) return(list(cex = cex, alpha = alpha)) } grid1 <- function(n, range = c(0, 1), edge = TRUE) { if(any(n < 0 | round(n) != n)) stop("n must be nonpositive and integer") G <- rep(0, n) if(edge) { G <- seq(from = min(range), to = max(range), by = abs(diff(range))/(n-1)) } else { lj <- abs(diff(range)) incr <- lj/(2 * n) G <- seq(from = min(range) + incr, to = max(range) - incr, by = 2 * incr) } return(G) } grid2 <- function(x, y) { lx <- length(x) ly <- length(y) xy <- matrix(0, nrow = lx * ly, ncol = 2) l <- 0 for(j in 1:ly) { for(i in 1:lx) { l <- l + 1 xy[l,] <- c(x[i], y[j]) } } return(xy) } vpoints <- function(x, y, col, cex = 1, ...) { xy <- xy.coords(x, y) symbols(xy$x, xy$y, add = TRUE, inches = 0.2*cex, fg = if(missing(col)) par("col") else col, rectangles = matrix(c(0,1), nrow = length(xy$x), ncol = 2, byrow = TRUE), ...) } # Discriminant coordinates / crimcoords ------------------------------- crimcoords <- function(data, classification, numdir = NULL, unbiased = FALSE, ...) { X <- as.matrix(data) n <- nrow(X) p <- ncol(X) classification <- as.vector(classification) stopifnot(length(classification) == n) Z <- unmap(classification) G <- ncol(Z) nk <- colSums(Z) # overall mean M <- matrix(apply(X,2,mean), n, p, byrow=TRUE) # within-group means Mk <- sweep(crossprod(Z, X), 1, FUN = "/", STATS = nk) ZMk <- Z %*% Mk # pooled within-groups covar W <- crossprod(X - ZMk) W <- if(unbiased) W/(n-G) else W/n # between-groups covar B <- crossprod(ZMk - M) B <- if(unbiased) B/(G-1) else B/G # manova identity: (n-1)*var(X) = # if(unbiased) (n-G)*W+(G-1)*B else n*W+G*B # generalized eigendecomposition of B with respect to W SVD <- eigen.decomp(B, W) l <- SVD$l; l <- (l+abs(l))/2 if(is.null(numdir)) numdir <- sum(l > sqrt(.Machine$double.eps)) numdir <- min(p, numdir) basis <- as.matrix(SVD$v)[,seq(numdir),drop=FALSE] dimnames(basis) <- list(colnames(X), paste("crimcoords", 1:numdir, sep="")) proj <- X %*% basis s <- sign(apply(proj,2,median)) proj <- sweep(proj, 2, FUN = "*", STATS = s) basis <- sweep(basis, 2, FUN = "*", STATS = s) obj <- list(means = Mk, B = B, W = W, evalues = l, basis = basis, projection = proj, classification = classification) class(obj) <- "crimcoords" invisible(obj) } print.crimcoords <- function(x, digits = getOption("digits"), ...) { cat("\'", class(x)[1], "\' object:\n", sep = "") str(x, max.level = 1, give.attr = FALSE, strict.width = "wrap") invisible() } plot.crimcoords <- function(x, ...) { object <- x # Argh. Really want to use object anyway numdir <- ncol(object$projection) G <- length(unique(object$classification)) if(numdir >= 2) { clPairs(object$projection, object$classification, ...) } else { args <- list(...) cols <- if(!is.null(args$colors)) args$colors else if(!is.null(args$col)) args$col else mclust.options("classPlotColors") cols <- adjustcolor(cols, alpha.f = 0.5)[1:G] br <- pretty(object$projection, n = nclass.Sturges(object$projection)) x <- split(object$projection[,1], object$classification) hist(x[[1]], breaks = br, probability = TRUE, xlim = extendrange(br), main = NULL, xlab = colnames(object$projection), border = FALSE, col = cols[1]) rug(x[[1]], col = cols[1]) for(k in seq_len(G-1)+1) { lines(hist(x[[k]], breaks = br, plot = FALSE), freq = FALSE, border = FALSE, col = cols[k], ann = FALSE) rug(x[[k]], col = cols[k]) } box() } invisible() } summary.crimcoords <- function(object, numdir, ...) { if(missing(numdir)) numdir <- sum(object$evalues > sqrt(.Machine$double.eps)) dim <- seq(numdir) obj <- list(basis = object$basis[,seq(dim),drop=FALSE], evalues = object$evalues[seq(dim)], evalues.cumperc = with(object, { evalues <- evalues[seq(numdir)] cumsum(evalues)/sum(evalues)*100 }) ) class(obj) <- "summary.crimcoords" return(obj) } print.summary.crimcoords <- function(x, digits = max(5, getOption("digits") - 3), ...) { title <- paste("Discriminant coordinates (crimcoords)") txt <- paste(rep("-", min(nchar(title), getOption("width"))), collapse = "") catwrap(txt) catwrap(title) catwrap(txt) cat("\n") catwrap("Estimated basis vectors:") print(x$basis, digits = digits) cat("\n") evalues <- rbind("Eigenvalues" = x$evalues, "Cum. %" = x$evalues.cumperc) colnames(evalues) <- colnames(x$basis) print(evalues, digits = digits) invisible() } mclust/R/icl.R0000644000176200001440000000625013656733162012673 0ustar liggesusers## ## Integrated Complete-data Likelihood (ICL) Criterion ## icl <- function(object, ...) UseMethod("icl") icl.Mclust <- function(object, ...) { n <- object$n # G <- object$G + ifelse(is.na(object$hypvol),0,1) z <- object$z if(is.null(z)) z <- matrix(1, nrow = n, ncol = 1) C <- matrix(0, n, ncol(z)) for(i in 1:n) C[i, which.max(z[i,])] <- 1 object$bic + 2*sum(C * ifelse(z > 0, log(z), 0)) } icl.MclustDA <- function(object, ...) { n <- object$n z <- predict(object)$z df <- object$df if(is.null(z)) z <- matrix(1, nrow = n, ncol = 1) C <- matrix(0, n, ncol(z)) for(i in 1:n) C[i, which.max(z[i,])] <- 1 object$bic + 2*sum(C * ifelse(z > 0, log(z), 0)) } mclustICL <- function(data, G = NULL, modelNames = NULL, initialization = list(hcPairs=NULL, subset=NULL, noise=NULL), x = NULL, ...) { call <- match.call() data <- data.matrix(data) n <- nrow(data) d <- ncol(data) mc <- match.call(expand.dots = TRUE) mc[[1]] <- as.name("mclustBIC") mc[[2]] <- data BIC <- eval(mc, parent.frame()) class(BIC) <- "mclustBIC" G <- attr(BIC, "G") modelNames <- attr(BIC, "modelNames") ICL <- matrix(NA, nrow = length(G), ncol = length(modelNames)) mostattributes(ICL) <- attributes(BIC) if(!is.null(x)) { r <- match(as.character(G), rownames(x), nomatch = 0) c <- match(modelNames, colnames(x), nomatch = 0) ICL[r,c] <- BIC[r,c] } for(i in 1:nrow(ICL)) { for(j in 1:ncol(ICL)) { if(is.na(BIC[i,j])) next() # not fitted if(!is.na(ICL[i,j])) next() # already available Sumry <- summary(BIC, data, G = G[i], modelNames = modelNames[j]) ICL[i,j] <- icl.Mclust(Sumry) } } class(ICL) <- "mclustICL" attr(ICL, "criterion") <- "ICL" return(ICL) } print.mclustICL <- function (x, pick = 3, ...) { subset <- !is.null(attr(x, "subset")) oldClass(x) <- attr(x, "args") <- NULL attr(x, "criterion") <- NULL attr(x, "control") <- attr(x, "initialization") <- NULL attr(x, "oneD") <- attr(x, "warn") <- attr(x, "Vinv") <- NULL attr(x, "prior") <- attr(x, "G") <- attr(x, "modelNames") <- NULL ret <- attr(x, "returnCodes") == -3 n <- attr(x, "n") d <- attr(x, "d") attr(x, "returnCodes") <- attr(x, "n") <- attr(x, "d") <- NULL oldClass(x) <- attr(x, "args") <- attr(x, "criterion") <- NULL catwrap("Integrated Complete-data Likelihood (ICL) criterion:") print(x, ...) cat("\n") catwrap(paste("Top", pick, "models based on the ICL criterion:")) print(pickBIC(x, pick), ...) invisible() } summary.mclustICL <- function(object, G, modelNames, ...) { if(!missing(G)) object <- object[rownames(object) %in% G,,drop=FALSE] if(!missing(modelNames)) object <- object[,colnames(object) %in% modelNames,drop=FALSE] structure(pickBIC(object, ...), class = "summary.mclustICL") } print.summary.mclustICL <- function(x, digits = getOption("digits"), ...) { cat("Best ICL values:\n") x <- drop(as.matrix(x)) x <- rbind(ICL = x, "ICL diff" = x - max(x)) print(x, digits = digits) invisible() } plot.mclustICL <- function(x, ylab = "ICL", ...) { plot.mclustBIC(x, ylab = ylab, ...) } mclust/R/mbahc.R0000644000176200001440000005622014524764147013202 0ustar liggesusers## ## Model-based Agglomerative Hierarchical Clustering (MBAHC) ## # MBAHC used for EM initialization for d-dim data ---- hc <- function(data, modelName = "VVV", use = "VARS", partition = dupPartition(data), minclus = 1, ...) { if(!any(modelName == c("E", "V", "EII", "VII", "EEE", "VVV"))) stop("invalid 'modelName' argument for model-based hierarchical clustering. See help(mclust.options)") if(!any(use == c("VARS", "STD", "SPH", "PCS", "PCR", "SVD", "RND"))) stop("invalid 'use' argument for model-based hierarchical clustering. See help(mclust.options)") funcName <- paste("hc", modelName, sep = "") mc <- match.call(expand.dots = TRUE) mc$use <- mc$modelName <- NULL data <- data.matrix(data) dropCols <- function(x) { # select only those columns of matrix x with all finite numerical values x[,apply(x, 2, function(x) all(is.finite(x))), drop = FALSE] } use <- toupper(use[1]) switch(use, "VARS" = { Z <- data }, "STD" = { Z <- scale(data, center = TRUE, scale = TRUE) Z <- dropCols(Z) }, "PCR" = { data <- scale(data, center = TRUE, scale = TRUE) data <- dropCols(data) SVD <- svd(data, nu=0) # evalues <- sqrt(SVD$d^2/(nrow(data)-1)) Z <- data %*% SVD$v }, "PCS" = { data <- scale(data, center = TRUE, scale = FALSE) SVD <- svd(data, nu=0) # evalues <- sqrt(SVD$d^2/(nrow(data)-1)) Z <- data %*% SVD$v Z <- dropCols(Z) }, "SPH" = { data <- scale(data, center = TRUE, scale = FALSE) n <- nrow(data); p <- ncol(data) Sigma <- var(data) * (n - 1)/n SVD <- svd(Sigma, nu = 0) Z <- data %*% SVD$v %*% diag(1/sqrt(SVD$d), p, p) Z <- dropCols(Z) }, "SVD" = { data <- scale(data, center = TRUE, scale = TRUE) data <- dropCols(data) p <- min(dim(data)) SVD <- svd(data, nu=0) Z <- data %*% SVD$v %*% diag(1/sqrt(SVD$d), p, p) }, "RND" = { out <- hcRandomPairs(data, ...) attr(out, "dimensions") <- dim(data) attr(out, "use") <- use attr(out, "call") <- match.call() class(out) <- "hc" return(out) } ) # call the proper hc function mc$data <- Z mc[[1]] <- as.name(funcName) out <- eval(mc, parent.frame()) attr(out, "use") <- use attr(out, "call") <- match.call() attr(out, "data") <- mc$data class(out) <- "hc" return(out) } print.hc <- function(x, ...) { if(!is.null(attr(x, "call"))) { cat("Call:\n") catwrap(paste0(deparse(attr(x, "call")))) cat("\n") } catwrap("Model-Based Agglomerative Hierarchical Clustering") if(!is.null(attr(x, "modelName"))) cat(paste("Model name =", attr(x, "modelName"), "\n")) if(!is.null(attr(x, "use"))) cat(paste("Use =", attr(x, "use"), "\n")) if(!is.null(attr(x, "dimensions"))) cat(paste("Number of objects =", attr(x, "dimensions")[1], "\n")) invisible(x) } randomPairs <- function(...) { .Deprecated(old = "randomPairs", new = "hcRandomPairs", package = "mclust") hcRandomPairs(...) } hcRandomPairs <- function(data, seed = NULL, ...) { if(!is.null(seed)) set.seed(seed) data <- as.matrix(data) n <- nrow(data) m <- if(n%%2 == 1) n-1 else n tree <- matrix(sample(1:n, m, replace = FALSE), nrow = 2, ncol = ceiling(m/2)) tree <- apply(tree, 2, sort) ind <- unique(tree[1,]) while(ncol(tree) < (m-1)) { addtree <- sort(sample(ind, size = 2, replace = FALSE)) ind <- setdiff(ind, addtree[2]) tree <- cbind(tree, addtree) } dimnames(tree) <- NULL structure(tree, initialPartition = 1:n, dimensions = c(n,2)) } dupPartition <- function(data) { dup <- duplicated(data) if (is.null(dim(data))) { data <- as.numeric(data) if (!any(dup)) return(1:length(data)) kmeans(data, centers = data[!dup])$cluster } else { data <- data.matrix(data) if (!any(dup)) return(1:nrow(data)) kmeans(data, centers = data[!dup,])$cluster } } hclass <- function(hcPairs, G) { initial <- attributes(hcPairs)$initialPartition n <- length(initial) k <- length(unique(initial)) G <- if(missing(G)) k:2 else rev(sort(unique(G))) select <- k - G if(length(select) == 1 && !select) return(matrix(initial, ncol = 1, dimnames = list(NULL, as.character(G)))) bad <- select < 0 | select >= k if(all(bad)) stop("No classification with the specified number of clusters") if(any(bad) & mclust.options("warn")) { warning("Some selected classifications are inconsistent with mclust object") } L <- length(select) cl <- matrix(as.double(NA), nrow = n, ncol = L, dimnames = list(NULL, as.character(G))) if(select[1]) m <- 1 else { cl[, 1] <- initial m <- 2 } for(l in 1:max(select)) { ij <- hcPairs[, l] i <- min(ij) j <- max(ij) initial[initial == j] <- i if(select[m] == l) { cl[, m] <- initial m <- m + 1 } } apply(cl[, L:1, drop = FALSE], 2, partconv, consec = TRUE) } hcEII <- function(data, partition = NULL, minclus = 1, ...) { if(minclus < 1) stop("minclus must be positive") if(any(is.na(data))) stop("missing values not allowed in data") #==================================================================== dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) #if(oneD || length(dimdat) > 2) # stop("data should in the form of a matrix") data <- as.matrix(data) dimnames(data) <- NULL n <- nrow(data) p <- ncol(data) if(is.null(partition)) partition <- 1:n else if(length(partition) != n) stop("partition must assign a class to each observation") partition <- partconv(partition, consec = TRUE) l <- length(unique(partition)) attr(partition, "unique") <- l m <- l - minclus if(m <= 0) { stop("initial number of clusters is not greater than minclus") } if(n <= p & mclust.options("warn")) { warning("# of observations <= data dimension") } #============================================================= storage.mode(data) <- "double" ld <- max(c((l * (l - 1))/2, 3 * m)) temp <- .Fortran("hceii", data, as.integer(n), as.integer(p), as.integer(partition), as.integer(l), as.integer(m), double(p), as.integer(ld), double(ld), PACKAGE = "mclust")[c(1, 9)] temp[[1]] <- temp[[1]][1:m, 1:2, drop = FALSE] temp[[2]] <- temp[[2]][1:m] structure(t(temp[[1]]), initialPartition = partition, dimensions = dimdat, modelName = "EII", call = match.call()) } hcEEE <- function(data, partition = NULL, minclus = 1, ...) { if(minclus < 1) stop("minclus must be positive") if(any(is.na(data))) stop("missing values not allowed in data") #===================================================================== dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) #if(oneD || length(dimdat) > 2) # stop("data should in the form of a matrix") data <- as.matrix(data) dimnames(data) <- NULL n <- nrow(data) p <- ncol(data) if(n <= p & mclust.options("warn")) warning("# of observations <= data dimension") if(is.null(partition)) partition <- 1:n else if(length(partition) != n) stop("partition must assign a class to each observation") partition <- partconv(partition, consec = TRUE) l <- length(unique(partition)) attr(partition, "unique") <- l m <- l - minclus if(m <= 0) stop("initial number of clusters is not greater than minclus") storage.mode(data) <- "double" ## R 2.12.0: 32 bit Windows build fails due to compiler bug ## workaround: removal (hopefully temporary) of hc functionality for EEE # Luca: commented the next line and uncommented below # stop("hc for EEE model is not currently supported") temp <- .Fortran("hceee", data, as.integer(n), as.integer(p), as.integer(partition), as.integer(l), as.integer(m), if(p < 3) integer(m) else integer(1), if(p < 4) integer(m) else integer(1), double(p), double(p * p), double(p * p), double(p * p), PACKAGE = "mclust")[c(1, 7:10)] # # currently temp[[5]] is not output temp[[4]] <- temp[[4]][1:2] temp[[5]] <- temp[[5]][1:2] names(temp[[5]]) <- c("determinant", "trace") temp[[1]] <- temp[[1]][1:(m + 1), ] if(p < 3) tree <- rbind(temp[[2]], temp[[3]]) else if(p < 4) tree <- rbind(temp[[1]][-1, 3], temp[[3]]) else tree <- t(temp[[1]][-1, 3:4, drop = FALSE]) determinant <- temp[[1]][, 1] attr(determinant, "breakpoints") <- temp[[4]] trace <- temp[[1]][, 2] structure(tree, initialPartition = partition, dimensions = dimdat, modelName = "EEE", call = match.call()) } hcVII <- function(data, partition = NULL, minclus = 1, alpha = 1, ...) { if(minclus < 1) stop("minclus must be positive") if(any(is.na(data))) stop("missing values not allowed in data") #===================================================================== dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) #if(oneD || length(dimdat) > 2) # stop("data should in the form of a matrix") data <- as.matrix(data) dimnames(data) <- NULL n <- nrow(data) p <- ncol(data) if(n <= p & mclust.options("warn")) warning("# of observations <= data dimension") if(is.null(partition)) partition <- 1:n else if(length(partition) != n) stop("partition must assign a class to each observation") partition <- partconv(partition, consec = TRUE) l <- length(unique(partition)) attr(partition, "unique") <- l m <- l - minclus if(m <= 0) stop("initial number of clusters is not greater than minclus") storage.mode(data) <- "double" ll <- (l * (l - 1))/2 ld <- max(n, ll, 3 * m) alpha <- alpha * traceW(data/sqrt(n * p)) alpha <- max(alpha, .Machine$double.eps) temp <- .Fortran("hcvii", data, as.integer(n), as.integer(p), as.integer(partition), as.integer(l), as.integer(m), as.double(alpha), double(p), as.integer(ld), double(ld), PACKAGE = "mclust")[c(1, 10)] temp[[1]] <- temp[[1]][1:m, 1:2, drop = FALSE] temp[[2]] <- temp[[2]][1:m] structure(t(temp[[1]]), initialPartition = partition, dimensions = dimdat, modelName = "VII", call = match.call()) } hcVVV <- function(data, partition = NULL, minclus = 1, alpha = 1, beta = 1, ...) { if(minclus < 1) stop("minclus must be positive") if(any(is.na(data))) stop("missing values not allowed in data") dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) #if(oneD || length(dimdat) > 2) # stop("data should in the form of a matrix") data <- as.matrix(data) dimnames(data) <- NULL n <- nrow(data) p <- ncol(data) if(n <= p & mclust.options("warn")) warning("# of observations <= data dimension") if(is.null(partition)) partition <- 1:n else if(length(partition) != n) stop("partition must assign a class to each observation") partition <- partconv(partition, consec = TRUE) l <- length(unique(partition)) attr(partition, "unique") <- l m <- l - minclus if(m <= 0) stop("initial number of clusters is not greater than minclus") storage.mode(data) <- "double" ll <- (l * (l - 1))/2 # dp <- duplicated(partition) #x[c((1:n)[!dp],(1:n)[dp]),], #as.integer(c(partition[!dp], partition[dp])), ld <- max(n, ll + 1, 3 * m) alpha <- alpha * traceW(data/sqrt(n * p)) alpha <- max(alpha, .Machine$double.eps) temp <- .Fortran("hcvvv", cbind(data, 0.), as.integer(n), as.integer(p), as.integer(partition), as.integer(l), as.integer(m), as.double(alpha), as.double(beta), double(p), double(p * p), double(p * p), double(p * p), as.integer(ld), double(ld), PACKAGE = "mclust")[c(1, 14)] temp[[1]] <- temp[[1]][1:m, 1:2, drop = FALSE] temp[[2]] <- temp[[2]][1:m] structure(t(temp[[1]]), initialPartition = partition, dimensions = dimdat, modelName = "VVV", call = match.call()) } ## ## Plot method (dendrogram) for model-based hierarchical agglomeration ---- ## plot.hc <- function(x, what = c("loglik", "merge"), maxG = NULL, labels = FALSE, hang = 0, ...) { stopifnot(inherits(x, "hc")) what <- what[1] hier <- as.hclust(x, what = what, maxG = maxG, labels = labels) switch(what, "loglik" = { ylab <- paste("Classification log-likelihood", paste("(", hier$method, sep = ""), "model)") cloglik <- attr(hier,"cloglik") attr(hier,"cloglik") <- NULL plot( as.dendrogram(hier, hang=hang), axes=FALSE, ylab=ylab) r <- range(cloglik, na.rm=TRUE) par.usr <- par("usr") ybot <- max(r)-par.usr[3] ytop <- min(r)+par.usr[3] }, "merge" = { ylab <- paste("Number of Clusters", paste("(", hier$method, sep = ""), "model)") nclus <- attr(hier,"nclus") attr(hier,"nclus") <- NULL plot( as.dendrogram(hier, hang=hang), axes=FALSE, ylab=ylab) par.usr <- par("usr") ybot<- max(nclus)-par.usr[3] ytop <- 1+par.usr[3] }, stop("unrecognized what option")) par(usr=c(par("usr")[1:2],ybot,ytop)) at <- pretty(seq(from=ybot,to=ytop,length=100), min = 5, max = 10) axis(2, at=at) invisible(hier) } as.hclust.hc <- function(x, ...) { stopifnot(inherits(x, "hc")) hc2hclust(x, ...) } hc2hclust <- function(object, what = c("loglik", "merge"), maxG = NULL, labels = FALSE) { stopifnot(inherits(object, "hc")) if (!is.null(maxG) && maxG < 2) stop("maxG < 2") what <- what[1] switch(what, "loglik" = { obj <- ldend(object, maxG=maxG, labels) obj <- c(obj, list(dist.method = NULL)) attr(obj,"cloglik") <- as.vector(obj$cloglik) obj$cloglik <- NULL class(obj) <- "hclust" obj }, "merge" = { obj <- mdend(object, maxG=maxG, labels) obj <- c(obj, list(dist.method = NULL)) attr(obj,"nclus") <- as.vector(obj$nclus) obj$nclus <- NULL class(obj) <- "hclust" obj }, stop("unrecognized what option") ) } ldend <- function (hcObj, maxG = NULL, labels = FALSE) { # classification log-likelihood dendrogram setup for MBAHC stopifnot(inherits(hcObj,"hc")) if(!is.null(maxG) && maxG < 2) stop("maxG < 2") n <- ncol(hcObj) + 1 cLoglik <- CLL <- cloglik.hc(hcObj) maxG <- if (is.null(maxG)) length(CLL) else min(maxG,length(CLL)) na <- is.na(CLL) m <- length(CLL) d <- diff(CLL) if (any(neg <- d[!is.na(d)] < 0)) { m <- which(neg)[1] CLLmax <- CLL[min(maxG,m)] CLL[-(1:min(maxG,m))] <- CLLmax } else if (any(na)) { m <- which(na)[1] - 1 CLLmax <- CLL[min(maxG,m)] CLL[-(1:min(maxG,m))] <- CLLmax } else { CLLmax <- max(CLL[1:maxG]) CLL[-(1:maxG)] <- CLLmax } height <- CLL height <- height[-length(height)] height <- rev(-height+max(height)) mo <- mergeOrder(hcObj) nam <- rownames(as.matrix(attr(hcObj,"data"))) leafLabels <- if (labels) nam else character(length(nam)) obj <- structure(list(merge = mo$merge, height = height, order = mo$order, labels = leafLabels, cloglik = cLoglik, method = attr(hcObj, "model"), call = attr(hcObj, "call"))) return(obj) } mdend <- function (hcObj, maxG = NULL, labels = FALSE) { # uniform height dendrgram setup for MBAHC stopifnot(inherits(hcObj,"hc")) if(!is.null(maxG) && maxG < 2) stop("maxG < 2") ni <- length(unique(attr(hcObj,"initialPartition"))) maxG <- if (!is.null(maxG)) min(maxG, ni) else ni mo <- mergeOrder(hcObj) j <- ni - maxG n <- ncol(hcObj) height <- c(rep(0,j),1:(n-j)) nclus <- maxG:1 nam <- rownames(as.matrix(attr(hcObj,"data"))) leafLabels <- if (labels) nam else character(length(nam)) obj <- structure(list(merge = mo$merge, order = mo$order, height = height, labels = leafLabels, nclus = nclus, method = attr(hcObj, "model"), call = attr(hcObj, "call"))) return(obj) } mergeOrder <- function(hcObj) { # converts the hc representation of merges to conform with hclust # and computes the corresponding dendrogram leaf order # CF: inner code written by Luca Scrucca HC <- matrix(as.vector(hcObj), ncol(hcObj), nrow(hcObj), byrow = TRUE) HCm <- matrix(NA, nrow(HC), ncol(HC)) merged <- list(as.vector(HC[1, ])) HCm[1, ] <- -HC[1, ] for (i in 2:nrow(HC)) { lmerged <- lapply(merged, function(m) HC[i, ] %in% m) lm <- which(sapply(lmerged, function(lm) any(lm))) if (length(lm) == 0) { merged <- append(merged, list(HC[i, ])) HCm[i, ] <- sort(-HC[i, ]) } else if (length(lm) == 1) { merged <- append(merged, list(c(merged[[lm]], HC[i, !lmerged[[lm]]]))) merged[[lm]] <- list() HCm[i, ] <- sort(c(-HC[i, !lmerged[[lm]]], lm)) } else { merged <- append(merged, list(unlist(merged[lm]))) merged[[lm[1]]] <- merged[[lm[2]]] <- list() HCm[i, ] <- lm } } list(merge = HCm, order = merged[[length(merged)]]) } cloglik.hc <- function(hcObj, maxG = NULL) { n <- ncol(hcObj) + 1 if (is.null(maxG)) maxG <- n cl <- hclass(hcObj) cl <- cbind( "1" = 1, cl) modelName <- attr(hcObj,"modelName") LL <- rep(list(NA),maxG) for (j in 1:maxG) { ll <- NULL for (k in unique(cl[,j])) { i <- which(cl[,j] == k) # compute loglik term here llnew <- mvn( modelName, attr(hcObj,"data")[i,,drop=FALSE])$loglik if (substr(modelName,2,2) != "I") { llvii <- mvn( "VII", attr(hcObj,"data")[i,,drop=FALSE])$loglik if (substr(modelName,3,3) != "I") { llvvi <- mvn( "VVI", attr(hcObj,"data")[i,,drop=FALSE])$loglik llall <- c("VVV"=llnew,"VVI"=llvvi,"VII"=llvii) } else { llall <- c("VVI"=llnew,"VII"=llvii) } if (!all(nall <- is.na(llall))) { llnew <- llall[!nall][which.max(llall[!nall])] } } if (is.na(llnew)) break ll <- c(ll, llnew) } if (is.na(llnew)) break LL[[j]] <- ll } CLL <- sapply(LL,sum) for (i in seq(along = CLL)) { if (is.na(CLL[i])) LL[[i]] <- NA } attr(CLL,"terms") <- LL return(CLL) } ## Initialization for 1-dim data ---- qclass <- function (x, k) { x <- as.vector(x) # eps <- sqrt(.Machine$double.eps) # numerical accuracy problem if scale of x is large, so make tolerance # scale dependent eps <- sd(x)*sqrt(.Machine$double.eps) q <- NA n <- k while(length(q) < (k+1)) { n <- n + 1 q <- unique(quantile(x, seq(from = 0, to = 1, length = n))) } if(length(q) > (k+1)) { dq <- diff(q) nr <- length(q)-k-1 q <- q[-order(dq)[1:nr]] } q[1] <- min(x) - eps q[length(q)] <- max(x) + eps cl <- rep(0, length(x)) for(i in 1:k) { cl[ x >= q[i] & x < q[i+1] ] <- i } return(cl) } hcE <- function(data, partition = NULL, minclus = 1, ...) { if(minclus < 1) stop("minclus must be positive") if(any(is.na(data))) stop("missing values not allowed in data") #==================================================================== dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- as.vector(data) n <- length(data) if(is.null(partition)) partition <- 1:n else if(length(partition) != n) stop("partition must assign a class to each observation") partition <- partconv(partition, consec = TRUE) l <- length(unique(partition)) attr(partition, "unique") <- l m <- l - minclus if(m <= 0) stop("initial number of clusters is not greater than minclus") storage.mode(data) <- "double" ld <- max(c((l * (l - 1))/2, 3 * m)) temp <- .Fortran("hc1e", data, as.integer(n), as.integer(partition), as.integer(l), as.integer(m), as.integer(ld), double(ld), PACKAGE = "mclust")[c(1, 3, 7)] temp[[1]] <- temp[[1]][1:m] temp[[2]] <- temp[[2]][1:m] temp[[3]] <- temp[[3]][1:m] structure(rbind(temp[[1]], temp[[2]]), initialPartition = partition, dimensions = n, modelName = "E", call = match.call()) } hcV <- function(data, partition = NULL, minclus = 1, alpha = 1, ...) { if(minclus < 1) stop("minclus must be positive") if(any(is.na(data))) stop("missing values not allowed in data") #===================================================================== dimdat <- dim(data) oneD <- (is.null(dimdat) || NCOL(data) == 1) if(!oneD) stop("data must be one-dimensional") data <- as.vector(data) n <- length(data) if(is.null(partition)) partition <- 1:n else if(length(partition) != n) stop("partition must assign a class to each observation") partition <- partconv(partition, consec = TRUE) l <- length(unique(partition)) attr(partition, "unique") <- l m <- l - minclus if(m <= 0) stop("initial number of clusters is not greater than minclus") storage.mode(data) <- "double" alpha <- alpha * (vecnorm(data - mean(data))^2/n) alpha <- min(alpha, .Machine$double.eps) ld <- max(c((l * (l - 1))/2, 3 * m)) temp <- .Fortran("hc1v", data, as.integer(n), as.integer(partition), as.integer(l), as.integer(m), as.double(alpha), as.integer(ld), double(ld), PACKAGE = "mclust")[c(1, 3, 8)] temp[[1]] <- temp[[1]][1:m] temp[[2]] <- temp[[2]][1:m] temp[[3]] <- temp[[3]][1:m] structure(rbind(temp[[1]], temp[[2]]), initialPartition = partition, dimensions = n, modelName = "V", call = match.call()) } mclust/R/bootstrap.R0000644000176200001440000004277314241626165014146 0ustar liggesusers## ## Resampling methods ## # # Bootstrap Likelihood Ratio Test # mclustBootstrapLRT <- function(data, modelName = NULL, nboot = 999, level = 0.05, maxG = NULL, verbose = interactive(), ...) { if(is.null(modelName)) stop("A 'modelName' must be provided. Please see help(mclustModelNames) which describes the available models.") modelName <- modelName[1] checkModelName(modelName) if(grepl("X", modelName)) stop("Specified 'modelName' is only valid for one-component mixture.") if(is.null(maxG)) { G <- seq.int(1, 9) } else { maxG <- as.numeric(maxG) G <- seq.int(1, maxG+1) } BIC <- mclustBIC(data, G = G, modelNames = modelName, warn = FALSE, verbose = FALSE, ...) if(!(modelName %in% attr(BIC, "modelNames"))) stop("'modelName' not compatibile with data. Please see help(mclustModelNames) which describes the available models.") # select only sequential models that can be fit bic <- BIC[, attr(BIC, "modelNames") == modelName] G <- G[!is.na(BIC)] if(length(G) == 0) stop(paste("no model", modelName, "can be fitted.")) if(all(G == 1)) { warning("only 1-component model could be fitted. No LRT is performed!") return() } if(sum(is.na(bic)) > 0) warning("some model(s) could not be fitted!") if(verbose) { flush.console() cat("bootstrapping LRTS ...\n") pbar <- txtProgressBar(min = 0, max = (max(G)-1)*nboot, style = 3) on.exit(close(pbar)) } obsLRTS <- p.value <- vector("numeric", length = max(G)-1) bootLRTS <- matrix(as.double(NA), nrow = nboot, ncol = max(G)-1) g <- 0; continue <- TRUE while(g < (max(G)-1) & continue) { g <- g + 1 # fit model under H0 Mod0 <- summary(BIC, data, G = g, modelNames = modelName) # fit model under H1 Mod1 <- summary(BIC, data, G = g+1, modelNames = modelName) # observed LRTS obsLRTS[g] <- 2*(Mod1$loglik - Mod0$loglik) # bootstrap b <- 0 while(b < nboot) { b <- b + 1 # generate 'parametric' bootstrap sample under H0 bootSample <- sim(Mod0$modelName, Mod0$parameters, n = Mod0$n) # fit model under H0 bootMod0 <- em(data = bootSample[,-1], modelName = Mod0$modelName, parameters = Mod0$parameters, warn = FALSE, ...) # fit model under H1 bootMod1 <- em(data = bootSample[,-1], modelName = Mod1$modelName, parameters = Mod1$parameters, warn = FALSE, ...) # compute bootstrap LRT LRTS <- 2*(bootMod1$loglik - bootMod0$loglik) if(is.na(LRTS)) { b <- b - 1; next() } bootLRTS[b,g] <- LRTS if(verbose) setTxtProgressBar(pbar, (g-1)*nboot+b) } p.value[g] <- (1 + sum(bootLRTS[,g] >= obsLRTS[g]))/(nboot+1) # check if not-significant when no maxG is provided if(is.null(maxG) & p.value[g] > level) { continue <- FALSE if(verbose) setTxtProgressBar(pbar, (max(G)-1)*nboot) } } out <- list(G = 1:g, modelName = modelName, obs = obsLRTS[1:g], boot = bootLRTS[,1:g,drop=FALSE], p.value = p.value[1:g]) class(out) <- "mclustBootstrapLRT" return(out) } print.mclustBootstrapLRT <- function(x, ...) { txt <- paste(rep("-", min(61, getOption("width"))), collapse = "") catwrap(txt) catwrap("Bootstrap sequential LRT for the number of mixture components") catwrap(txt) cat(formatC("Model", flag = "-", width = 12), "=", x$modelName, "\n") cat(formatC("Replications", flag = "-", width = 12), "=", nrow(x$boot), "\n") df <- data.frame(x$obs, x$p.value) colnames(df) <- c("LRTS", "bootstrap p-value") rownames(df) <- formatC(paste(x$G, "vs", x$G+1), flag = "-", width = 8) print(df, ...) } plot.mclustBootstrapLRT <- function(x, G = 1, hist.col = "grey", hist.border = "lightgrey", breaks = "Scott", col = "forestgreen", lwd = 2, lty = 3, main = NULL, ...) { if(!any(G == x$G)) { warning(paste("bootstrap LRT not available for G =", G)) return() } G <- as.numeric(G)[1] h <- hist(x$boot[,G], breaks = breaks, plot = FALSE) xlim <- range(h$breaks, x$boot[,G], x$obs[G], na.rm = TRUE) xlim <- extendrange(xlim, f = 0.05) plot(h, xlab = "LRTS", freq = FALSE, xlim = xlim, col = hist.col, border = hist.border, main = NULL) box() abline(v = x$obs[G], lty = lty, lwd = lwd, col = col) if(is.null(main) | is.character(main)) { if(is.null(main)) main <- paste("Bootstrap LRT for model", x$modelName, "with", G, "vs", G+1, "components") title(main = main, cex.main = 1) } invisible() } # # Bootstrap inference (standard errors and percentile confidence intervals) # MclustBootstrap <- function(object, nboot = 999, type = c("bs", "wlbs", "pb", "jk"), max.nonfit = 10*nboot, verbose = interactive(), ...) { stopifnot("object must be of class 'Mclust' or 'densityMclust'" = inherits(object, c("Mclust", "densityMclust"))) if(any(type %in% c("nonpara", "wlb"))) { type <- gsub("nonpara", "bs", type) type <- gsub("wlb", "wlbs", type) warning("resampling type converted to \"", type, "\"") } type <- match.arg(type, choices = eval(formals(MclustBootstrap)$type)) # data <- object$data n <- object$n d <- object$d G <- object$G if(type == "jk") nboot <- n varnames <- rownames(object$parameters$mean) # model parameters par <- summary(object)[c("pro", "mean", "variance")] if(d == 1) { par$mean <- array(par$mean, dim = c(d, G)) par$variance <- array(par$variance, dim = c(d, d, G)) } # bootstrapped parameters pro.boot <- array(NA, c(nboot,G), dimnames = list(NULL, seq.int(G))) mean.boot <- array(NA, c(nboot,d,G), dimnames = list(NULL, varnames, seq.int(G))) var.boot <- array(NA, c(nboot,d,d,G), dimnames = list(NULL, varnames, varnames, seq.int(G))) if(verbose) { cat("resampling ...\n") flush.console() pbar <- txtProgressBar(min = 0, max = nboot, style = 3) on.exit(close(pbar)) } b <- nonfit <- 0 while(b < nboot & nonfit < max.nonfit) { b <- b + 1 obj <- object switch(type, "bs" = { idx <- sample(seq_len(n), size = n, replace = TRUE) obj$data <- object$data[idx,] obj$z <- object$z[idx,] obj$warn <- FALSE mod.boot <- try(do.call("me", obj), silent = TRUE) }, "wlbs" = { w <- rexp(n) # w <- w/mean(w) w <- w/max(w) mod.boot <- try(do.call("me.weighted", c(list(weights = w, warn = FALSE), obj)), silent = TRUE) }, "pb" = { obj$data <- do.call("sim", object)[,-1,drop=FALSE] obj$z <- predict(obj)$z obj$warn <- FALSE mod.boot <- try(do.call("me", obj), silent = TRUE) }, "jk" = { idx <- seq_len(n)[-b] obj$data <- object$data[idx,] obj$z <- object$z[idx,] obj$warn <- FALSE mod.boot <- try(do.call("me", obj), silent = TRUE) } ) # check model convergence if(inherits(mod.boot, "try-error")) { if(type != "jk") b <- b - 1 nonfit <- nonfit + 1 next() } if(is.na(mod.boot$loglik)) { if(type != "jk") b <- b - 1 nonfit <- nonfit + 1 next() } if(type == "jk") { # pseudovalues ... # pro.boot[b,] <- n*par$pro - (n-1)*mod.boot$parameters$pro # mean.boot[b,,] <- n*par$mean - (n-1)*mod.boot$parameters$mean # var.boot[b,,,] <- n*par$variance - (n-1)*mod.boot$parameters$variance$sigma pro.boot[b,] <- mod.boot$parameters$pro mean.boot[b,,] <- mod.boot$parameters$mean var.boot[b,,,] <- mod.boot$parameters$variance$sigma } else { # bootstrap values pro.boot[b,] <- mod.boot$parameters$pro mean.boot[b,,] <- mod.boot$parameters$mean var.boot[b,,,] <- mod.boot$parameters$variance$sigma } if(verbose) setTxtProgressBar(pbar, b) } out <- list(G = G, modelName = object$modelName, parameters = par, nboot = nboot, type = type, nonfit = nonfit, pro = pro.boot, mean = mean.boot, variance = var.boot) class(out) <- "MclustBootstrap" return(out) } print.MclustBootstrap <- function(x, digits = getOption("digits"), ...) { cat("\'", class(x)[1], "\' object:\n", sep = "") str(x, max.level = 1, give.attr = FALSE, strict.width = "wrap") invisible() } summary.MclustBootstrap <- function(object, what = c("se", "ci", "ave"), conf.level = 0.95, ...) { what <- match.arg(what, choices = eval(formals(summary.MclustBootstrap)$what)) dims <- dim(object$mean) # varnames <- dimnames(object$mean)[[2]] nboot <- dims[1] d <- dims[2] G <- dims[3] switch(what, "se" = { out <- list(pro = apply(object$pro, 2, sd, na.rm=TRUE), mean = apply(object$mean, c(2,3), sd, na.rm=TRUE), variance = apply(object$variance, c(2,3,4), sd, na.rm=TRUE)) if(object$type == "jk") out <- lapply(out, function(x) sqrt(x^2*(nboot-object$nonfit-1)^2/(nboot-object$nonfit))) }, "ave" = { out <- list(pro = apply(object$pro, 2, mean, na.rm=TRUE), mean = apply(object$mean, c(2,3), mean, na.rm=TRUE), variance = apply(object$variance, c(2,3,4), mean, na.rm=TRUE)) }, "ci" = { levels <- c((1-conf.level)/2, (1 + conf.level)/2) if(object$type == "jk") { # bias-corrected ci based on normal-approximation ave <- list(pro = apply(object$pro, 2, mean, na.rm=TRUE), mean = apply(object$mean, c(2,3), mean, na.rm=TRUE), variance = t(sapply(seq.int(d), function(j) apply(object$variance[,j,j,], 2, mean, na.rm=TRUE), simplify = "array"))) se <- list(pro = apply(object$pro, 2, sd, na.rm=TRUE), mean = apply(object$mean, c(2,3), sd, na.rm=TRUE), variance = t(sapply(seq.int(d), function(j) apply(object$variance[,j,j,], 2, sd, na.rm=TRUE), simplify = "array"))) se <- lapply(se, function(x) sqrt(x^2*(nboot-object$nonfit-1)^2/(nboot-object$nonfit))) zq <- qnorm(max(levels)) lnames <- paste0(formatC(levels * 100, format = "fg", width = 1, digits = getOption("digits")), "%") # the code above mimic stats:::format_perc(levels) which can't be used # because format_perc is not exported from stats out <- list(pro = array(as.double(NA), c(2,G), dimnames = list(lnames, 1:G)), mean = array(as.double(NA), dim = c(2,d,G), dimnames = list(lnames, 1:d, 1:G)), variance = array(as.double(NA), dim = c(2,d,G), dimnames = list(lnames, 1:d, 1:G))) out$pro[1,] <- ave$pro - zq*se$pro out$pro[2,] <- ave$pro + zq*se$pro out$mean[1,,] <- ave$mean - zq*se$mean out$mean[2,,] <- ave$mean + zq*se$mean out$variance[1,,] <- ave$variance - zq*se$variance out$variance[2,,] <- ave$variance + zq*se$variance } else { # percentile-based ci out <- list(pro = apply(object$pro, 2, quantile, probs = levels, na.rm=TRUE), mean = apply(object$mean, c(2,3), quantile, probs = levels, na.rm=TRUE)) v <- array(as.double(NA), dim = c(2,d,G), dimnames = dimnames(out$mean)) for(j in seq.int(d)) v[,j,] <- apply(object$variance[,j,j,], 2, quantile, probs = levels, na.rm=TRUE) out$variance <- v } } ) obj <- append(object[c("modelName", "G", "nboot", "type")], list(d = d, what = what)) if(what == "ci") obj$conf.level <- conf.level obj <- append(obj, out) class(obj) <- "summary.MclustBootstrap" return(obj) } print.summary.MclustBootstrap <- function(x, digits = getOption("digits"), ...) { txt <- paste(rep("-", min(58, getOption("width"))), collapse = "") catwrap(txt) catwrap(paste("Resampling", switch(x$what, "se" = "standard errors", "ave" = "averages", "ci" = "confidence intervals"))) catwrap(txt) # cat(formatC("Model", flag = "-", width = 26), "=", x$modelName, "\n") cat(formatC("Num. of mixture components", flag = "-", width = 26), "=", x$G, "\n") cat(formatC("Replications", flag = "-", width = 26), "=", x$nboot, "\n") cat(formatC("Type", flag = "-", width = 26), "=", switch(x$type, "bs" = "nonparametric bootstrap", "wlbs" = "weighted likelihood bootstrap", "pb" = "parametric bootstrap", "jk" = "jackknife"), "\n") if(x$what == "ci") cat(formatC("Confidence level", flag = "-", width = 26), "=", x$conf.level, "\n") # cat("\nMixing probabilities:\n") print(x$pro, digits = digits) # cat("\nMeans:\n") if(x$d == 1) { if(x$what == "se" | x$what == "ave") print(x$mean[1,], digits = digits) else print(x$mean[,1,], digits = digits) } else if(x$what == "se" | x$what == "ave") print(x$mean, digits = digits) else { for(g in seq.int(x$G)) { cat("[,,", g, "]\n", sep = "") print(x$mean[,,g], digits = digits) } } # cat("\nVariances:\n") if(x$d == 1) { print(x$variance[,1,], digits = digits) } else { for(g in seq.int(x$G)) { cat("[,,", g, "]\n", sep = "") print(x$variance[,,g], digits = digits) } } invisible(x) } plot.MclustBootstrap <- function(x, what = c("pro", "mean", "var"), show.parest = TRUE, show.confint = TRUE, hist.col = "grey", hist.border = "lightgrey", breaks = "Sturges", col = "forestgreen", lwd = 2, lty = 3, xlab = NULL, xlim = NULL, ylim = NULL, ...) { object <- x # Argh. Really want to use object anyway what <- match.arg(what, choices = eval(formals(plot.MclustBootstrap)$what)) par <- object$parameters d <- dim(object$mean)[2] varnames <- rownames(par$mean) if(show.confint) { ci <- summary(object, what = "ci", ...) ave <- summary(object, what = "ave", ...) } histBoot <- function(boot, stat, ci, ave, breaks, xlim, ylim, xlab, ...) { hist(boot, breaks = breaks, xlim = xlim, ylim = ylim, main = "", xlab = xlab, ylab = "", border = hist.border, col = hist.col) box() if(show.parest) abline(v = stat, col = col, lwd = lwd, lty = lty) if(show.confint) { lines(ci, rep(par("usr")[3]/2,2), lwd = lwd, col = col) points(ave, par("usr")[3]/2, pch = 15, col = col) } } switch(what, "pro" = { xlim <- range(if(is.null(xlim)) pretty(object$pro) else xlim) for(k in 1:object$G) histBoot(object$pro[,k], breaks = breaks, stat = par$pro[k], ci = ci$pro[,k], ave = ave$pro[k], xlim = xlim, ylim = ylim, xlab = ifelse(is.null(xlab), paste("Mix. prop. for comp.",k), xlab)) }, "mean" = { isNull_xlim <- is.null(xlim) for(j in 1:d) { xlim <- range(if(isNull_xlim) pretty(object$mean[,j,]) else xlim) for(k in 1:object$G) histBoot(object$mean[,j,k], breaks = breaks, stat = par$mean[j,k], ci = ci$mean[,j,k], ave = ave$mean[j,k], xlim = xlim, ylim = ylim, xlab = ifelse(is.null(xlab), paste(varnames[j], "mean for comp.",k), xlab)) } }, "var" = { isNull_xlim <- is.null(xlim) for(j in 1:d) { xlim <- range(if(isNull_xlim) pretty(object$variance[,j,j,]) else xlim) for(k in 1:object$G) histBoot(object$variance[,j,j,k], breaks = breaks, stat = par$variance[j,j,k], ci = ci$variance[,j,k], ave = ave$variance[j,k], xlim = xlim, ylim = ylim, xlab = ifelse(is.null(xlab), paste(varnames[j], "var. for comp.",k), xlab)) } } ) invisible() } mclust/NEWS.md0000644000176200001440000004356314515764650012707 0ustar liggesusers# mclust 6.0.1 - Changed initialization in `MclustSSC()` for components of unlabeled data via k-means. - Corrected output of `summary.MclustSCC()` for components of unlabeled data. - Updated citation info with reference to book published by Chapman & Hall/CRC # mclust 6.0.0 - Major release of mclust accompanying the upcoming book by Chapman & Hall/CRC. # mclust 5.4.11 (NOT ON CRAN) - Added `summary.crimcoords()` method and removed argument `plot` from `crimcoords()` function call. # mclust 5.4.10 - Updated banner on startup. - Updated info on man page for datasets `diabetes`, `wdbc`, and `thyroid`. - Std. error for cross-validation in `cvMclustDA()` uses formula for the weighted standard deviation with weights given by folds size. - Fix .Rd files. # mclust 5.4.9 - Added `crimcoords()` to compute discriminant coordinates or crimcoords. - Fixed man page for `cvMclustDA()`. # mclust 5.4.8 - `densityMclust()` by default draw a graph of the density estimate. - Fixed a bug in computing mixture density if the noise component is present. - Changed default behaviour of `hc()` when called to perform agglomerative hierarchical clustering instead of using for EM initialization. - The default `mclust.options("hcModelName")` now returns only the model to be used. - Changed default `partition` argument of `hc()` function by adding `dupPartion()` to remove data duplicates. - Added checks to `mclustBootstrapLRT()` to stop if an invalid `modelName` is provided or a one-component mixture model is provided. - Extended the functionality of `cvMclustDA()` by including as cross-validated metrics both the classification error and the Brier score. - Updated info on dataset man pages. # mclust 5.4.7 - Updated plot method (dendrogram) for hierarchical clustering --- now based on classification likelihood. - Added `MclustSSC()` function (and related `print`, `summary`, `plot`, and `predict`, methods) for semi-supervised classification. - Exchanged order of models VEE and EVE to account for increasing complexity of EVE. - Added `cex` argument to `clPairs()` to control character expansion used in plotting symbols. - `em()` and `me()` have now `data` as first argument. # mclust 5.4.6 - Fixed issues with source Fortran code with gfortran 10 as reported by CRAN. - Clean code of `hcCriterion()`. - Replaced `CEX` argument in functions with standard base graph `cex` argument. - Removed `ylim` argument in function; it can be passed via `...`. - MclustDA models use the default SVD transformation of the data for initialisation of the EM algorithm. - Added `icl` criterion to object returned by `Mclust()`. - Fixed number of pages for the RJ reference. - quantileMclust() uses bisection line search method for numerically computing quantiles. # mclust 5.4.5 - Fixed warnings in Fortran calls raised by CRAN. # mclust 5.4.4 - Added `classPriorProbs()` to estimate prior class probabilities. - Added `BrierScore()` to compute the Brier score for assessing the accuracy of probabilistic predictions. - Added `randomOrthogonalMatrix()` to generate random orthogonal basis matrices. - Partial rewriting of `summary.MclustDA()` internals to provide both the classification error and the Brier score for training and/or test data. - Partial rewriting of `plot.MclustDA()` internals. - Added `dmvnorm()` for computing the density of a general multivariate Gaussian distribution via efficient Fortran code. - Added Wisconsin diagnostic breast cancer (WDBC) data. - Added EuroUnemployment data. - Fixed mismatches in Fortran calls. - Bugs fix. # mclust 5.4.3 - Added website site and update DESCRIPTION with URL. - Fixed a bug when checking for univariate data with a single observation in several instances. Using `NCOL()` works both for n-values vector or nx1 matrix. - Fixed a bug when `hcPairs` are provided in the `initialization` argument of `mclustBIC()` (and relatives) and the number of observations exceed the threshold for subsetting. - Fixed bugs on axes for some manual pairs plots. - Renamed `type = "level"` to `type = "hdr"`, and `level.prob` to `prob`, in `surfacePlot()` for getting HDRs graphs - Fixed a bug in `type = "hdr"` plot on `surfacePlot()`. - Fixed a bug in `as.Mclust()`. - Small changes to `summary.MclustDA()` when `modelType = "EDDA"` and in general for a more compact output. # mclust 5.4.2 - Added `mclustBICupdate()` to merge the best values from two BIC results as returned by `mclustBIC()`. - Added `mclustLoglik()` to compute the maximal log-likelihood values from BIC results as returned by `mclustBIC()`. - Added option `type = "level"` to `plot.densityMclust()` and `surfacePlot()` to draw highest density regions. - Added `meXXI()` and `meXXX()` to exported functions. - Updated vignette. # mclust 5.4.1 - Added parametric bootstrap option (`type = "pb"`) in `MclustBootstrap()`. - Added the options to get averages of resampling distributions in `summary.MclustBootstrap()` and to plot resampling-based confidence intervals in `plot.MclustBootstrap()`. - Added function `catwrap()` for wrapping printed lines at `getOption("width")` when using `cat()`. - `mclust.options()` now modify the variable `.mclust` in the namespace of the package, so it should work even inside an mclust-function call. - Fixed a bug in `covw()` when `normalize = TRUE`. - Fixed a bug in `estepVEV()` and `estepVEE()` when parameters contains `Vinv`. - Fixed a bug in `plotDensityMclustd()` when drawing marginal axes. - Fixed a bug in `summary.MclustDA()` when computing classification error in the extreme case of a minor class of assignment. - Fixed a bug in the initialisation of `mclustBIC()` when a noise component is present for 1-dimensional data. - Fixed bugs in some examples documenting `clustCombi()` and related functions. # mclust 5.4 - Model-based hierarchical clustering used to start the EM-algorithm is now based on the scaled SVD transformation proposed by Scrucca and Raftery (2016). This change is not backward compatible. However, previous results can be easily obtained by issuing the command: `mclust.options(hcUse = "VARS") For more details see help("mclust.options")`. - Added `subset` parameter in `mclust.options()` to control the maximal sample size to be used in the initial model-based hierarchical phase. - `predict.densityMclust()` can optionally returns the density on a logarithm scale. - Removed normalization of mixing proportions for new models in single mstep. - Internal rewrite of code used by `packageStartupMessage()`. - Fixed a small bug in `MclustBootstrap()` in the univariate data case. - Fixed bugs when both the noise and subset are provided for initialization. - Vignette updated to include references, startup message, css style, etc. - Various bug fixes in plotting methods when noise is present. - Updated references in `citation()` and man pages. # mclust 5.3 (2017-05) - Added `gmmhd()` function and relative methods. - Added `MclustDRsubsel()` function and relative methods. - Added option to use subset in the hierarchical initialization step when a noise component is present. - `plot.clustCombi()` presents a menu in interactive sessions, no more need of data for classification plots but extract the data from the `clustCombi` object. - Added `combiTree()` plot for `clustCombi` objects. - `clPairs()` now produces a single scatterplot in the bivariate case. - Fixed a bug in `imputeData()` when seed is provided. Now if a seed is provided the data matrix is reproducible. - in `imputeData()` and `imputePairs()` some name of arguments have been modified to be coherent with the rest of the package. - Added functions `matchCluster()` and `majorityVote()`. - Rewrite of print and summary methods for `clustCombi` class objects. - Added `clustCombiOptim()`. - Fixed a bug in `randomPairs()` when nrow of input data is odd. - Fixed a bug in `plotDensityMclust2()`, `plotDensityMclustd()` and `surfacePlot()` when a noise component is present. # mclust 5.2.3 (2017-03) - Added native routine registration for Fortran code. - Fixed lowercase argument PACKAGE in `.Fortran()` calls. # mclust 5.2.2 (2017-01) - Fixed a bug in rare case when performing an extra M step at the end of EM algorithm. # mclust 5.2.1 (2017-01) - Replaced `structure(NULL, *)` with `structure(list(), *)` # mclust 5.2 (2016-03) - Added argument `x` to `Mclust()` to use BIC values from previous computations to avoid recomputing for the same models. The same argument and functionality was already available in `mclustBIC()`. - Added argument `x` to `mclustICL()` to use ICL values from previous computations to avoid recomputing for the same models. - Fixed a bug on `plot.MclustBootstrap()` for the `"mean"` and `"var"` in the univariate case. - Fixed uncertainty plots. - Added functions `as.Mclust()` and `as.densityMclust()` to convert object to specific mclust classes. - Solved a numerical accuracy problem in `qclass()` when the scale of `x` is (very) large by making the tolerance eps scale dependent. - Use transpose subroutine instead of non-Fortran 77 TRANSPOSE function in `mclustaddson.f`. - Fixed `predict.Mclust()` and `predict.MclustDR()` by implementing a more efficient and accurate algorithm for computing the densities. # mclust 5.1 (2015-10) - Fixed slow convergence for VVE and EVE models. - Fixed a bug in orientation for model VEE. - Added an extra M-step and parameters update in `Mclust()` call via `summaryMclustBIC()`. # mclust 5.0.2 (2015-07) - Added option to `MclustBootstrap()` for using weighted likelihood bootstrap. - Added a plot method for `MclustBootstrap` objects. - Added `errorBars()` function. - Added `clPairsLegend()` function. - Added `covw()` function. - Fixed rescaling of mixing probabilities in new models. - bug fixes. # mclust 5.0.1 (2015-04) - Fixed bugs. - Added print method for `hc` objects. # mclust 5.0.0 (2015-03) - Added the four missing models (EVV, VEE, EVE, VVE) to the mclust family. A noise component is allowed, but no prior is available. - Added `mclustBootstrapLRT()` function (and corresponding print and plot methods) for selecting the number of mixture components based on the sequential bootstrap likelihood ratio test. - Added `MclustBootstrap()` function (and corresponding print and summary methods) for performing bootstrap inference. This provides standard errors for parameters and confidence intervals. - Added `"A quick tour of mclust"` vignette as html generated using rmarkdown and knitr. Older vignettes are included as other documentation for the package. - Modified arguments to `mvn2plot()` to control colour, lty, lwd, and pch of ellipses and mean point. - Added functions `emX()`, `emXII()`, `emXXI()`, `emXXX()`, `cdensX()`, `cdensXII()`, `cdensXXI()`, and `cdensXXX()`, to deal with single-component cases, so calling the em function works even if `G = 1`. - Small changes to `icl()`, now it is a generic method, with specialized methods for `Mclust` and `MclustDA` objects. - Fixed bug for transformations in the initialization step when some variables are constant (i.e. the variance is zero) or a one-dimensional data is provided. - Changed the order of arguments in `hc()` (and all the functions calling it). - Small modification to `CITATION` file upon request of CRAN maintainers. - Various bug fixes. # mclust 4.4 (2014-09) - Added option for using transformation of variables in the hierarchical initialization step. - Added `quantileMclust()` for computing the quantiles from a univariate Gaussian mixture distribution. - Fixed bugs on `summaryMclustBIC()`, `summaryMclustBICn()`, `Mclust()` to return a matrix of 1s on a single column for `z` even in the case of `G = 1`. This is to avoid error on some plots. - Moved pdf files (previously included as vignettes) to `inst/doc` with corresponding `index.html`. # mclust 4.3 (2014-03) - Fixed bug for `logLik.MclustDA()` in the univariate case. - Added argument `"what"` to `predict.densityMclust()` function for choosing what to retrieve, the mixture density or component density. - `hc()` function has an additional parameter to control if the original variables or a transformation of them should be used for hierarchical clustering. - Added `"hcUse"` argument in `mclust.options()` to be passed as default to `hc()`. - Added the storing of original data (and class for classification models) in the object returned by the main functions. - Added component `hypvol` to `Mclust` object which provide the hypervolume of the noise component when required, otherwise is set to NA. - Added a warning when prior is used and BIC returns NAs. - Fixed bugs in `summary.Mclust()`, `print.summary.Mclust()`, `plot.Mclust()` and `icl()` in the case of presence of a noise component. - Fixed bug on some plots in `plot.MclustDR()` which requires `plot.new()` before calling `plot.window()`. - Fixed a bug in `MclustDR()` for the one-dimensional case. - Corrections to `Mclust` man page. - Various small bug fixes. # mclust 4.2 (2013-07) - Fixed bug in `sim*()` functions when no obs are assigned to a component. - `MclustDA()` allows to fit a single class model. - Fixex bug in `summary.Mclust()` when a subset is used for initialization. - Fixed a bug in the function `qclass()` when ties are present in quantiles, so it always return the required number of classes. - Various small bug fixes. # mclust 4.1 (2013-04) - Added `icl()` function for computing the integrated complete-data likelihood. - Added `mclustICL()` function with associated print and plot methods. - `print.mclustBIC()` shows also the top models based on BIC. - Modified `summary.Mclust()` to return also the icl. - Rewrite of `adjustedRandIndex()` function. This version is more efficient for large vectors. - Updated help for `adjustedRandIndex()`. - Modifications to `MclustDR()` and its summary method. - Changed behavior of `plot.MclustDR(..., what = "contour")`. - Improved plot of uncertainty for `plot.MclustDR(..., what = "boundaries")`. - Corrected a bug for malformed GvHD data. - Corrected version of qclass for selecting initial values in case of 1D data when successive quantiles coincide. - Corrected version of plot BIC values when only a single G component models are fitted. - Various bug fixes. # mclust 4.0 (2012-08) - Added new summary and print methods for `Mclust()`. - Added new summary and print methods for `densityMclust()`. - Included `MclustDA()` function and methods. - Included `MclustDR()` function and methods. - Included `me.weighted()` function. - Restored hierarchical clustering capability for the EEE model (hcEEE). - Included vignettes for mclust version 4 from Technical Report No. 597 and for using weights in mclust. - Adoption of GPL (>= 2) license. # mclust 3.5 (2012-07) - Added `summary.Mclust()`. - New functions for plotting and summarizing density estimation. - Various bug fixes. - Added `clustCombi()` and related functions (code and doc provided by Jean-Patrick Baudry). - Bug fix: variable names lost when G = 1. # mclust 3.4.11 (2012-01) - Added `NAMESPACE`. # mclust 3.4.10 (2011-05) - Removed intrinsic gamma- # mclust 3.4.9 (2011-05) - Fixed `hypvol()` function to avoid overflow. - Fixed `hypvol()` help file value description. - Removed unused variables and tabs from source code. - Switched to intrinsic gamma in source code. - Fixed default warning in estepVEV and mstepVEV. # mclust 3.4.8 (2010-12) - Fixed output when G = 1 (it had NA for the missing `z` component). # mclust 3.4.7 (2010-10) - Removed hierarchical clustering capability for the `EEE` model (hcEEE). - The R 2.12.0 build failed due to a 32-bit Windows compiler error, forcing removal of the underlying Fortran code for hcEEE from the package, which does not contain errors and compiles on other platforms. # mclust 3.4.6 (2010-08) - Added description of parameters output component to `Mclust` and `summary.mclustBIC` help files. # mclust 3.4.5 (2010-07) - Added `densityMclust()` function. # mclust 3.4.4 (2010-04) - Fixed bug in covariance matrix output for EEV and VEV models. # mclust 3.4.3 (2010-02) - Bug fixes. # mclust 3.4.2 (2010-02) - Moved CITATION to inst and used standard format - BibTex entries are in inst/cite. - Fixed bug in handling missing classes in `mclustBIC()`. - Clarified license wording. # mclust 3.4.1 (2010-01) - Corrected output description in `mclustModel` help file. - Updated mclust manual reference to show revision. # mclust 3.4 (2009-12) - Updated `defaultPrior` help file. - Added utility functions for imputing missing data with the mix package. - Changed default max to number of mixture components in each class from 9 to 3. # mclust 3.3.2 (2009-10) - Fixed problems with \cr in `mclustOptions` help file # mclust 3.3.1 (2009-06) - Fixed `plot.mclustBIC()` and `plot.Mclust()` to handle `modelNames`. - Changed "orientation" for VEV, VVV models to be consistent with R `eigen()` and the literature - Fixed some problems including doc for the noise option. - Updated the `unmap()` function to optionally include missing groups. # mclust 3.3 (2009-06) - Fixed bug in the `"errors"` option for `randProj()`. - Fixed boundary cases for the `"noise"` option. # mclust 3.2 (2009-04) - Added permission for CRAN distribution to LICENSE. - Fixed problems with help files found by new parser. - Changed PKG_LIBS order in src/Makevars. - Fixed `Mclust()` to handle sampling in data expression in call. # mclust 3.1.10 (2008-11) - Added `EXPR = to` all switch functions that didn't already have it. # mclust 3.1.9 (2008-10) - Added `pro` component to parameters in `dens()` help file. - Fixed some problems with the noise option. # mclust 3.1.1 (2007-03) - Default seed changed in `sim*()` functions. - Added model name check to various functions. - Otherwise backward compatible with version 3.0 # mclust 3.1 (2007-01) - Most plotting functions changed to use color. - `Mclust()` and `mclustBIC()` fixed to work with G=1 - Otherwise backward compatible with version 3.0. # mclust 3.0 (2006-10) - New functionality added, including conjugate priors for Bayesian regularization. - Backward compatibility is not guaranteed since the implementation of some functions has changed to make them easier to use or maintain. mclust/MD50000644000176200001440000002052114525113245012073 0ustar liggesusers41f023ef6e055f5036f4328d179cdb42 *DESCRIPTION 89163f1985e431c28e6a5d558a623f77 *NAMESPACE 70806dac0dd692456b18889693724c98 *NEWS.md 6c7b363cb8374e4df827c1e6394faa5f *R/bootstrap.R eeb0e042ca8bcc1eb2d32da9ef594baf *R/clustCombi.R dcad5ed0da37959e13faa123a2f6092d *R/densityMclust.R 3b11a9490c65c6e2f3d107a1d9abf3e2 *R/gmmhd.R 96cea560cb21abd9531b0c22d5cd4337 *R/graphics.R ba84097ee321529784cd9c6db5143e77 *R/icl.R 4b9155a000a45bb1f37fd1507c15f172 *R/impute.R abf9c56a0394f45738ed9552d1e8ea6a *R/mbahc.R 03f54145598d39ea1542e50ec42873e3 *R/mclust.R 1c0ad0629bdef87f46a9811aa87011ea *R/mclustaddson.R c270a7737514f87fc47a7380a783072e *R/mclustda.R c5a0f7f8aa99fd7f8bb589ead06084a1 *R/mclustdr.R a1046e73c9e0774d26ab06c23622aa12 *R/mclustssc.R 2c029b4f12b6c96982c034098d881bfe *R/options.R 781ff8bea2efa10350791cd781eb4a2d *R/toremove.R ba0e509dd6446b85a79e6c24e45d59c8 *R/util.R ce4d61899bfc151f01017ae2a753baee *R/weights.R dce1aa8eed39b0a39cb74e3459ca5a87 *R/zzz.R d5aefacf6bf650dc1d6471e05035542c *build/vignette.rds c8a065ae65f4c145cc82390a921d0d76 *data/Baudry_etal_2010_JCGS_examples.rda 235b19c9c990569f08327ff110680265 *data/EuroUnemployment.rda 566dc2f50e863e9b32c445f9d5afd26a *data/GvHD.rda 0f196d10ed5157a20f07538cf1cef0f3 *data/acidity.rda dcf0404be80a56cd040ed8cb8da1b428 *data/banknote.txt.gz 5da504b35928e78152ebd8ab33ff876e *data/chevron.rda bb390dfc16851f05f7012dd4d0727c5f *data/cross.rda 9ebc6f757130b83bce7597fb3f56cdad *data/diabetes.rda 6f4df3e12c20ef222516056f91b21733 *data/thyroid.rda b2a529e265721bcd2cb632d42ec6cc11 *data/wdbc.txt.gz 0a7b904c7ede38f1934dbada290835e5 *data/wreath.rda 2cc43a299aa4f4cd0ffea3f0373bb678 *inst/CITATION e1dfcd76432de653be6954f26e6e9f8b *inst/doc/mclust.R 4e6a81668ad2d1318ddbcf42d1871766 *inst/doc/mclust.Rmd aa5f29b30acea5caa9a59cd818bf2626 *inst/doc/mclust.html 7799d6093880e94a3bed07e6900f7249 *man/Baudry_etal_2010_JCGS_examples.Rd 226b01ed5c258d406de2af5cfade2ba1 *man/BrierScore.Rd 49a3fb4baf46c0ce393d0141bd926bc0 *man/EuroUnemployment.Rd 0e1b6313cb53d6e937acd55cdded818e *man/GvHD.Rd 19e300bf60a607cdf1c5b2e74911b336 *man/Mclust.Rd ea3ad560361c7274e200e317aca08df1 *man/MclustBootstrap.Rd 91388cf988ce8683cf21cac53d02fc2a *man/MclustDA.Rd a6a45328b7d5eaf35c375008599fe18f *man/MclustDR.Rd b2326163d0cd9784455a55a7c4044c8d *man/MclustDRsubsel.Rd b8d666ee6dceb9fbad3be67dea8e5c35 *man/MclustSSC.Rd 9d6a30f7faee827688630d29d0e4c581 *man/acidity.Rd 6c924f5dc94fb10416016d5d027932e3 *man/adjustedRandIndex.Rd 40bf7f2db2b9677c5b9bf7733da4aeac *man/banknote.Rd 840501df1b562d5ba9df4d31c33672c6 *man/bic.Rd 9458a1636740fbd8443308670f153d68 *man/cdens.Rd 00ce09bbbe3b468350de0f9ee5bcbfae *man/cdensE.Rd 3844722391ea73731d1353ecbd726867 *man/cdfMclust.Rd b0cfe540c4eb91f99f49c2127562cd49 *man/chevron.Rd 342f855aaedba7fe96aaf6a425a77519 *man/clPairs.Rd 63d108fe436518c400eebb43430a5958 *man/classError.Rd a794c30cb8d2365ce1b5845338e933cf *man/classPriorProbs.Rd fd20be5ef505890b5fb82337022b9f0a *man/clustCombi-internals.Rd f6385cd6c121cb424cc1dc5c172a278b *man/clustCombi.Rd bdb14988bc5193739060f58cca67fd5f *man/clustCombiOptim.Rd fd9027722a416f5955196a5cdc985c53 *man/combMat.Rd e1914a2b193b93b0344c25945495c851 *man/combiPlot.Rd 39620282beb115c8f76e50c01dfd08d9 *man/combiTree.Rd 831d195e9c3513946f54daac01b81db5 *man/coordProj.Rd 0ad10b7d97e45bebe7a10f69a7f919b9 *man/covw.Rd 284aacd524b82799e8fac8ea47e3f9e4 *man/crimcoords.Rd c18ca42f0c7ebdce593067d063d278ca *man/cross.Rd d8037dbf01147532dabd4fabbce22626 *man/cvMclustDA.Rd 9f86088d9d21126e7cc1ac28d4e5fa6a *man/decomp2sigma.Rd 4175c1b284dbb6b7c5a5da13f32d037e *man/defaultPrior.Rd 2676ec2d5d2b5b82dccc0cbd3c11bc13 *man/dens.Rd 131970319ce1e07e4318d849c325407b *man/densityMclust.Rd 21a247e8b819f6d1cdede859a84ffd68 *man/densityMclust.diagnostic.Rd ffa2e5d8d84e46efb64589a77dd134a2 *man/diabetes.Rd 92ea0d6609afc260770a66d2ad957f04 *man/dmvnorm.Rd d8e808a964b5deb680007ae1ee1b5997 *man/dupPartition.Rd e0d38101ddddf5fd476ec651f2527af5 *man/em.Rd df3e1e471eccd851260c5c668f4a33e4 *man/emControl.Rd 4cc7c95403c93c80dbc47f05c6b70a52 *man/emE.Rd 7ec8253676ce2d0d911a74b74ceee450 *man/entPlot.Rd 6e7e4d7ec91567f07c07d95891720b0b *man/errorBars.Rd cbd0f5744473f1e9939cfa60e4608b12 *man/estep.Rd 262848b35d252a53470aad0d956e48ae *man/estepE.Rd 246f72baa397cd928f97c9cb6c3ff940 *man/figures/logo.png d592f984a4d0741fe7282e03e630acb4 *man/gmmhd.Rd 92b1557f0c9c56340e8402c7a2bd1396 *man/hc.Rd b4d9712d43b640bf6e90642b59d33af3 *man/hcE.Rd ed3b16d80e13872b6cf8ce7e80d0041c *man/hcRandomPairs.Rd dcaccecf8c2189f5a3138173a258f591 *man/hclass.Rd 2ddfa6fedfaf2a8b469024689cad3e32 *man/hdrlevels.Rd f5c538311fa12c3298e7524d9eb62817 *man/hypvol.Rd 918939d5858154bd5aa2a789e8adda3a *man/icl.Rd a91d9fd699250bc9a4e3757ae60b6247 *man/imputeData.Rd 327ef4d86112a911b2648b7a47b2e60e *man/imputePairs.Rd cfb07fb48ca73468e3283687cdd54d97 *man/logLik.Mclust.Rd 525b30e85bd1a3ea08b30745b3cdadd3 *man/logLik.MclustDA.Rd 699915f3a4cf8bfd6718f71f2a754c48 *man/majorityVote.Rd 7d8989594ce3a00a8633781ff84658f0 *man/map.Rd 4e3901ea67e0c125d8e5ac4330df2e38 *man/mapClass.Rd f1edaf4e08180fa4edad91f6023b58c3 *man/mclust-deprecated.Rd c2686ac817e8bbb871363f12f143cc1d *man/mclust-internal.Rd f3dffc6f5583c98679efd2d77dce2c5c *man/mclust-package.Rd dbdb7e3336c3760b23504cb5361bb265 *man/mclust.options.Rd 06f681ededb7169ee1e4dbe256233f4a *man/mclust1Dplot.Rd 0d32e83fa5e8a19883b7189e597e2d08 *man/mclust2Dplot.Rd 355c6c6116ce20866b1cdee2251bd9d4 *man/mclustBIC.Rd 238e7aa5fdbff7f89e2444d3022fddfb *man/mclustBICupdate.Rd 38a76cbae6cbf9afdbe7e516e33c6c56 *man/mclustBootstrapLRT.Rd 9193ecabab69de75262cc9ea251c479c *man/mclustICL.Rd 26b1e6ddb4c2016044aaef2315e66fb8 *man/mclustLoglik.Rd 9d69c87d5861e6de08a57854ed4bf324 *man/mclustModel.Rd caf6238f3fe60beaaf874d6c1c98065b *man/mclustModelNames.Rd 996f7ebfb72da1eee161ea7b23855d15 *man/mclustVariance.Rd 1400e1fe29cf0dbeb443d6e89a0c3706 *man/me.Rd 2c269df04f607068c021e07414bae71a *man/me.weighted.Rd ac659d4036d5e464339507dd9d76ec3f *man/meE.Rd d46877118126083ae9bff0f5f72208d0 *man/mstep.Rd 8ba56642820275d19834149d7d552327 *man/mstepE.Rd 16c6ef6baa11a8e7f6dc3f515aa9f9ed *man/mvn.Rd fff5c19cf5ffeae6fe2fc54f35c34da3 *man/mvnX.Rd c3796dae5e43a2c26a9ee9b73fcf2276 *man/nMclustParams.Rd 39229fadd09ef4e5370f060fd6357149 *man/nVarParams.Rd 36355172607046a0c37f10dee1197ed5 *man/partconv.Rd b0354eb07d5c849fc6c0be463c30b31b *man/partuniq.Rd d84812bf9da9c9e76465ae24ae554882 *man/plot.Mclust.Rd 25b5261ddbced669fefaba45a72e4f04 *man/plot.MclustBoostrap.Rd 01f1cd87231cbb640308280faf53d535 *man/plot.MclustDA.Rd cfb781f6b9fe5bf1d2c8cdf49e1a8c04 *man/plot.MclustDR.Rd 3fd0d79ca9549c14b822845673cad5c1 *man/plot.MclustSSC.Rd 458f2b4c3940e714a015e03e553d364b *man/plot.clustCombi.Rd 78a23338f16271f0ae9bbb5b7a935c95 *man/plot.densityMclust.Rd 339efd96c69d386b0e4a34094084a486 *man/plot.hc.Rd 67acd4e69e1861bebf51d1f649e775a7 *man/plot.mclustBIC.Rd 640264c2366087d920e4518af50281a7 *man/plot.mclustICL.Rd f640c08bd9098247a97a44f30c89a4cf *man/predict.Mclust.Rd 79be9849129ebfbdf9c0a6cdfac3d328 *man/predict.MclustDA.Rd e84b696c5b8eff056814c3cba07bee9f *man/predict.MclustDR.Rd 66b20d05c52e7dc62fcb4971f1b2287b *man/predict.MclustSSC.Rd 691003fbb627468671f5a26fc0759d29 *man/predict.densityMclust.Rd 8384a865dade63acabf451b136217287 *man/priorControl.Rd 1a32c267ea6deb106ae19b6cec847115 *man/randProj.Rd d9477d3e3d801b783e42d10e37271e05 *man/randomOrthogonalMatrix.Rd e43a5fead9ef355e3d522ec94e224f66 *man/sigma2decomp.Rd 1ed03f440760a36b9c66bf6c5259f1cb *man/sim.Rd 77797cb68e57027ea316e1b068b9ed63 *man/simE.Rd 83eb12323099cbbf0f3e075d10f0be77 *man/summary.Mclust.Rd 91268ae73944bd6d5845911c42142e58 *man/summary.MclustBootstrap.Rd 4a8c675b46da86ca7075999c22e635f4 *man/summary.MclustDA.Rd 103fff818063adef9f3262398342fb0a *man/summary.MclustDR.Rd fe2f8df16cc881677e427811d2d8a0d9 *man/summary.MclustSSC.Rd 986ecadd060b62810d73fa37ec72dc19 *man/summary.mclustBIC.Rd dda5b13da10532da7d8d3752d554c71e *man/surfacePlot.Rd a584277d937587f96f984cbcf4d8a494 *man/thyroid.Rd 16f301b425aebb1ac6b6f0175427dabc *man/uncerPlot.Rd b76a9e2d21683188dc8bce832e2ec9d1 *man/unmap.Rd c48dce8f9817012850ce6d6ae81bd136 *man/wdbc.Rd c1b81d23059192faf2b0fdb40b0bc0d2 *man/wreath.Rd 2a6f9e9e044a78154d3cfda5936d6f48 *src/Makevars d784799104d2c2350f9350b268761a2b *src/dmvnorm.f 394a717e565a6647678c758462d11eb2 *src/init.c 5ddcdc7e9d5c82abda7bcb62fb594cb3 *src/mclust.f 015387e2f8c8f04d9e8900f82002a925 *src/mclustaddson.f 4e6a81668ad2d1318ddbcf42d1871766 *vignettes/mclust.Rmd 71148153fcf7f0388c1ae6f5da2e9f06 *vignettes/vignette.css mclust/inst/0000755000176200001440000000000014525075361012546 5ustar liggesusersmclust/inst/doc/0000755000176200001440000000000014525075361013313 5ustar liggesusersmclust/inst/doc/mclust.Rmd0000644000176200001440000001773414516406623015301 0ustar liggesusers--- title: "A quick tour of mclust" author: "Luca Scrucca" date: "`r format(Sys.time(), '%d %b %Y')`" output: rmarkdown::html_vignette: toc: true number_sections: false css: "vignette.css" vignette: > %\VignetteIndexEntry{A quick tour of mclust} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} library(knitr) opts_chunk$set(fig.align = "center", out.width = "90%", fig.width = 6, fig.height = 5, dev.args = list(pointsize=10), par = TRUE, # needed for setting hook collapse = TRUE, # collapse input & output code in chunks warning = FALSE) knit_hooks$set(par = function(before, options, envir) { if(before && options$fig.show != "none") par(family = "sans", mar=c(4.1,4.1,1.1,1.1), mgp=c(3,1,0), tcl=-0.5) }) set.seed(1) # for exact reproducibility ``` # Introduction **mclust** is a contributed R package for model-based clustering, classification, and density estimation based on finite normal mixture modelling. It provides functions for parameter estimation via the EM algorithm for normal mixture models with a variety of covariance structures, and functions for simulation from these models. Also included are functions that combine model-based hierarchical clustering, EM for mixture estimation and the Bayesian Information Criterion (BIC) in comprehensive strategies for clustering, density estimation and discriminant analysis. Additional functionalities are available for displaying and visualizing fitted models along with clustering, classification, and density estimation results. This document gives a quick tour of **mclust** (version `r packageVersion("mclust")`) functionalities. It was written in R Markdown, using the [knitr](https://cran.r-project.org/package=knitr) package for production. See `help(package="mclust")` for further details and references provided by `citation("mclust")`. ```{r, message = FALSE, echo=-2} library(mclust) cat(mclust:::mclustStartupMessage(), sep="") ``` # Clustering ```{r} data(diabetes) class <- diabetes$class table(class) X <- diabetes[,-1] head(X) clPairs(X, class) BIC <- mclustBIC(X) plot(BIC) summary(BIC) mod1 <- Mclust(X, x = BIC) summary(mod1, parameters = TRUE) plot(mod1, what = "classification") table(class, mod1$classification) plot(mod1, what = "uncertainty") ICL <- mclustICL(X) summary(ICL) plot(ICL) LRT <- mclustBootstrapLRT(X, modelName = "VVV") LRT ``` ## Initialisation EM algorithm is used by **mclust** for maximum likelihood estimation. Initialisation of EM is performed using the partitions obtained from agglomerative hierarchical clustering. For details see `help(mclustBIC)` or `help(Mclust)`, and `help(hc)`. ```{r} (hc1 <- hc(X, modelName = "VVV", use = "SVD")) BIC1 <- mclustBIC(X, initialization = list(hcPairs = hc1)) # default summary(BIC1) (hc2 <- hc(X, modelName = "VVV", use = "VARS")) BIC2 <- mclustBIC(X, initialization = list(hcPairs = hc2)) summary(BIC2) (hc3 <- hc(X, modelName = "EEE", use = "SVD")) BIC3 <- mclustBIC(X, initialization = list(hcPairs = hc3)) summary(BIC3) ``` Update BIC by merging the best results: ```{r} BIC <- mclustBICupdate(BIC1, BIC2, BIC3) summary(BIC) plot(BIC) ``` Univariate fit using random starting points obtained by creating random agglomerations (see `help(hcRandomPairs)`) and merging best results: ```{r, echo=-1} set.seed(20181116) data(galaxies, package = "MASS") galaxies <- galaxies / 1000 BIC <- NULL for(j in 1:20) { rBIC <- mclustBIC(galaxies, verbose = FALSE, initialization = list(hcPairs = hcRandomPairs(galaxies))) BIC <- mclustBICupdate(BIC, rBIC) } summary(BIC) plot(BIC) mod <- Mclust(galaxies, x = BIC) summary(mod) ``` # Classification ## EDDA ```{r} data(iris) class <- iris$Species table(class) X <- iris[,1:4] head(X) mod2 <- MclustDA(X, class, modelType = "EDDA") summary(mod2) plot(mod2, what = "scatterplot") plot(mod2, what = "classification") ``` ## MclustDA ```{r} data(banknote) class <- banknote$Status table(class) X <- banknote[,-1] head(X) mod3 <- MclustDA(X, class) summary(mod3) plot(mod3, what = "scatterplot") plot(mod3, what = "classification") ``` ## Cross-validation error ```{r} cv <- cvMclustDA(mod2, nfold = 10) str(cv) unlist(cv[3:6]) cv <- cvMclustDA(mod3, nfold = 10) str(cv) unlist(cv[3:6]) ``` # Density estimation ## Univariate ```{r} data(acidity) mod4 <- densityMclust(acidity) summary(mod4) plot(mod4, what = "BIC") plot(mod4, what = "density", data = acidity, breaks = 15) plot(mod4, what = "diagnostic", type = "cdf") plot(mod4, what = "diagnostic", type = "qq") ``` ## Multivariate ```{r} data(faithful) mod5 <- densityMclust(faithful) summary(mod5) plot(mod5, what = "BIC") plot(mod5, what = "density", type = "hdr", data = faithful, points.cex = 0.5) plot(mod5, what = "density", type = "persp") ``` # Bootstrap inference ```{r} boot1 <- MclustBootstrap(mod1, nboot = 999, type = "bs") summary(boot1, what = "se") summary(boot1, what = "ci") ``` ```{r, echo=-1, fig.width=6, fig.height=7} par(mfrow=c(4,3)) plot(boot1, what = "pro") plot(boot1, what = "mean") ``` ```{r} boot4 <- MclustBootstrap(mod4, nboot = 999, type = "bs") summary(boot4, what = "se") summary(boot4, what = "ci") ``` ```{r, echo=-1} par(mfrow=c(2,2)) plot(boot4, what = "pro") plot(boot4, what = "mean") ``` # Dimension reduction ## Clustering ```{r} mod1dr <- MclustDR(mod1) summary(mod1dr) plot(mod1dr, what = "pairs") plot(mod1dr, what = "boundaries", ngrid = 200) mod1dr <- MclustDR(mod1, lambda = 1) summary(mod1dr) plot(mod1dr, what = "scatterplot") plot(mod1dr, what = "boundaries", ngrid = 200) ``` ## Classification ```{r} mod2dr <- MclustDR(mod2) summary(mod2dr) plot(mod2dr, what = "scatterplot") plot(mod2dr, what = "boundaries", ngrid = 200) mod3dr <- MclustDR(mod3) summary(mod3dr) plot(mod3dr, what = "scatterplot") plot(mod3dr, what = "boundaries", ngrid = 200) ```
# Using colorblind-friendly palettes Most of the graphs produced by **mclust** use colors that by default are defined in the following options: ```{r} mclust.options("bicPlotColors") mclust.options("classPlotColors") ``` The first option controls colors used for plotting BIC, ICL, etc. curves, whereas the second option is used to assign colors for indicating clusters or classes when plotting data. Starting with R version 4.0, the function \code{palette.colors()} can be used for retrieving colors from some pre-defined palettes. For instance ```{r, eval=FALSE} palette.colors(palette = "Okabe-Ito") ``` returns a color-blind-friendly palette for individuals suffering from protanopia or deuteranopia, the two most common forms of inherited color blindness. For earlier versions of R such palette can be defined as: ```{r} cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999") ``` and then assigned to the **mclust** options as follows: ```{r} bicPlotColors <- mclust.options("bicPlotColors") bicPlotColors[1:14] <- c(cbPalette, cbPalette[1:5]) mclust.options("bicPlotColors" = bicPlotColors) mclust.options("classPlotColors" = cbPalette[-1]) clPairs(iris[,-5], iris$Species) mod <- Mclust(iris[,-5]) plot(mod, what = "BIC") plot(mod, what = "classification") ``` If needed, users can easily define their own palettes following the same procedure outlined above.

# References Scrucca L., Fraley C., Murphy T. B. and Raftery A. E. (2023) *Model-Based Clustering, Classification, and Density Estimation Using mclust in R*. Chapman & Hall/CRC, ISBN: 978-1032234953, https://mclust-org.github.io/book/ Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, *The R Journal*, 8/1, pp. 205-233. https://journal.r-project.org/archive/2016/RJ-2016-021/RJ-2016-021.pdf Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, *Journal of the American Statistical Association*, 97/458, pp. 611-631. ---- ```{r} sessionInfo() ```mclust/inst/doc/mclust.R0000644000176200001440000001353214525075357014756 0ustar liggesusers## ----setup, include=FALSE----------------------------------------------------- library(knitr) opts_chunk$set(fig.align = "center", out.width = "90%", fig.width = 6, fig.height = 5, dev.args = list(pointsize=10), par = TRUE, # needed for setting hook collapse = TRUE, # collapse input & output code in chunks warning = FALSE) knit_hooks$set(par = function(before, options, envir) { if(before && options$fig.show != "none") par(family = "sans", mar=c(4.1,4.1,1.1,1.1), mgp=c(3,1,0), tcl=-0.5) }) set.seed(1) # for exact reproducibility ## ----message = FALSE, echo=-2------------------------------------------------- library(mclust) cat(mclust:::mclustStartupMessage(), sep="") ## ----------------------------------------------------------------------------- data(diabetes) class <- diabetes$class table(class) X <- diabetes[,-1] head(X) clPairs(X, class) BIC <- mclustBIC(X) plot(BIC) summary(BIC) mod1 <- Mclust(X, x = BIC) summary(mod1, parameters = TRUE) plot(mod1, what = "classification") table(class, mod1$classification) plot(mod1, what = "uncertainty") ICL <- mclustICL(X) summary(ICL) plot(ICL) LRT <- mclustBootstrapLRT(X, modelName = "VVV") LRT ## ----------------------------------------------------------------------------- (hc1 <- hc(X, modelName = "VVV", use = "SVD")) BIC1 <- mclustBIC(X, initialization = list(hcPairs = hc1)) # default summary(BIC1) (hc2 <- hc(X, modelName = "VVV", use = "VARS")) BIC2 <- mclustBIC(X, initialization = list(hcPairs = hc2)) summary(BIC2) (hc3 <- hc(X, modelName = "EEE", use = "SVD")) BIC3 <- mclustBIC(X, initialization = list(hcPairs = hc3)) summary(BIC3) ## ----------------------------------------------------------------------------- BIC <- mclustBICupdate(BIC1, BIC2, BIC3) summary(BIC) plot(BIC) ## ----echo=-1------------------------------------------------------------------ set.seed(20181116) data(galaxies, package = "MASS") galaxies <- galaxies / 1000 BIC <- NULL for(j in 1:20) { rBIC <- mclustBIC(galaxies, verbose = FALSE, initialization = list(hcPairs = hcRandomPairs(galaxies))) BIC <- mclustBICupdate(BIC, rBIC) } summary(BIC) plot(BIC) mod <- Mclust(galaxies, x = BIC) summary(mod) ## ----------------------------------------------------------------------------- data(iris) class <- iris$Species table(class) X <- iris[,1:4] head(X) mod2 <- MclustDA(X, class, modelType = "EDDA") summary(mod2) plot(mod2, what = "scatterplot") plot(mod2, what = "classification") ## ----------------------------------------------------------------------------- data(banknote) class <- banknote$Status table(class) X <- banknote[,-1] head(X) mod3 <- MclustDA(X, class) summary(mod3) plot(mod3, what = "scatterplot") plot(mod3, what = "classification") ## ----------------------------------------------------------------------------- cv <- cvMclustDA(mod2, nfold = 10) str(cv) unlist(cv[3:6]) cv <- cvMclustDA(mod3, nfold = 10) str(cv) unlist(cv[3:6]) ## ----------------------------------------------------------------------------- data(acidity) mod4 <- densityMclust(acidity) summary(mod4) plot(mod4, what = "BIC") plot(mod4, what = "density", data = acidity, breaks = 15) plot(mod4, what = "diagnostic", type = "cdf") plot(mod4, what = "diagnostic", type = "qq") ## ----------------------------------------------------------------------------- data(faithful) mod5 <- densityMclust(faithful) summary(mod5) plot(mod5, what = "BIC") plot(mod5, what = "density", type = "hdr", data = faithful, points.cex = 0.5) plot(mod5, what = "density", type = "persp") ## ----------------------------------------------------------------------------- boot1 <- MclustBootstrap(mod1, nboot = 999, type = "bs") summary(boot1, what = "se") summary(boot1, what = "ci") ## ----echo=-1, fig.width=6, fig.height=7--------------------------------------- par(mfrow=c(4,3)) plot(boot1, what = "pro") plot(boot1, what = "mean") ## ----------------------------------------------------------------------------- boot4 <- MclustBootstrap(mod4, nboot = 999, type = "bs") summary(boot4, what = "se") summary(boot4, what = "ci") ## ----echo=-1------------------------------------------------------------------ par(mfrow=c(2,2)) plot(boot4, what = "pro") plot(boot4, what = "mean") ## ----------------------------------------------------------------------------- mod1dr <- MclustDR(mod1) summary(mod1dr) plot(mod1dr, what = "pairs") plot(mod1dr, what = "boundaries", ngrid = 200) mod1dr <- MclustDR(mod1, lambda = 1) summary(mod1dr) plot(mod1dr, what = "scatterplot") plot(mod1dr, what = "boundaries", ngrid = 200) ## ----------------------------------------------------------------------------- mod2dr <- MclustDR(mod2) summary(mod2dr) plot(mod2dr, what = "scatterplot") plot(mod2dr, what = "boundaries", ngrid = 200) mod3dr <- MclustDR(mod3) summary(mod3dr) plot(mod3dr, what = "scatterplot") plot(mod3dr, what = "boundaries", ngrid = 200) ## ----------------------------------------------------------------------------- mclust.options("bicPlotColors") mclust.options("classPlotColors") ## ----eval=FALSE--------------------------------------------------------------- # palette.colors(palette = "Okabe-Ito") ## ----------------------------------------------------------------------------- cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999") ## ----------------------------------------------------------------------------- bicPlotColors <- mclust.options("bicPlotColors") bicPlotColors[1:14] <- c(cbPalette, cbPalette[1:5]) mclust.options("bicPlotColors" = bicPlotColors) mclust.options("classPlotColors" = cbPalette[-1]) clPairs(iris[,-5], iris$Species) mod <- Mclust(iris[,-5]) plot(mod, what = "BIC") plot(mod, what = "classification") ## ----------------------------------------------------------------------------- sessionInfo() mclust/inst/doc/mclust.html0000644000176200001440001332415614525075360015526 0ustar liggesusers A quick tour of mclust

A quick tour of mclust

Luca Scrucca

15 Nov 2023

Introduction

mclust is a contributed R package for model-based clustering, classification, and density estimation based on finite normal mixture modelling. It provides functions for parameter estimation via the EM algorithm for normal mixture models with a variety of covariance structures, and functions for simulation from these models. Also included are functions that combine model-based hierarchical clustering, EM for mixture estimation and the Bayesian Information Criterion (BIC) in comprehensive strategies for clustering, density estimation and discriminant analysis. Additional functionalities are available for displaying and visualizing fitted models along with clustering, classification, and density estimation results.

This document gives a quick tour of mclust (version 6.0.1) functionalities. It was written in R Markdown, using the knitr package for production. See help(package="mclust") for further details and references provided by citation("mclust").

library(mclust)
##                    __           __ 
##    ____ ___  _____/ /_  _______/ /_
##   / __ `__ \/ ___/ / / / / ___/ __/
##  / / / / / / /__/ / /_/ (__  ) /_  
## /_/ /_/ /_/\___/_/\__,_/____/\__/   version 6.0.1
## Type 'citation("mclust")' for citing this R package in publications.

Clustering

data(diabetes)
class <- diabetes$class
table(class)
## class
## Chemical   Normal    Overt 
##       36       76       33
X <- diabetes[,-1]
head(X)
##   glucose insulin sspg
## 1      80     356  124
## 2      97     289  117
## 3     105     319  143
## 4      90     356  199
## 5      90     323  240
## 6      86     381  157
clPairs(X, class)


BIC <- mclustBIC(X)
plot(BIC)

summary(BIC)
## Best BIC values:
##              VVV,3       VVV,4       EVE,6
## BIC      -4751.316 -4784.32213 -4785.24591
## BIC diff     0.000   -33.00573   -33.92951

mod1 <- Mclust(X, x = BIC)
summary(mod1, parameters = TRUE)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust VVV (ellipsoidal, varying volume, shape, and orientation) model with 3
## components: 
## 
##  log-likelihood   n df       BIC       ICL
##       -2303.496 145 29 -4751.316 -4770.169
## 
## Clustering table:
##  1  2  3 
## 81 36 28 
## 
## Mixing probabilities:
##         1         2         3 
## 0.5368974 0.2650129 0.1980897 
## 
## Means:
##              [,1]     [,2]       [,3]
## glucose  90.96239 104.5335  229.42136
## insulin 357.79083 494.8259 1098.25990
## sspg    163.74858 309.5583   81.60001
## 
## Variances:
## [,,1]
##          glucose    insulin       sspg
## glucose 57.18044   75.83206   14.73199
## insulin 75.83206 2101.76553  322.82294
## sspg    14.73199  322.82294 2416.99074
## [,,2]
##           glucose   insulin       sspg
## glucose  185.0290  1282.340  -509.7313
## insulin 1282.3398 14039.283 -2559.0251
## sspg    -509.7313 -2559.025 23835.7278
## [,,3]
##           glucose   insulin       sspg
## glucose  5529.250  20389.09  -2486.208
## insulin 20389.088  83132.48 -10393.004
## sspg    -2486.208 -10393.00   2217.533

plot(mod1, what = "classification")

table(class, mod1$classification)
##           
## class       1  2  3
##   Chemical  9 26  1
##   Normal   72  4  0
##   Overt     0  6 27

plot(mod1, what = "uncertainty")


ICL <- mclustICL(X)
summary(ICL)
## Best ICL values:
##              VVV,3       EVE,6       EVE,7
## ICL      -4770.169 -4797.38232 -4797.50566
## ICL diff     0.000   -27.21342   -27.33677
plot(ICL)


LRT <- mclustBootstrapLRT(X, modelName = "VVV")
LRT
## ------------------------------------------------------------- 
## Bootstrap sequential LRT for the number of mixture components 
## ------------------------------------------------------------- 
## Model        = VVV 
## Replications = 999 
##               LRTS bootstrap p-value
## 1 vs 2   361.16739             0.001
## 2 vs 3   123.49685             0.001
## 3 vs 4    16.76161             0.498

Initialisation

EM algorithm is used by mclust for maximum likelihood estimation. Initialisation of EM is performed using the partitions obtained from agglomerative hierarchical clustering. For details see help(mclustBIC) or help(Mclust), and help(hc).

(hc1 <- hc(X, modelName = "VVV", use = "SVD"))
## Call:
## hc(data = X, modelName = "VVV", use = "SVD") 
## 
## Model-Based Agglomerative Hierarchical Clustering 
## Model name        = VVV 
## Use               = SVD 
## Number of objects = 145
BIC1 <- mclustBIC(X, initialization = list(hcPairs = hc1)) # default 
summary(BIC1)
## Best BIC values:
##              VVV,3       VVV,4       EVE,6
## BIC      -4751.316 -4784.32213 -4785.24591
## BIC diff     0.000   -33.00573   -33.92951

(hc2 <- hc(X, modelName = "VVV", use = "VARS"))
## Call:
## hc(data = X, modelName = "VVV", use = "VARS") 
## 
## Model-Based Agglomerative Hierarchical Clustering 
## Model name        = VVV 
## Use               = VARS 
## Number of objects = 145
BIC2 <- mclustBIC(X, initialization = list(hcPairs = hc2))
summary(BIC2)
## Best BIC values:
##              VVV,3       VVE,3       EVE,4
## BIC      -4760.091 -4775.53693 -4793.26143
## BIC diff     0.000   -15.44628   -33.17079

(hc3 <- hc(X, modelName = "EEE", use = "SVD"))
## Call:
## hc(data = X, modelName = "EEE", use = "SVD") 
## 
## Model-Based Agglomerative Hierarchical Clustering 
## Model name        = EEE 
## Use               = SVD 
## Number of objects = 145
BIC3 <- mclustBIC(X, initialization = list(hcPairs = hc3))
summary(BIC3)
## Best BIC values:
##              VVV,3        VVE,4       VVE,3
## BIC      -4751.354 -4757.091572 -4775.69587
## BIC diff     0.000    -5.737822   -24.34212

Update BIC by merging the best results:

BIC <- mclustBICupdate(BIC1, BIC2, BIC3)
summary(BIC)
## Best BIC values:
##              VVV,3        VVE,4       VVE,3
## BIC      -4751.316 -4757.091572 -4775.53693
## BIC diff     0.000    -5.775172   -24.22053
plot(BIC)

Univariate fit using random starting points obtained by creating random agglomerations (see help(hcRandomPairs)) and merging best results:

data(galaxies, package = "MASS") 
galaxies <- galaxies / 1000
BIC <- NULL
for(j in 1:20)
{
  rBIC <- mclustBIC(galaxies, verbose = FALSE,
                    initialization = list(hcPairs = hcRandomPairs(galaxies)))
  BIC <- mclustBICupdate(BIC, rBIC)
}
summary(BIC)
## Best BIC values:
##                V,3         V,4        V,5
## BIC      -441.6122 -443.399746 -446.34966
## BIC diff    0.0000   -1.787536   -4.73745
plot(BIC)

mod <- Mclust(galaxies, x = BIC)
summary(mod)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust V (univariate, unequal variance) model with 3 components: 
## 
##  log-likelihood  n df       BIC       ICL
##       -203.1792 82  8 -441.6122 -441.6126
## 
## Clustering table:
##  1  2  3 
##  3  7 72

Classification

EDDA

data(iris)
class <- iris$Species
table(class)
## class
##     setosa versicolor  virginica 
##         50         50         50
X <- iris[,1:4]
head(X)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4
mod2 <- MclustDA(X, class, modelType = "EDDA")
summary(mod2)
## ------------------------------------------------ 
## Gaussian finite mixture model for classification 
## ------------------------------------------------ 
## 
## EDDA model summary: 
## 
##  log-likelihood   n df       BIC
##       -187.7097 150 36 -555.8024
##             
## Classes       n     % Model G
##   setosa     50 33.33   VEV 1
##   versicolor 50 33.33   VEV 1
##   virginica  50 33.33   VEV 1
## 
## Training confusion matrix:
##             Predicted
## Class        setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         47         3
##   virginica       0          0        50
## Classification error = 0.02 
## Brier score          = 0.0127
plot(mod2, what = "scatterplot")

plot(mod2, what = "classification")

MclustDA

data(banknote)
class <- banknote$Status
table(class)
## class
## counterfeit     genuine 
##         100         100
X <- banknote[,-1]
head(X)
##   Length  Left Right Bottom  Top Diagonal
## 1  214.8 131.0 131.1    9.0  9.7    141.0
## 2  214.6 129.7 129.7    8.1  9.5    141.7
## 3  214.8 129.7 129.7    8.7  9.6    142.2
## 4  214.8 129.7 129.6    7.5 10.4    142.0
## 5  215.0 129.6 129.7   10.4  7.7    141.8
## 6  215.7 130.8 130.5    9.0 10.1    141.4
mod3 <- MclustDA(X, class)
summary(mod3)
## ------------------------------------------------ 
## Gaussian finite mixture model for classification 
## ------------------------------------------------ 
## 
## MclustDA model summary: 
## 
##  log-likelihood   n df       BIC
##       -646.0801 200 66 -1641.849
##              
## Classes         n  % Model G
##   counterfeit 100 50   EVE 2
##   genuine     100 50   XXX 1
## 
## Training confusion matrix:
##              Predicted
## Class         counterfeit genuine
##   counterfeit         100       0
##   genuine               0     100
## Classification error = 0 
## Brier score          = 0
plot(mod3, what = "scatterplot")

plot(mod3, what = "classification")

Cross-validation error

cv <- cvMclustDA(mod2, nfold = 10)
str(cv)
## List of 6
##  $ classification: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ z             : num [1:150, 1:3] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:3] "setosa" "versicolor" "virginica"
##  $ ce            : num 0.0267
##  $ se.ce         : num 0.0109
##  $ brier         : num 0.0208
##  $ se.brier      : num 0.00738
unlist(cv[3:6])
##          ce       se.ce       brier    se.brier 
## 0.026666667 0.010886621 0.020795887 0.007383247
cv <- cvMclustDA(mod3, nfold = 10)
str(cv)
## List of 6
##  $ classification: Factor w/ 2 levels "counterfeit",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ z             : num [1:200, 1:2] 1.56e-06 3.50e-19 5.41e-28 3.33e-20 2.42e-29 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:2] "counterfeit" "genuine"
##  $ ce            : num 0.005
##  $ se.ce         : num 0.005
##  $ brier         : num 0.00514
##  $ se.brier      : num 0.00498
unlist(cv[3:6])
##          ce       se.ce       brier    se.brier 
## 0.005000000 0.005000000 0.005135796 0.004980123

Density estimation

Univariate

data(acidity)
mod4 <- densityMclust(acidity)

summary(mod4)
## ------------------------------------------------------- 
## Density estimation via Gaussian finite mixture modeling 
## ------------------------------------------------------- 
## 
## Mclust E (univariate, equal variance) model with 2 components: 
## 
##  log-likelihood   n df       BIC       ICL
##       -185.9493 155  4 -392.0723 -398.5554
plot(mod4, what = "BIC")

plot(mod4, what = "density", data = acidity, breaks = 15)

plot(mod4, what = "diagnostic", type = "cdf")

plot(mod4, what = "diagnostic", type = "qq")

Multivariate

data(faithful)
mod5 <- densityMclust(faithful)

summary(mod5)
## ------------------------------------------------------- 
## Density estimation via Gaussian finite mixture modeling 
## ------------------------------------------------------- 
## 
## Mclust EEE (ellipsoidal, equal volume, shape and orientation) model with 3
## components: 
## 
##  log-likelihood   n df       BIC       ICL
##       -1126.326 272 11 -2314.316 -2357.824
plot(mod5, what = "BIC")

plot(mod5, what = "density", type = "hdr", data = faithful, points.cex = 0.5)

plot(mod5, what = "density", type = "persp")

Bootstrap inference

boot1 <- MclustBootstrap(mod1, nboot = 999, type = "bs")
summary(boot1, what = "se")
## ---------------------------------------------------------- 
## Resampling standard errors 
## ---------------------------------------------------------- 
## Model                      = VVV 
## Num. of mixture components = 3 
## Replications               = 999 
## Type                       = nonparametric bootstrap 
## 
## Mixing probabilities:
##          1          2          3 
## 0.05185780 0.05058160 0.03559685 
## 
## Means:
##                1         2         3
## glucose 1.042239  3.444948 16.340816
## insulin 7.554105 29.047203 63.483315
## sspg    7.669033 31.684647  9.926121
## 
## Variances:
## [,,1]
##          glucose   insulin      sspg
## glucose 10.78177  51.28084  51.61617
## insulin 51.28084 529.62298 416.38176
## sspg    51.61617 416.38176 623.81098
## [,,2]
##           glucose   insulin      sspg
## glucose  65.66172  616.6785  442.0993
## insulin 616.67852 7279.0671 3240.3558
## sspg    442.09927 3240.3558 7070.4152
## [,,3]
##           glucose   insulin      sspg
## glucose 1045.6542  4178.685  667.2709
## insulin 4178.6846 18873.253 2495.0278
## sspg     667.2709  2495.028  506.8173
summary(boot1, what = "ci")
## ---------------------------------------------------------- 
## Resampling confidence intervals 
## ---------------------------------------------------------- 
## Model                      = VVV 
## Num. of mixture components = 3 
## Replications               = 999 
## Type                       = nonparametric bootstrap 
## Confidence level           = 0.95 
## 
## Mixing probabilities:
##               1         2         3
## 2.5%  0.4490043 0.1510533 0.1324862
## 97.5% 0.6518326 0.3548749 0.2688038
## 
## Means:
## [,,1]
##        glucose  insulin     sspg
## 2.5%  89.13950 344.9890 150.8405
## 97.5% 93.16603 374.7221 181.8322
## [,,2]
##         glucose  insulin     sspg
## 2.5%   98.82567 447.4121 257.9011
## 97.5% 112.28459 561.3273 374.6194
## [,,3]
##        glucose   insulin      sspg
## 2.5%  198.5986  969.6231  63.22103
## 97.5% 263.2932 1226.2654 101.09078
## 
## Variances:
## [,,1]
##        glucose  insulin     sspg
## 2.5%  38.65508 1234.198 1514.416
## 97.5% 79.43401 3287.722 4146.024
## [,,2]
##         glucose   insulin     sspg
## 2.5%   88.35268  3514.662 12583.92
## 97.5% 358.15175 31416.557 39228.47
## [,,3]
##        glucose   insulin     sspg
## 2.5%  3377.773  47477.74 1317.041
## 97.5% 7379.344 120297.75 3229.747
plot(boot1, what = "pro")
plot(boot1, what = "mean")

boot4 <- MclustBootstrap(mod4, nboot = 999, type = "bs")
summary(boot4, what = "se")
## ---------------------------------------------------------- 
## Resampling standard errors 
## ---------------------------------------------------------- 
## Model                      = E 
## Num. of mixture components = 2 
## Replications               = 999 
## Type                       = nonparametric bootstrap 
## 
## Mixing probabilities:
##          1          2 
## 0.04130937 0.04130937 
## 
## Means:
##          1          2 
## 0.04669993 0.06719883 
## 
## Variances:
##          1          2 
## 0.02376885 0.02376885
summary(boot4, what = "ci")
## ---------------------------------------------------------- 
## Resampling confidence intervals 
## ---------------------------------------------------------- 
## Model                      = E 
## Num. of mixture components = 2 
## Replications               = 999 
## Type                       = nonparametric bootstrap 
## Confidence level           = 0.95 
## 
## Mixing probabilities:
##               1         2
## 2.5%  0.5364895 0.3004131
## 97.5% 0.6995869 0.4635105
## 
## Means:
##              1        2
## 2.5%  4.279055 6.184439
## 97.5% 4.461108 6.449465
## 
## Variances:
##               1         2
## 2.5%  0.1395796 0.1395796
## 97.5% 0.2317769 0.2317769
plot(boot4, what = "pro")
plot(boot4, what = "mean")

Dimension reduction

Clustering

mod1dr <- MclustDR(mod1)
summary(mod1dr)
## ----------------------------------------------------------------- 
## Dimension reduction for model-based clustering and classification 
## ----------------------------------------------------------------- 
## 
## Mixture model type: Mclust (VVV, 3) 
##         
## Clusters  n
##        1 81
##        2 36
##        3 28
## 
## Estimated basis vectors: 
##              Dir1     Dir2
## glucose  0.764699  0.86359
## insulin -0.643961 -0.22219
## sspg     0.023438 -0.45260
## 
##                Dir1      Dir2
## Eigenvalues  1.2629   0.35218
## Cum. %      78.1939 100.00000
plot(mod1dr, what = "pairs")

plot(mod1dr, what = "boundaries", ngrid = 200)


mod1dr <- MclustDR(mod1, lambda = 1)
summary(mod1dr)
## ----------------------------------------------------------------- 
## Dimension reduction for model-based clustering and classification 
## ----------------------------------------------------------------- 
## 
## Mixture model type: Mclust (VVV, 3) 
##         
## Clusters  n
##        1 81
##        2 36
##        3 28
## 
## Estimated basis vectors: 
##              Dir1     Dir2
## glucose  0.764699  0.86359
## insulin -0.643961 -0.22219
## sspg     0.023438 -0.45260
## 
##                Dir1      Dir2
## Eigenvalues  1.2629   0.35218
## Cum. %      78.1939 100.00000
plot(mod1dr, what = "scatterplot")

plot(mod1dr, what = "boundaries", ngrid = 200)

Classification

mod2dr <- MclustDR(mod2)
summary(mod2dr)
## ----------------------------------------------------------------- 
## Dimension reduction for model-based clustering and classification 
## ----------------------------------------------------------------- 
## 
## Mixture model type: EDDA 
##             
## Classes       n Model G
##   setosa     50   VEV 1
##   versicolor 50   VEV 1
##   virginica  50   VEV 1
## 
## Estimated basis vectors: 
##                  Dir1      Dir2
## Sepal.Length  0.20874 -0.006532
## Sepal.Width   0.38620 -0.586611
## Petal.Length -0.55401  0.252562
## Petal.Width  -0.70735 -0.769453
## 
##                Dir1       Dir2
## Eigenvalues  1.8813   0.098592
## Cum. %      95.0204 100.000000
plot(mod2dr, what = "scatterplot")

plot(mod2dr, what = "boundaries", ngrid = 200)


mod3dr <- MclustDR(mod3)
summary(mod3dr)
## ----------------------------------------------------------------- 
## Dimension reduction for model-based clustering and classification 
## ----------------------------------------------------------------- 
## 
## Mixture model type: MclustDA 
##              
## Classes         n Model G
##   counterfeit 100   EVE 2
##   genuine     100   XXX 1
## 
## Estimated basis vectors: 
##              Dir1     Dir2
## Length   -0.07016 -0.25690
## Left     -0.36888 -0.19963
## Right     0.29525 -0.10111
## Bottom    0.54683  0.46254
## Top       0.55720  0.41370
## Diagonal -0.40290  0.70628
## 
##                Dir1     Dir2
## Eigenvalues  1.7188   1.0607
## Cum. %      61.8373 100.0000
plot(mod3dr, what = "scatterplot")

plot(mod3dr, what = "boundaries", ngrid = 200)


Using colorblind-friendly palettes

Most of the graphs produced by mclust use colors that by default are defined in the following options:

mclust.options("bicPlotColors")
##       EII       VII       EEI       EVI       VEI       VVI       EEE       VEE 
##    "gray"   "black" "#218B21" "#41884F" "#508476" "#58819C" "#597DC3" "#5178EA" 
##       EVE       VVE       EEV       VEV       EVV       VVV         E         V 
## "#716EE7" "#9B60B8" "#B2508B" "#C03F60" "#C82A36" "#CC0000"    "gray"   "black"
mclust.options("classPlotColors")
##  [1] "dodgerblue2"    "red3"           "green3"         "slateblue"     
##  [5] "darkorange"     "skyblue1"       "violetred4"     "forestgreen"   
##  [9] "steelblue4"     "slategrey"      "brown"          "black"         
## [13] "darkseagreen"   "darkgoldenrod3" "olivedrab"      "royalblue"     
## [17] "tomato4"        "cyan2"          "springgreen2"

The first option controls colors used for plotting BIC, ICL, etc. curves, whereas the second option is used to assign colors for indicating clusters or classes when plotting data.

Starting with R version 4.0, the function can be used for retrieving colors from some pre-defined palettes. For instance

palette.colors(palette = "Okabe-Ito")

returns a color-blind-friendly palette for individuals suffering from protanopia or deuteranopia, the two most common forms of inherited color blindness. For earlier versions of R such palette can be defined as:

cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", 
"#D55E00", "#CC79A7", "#999999")

and then assigned to the mclust options as follows:

bicPlotColors <- mclust.options("bicPlotColors")
bicPlotColors[1:14] <- c(cbPalette, cbPalette[1:5])
mclust.options("bicPlotColors" = bicPlotColors)
mclust.options("classPlotColors" = cbPalette[-1])

clPairs(iris[,-5], iris$Species)

mod <- Mclust(iris[,-5])
plot(mod, what = "BIC")

plot(mod, what = "classification")

If needed, users can easily define their own palettes following the same procedure outlined above.



References

Scrucca L., Fraley C., Murphy T. B. and Raftery A. E. (2023) Model-Based Clustering, Classification, and Density Estimation Using mclust in R. Chapman & Hall/CRC, ISBN: 978-1032234953, https://mclust-org.github.io/book/

Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, The R Journal, 8/1, pp. 205-233. https://journal.r-project.org/archive/2016/RJ-2016-021/RJ-2016-021.pdf

Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, Journal of the American Statistical Association, 97/458, pp. 611-631.


sessionInfo()
## R version 4.3.0 (2023-04-21)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Rome
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] mclust_6.0.1 knitr_1.44  
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.33     R6_2.5.1          fastmap_1.1.1     xfun_0.40        
##  [5] cachem_1.0.8      htmltools_0.5.6   rmarkdown_2.22    cli_3.6.1        
##  [9] sass_0.4.6        jquerylib_0.1.4   compiler_4.3.0    rstudioapi_0.15.0
## [13] tools_4.3.0       evaluate_0.22     bslib_0.4.2       yaml_2.3.7       
## [17] rlang_1.1.1       jsonlite_1.8.7
mclust/inst/CITATION0000644000176200001440000000275414516413151013704 0ustar liggesuserscitHeader("To cite 'mclust' R package in publications, please use:") bibentry(bibtype = "Book", title = "Model-Based Clustering, Classification, and Density Estimation Using {mclust} in {R}", author = c(person(given="Luca", family="Scrucca"), person(given="Chris", family="Fraley"), person(given=c("T.", "Brendan"), family="Murphy"), person(given=c("Adrian", "E."), family="Raftery")), publisher = "Chapman and Hall/CRC", isbn = "978-1032234953", doi = "10.1201/9781003277965", year = "2023", url = "https://mclust-org.github.io/book/") # OLD # citEntry(entry = "Article", # title = "{mclust} 5: clustering, classification and density estimation using {G}aussian finite mixture models", # author = personList(person(given="Luca", family="Scrucca"), # person(given="Michael", family="Fop"), # person(given=c("T.", "Brendan"), family="Murphy"), # person(given=c("Adrian", "E."), family="Raftery")), # journal = "The {R} Journal", # year = "2016", # volume = "8", # number = "1", # pages = "289--317", # url="https://doi.org/10.32614/RJ-2016-021", # # # textVersion = # paste("Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016)", # "mclust 5: clustering, classification and density estimation using Gaussian finite mixture models", # "The R Journal", # "8/1, pp. 289-317"))