eco/0000755000176200001440000000000014331034742011021 5ustar liggesuserseco/NAMESPACE0000644000176200001440000000171114330337373012245 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(coef,eco) S3method(coef,ecoNP) S3method(predict,eco) S3method(predict,ecoNP) S3method(predict,ecoNPX) S3method(predict,ecoX) S3method(print,eco) S3method(print,ecoBD) S3method(print,ecoML) S3method(print,summary.eco) S3method(print,summary.ecoML) S3method(print,summary.ecoNP) S3method(print,summary.predict.eco) S3method(summary,eco) S3method(summary,ecoML) S3method(summary,ecoNP) S3method(summary,predict.eco) S3method(varcov,eco) S3method(varcov,ecoNP) export(Qfun) export(eco) export(ecoBD) export(ecoML) export(ecoNP) export(varcov) importFrom(MASS,mvrnorm) importFrom(stats,as.formula) importFrom(stats,coef) importFrom(stats,model.frame) importFrom(stats,model.matrix) importFrom(stats,model.response) importFrom(stats,predict) importFrom(stats,quantile) importFrom(stats,sd) importFrom(stats,terms) importFrom(stats,weighted.mean) importFrom(utils,packageDescription) useDynLib(eco, .registration = TRUE) eco/ChangeLog0000644000176200001440000000174214330337373012604 0ustar liggesusers4.0-3 02.28.20 minor warnings fixed 4.0-2 02.25.18 testthat added, minor fixes 4.0-1 05.10.17 Roxygen2 compliant, C functions registered 3.1-7 03.04.15 minor fixes 3.1-6 06.12.12 minor fixes 3.1-5 05.29.12 minor fixes 3.1-4 07.12.09 minor fixes 3.1-3 07.05.09 minor fixes 3.1-2 01.29.09 minor documentation fixes 3.1-1 06.27.07 some minor improvements; final version for JSS publication 3.0-2 01.11.07 made it comparible with the Windows; a bug fix in summary.ecoML() 3.0-1 12.27.06 a major revision; added ML estimation, calculation of fraction of missing information, stable release for R-2.4.1 2.2-2 09.23.06 changed due to updates in R 2.2-1 09.28.05 nonparametric model with contextual effects added 2.1-1 07.06.05 a major revision; added bounds and prediction; added/updated other functionalities 1.1-1 06.15.05 add the Metropolis algorithm to sample W 1.0-1 12.21.04 first official version; submitted to CRAN 0.9-1 09.07.04 first beta version eco/data/0000755000176200001440000000000014330613547011737 5ustar liggesuserseco/data/housep88.txt.gz0000644000176200001440000002163414330613547014610 0ustar liggesusersUKmwߏ='GHyq~s!H$`ߟǿ>_{1ڞS1*m>m=~t9U?Tkvnz[e|Z{we0_i'ʼ6WC]mGm͏n}ܢ_20g5y٧Զy1<|ʺ䶮=eksV۔1-/~K|y;zz<^^f=FlBmkӊtתigGyVWkK?/}޸l[ rlo>f:tmԙ}j;I||=X?ܥ^6U%n=dSڮ5b6`לcԟwiu4mX^MW6eyY_Nh|,z]3kq{.M=#؁gS+NaZ54|S8^pDz2vW+(1gG;bY=zY;QP8OYgv]{8(:[:w/u~2(S.qwz:_W~Su欫%uui}VϪnA!V|[Z^bk[e=9sEwrCxqWyg#i1gx=eԮǟr:~Q?V'#\n;`L57(pvBE RR"{GC]r T_:uhjyG "@NkarN\S0o'dlj:nC\ h}L.?>+fb\vVÏ䐕b鎐vgMv^>QGژ* K!aV>5j[ Ӆl 7 X?^xG鬇v29XP^ɻbVQW&Ǟk)hSqUƱ)5:sU[c{;& ]*ŒP i +;ɭN|n,#IrғZ^{?2Ž~Tgʓvr-CۣT٢mpzAwEjeڿr^%IΨ}f ~`WN~4mAW7H: ~䋣zq4~QA'# 1B.$󸃀HNs7L^6 '&8MEH L*w"c,ң/?gu?_a!>{9^a,E {P"މedDt}]VYi:"F6xo_+Ts[/zXxMtJgM4<[Lk _H$52=HI/H/%lj 6{j m z(o~([دS~Ap[.Ң%|źZO?Ņ5]\@'b-Q#tu0J9GAI?kr\Wr?VSC[QY~U,>)㱩M%׾&U*ꯒlڐn~*UcRe688Z==1A"S2yke^کC_1w윜3h홂Kޯ_ !JL ,wQK;p_ \tY+U9Kyq\dsSk^\ApBYn˫>z|{~4ڑs,p1&KY\JuFJ&}vT"9S**T:m 'zC o~>L>1u\0ʁ'{6Vc2βp1 edov;!Z7h>)M:F*Ad(s"Rf` %2ѣ3_:΍R'm#" `/bp?;\&uπ *-JF*:M:CG*0&&%^IҢX5~O:E}[)x!RzBޜD>vb]jyLg/WJZ}A+San+iP$:]5ʹX7/W Z BM ^^E +B͊ V*5)"  2V>AN]շ: {K;OF4{}o\if YaJ;PBH{I`7>QߔɈB/*ctK惙AD;W\o fZO5;\Joɉ\^YAb3F\Sל  HDX5&Gbʴݹg= 0։޵ $W蜕 |ੵ *ah['RR D;P)>9KhGYmt\QqrύJҢlFO"k(@#" lJ>QV<ұXj æOTj^>!HضmڋrH" Ry쬳R1;wѳkX` ft#Һ!H#XإR[6@T5ήS\PH3GC7X {UCP>-ob>ǷtD>+ۥI!t*w2qu`V?_Ơ@?].)N`=oTD*B3N5#BHp=SLaSjɤfS8rKjן7n7T=''b)Vu}t]͑kVaX09]>i*TK9xfs43o${ :ZL0h  "$$S Z??=~ o.j& \OE%-Gб6u oT*|nVKC0u0뱵>_,ObżtTDŽYt֧=_§?.B+Qʯ5ߺ(q4_ Np ?{^@DÕĢ:ps2I:YE#S+ҚKIk+b+W%*+l^:'2P~r+;7fCO5AĂ*R44\ycf~f J+xJMm^ۅ4HHutjZء7ILV>Gûk (Xj~7_%hIR}OK~Wz*&p4c_?Cꩋ\¥}COG(kHAd+<*IN=\Oc@DHNejaP(!FzˤD%7ҋi29ywk$6=`n=1>IaPm((JGnV0?ao:BxE=C ˍpU"/ũ\3"o Eu[;=~ wYN0I#8+ /#1Hde)|eSc/C A#XgE1`@؏F]hG}5?(@tl%[qXaEDŠtXAalM@V:M*h8&NCU O{8h>)BE@6Yk oa:n$ t"E*u'ℓ ҭ5OA$f7$\F>zgZo`*O~.Z5Lj"3\3*Wf2 S3s7S{ A"3Ofᴿ:Dۣ6`}y bo9S4N) xZ#S3qc'}n:OlwOЭok]E0'{ UqPhDy r ոt+Zh-#!+خMKi}S kn=B8Z&{w6nh %](2 |.0@!026yc{narD0Qwy='fНzqelkXOV93SLHm523*^_gs +kC%i@l'pis"1$"Z8,/f^<@ؑ@붩Rr_^#ީk5mi{'=hdK^ "e^j $(.-S<2`oWo|3{q' ͈z I+M+R+#&￾Gysa`b=]HcK7a#tdlQ#rpḧ吒Jąە~-/K#(_INTFRk"mX(#w:}}N {O;sg8^~]_A; pgGnUTpY gw&҂=j{#DCۢvj~t:HzIU5|'H=Dg J3S @.}(TQXP%m;e>R4=e%u3:@x;cMˀz߻P ;sČ y25S ިg6eh w.f&/qWD-x$4 ]ק׾^ 4hRQ1]8 1<mOG4Y*+0[X&#wN|{S r;ZBF:t2^GVQLWhbTi^],=xL Vt aaLz#~pHrX3z`eF8ׯLt\=zsH {7uE>MXfWH8"*Oqev7<~kr #Zs/F;[Z aH)-@o} j;)f~cuyG|2 e+)8w|\ZZVf^|nDU7%^7,X1Z Z{Q(13*A9Od<7dD"=q `Uc 3Au]3R==<[y-QiW{?#I ԊOaiTFS9~IUL| A~ߍtN`vrhǛL?OhtVIrsĢzH@nQ.(«H<yB2@ֵ>{+W|?‚CE6۪IUڰz+CR40=%?ashG:ҤV^_&\G ɗL0|>1̥8kHxa&2Ͷ F~]|C?@y|UN1i |эjb{//~i|ܑ"s͐tWQmc6G5#FszK- 6sǝJ~ΛNU1) HW pr3֋igxLeco/data/forgnlit30.txt.gz0000644000176200001440000000214514330613547015110 0ustar liggesusers5V9d7 :@ؑ3ƾE\b~/dmο痜Ky=gSGڳY<2c^kq;*4EUHM^~$%tKh+9hU ǽ$wbSmAu{X:s[x{*ĬOl }cO$[޺3[-[ltHCT}TA?YW;i5vdnSFGU]ܻsɯ$ܱ,Zvp(RӦ}Yc1 /*|/er)*Xs@5n%#C"c=wq;Q~/1q!: p P$%8m ;o+Tǀ n'Оdb]j,@kk.]eu=k;A@fYv&Mak}wm7Ih&nLc9@}\P-,qG?1*`arw -%*{"U Xщ}!]sĶ4<~?낪`l p0yXKNyGc 3s\iw-] 9V Lx*L-wO!e*pF D<*xL,N^ ^8_ ސK>:\ɵr -)!0)K)5߾{}W 0=BM*[`֠·EqY͈!@1 QE%LI8߾1G|>si^e\AJ"ޖAA͉%2 > 8H"]{B&}Pr `1U Q~gcd&D=;2>|q܁hw:rG| w:i ʵB!",U|/Nߑ)eTdeI ?OOi??k?{1l\zZ ܛ{[izغd4`ncE\KMsl.lQz,iԞߒ{]mDNfcx f{YϢFߟHڶ)cĜs%giJg\_b Pc;LAVҖq].\UZi4K+5ri8]ڮmMALjߖ:OysCfMYaZIJ)\3ͤsZRW5$0(B>.۪n3xfama7BX֩ &󏍵loK]+ǒ?K?2R'^_ax(m,׻OecOL7sjmv -@UǬ`~+36[~#Ado0[yAɠeNY16@/4TO{.1(0w'+6ɗ+6pޚO̰4mSa?nx`.xrbf|eTFO5&7a*;7c;< {pG/].c=ï_ht\to[5Vp˂u]Rd۸}} A q77.xf"8vgrǍ!/e-*#-G&@AWgfcb2v󌺖8{X*10S$Z"V2jqx uhG^~}p Ew+fv%=p`QbtQhNƚ{rˬnA4Sźr,yX|5# j† qF'Y.c,6"ߺGZ&佤als(eV[Mhse&, TQxegAc"|Uy0v&umX&$G9QTL_Ym"9TI '֙$ob ^jP/Ƣi"-*58#(/Tz'BۗAM's@=ǗO԰{[co'"2{k~.\#Cݾ)5`A2R0yX7 pNaf%rRLݡ'Hħٔ]jSv"?}DT;Oe.P7xI%/uPMϾvޘ֑9{ȮjogAgxVH}mf!`ړ=IV-}š^Wu6}4~ ̈́k铫D5F,wǁ 읫򆭻 keNXQӈnENHoz*g/C\5j .mR0S 񣔋e򡷸C.٤nv#P,\Uu-e]ܵdMٮe nQ3PW5OP_J~+YJUy3*t^1f#FU}eXJڑօus>~Kp gn %-LɿC|^v}[*/]'xb1eY&Ǯ`է=i굦:Y7Cj e,Sp"X,3ߌv"GdHҨWoQ &j(qg[iOl³dOv|ؽ.<ZO9?􁄉$Y4b^DXNC8F?@ݦ7+ JpAЉ}W=Wثmi87=Wnz*Sj,F[VFSjL0]mu\%yŬ\okⷕsJoE[M 0B}t'N-U;L`V<䌏.k!IAMoXKND$GJ.0V)Q~ROMɎP2-*t:AkB6 z>:Ǖ|kcDlҴr6-擹Ơ &58F3jG<]΢AA =?SAD:??.IbLonaO0ǰF'`Ix\?`K$2dnĂ[)6< Z{.6TR죴/܁N +MVW܈#~Tg6Jܧ6%r;<5\]9\yin*MGz2f?iv9[c &<5couYX Ӓh-ٔxѸ _KxW&EIkS!6̒,3vrn8 =pr/obr.3'ɘ%mR/oXVY>5|D3lCxdi|=E!7f'()+j:r]16ҰecWa.3ޫ9\rOeTys8 [-=-`V*e5kF@->y!TFj->? ΊhYd_َ~S#Jnmm~KtTkY4BfJxbRǨuzlj{Mjd3T=Y#xUkĪ=NR/eco/data/forgnlit30c.txt.gz0000644000176200001440000012675714330613547015273 0ustar liggesusersmK-9eճ9P$EV2,>}Lj??O3=>3\}8_>6ӿ/h/~>~m3WGg~}3=k=;&?ƭuf۷Oǧ~/^=65~;ӯů_<\qt>[?ok{w<ŇfOSh]v{7x:;=_\qQj3'_|s8O;U~⍼߿vo{J?m%xN7z||s/}C|ݛ{n+?8ZZ,>8ooܔ"VɌOw{vO/^rx|}^9^{k,?{~Gm(o{žr7m{j@ur6]OxGcMZ;VWyc Q|q-m?6yqyO^K4ӨX+py~ճ"Z\JgGr/6T"6Y<!A,so/s:{{Ǜ2ݾɛ}ݫzQ_[wlDb_z c!6֗y.^>y-_z(#b֌!ƎuKfĉ-9b+V8xS},ڈ璗~/5?5 :8ch\zW\|x7E8B/'ĈxBCXo|q@X/7O?ŹFX[yǥ??q8>CײWa.2օ"7j~Zc(2ɶ"M.rņ>/{N,{^)"R@=> pF=2/Mؽ#(Xaz^aqQr$_cF3VihDx0O%G_K"|mq]gȫbD\yE"x{{c'ңM}wv,wKI6X±IQEt|VoIE"cc5BX̓s%Rh"Kε{MRE9<-`x_.O/& ;?^=󌘖I_'ID ë}qv:uqk:7P3UFF}{*.v\"bfSyDYyd1(:uXy>ÙGLG`8/ۯa~[FRoD~EXձcLD["JO?'4Iɩ\G'9bRINO?W+ϨvWu~ۛmOᴮgۼȳjwWEc|*a.֌7/1ioXqGkfXG30ȏr,"p. /-/XO=u|qW738ӉN,se)4ETp&Og:/zOrxXrFyL6{pA?}ƆoSƯH-1t̻[YCxSDkƨrȞV$z:4Y {|(uQY 8EgEyyo##Fq~fY^jeYY#*cG hQVcc'eOp%4-Ku8@b=mI[M"ULjxw;ҬXi<:x->мJ^SգB[M5jgس]8Qj>c5㗒O -GDN릕w_Q">5h5 {=DW6l~pȏ93?ZESvvl|Xq٪A$DAvt' To|߽n/F ܵ?q?'_SLZ`:3xjѢeu?J׿?G3(bЮ2 NaSm7w \k#Z$u6ǒu֯; {+4tl܊o@/k2A=#wrŮ< @ҟK2_QueDmڬx4edV[+5"a?YJ{3\DHdSx/f3}d=jGPxҸS+b:[aGU'pnY>qJ!2[döܲJ~$/dqU)9O!XyFnI2,, ) (ʧP M>U4k+Wg2TKZEXfaġ;sZ6D>Q-_G.lM j*I8nM8.7߷8v6n:˯ƕnް6tFw Q3Er~ԪUn3XM*諨v7z&9aX/pjG(jxzU6 gfSڸ'qk^ݧd!N&w;O&ڢ[3bG}23t.(7x*[cE%ot,,;3\3k ISźqK#y:2MK9묧 sgG\_\"e}USיII5n̥]wtQƯ^_sF_] ֮D}Zq%sY`Z$q|Fl=8"s"ZTANݴ*gY;e߂ƢC*qf1J=fo%<#T 5)c1\D$>vGejpV_&G2ȭMU^l??՜\;1R^?~/C4v~QhGTσ [)c[:qbBOR~0yxhIɓ~ADyM,8_ z0",˜Nyڹx<T/졩t Pٴ@ƿ̇Ԟgݝa`!ϗrvMl.R%<"~2a\6"P+ݵ 4}aXclOȤOp Xꮇ5w#4H\UlpXkժz_|n}?RުGZ1?ء{괬Qz0.Qkwx_o4*Иlrn:Bkn1OȺ$.9҂M0/T#q`1@}1%C2TJ;s3`*]~;9-MvBqBOsS3T=O!dQv6Ц%N8b0.tItʏZ4<1X<eվS'e|,Q^ 0X%hN<å;h]+Ğ 33wm&P'pfXrs/~(.LMYH\ƀ.ygg5Jqנ23Iz7te: [R)"]^2ѽfpqm3 ә#1^GMD/.&-_"Ab{ <QG[k7y5ViY XN>6[e˯J';Jw63r&hprT=Ѝ1[gĸ&L&ഁDBX ܁M#S ޡ>Dn RE%!M8Y*0 —6!]Gm8wڙ4 @EvƬX'E౸!Uп"E8 mEScXSҖ/@;,> *<T{'^qo+gPON..L!*5FR8^z/'~[\ȲbxDQ\=6=~{s6qzo0'յ.|Zuz~j _)zT#(a,#xJvBD:smٚO qtƔ߻e9TR'䲯S 9UEo*igbq4ySe\mBc`5K4-\bгxh׎_0jWOTO~`e o2JL% X@rX#F37tqpxS[rHhe9Ac(wL&my?I[H_{`:1:|jwCrx͙ 3O)y'o8;c8i>=NAӽHG~Eڪ!8 O=A]D/ Xd?ȝޠc9WjoB1Q, Dͩo"87rܣ}5zcQIZExoM)ur29gR ogڊ`Q9}Y1:ŅJ_dz;8nTIiM]xO yT^HStXfOrć5uߥSa2+ X?|q䄷f}1;q̮@<5:`_d$_Wr89Ϡka[t(.3),_~RXsƥwIc~^wiͯ_Y6Ma u [q"_I~m0cDpnfoOŜ .2Ĩȹ1v 4jKZRFrmű"|{OU>O;e[Ɇ&bnNo-j]8ҎTvQ[WFwN7:_K0|bfjwtu @\|th|H߈G㭂t06q<mM=7fcCX2O=( o_˵zgasx3 $ʊl&FD8DӠQ 0yݫ@myC2bd@Pbǧp%Q.|CVYz"M8ad}aZz2/ \qBDdψ04];ObG"䈳63ko\L.\.xN-[' d2XMS~-Nn)*ըM $f @*<~6] F5Y1 |dFF_$!g^6!q9~ K>G-3,OB+ j Kq58։Q%]W|x/MfW"5&bWO朚\ P$d:'o,> F^3دl ՛~ 1 @9M 2VDqB}"Ve~Rô],؋3맿WJ 𼳗4 a%9,lH&C5_ؕ $רi^Ԕ#D) 0 [^jQHM`j ?^8ۃ44kO^f^|3tļuE] a_7~ZŐLq۩"VOL\ZǷϷN9&>\4ᣮ#[$L횐'v.@\Q##Ά΀|>qh\őV83xXi Ś7%uO|Qu2 A,/QNU"ύ_\3ML:c 1U"߶~:Ж] SlqwLzu[eYFIG|m_LGk.0Stv~Yŋcp(>Q+zज़n ߪ^Eh>z8Á {)KdAHC]Ӷa+_!wD]'&hlfx`[KkZ#"md5}?Xa1ƠQXyP82 oim1?p.§p- ITxwqnJe)SHg EЇk?b3oEV٨1όNqm#hnIJ 6z5:;dH~jU0W?* 1M*f+ȝXe[IKԽcLc)sCjN M?q"+JDIqL?`>O忢Pp=tɴzɌUj hn8/b׸6Qr]ptSPXÒU #+EK xo R%+T(k$m4_3N.1ņK:p hzvS+`* R Y_ b&\Qj,+APeFEAt&K4DF ,/GCpFGgt?qSJkOֱs̞2K'm \,0rNeII 0:՘j[P1JnM@.z!~wS 66_Q:eygd*RvB/j3>FsoS܂~՟f.N0!i 26n%;9) ELTWu=剀~ v-? %wx]=Hui#Xt`QitՇ~-"gF5(ZQd@Qu\$Z']E!"IR5g<5idOa-gt:UR@:ՈU!WȉF9w<ߋlݝ+>kc/8f|7H*ܩ A[*Xr'C> FrDk4<vY%7Xe=72P&(Ao#⒒D]k`T@UO5??1}?}>\W+BO\#mZu&/1/tYk_8 32EWvo^آPV1`s$&Y*> @礔^/ @ l6!ѧ83V &fϙM'[ T) /ڟf&6o6g "1/]nͼfm.i y2 \6 -^lV%uf|G[?",tA}dD`rx Km%-a?76oE ,g|Y\=]DAɭ$7c6$;8U(.ڬ2s ?G]X?BV?ZeZ?\B^2MʎR{|ԕ!,[\?IoV$hŠ fЗݚFT "Iohr>(I5}[ lzG)WأWqYѹ.)p,%434(ق&%A'ORa:j(w̓VLڤ(Щ=#lcCf)U#."n oMC&pW~`_EGjIPbn5󷎴/Uz$$7~ASƧh-'OmhDrYMU>8=$&Ag=;ӯQ6zYTU474v`U `q~,>oY8q{ײ01h{0g"^dIK1bGͭq*Kl l]@;)_/ ٕ)ϖRWI`gB8}/*QxӃ*[c}#M$r߿tjزyq̻>f?_:^^U3Z=m<:"=,qAz(6z1Wِ4ݘ9G%Fd-D3ڽv鴚,yc|}d,[m\mtBt()",(ꕡS74,S:N_cK 6W![} Dq(3`, sb] #ej`DZ~yz!I~n-a#W?NE*itn{p7 CS8 쟏אVPTҘ\)Z$̱jQ5?yA\+4S6C\:]7QVƤۘ7_>@К&H(rD5;s@-0PmI}lxpG"n#4<>PuZ QG'"eX#%yQ "d^:b$=Z 逶ire|dXsTlK9PU"v]b덤'^ܖ:}q+{9MԆG2K6Y>PR*n z|F 3m +3- 6Ʃt,"VJ6$;gvB9~zc՜eҭձ{u#/x:'@g\Pjky8EZt @VÓ_H+=YyjnXE៵+J VY\o=v+~2 3/ `-BCm㿹Ј+%VnѹrhfG YHދ,3ӂN<#s(lL YHu"}8Q0ÁvgJHmٙ MO\;sX6)C?ꅩ*%# ue8s N㈅vsbϏ')z[Ӻ*.6thOCHiV ml]Xd,7"=4 ~%ͣӎZŃ?khBCVg:ڟh‰p1ױpdԐۡk^j$g`ꐂ22ǫ =ꂵ<5GQRWRΒ%uKLYnB˗JCpkQ3^x{;X,R>Ek1kl-Jj<'Da)خVLg=I &Ӯ |*i9Qk2jWPI~#Q5sUyFSߌ)t: sRb$JuYi9C+ 3ts <6q?z[oD2(ݹ/4N%'jR;yvmR<4X 'G瘐=%:Gbp2Ѳˊ5?Uq\> jh,-:5PQpI-Ԃan2IkIjjǐ姒!6>ULB#my)̬K1ʔ+gTqE ~w)'QbU-/oaЗOy!8\:+l `iLt)t|α+N#t+[. WξoqPxʹ}3E:C(W@Ch ,H[4F oT<@xX*}@5tg{LpRBjupn3cmWKd|m$C}YMΨx)g`ş?}[,8z b8`_=%N1ƴNY^F NDy}s^q$[~w^d T@E8'lo΃fMP~h"H0@[oz_D՘D4j i?*BxAM הÐ|NЦ-Hkm՛?ٸ:_eFǦH\xT)4x5Fr'Яah+{T -ֶS uf :"΀:9ތZ*"yy,vQ0.0[.3CZx|*@WҠQF3F:9P0Rֆ?ilXwU gcL؊D$ViJ`?d|JKeugS'ƚ%x6)F *$Di!{V>Anz&|3t3)?؊`EKFq温(tiEUBA'hH2O mm r Q|zUAAF*bJ4,3\w⾟+fQ @wuXjc&).ij3|O2|$r\FY`ifRT?C.;KPgBogQK9g!QԳt9i)VME91 pZ-dJ ҦՈQ8*2xG9dDg &ʻ-ȍl<&#Xjic%єMC:-Eʥ^7wRU$0BL?? .2Rio$$ّ.ȴ95&E\SV0)L!ENiP6k=&$~) (b 1_pm0s9)R^B)23?b`!|)@iDo[<I=t8% w]fTM2g@Տh3~~ ;= U.R`_.;;wOe$Ȫ)3сp4; 6ڂ9}9*ʉbh .ySڷ׌d,:e`lgV&*BB53%|q:TyZS--w|bP)єo| 94NWӊ Zȥ< hKTWF{U |Ty9R^g#"nqetRu}K0Q²**Y'!x0j]J~ m nS'yEZdzӑI<wݎ9 iJ}׌vpVۡi D4c$$1<]J%O5JmYxT9rys& sHJvrr96`4PNf73Bfk=UMB6b\Sl> ޳|$}e[RXs1d py(Z7va[r Zͷ)ej:~7aI(}?FoaVN^Zb?ESyApjx$]:ml{BwZ m^. ׊z3rXF#;"$_Weͭ"jGpX'\`cS\򈇤>5lX 3䫠3XSl::!8p&r9Kztc佧=4)&WeW GPq|w?;^S; ꤡb;G͡fHcr )0Ub^H٫:@dl"aY&Ӏ?V(bXB_-%.|@#G"l!_$<^Cnkx%ı?i{_PL!թUhI١Ѱ*qgFr9 kԌ!OֿaRkcyZeڇ'W{CAPN14<}`kh%#þy[jУ%ӆ6xa}\HųUoz}~Zr$^Q`)?Yƻ bLfF5+K:K;.W[Uh7ky.Ʀ_/ê# TVԂ*.tOp%4fi ͂/$FwЊx۲6'ԙg\[f]/A$OޗHUB!;f(3ۣoRH;Pr"LQxHZ1d Qfg̎:~r!=~Kcjm1e,kv~"ן:>| G\M:*/O|kI2śMe&]ZcgdtTw>&1iO9շJAƠ:~Kg#tBR$[2_hcc_=<*GLҖzr9 ɑc 伹Ao PͶl9鏒ӞtOo~=5FrܗsC2Uq“ OX^—HpCgL';t^3ǑQ۝N:TJaXșwiN%'H k>u>N$8jҶ9H@)Qכd sE:j%R%~<<>y,*"J]'?'5<˷""yQߒn6%vƓ$t Des4A\kOֆXyUKyo->pF`XDDU?Fe"|q?kP-ϑjuVC\Kk?`@ĖK}i(OLՕ'h3挋{P"lu=NҹF4~֕l=\a?Jcߎ-$4Fz'I:K{59,\6оpnzf%S.B삍x 5o6I2}')o)F ax|5ڂfgǧ4 +yW+?CL g,){uU(mPjn#֚Cshk-翓ZiSynp> 'uڧ0@NDKcV :20cCa\tssI 'SYjbdq+y/v^`{1SD* hl͓e\sMIwW/ޙkR(IJa/9~Oh$npMǞIJ87&dm;'Ui6[D t{[5`N>zN _N^v˃ZU+Y6jW(BNڕ_ Ӧ^ {GDuݡ̏ϫ,{]0R(_%x#aK^6D8%{`)Z$R"ZEwM1DNŁ[TOp\z+ðl8]]W\+7ڪ=jȻFn`-vq:%ay@"v'@Ro#P}2%9~V.eNʥ"70Ҿe`1RX` M&&#@WdX.2VPE="ݸ3gtb$m^e+/i+(߂JJ^jq:AJSZ2wwh>2 qFېٰg7@'[$t͠8PvBaMF2Ovj¤&.M*,ma=PHwa2* d("f @7?4KcJx]ESi`V޺`-1eJNv=d7 <^aھ5wC #|F=:= (/:>مNi_<>%#yZg%}'P>Xdҗ.TUo-߶i k~%b—-)9:'}JfXsw~} /".˞Ou&/ǝpuAN<Vy/leQ,qXC3jbpʉU+ ibS~Nb"pM*x`Xf7LJjh%e[}un9+ב ed_߸T7mY=]3ݧIE*f i].l͢o0o\89 쥦ȹjE/G[Wc Da mo\)ץEY`K"kdl?(eD0Eur׍F'+MdeNcT4P #9gUώ̹CW$Ny T(T[IǡS꺨roL]g_ b}ZH{!KL@\Js!(E!BRԷgVΘ3>QgpH=اfhxXCf5FH]&lBOL6F>[M',5nWY0>ŻJNQڥڏ@~OG)įrN/LNVo5~o>rª^Փ9Sx˲.;HmH'nE^#K.^#@ָ%˿uW tmEI,nvt-oAfTS(%]Q|_idXu:C4A<JNPr6Z^X2u*ӓhsyI~YcL5d $<VS/]A[d{fȴ>yDQhG>KSȩJRi:ڍ: KH XETl[w exU.E0]}Bxg,^$|Ցمݐ$ðoJ7W']ÿMyC7abl]TJ#LL)W &] \3 `??|zʬpm0@z/;喚6Da \ncQͦKȜZaj婼Pm~Cz(qo'TA9w۽=o ]|Eذ\+a*H)4O?]~T"Khvb0k18Rz͒&S)*l褙6`8` S-it?xiY0PFP*ZqblfTH!5e.czur&'ͨU=Q[cۡrNbۯ8u){QѱYZR}>F:)%/>QLgiJkq(/~bI@`lzBؤ.(la_m~d <2*}$ZvhiaEB0")DŽƲ8Mscajn99bHO)%{"/ *4uTFU+!`*vf3!SJ$>+Оhʵlt;psqLw{܏G+!Nuy!c(CzP+PLRdQWmx俢2ԑ# vr ~՘˰26<{Hņ![9gLYbN;.UkGQ_KS&|NtGXb՞P˷-3|6žJ[\b>Nΐj9M[yjo@#ۃ7y~lѭ5ϷS5K$_6Z8%OX3u54~ӗc%Hͅ<9ll9q~jGrP\ j+(!؎v  }{(ͣYJ@@r,v-K>1Vc,u^c?6N=ɝKJ8W9~LB8Rh:X>ŚKSUJ,@UY>ӊA:`}Ѡ3v X[M~CLͷU!GKS<6G8]b3O8JC͏_c5>1 Ss f1oLub'>J)yHK p"q-1U2pCTHRgk @lD\G Gq W*/p~Y($9RXy{w񌆫݇ A? -Peߛ#7!o%J<w %wө~JC> [s=OD#j/Q\ͻsJ3aH]߅Zg Eiq_ W2J+4l/G0'`~>mGW}NK*W=)MYH] @gN&:@g47LpZ#=[F6V>ڦQLR ^Nt8kNUs>/_;kPCt1VH Z[B}82us7 =Na-6kZ! ]]cGRDd,0+n[,7 [="N qH}z S6SޤK,.fj /QVmmʉ-=Isb<2E3\9w^d,!ti)vm,[S"1_ܟJngXȹZf4ߧ=34 ɕ8]zΣ?e=-).$q@QD+xvkKӼ !@ͤmF`{A[?Ǩ݊_˥^ܝ_|ol4%T䫠YeYbxB &%, mrEa8҃!~/˗`&SejlEz{E|q 3+waHs{@ b}YkP5b= Y?Fps)صMq,)4lf$0mG<=5po ^ilO7#E%UImj{OS\=/JҜs̒?@ˏa{PBܸXwQ&os,MUNUC^$5kT<3u?,2Cb]ӭYV>%j?xB70!^pSBNU+j(|^ػK3ϴ %I',q?|#xGm jyȡ RO>Z\W|:?ls45RσdY5\kW\hvs%jiZ~ʂw~e/.|eEH߷ѴFf8.2+ [ur+d:tFKS@ހ韽s^PUkӴ[.߭NBt$i_]2:\?VH >w#E&P3c@Dlݗх2̪֯2D<'IXC N,;6Ɨ; Kh'\*6?`497@eM$rFh'P9 ~<߱O16e {zЙ-!Ew;FW1ՃC㱼Fu]8FG*-"16U,eͶmK8 *W 9]g[4M5ԼB [apЏ'X?(Kq3+ƨ2qϋUZU)BA/ik)ʪO]?Ēu#y, UF fmnL.2@oYZdyGxȉ1tS5vNi 0fyҨݏlMMڣ`T%DG Vo:"$JZB,T>? F?c 6IO .FM:|% Dey7P#&sw/$ϏuH`gnwc=r`po ~<@9N'|*:N&<<+!o O~=:g{ʿ'#:ٵ#pǓ,[F 5gf4ƒeI;RtvnBѐw)B[JqPjI񤒩hQ&>/1/ o&Wd-o{[,nUc/:ۺmzܛ^4=BY`2~J光l!roM٬Tp@|KN>cy6g%iyyl̆wVy'ow^Z8ҢgH?ݝDJGQ#5 5 J|&dG2~awy*1ffE,w[e"[<s{p o o;E.x+`֐$H'w)$d.O@5T˛qN)q˩|WB9H~grH/rWqgn fH<##쾳5p 7.a'yroV9s3SI|l{R U,Kݯ{d h[$DvJoXTVc:BmV+P  ]7sk6P:z nr Р+Ge։8u b`P`(Og4 N]k)(6}oC~rQ=J7dU٫_V1JzosR ?ܼ\;řyt|tT^J8uϏMhoڻzw(\r_H52c'q[x 2xaГ޴!P0 6{EY?Fw`wBv4Ky ~qΘQXVڧ0%KW|W@iBL'Z&Eo>qJ=<~eB&L{9%1eFmyu6X_Ԭ&@zb6B>[+䰘* CFv3~2-z) mS8r- :pj7p 'DFI#rŠ(9DtxU WMbe%II LLjP[ 7E[pX9*(fAy r7&$& cO+H^O@=ӰoCA9rh(eIn֍5=Y/[YMS˧US ?yMlqV[ΌRhqM=_?36Ns_f ^]))%s.SMP)9- KK#8XW&rHFȒ[cp vuF KBY.1Ox 2׹eFy;pK3aUoVm_X_ $u,氭jQ=v,cƔT^:U.͘#gM*+gWʏ P6gR)(s{mlRRqRF =Ӆ'XzFڶ%q"u/s"ZFRB~v6.% 9m#0ӫiv )úƧ 1}JЏU!`hn|O 8N1 >iwF#|Qf C/% 3`w:K5Lc:8_hlUO>=PiRt y{wXԗR0dddOܣ  _,鐉cF*$/ ) qĘQ"2-G6D\ \Z{ؘ5#ԧfJ͇znUܒl8ӆ6^GbTCͪ,ZĔC>R[ꂕ;SKz$[]̂|4JVˬHTa?Ŧt(+#8v1*ᕕnﺘxB2mf3`VB+ٴeUeHV;dW7 fʳrE_eyL%LLVN: SqouD!i2rGN1 z%SjޏoQ ڌNxEzVr| lIՄfLwř0Ly ?ǔ1?#nF'W%( ԁe?+ ;|&J9YCT:nGR7 UhI[ ,brOsOe:rOIՑVN͍9 /JM ;2#?q%7ר6CB*`o-wW4 )N%TW9s;Loӈмl㋵mys @S<9?Q DyʺD[= 0}JjPWSC0m GƭhLDg+OFl9'܊Z0s( _>A_B>'GM7;u~Q``途:4I$Ĵ/hFwp%[Fϑt7F}η4Gu!o;>Ǫ54jGNH[`)Vc%@C|+*JV2!}ّ]ָ3#S}_QRC ܽqWEvP:?m"g\ %[v m6_Gn$ɱr͊"OkC*3c@WA b5?NH,%7FД8Sd"Nw,[wr@*to<*WԵA$AF3ZQHHA*gYȎ3w<{=Ap59S!5Ѓhϻ)u-Z1 ,]"8%,vDlK*MC+\7N&kqpB]*$飞5.D%船{&X`(>>էoNc攔``Wa%6µ#A~r_4~pCXJy݊(P_ @ H)rn%5W`*7٧Xk&Iz\\RUgYIff '?/ Sl->ki{<^Mw%HUZWT绨4j)cc (n`szW~OgZ\@>qQJ^C)x{7llQxUJY3^3i3\NRgg:+gx.O %'Yjj}hhm8mTmnQAg4zp,$ :pm$,WՅȁIKb;:; 0Oٕ4*%Xfs5Zh)8I=*{^I5{Sqy}0-+ԝ J획b V660ӶVvݯ#c-m-L_#+(VpO ~41MnNmѷ"i0W4{CJ@Ћ 2i&q^%{Usnd`Dgx'EA3;Bmv$PJkD8]Z,7-nU";Ý"3 R.얒.64EyZ#rPS (W3=5U+q}K6`>uhNE m)z:&@1%dbuOk<pI?&|:h'#gdT="){<%3pT#^!]㹡|;yi  f [QQsmxauܜ7:1e60 퓸ӗ8tW; xYLYGz¸ ?A-pRêQz8jhj_T`7֕ -q5(d3>SUE3iP0ɞ Z ͫO775W3;uJ>y2%zOu#]hi3_&L.̎Cwm0YYè$p@.,{cdw,lemtQ(yѬWoR}.W{#%]o/``A|ealdfS~24E4V{1-Uhƫ0k7W픾pVɶ hݓLloj^9] QUQ&->Xm./6g"+`Qfh_B#"wɨȫnV)>517RèrD,[Ѹ(,z=ȓSϰ:ϼQ F2 r : *LM}@hFQ|E DK!#5- o%oX [Lk"YWx/%J"#=52sSet# \VuW_fa-Iwab$P`ڪd gP8G?]5rHJnc/ [.y60>6H:?}AgE7`+k5CNJvJvY}qG(YT=q G|)y#Q9_ic:2t[Կ>RQ=)\JJ} |sԢ~tU2B!#UdqK&8Fm4s5Ѻq R\FbmF6}#f1w ~&NS{1SY8Mh'lpk5mRV !+n [x) ?B@ =@ڶMQ֯i; ݬ,RZ: V4h=:_t9IcZ7Qҷ @Rf59ƹ5E4~q.ifhAc֜7Lfwl"i".S7k3 =&v@?~TXWTc&BѥvF@#R G1;(Jui|s!VBU-H6kWd/6W;B+@8kn8Fi[ 껆!n[yոBXQ]GgUmc8F,MOa4yN9ܓĈr 7y!F/[@nIc "mK6U?>tx\'nD<5:g T%ꑢ&9u痦-mY2= }Ie(U"E#AT~vO4%bOh?ʲeYY_ܨ-?~fPe>T$-VUynS*y~5!eKm_BUdMww#qa{Vvl삲tÙ5rP/h4t*R䮴w"X7,%a dia6$V@Z",}}`p-+7* |~=YCzw:dL~ݵ"lG *oUz=dmq<=hʍpyqUft e>91NdkI8!+ ^ZۏK/HYS"ꐚOǿOvw'D'"*ٝTtiP(Y` 52rr7+V'@zIhJgw|+xũv;`})RTkB[ZcҷS $^ >6GP8"c͢L;VObHYuRT^ffS-2>k"[ "Skk ['y.Ud; v;֓ٔG>Lm?fs8le}(M` l@3Ey.col``GȆݲÝRB' - ) +.Q&Tս>Ywy2?ΚMomC%#Vk:t҂w#sjR ?*}J35YP40zMtm_P|\Ņƹ'mxCibp$8ګ*B 1 7O['rj#2 ZDD˜#UK\2,ٷB^;6a\7l(ysXi!LkDAamU6nËi*0o5~X缼z3tU/$H9MBY\| 'sz%ڮ3" d]*?ẚHDW. (oI|?wH j'0h̶k#8s_u}h(<:AL3gb;;%F!{ۘtq}& O[uf~}>5%.ZsO^? hI]@ڔ^~e (-zD6u~N%_#2Xs/uAddB*C ,}f6UNh\1J"] ADAlR[CcCDjǏb2%Wriao:ED5u҅t~00%$Z92>ȕxv*U.wPN{e- iNh⠕8\3,H֑1|Gď}l~ફZ J0MF|]mlfviȞC)³1+`zRQc\}+#+Fx$uw!5sgrbN2&hnh@5zLn(ҠށUXLR GxoƎq9dɬmA8$9`bHc*tztM'Xz/WȔgƤ޾=Q(#ν>*0EFU 5\5;ފқVMf)YE;\6nRX YG'i`tU Z8!C'ʗ8.( /Pd}[S{ؗs_1HZkW" GI<:pYD~HN we>o6A:7U]]Q;Iah:K>TXh샛~TX|ڪD"PY* _"9s 66a:x-1Z!&Iqс<.$@Yc|ֳUTF]sQ9}.v+j4le3mMk@m2mCJ#xވ} a Vz]S5KnPGC%iFDKŐ]vlޭÚ%[f sug7-8ϧ;!>M0Cz/c/x`=%Ul+Eb PEћ~+ >Q$0?O8D)ĐY&+}BkeY00~-mIVL{IMr;=n̸j2-mHx~8Y#}yvWc#NXa6>Fs@xARA.0(n/]#5}#MA8J!ZYDk&΋9-нEreق7Ra#D5%(]3 |g4B]uZ"'`$Gk#@Fm]wJVr)$Td`"B)0elQ ZNɪyՖ)r݁#iD.G'Gb;5(0ŔSܰp4MdW'Jlp+FcA3FǨri\Rigs$!39gLp-dJvCɐ.6L33 85":xeK{O,#*^igbrw%܃s> | |JmV8 yJ,h'If}.>X&[| ʿfq<.qigH6Gc m]61âhĠe~ѮY.><-g숱|9+fG^#?W@>=8-f+w9}pARY&A [lDY?eE,WDSɝmյz7^HS0vZx[M:x$A<Й]grqjkfi hΐQyxW0pghy̨:)Pi [4|X6'$ 3"#ןلB/k0R)2K_0{<"!!Iٯ>$@a|пK5ߑ:q;ųĒ(w ٍ (VӗZ,=6>{=>rj{#H",ϼe":AP[ | ʹsKieK$DB=%E"GEتB$ߪb2F_xի aÁ#Egނx:9OPxESL]uMR?b21d.٠pWZ"wٛ ;`l 4DnJe$6eɋάTKDdvrsg %:*5Hڂ7tq.=<+]{ڬK Pց!c?‘0*Fy]n#tiW^7ˉ rftj_.YX5BWt;9je6_R a*=ZSN>&Bϧ@s:>_xc ! &.qzRH+&=l fȳ5:SBdMp "(ټɝR4n=TQ5=9) !._&;[0:GU*QzK9v_^סq[L^ ^@)qS"HW'; ff4mJJtfkOQ\E.ΦK4PuSvQ!O+ԾY()i-ٲמ%3l0lFETYG!MbMGoM021ϿyuW3G]̗l,(m( Wv;7WMo`dTS4ꢋ'Lz폊C~ S  jM1qo84]@=@J#ر"yLlne\5nu]3v-% rc?[A2P;Ri08f;\ `u bCiK#XuTk !%ԕT;0ؔ93d{J~}+y pP "2/N v'ޙ oZv< eiSbÖ v=i$L08Fc!dR R1x8<3O$ps 0ͳ]M zA4Ԑ͕h_?R3okla*/%eugƬ ޤkLz Nw7U{j,"]1ƪhn}͈Ͷ"u|etƴ9"<U2^ڕJ3#~Ց(K_ zZhg:ܵME:E,ؽ 1ԷUz*nEn={j {3I:}9ӅSgU:~SSW`*TOjR`bI;KU{wLAdO^F'Ddԩ=$n6# HOٸt.c՞j,h%W7eԨ#ϪTIGM\i>9r a)\MwE%ia/䪬:)0fs''JWL`1Smjy2?="<]Z?nD! ~%Dj]DKiKM;DN8[m)qB&1_ѧ6\W58R촆yx&5\%~TJ*Ө螒ss~'{`ϭB7RhgG^wׂoxnj[q+X7C"zDt# JVwbLPlzI7B]fjx*nP(ȲoK!!X$פ(y+E-l R$lY&0-\^=!FI2J&hkȡm.wڻ(bW#.Ie\PaJ]z;C=([H:/vO8X5R̯?]̎l̓9iVPyAJU+ 0kzTq-K2ER'`H{# }'?(b&CjVOT>Vs!Mid^.S9F t@ű EE(CY+T]R?h CdN:{MP F-T:M4Zr*"dtZ^uiFӝYrѪ4Zj7wuJ0 3a)e͇CN3q;5n7u #ܼrӱPkFbhtMWLJ rA˰M51 a;X3`]tq툇*ʥ4uarw1%I257UALyd\>3=aEpK=eLE"ɥahh6RDBL;_JġECB+%ɃE 5֨t1eiE߼i8J @Fʜ]-X&=mW.53/%kP~*JY*~(Ҥ蘗Bd*`yoH𔄹88INXz"?lH/g |x{)'WnBr1 C۽zi5Jկ}e@?tzAfI7@c4ΓbR?qb$(fȩӸuQk5PѾhs^&jgEYK(hْfj_ħ4 :e{Zڥс2q<`.jNI njast3ĮsWa`i@1Ƽ*Lm1)#`ț@2>q׮!@ڹ@<ȪFjls]a yqrT,-)m!5H [] EEUv`nn]Yd`5 մGAZ8vUOd[#r㱪 &-jZ&'Knh?IF`yF؂"L%ǃ%t9IMݙI {EfIㆎ#( fk17o[ShI&␒m̱NsGDW Oz`e75,3"Qӆ=X~ n+ tD"hJ)% -b?S) 8,qZآ ћ]T]O Gr#jW$4KBd} X'&fO]0BQ}TAsTYh45j3I;Ģr)JhG/]|US!|*=0V%p< 0[yJmQvUKGM/PtDɎsȂ'zèUzj+:~せ׊ u&fa4%nh8~zݙ*4J{s }Che)Tِ4+{HlvU5>UdTLstjJsHf[=~+YhIV|{ 2UDs/љ"Gb/x칰aB]D};66jvTО)8}Q b`MɥUb?d;4XDb#We'8ĥHZSu 耞wߛ#ڝ_5, {YϕA NJx(TL<(KtN5ӥ)]̒rrƒ4\֓K d?鞦<ׇH_F}4:=D)8 N[mΥkF˿n&)B~yV$/qrD(%'si!FyolWVh0"ehud=o 91HPOw(d|( jŃ1uѷdmfi;A3w!Q%!0$2<]D FniJ0tI'-GL'[}a,FpkhRb#40̇l|ЯGXu?hGϼF2Yo^ @KF,\E3" v8OJGᾠ q,HUxyۀ*8tboas<5*1m[uNޗW &4dQDpIG2aiy#NUCidJd%Vg$J0lypi6 mMU2R53mvcUuEFs8 EҢ 8CSk8Pͫ/SQfeZ)ۭCM"9tIBS˄%[_y ;S 9+gRD:]ܛ[ź)5IS!i 69-q\S8KO<`?c L0^#&鎝H.u)XKIRC;[Ooxb]4Co@D>{ Y*YXvLeM#mZ /Ltf7Z=KepT 9d#XLi{;hW;R崐4&rǷh푹^6*!ƚaj鋫ʨO C??9 L!:*8&CDCUf94~c8t݇m/ #y6 3 Bt@[V&Mϕ ?9ګxOkLQdxKl~:|zXK4Efor{K0ߜRD2I'jO`儝P}.OݎηI|Ei?> IB'!Սn|`̲ @ve<֪}=hm Zd1rk1 QQLƒHϰE\U r `"#ij=e6!SAxhMVUsy 6n)=Qk&yڢk֋?EpI?&LF 9WeJiPȳF)JM ^ w/oѳ0|tvЭDzfsV;'7 ^uHfnæ)T (:X?zp{sIEc) yen"q~$bQ]·q6N#ݫFەےfvuڠHQ*5DmhBBt|/SD.] O6Lp _VClS.VkOMb!NI}mVCA~}bݒ8. ٓL>$`jÁK4T*5;ѯ/@F>3o=HI>]5UqD_ #@evwu^_vy[J-ڍ+Cqu==Y;FmSz^#vs%nۯhzVƉWE/2c|{Tۺy9nRԆiE69&=֖ͼcEcuʿ |νSs\-qכ=S>I[79{c]̦wk=Ϯ?ݴxKj lĻMI[eł]w]Cȧz0fLGKRNuxZٯLkYq9V_%@^1ԢӰ-8q}]OyXkOӫ,n*EgKrZcd{U1㰰vZmi1Jizג1`_{Lmt{ m-NJZG/Kxr耶[sӦyNMN l%q8Ȅe׉;(}(ͣJ^jڈ[X&zzRƱclCnZ7I1l]b.G_==Dl8+kkVh^!cZ,k%o/+p}ߊ6=yz.vrzq}* kl=UX#qW+]$\?}^ HJy 9Mo&#޸.0eAVm#7n>VK\Β>Ӿ'~:N\B뮗 *.4ѻzzCp݊I7vܩKͮߚ 8uR;E{#]o\P?Mp{a,43t"X-;]p|PF$CF40Fy=.@Ż=ip26k houV,Mڠy[Ǜ^ŻMҿG!\"şǙ-v)b,Ξ_5wPΊ=/qS.y=a&_3A~3@nJu"'$d~7-}Ĩ K>7\n*rYa accݭ2'P'&/ O$<~ qX#c\KzƇݻSXfw}Q0|?vswqFu5C?.pa! Fcg ݊}y6t KO+{l6hjnx.BeC!&Dd+Կq6Ңc`5=VC舮"3Dgs).G0u H{Y8"pxskS 5S1x|;@W{{?=]wq+e'3Q$Ct 'IA5B kcySiUvvN0-"!lTgIW8`y6ǐ@ XNJ>քkv^s3r ^AsWq,ӽ m: X@3۪$3)3i&4A,jS+66~q,C`f*:oCSMk!Mrܽ`F2-dmԁ|y<—+YgڅkZؚ: <0q)'3 oYfh96YGPuq'r6M ~ҫc*Cݺlc,m0gcN$UfJ1vm,t?# q^8x^k-XbSiE.[0,VWS6.oȎ z958A/ϨȜpC]_2#l¾h#2&9^s9%t+NA|HgB'i"h^n;)V@M>z\K9:Iӛ!4u?Y۳(΀Vu6pjΞbdف|IM@ܼ8 %lsM$~6Ν N4 N&M!*8^21%2@Cm $SM!87 2698U9ƨE#%a =hN"J$ 0iw K*f^8@*Z L[T {VOf ?~r~-V-rJ?TJ@  A2UU'wCl0PcX N]ォISﻵבWΉwBg'8zVi]|h/7EDa^d/pVvdT8707vv^UKx9exw.Y!P 5cD~)q+lV@jcJO0[zC^=Th*^(.-&ד^44^3R#1zԏG3f;fmБ[h>D@ E(ϒ*=̬U-LK"XeVsD,lpu-OQc4RgCl#'EE݋ NSqyf^RmeZrp"T˹ޣ8>izx=>;P5]2[¨3̲$#u"J "hks6d1Mrpwd {.\ę1-i$?&y՝C0)iI.%Exvr2,lxCGB4.矊mLy9yaGK`dQwt"\,&~G#b7tc7/YtVpT\y/ҋk0xfks1$]s N` TťwӹұhtrfHZ7}Æ DЀ)J&N\&qMQNAc:]Ԉw{1 Y$ZVE*crcs\H~5`NKE2~ow_,F肮?,(%9=FTpgWYAt{:NN'aINPlid}2ŰfvOgrzI*ֶ&T >r <,qKae\(пE 2!omJ}\Hsu&FDh Q | ¶]Wrb0-p r2H3sdb繱F'h%X@)uKsX82zpw [P \eN2pvNQg5I*"H+)?ȒW5I(-Nc<L ^2]? ɺQ̄"RIKP^ӥak Nz?\*I!]YOv2IZE gS; x=A]@\g gp9YfvM,HO+ %TcYL @MZTp%9#?YEd18U4ELz`.c3>\|n9a$٦Eõk™?pEAQZQ!F%rNApa/b@Cג9+̺K蕒|q)o[SYkF^3 Evˊ&H9du Ŕ8}P %[g$5R;PQ@2UXY #`~SRThV.J,f ojerj`ՠ"4 Ѡf ּ'Or,!K@L srLLij{O7`v(ğS ;7IQG ~H(7q.m*_!; +T Aj~ *uWbYR>'Lhh,Dųz[Dz"C\D;$='*l] 7C_݆GD7-ەz(d"5R\+Nt9Xl69h𭐃ۤw!f9cS.8bҴ>RŞvXzzƂ&8=G:+%#ExW lnJ)X * .@Ў;{S–,'?ǧnD98D 9eP&\_}(ݡ,Y~w+N p+DkUUVbV4Q']cay֪%=O?aE9;lX8acɁřmЎ)3Zqr c\R>,Ehrn"8HC R ź%Yh{SE3eVW$^hg,H?>z*=[rt-,hmBKPaQ?ME湅3`Q"RH(!>xK>"eOFΒe +#zF !x܂CW'J4/Dn^*ԏ^(zg&\IA n|сFb ⧔_lS\>Gga-Jڀn`֨)9~N"-ʎ-H3'ps8 h8YMEih){*HJe;脣X-ql5xHd&$y^9N6!3Or4_Owi f)_&P(IJ5˟Ӥnd +dI݌>* TA%f5vd_Պ@ঝO[tx=]TkMٞ;`OP\&k)1bgzX 7)$i:.m!Z^7/jj,HCx>n4[_2ͱ9P{Sɩ8 $8d@%UL V.?v1K{6$C *ZBҺx/.23Ff9,#%Vj[H BH1eqvS<%VSHqBX8Hr&bh "/YJDXFw*왶QQrQ{B"ClA8?;W|ZL_2*i lQXcbJ_kzx"F!1`&`/p|s,0#;=] W;PT X;:"2b9n U&˛.%I1z(~tP%HOմu"xI <&EEXyNHv)p˺a}ݙ0.R9IMoយaSA 8u.鼕a6.]2?YcQ}]KUDfqP T<7)O" #oM>{1iM7I?xci^RI'?Ω.:,&A&3iu,7Whf'#5)#m>Oay q`pV@6Tw53:%uӼ-s&K&q7o sԸD!r;ؖ!*VuaA3^%Y .; zǰn+*M06gdp:cPequgJQa5q *kQWB/@^UP*EݾR=J{;jZ¢c 76mٍia?Ev!N$'Y?}Wwe;s݈A♊ 6oB, VLsk";#:ǥ\Gl.V"u<jtƙij$jhmbi)^A&W(U{gEog/sCTW0uuթ,{vlF1R k!=)R9.*r$..?BMȣ.Bޒ%V*wXT\ t̼j:{$[D*?|s3\q, G uNrDqX*e}0i MY1'^8=砶z=nG7A3Bqh{ pjEߋjt6sԧ]_7Ty=lǚiqp]?$*OW0}B~e:ȯ7Ҋ4@pHrO.~rg#k3ΰ̯H>)@8j쯩,Tku)ےA}%ٓw< >闻6cҥ{85GQvڀ&JsiVζ#8]*B&Srz3[a_\ޏ ;J\Qzn! +[3!qL.nQ /eYI@wWRoLUQ x6E} ccN+8E~R/ ]- Dt'n|H=a S,e~AT\)*Álh3k^PY9z%zv̛i 'w.[T%Ae&Б ^`X"gJ2,D$ "OR&$f' [29Ex(O6:M:gWĪ x$?d>ݥ38Ei $mtP8=O3.5MnŅjZRpvNg~>uEkY)!Oͨ{KG:+eHorߤYJxS xW9)VI(['tnG;\Ǣ4OC]AqqαWwIyzpZA#d5C:,E=xNl7tбݦS: DZ 70ܺlg@x^{kcrLҢ+k=ώIE40HnjWs Qc:NKj,(Q$)Q,<Yyf C9.P 2H{, r~+'=dKK0 ivRoKEdtQUHV1 ͚[~E-7[8N6RH/F fEsqosLe# 2n & nq}7;SHu+AQH,sh^ZF_.92ݖz m MAB?˴wXVaTxǴ-4xF{0;rV*&؞~f`a Nu&5\k⒀[ b$zLw-soe5T6j&WGz㙝"j7A0ŅvC'4c#[;O"-=ylDF#=SE ʀ~{GBzl-'Ն!!ΖX-P&;ivÁAlIv܈ẗ́JHJ4ay $)&% 4Ns;ḘFЋCOpǃs Gw{ݝ/M+[3p[uZ]?Cۯ.r`aB0_[9mS qU ))7ļ2;Y5<Ğ, XLJY/{R`!:aT!=ͫ  8/%ˏbdpz#e<=ںe{VGB,45 V쾪l;0h٥[ݴ0i8 ڜ4~Q,e}< ;oą w|xm<3hݽ͐&z,9$/Zdh>eVʰY|W_/:M=6ZxlL3EC, M$r:6v/p׬|_v,s9`?g {hH ^&Z5],UQT S1"1,a [djfi-ӄ-Hw1K#m5+r+T]ծٽƖnErg.<Hf=#_̗`'Ơ>w҈bTq )S˞8,<дҗ,Y  )'=닻xl29DΎP7Vi|M}i7j>C)%ؓ G0⌬Sne챃XHRg2Vb虘hdZm6@n*5tÇ_,IK(> @pTD\Kti w]ɒD-2PISyPjW UL>y:gY y*ڍ8e`a IE7P>ԟJ}=a~gU//M˨`}a{/R[6W12BK[WKxI.;RphBx{K֥f.n (#KOmXet~dnZZ,gдYhL/1ox"Ɍe2W3x*+_r`B`~%{H Cv'-spX~^RVc!DcZ:mFA؈뙇t:kt GZ067L(3X6RK}@09Α%ȋu_q卵/!$@iĞ^1P#K7pFFoAcf2fp异Xe&=ȳN ,9Vf+\\Ά@%lg /Y@$ iv?閱pH|lt4$iPJ8Ne({xnNS4iT~fU|_=yFcԚx jFn1)q-4$ǭǺp%]_\a7]7uM¾ۺ7PmE7Vug>JwdYaOZpjsF{zdK4索ʟݱ)vM  Nvk'ns2lbVs2F֝ݢ:}?] lpGDvp8x; 56N|#7tS.l7 ƒDc3`V KQkrrb_W2ӿi׽RB[ h۩7_-p =x ϼ;=dka\&"\׬`|2c d>17^ng,M3{oUc%x͹9.0D>otSׁפLxscԬHtJGI8#E6 t;f$Gj]])}M=3`6R_աH:,i?\1FZl}ô$~L,?b׷[9"T%tH9Nv(C8OI!ό.x1N=4*Ē/$yĽ?n7` ƲKQhyTS'Z7Kz®aS4~d^S_w vs3Si05šL> cyHb{8tl]wա1ǟU_!8CNX'vk\Bo#KKgɤ-VO[I{Bqad 2{3TOtV>Dbt\]4  -V>L}8@Z?"Z82w:iļx>ΰU K(KJ/~,gTd2L>m j35Hg46k'O@ ,Z{2O@lo< z´1-=SImZƎ#17OEv~OL-[z/R:vذ۹\-gSI~k5p"bIs-W~Ò3)+8Ѵ-CciuT/{vM9VLZ)S1{CjC.wMU_dU=6V5$r^h.̿q'$|e $!/{VϘ,Nl= zK,{@ u!Ur?) :+1IVSWL]p) >J\eco/data/wallace.txt.gz0000644000176200001440000001117514330613547014534 0ustar liggesuserse[K:*rK~hgԤzi3 :/qHI?i??p8\_&V7o/lNoDȦC.u脶9)N\lf 8r8ZB3F LN8,Hl StO8qM2oy\3f9We4ޜ f1uy{0\n4Q}00!GL6h.nC¶?~Z#4 g@f趄)_NAbN='\86pݵxGpsjykr^/`ϡB)0!"ƹ49t]8 県pЙF}3~A oL:gN~ to2Ǫx>XFϛw4&w* 7ڋLt8Ŀ.v2aT6}GmްەA':"Ph.<"fLWM>̅Fɧki'rԖ ԃ؆b"ދ2e?*@̃8M#wM_̽A_ͨ7d0 c+CyͰnF(a7L>Vp qBt8\;$e0gu(?~"x~(lH$wӅu+E8c/8SP ? HW[g,{TyS(/L>(> ?T/^Sc=t,Mܙ{ O\ 䦸aYvQJepL@¶ZozUu+p`/bM|@?r< o my=lgM V^壆]z@[Br[* =Bx{@^?]{!0%v_4o9)E^- 楑eZj2T䥞ndR7#Apaљ9(Z[`q]C#FXК, ~29bP`sq btu!ērF'#9#,K:䛚&:qB{P*t{PG[/YUsDK|2%03St`JHӏM E"]M1pq?]:\cD}r~X P_u{C֕u@Xy 504*R&N;r3fat9b16#),9s;?)ۻs~nԘثL 4r[FQ^0i02jWaj60˲!hf \f+ׯ̰Q Cm2[iuct _1fzLct@ P,j>{0T:]yNhc)RMsdh[9{jnru`V;rV;Vwi7+酧@mWK ^TQ[} s#lk)<\W 7MyV]V4w5t`Ul<%r#_ŋ,3RMױ7t?,%^I8e퐥Ԍ9O5xVT!qNC!ouK)wA[4-?$ du߀Q~|OL

UUM}prmTMMMR[xuWfw4yL!Akք$-0ϐ$!r MnNN O~iC),ٻšoުlώuumb]Ntv"^@ My>,=@w$k>|\ީ|oaEԭM!(b=oq2eC:e~50U6u'gA:k]. ⮺n RԾ>vL!!c!㽾/|kuxAQI8ְދԣ^z^Wc bMgsWgASMc?,_l*] Khl{:MeS@J|<_CpK.y E(jY ExJ ;sȦ&e%rMx1W9+zku ] pSG8"ҠŇ4ovG'&NQ+,]~h(@S,lE Oŗ3rp?Nr\׉-? KP bJ+gQ Gu`Mc]BPԂ(Xɿy@]dL]ϙPEvzqG|p^5J1ױW :*ݓa#2"zNS"wĉ׈HrHI*ʡR΋ rz1 ]{]q[uqvx~B&m.GN 0 yW{CW z༫RӪSRxakJ^l>)@4oۿe=u 1Je'L[58y*(5Rg~< jS" jjlB>ԢjCJeV_ h^Rg KDeѥr ̂hތD| hW͑LqM,I*4~ptQNq`ZG|n`1T|1= ` [;t+u8&lj@-{-Ƃ- cg}Nĩ]yE84ъ|fi0;-BoK]P- /'k.n#ϟ`aϫ|>]?}VAp3/~R ^ZusRKX¼c]p*+ߞ+񬏖0<~{b\s xېs"x Tz-qar|b<>`U1- .Oi&bGjא<8~4+#s7pPG E%L=>,t\qswԚt. 5sS0؛e` ]_(q;6S~_;83Nyȳ;F2ꍑ7g>w Q_ODb|03fѐ}wԭ`.q+ cM|lr !z*@?3>W9RȁDUbW >7`dRS$wX@j.lYWbD̍5=cV[ 'ނ\j{`UzGa+A­ V8?OIܑeco/man/0000755000176200001440000000000014330337373011601 5ustar liggesuserseco/man/print.summary.ecoNP.Rd0000644000176200001440000000306114330337373015723 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/print.summary.ecoNP.R \name{print.summary.ecoNP} \alias{print.summary.ecoNP} \title{Print the Summary of the Results for the Bayesian Nonparametric Model for Ecological Inference in 2x2 Tables} \usage{ \method{print}{summary.ecoNP}(x, digits = max(3, getOption("digits") - 3), ...) } \arguments{ \item{x}{An object of class \code{summary.ecoNP}.} \item{digits}{the number of significant digits to use when printing.} \item{...}{further arguments passed to or from other methods.} } \value{ \code{summary.ecoNP} yields an object of class \code{summary.ecoNP} containing the following elements: \item{call}{The call from \code{ecoNP}.} \item{n.obs}{The number of units.} \item{n.draws}{The number of Monte Carlo samples.} \item{agg.table}{Aggregate posterior estimates of the marginal means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If \code{param = TRUE}, the following elements are also included: \item{param.table}{Posterior estimates of model parameters: population mean estimates of \eqn{W_1} and \eqn{W_2}. If \code{subset} is specified, only a subset of the population parameters are included.} If \code{unit = TRUE}, the following elements are also included: \item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.} \item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.} This object can be printed by \code{print.summary.ecoNP} } \description{ \code{summary} method for class \code{ecoNP}. } \seealso{ \code{ecoNP}, \code{predict.eco} } \keyword{methods} eco/man/summary.eco.Rd0000644000176200001440000000420114330337373014327 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/summary.eco.R \name{summary.eco} \alias{summary.eco} \alias{print.eco} \title{Summarizing the Results for the Bayesian Parametric Model for Ecological Inference in 2x2 Tables} \usage{ \method{summary}{eco}( object, CI = c(2.5, 97.5), param = TRUE, units = FALSE, subset = NULL, ... ) } \arguments{ \item{object}{An output object from \code{eco}.} \item{CI}{A vector of lower and upper bounds for the Bayesian credible intervals used to summarize the results. The default is the equal tail 95 percent credible interval.} \item{param}{Logical. If \code{TRUE}, the posterior estimates of the population parameters will be provided. The default value is \code{TRUE}.} \item{units}{Logical. If \code{TRUE}, the in-sample predictions for each unit or for a subset of units will be provided. The default value is \code{FALSE}.} \item{subset}{A numeric vector indicating the subset of the units whose in-sample predications to be provided when \code{units} is \code{TRUE}. The default value is \code{NULL} where the in-sample predictions for each unit will be provided.} \item{...}{further arguments passed to or from other methods.} } \value{ \code{summary.eco} yields an object of class \code{summary.eco} containing the following elements: \item{call}{The call from \code{eco}.} \item{n.obs}{The number of units.} \item{n.draws}{The number of Monte Carlo samples.} \item{agg.table}{Aggregate posterior estimates of the marginal means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If \code{param = TRUE}, the following elements are also included: \item{param.table}{Posterior estimates of model parameters: population mean estimates of \eqn{W_1} and \eqn{W_2} and their logit transformations.} If \code{units = TRUE}, the following elements are also included: \item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.} \item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.} This object can be printed by \code{print.summary.eco} } \description{ \code{summary} method for class \code{eco}. } \seealso{ \code{eco}, \code{predict.eco} } \keyword{methods} eco/man/census.Rd0000644000176200001440000000253614330337373013376 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/census.R \docType{data} \name{census} \alias{census} \title{Black Illiteracy Rates in 1910 US Census} \format{ A data frame containing 5 variables and 1040 observations \tabular{lll}{ X \tab numeric \tab the proportion of Black residents in each county\cr Y \tab numeric \tab the overall literacy rates in each county\cr N \tab numeric \tab the total number of residents in each county \cr W1 \tab numeric \tab the actual Black literacy rate \cr W2 \tab numeric \tab the actual White literacy rate } } \description{ This data set contains the proportion of the residents who are black, the proportion of those who can read, the total population as well as the actual black literacy rate and white literacy rate for 1040 counties in the US. The dataset was originally analyzed by Robinson (1950) at the state level. King (1997) recoded the 1910 census at county level. The data set only includes those who are older than 10 years of age. } \references{ Robinson, W.S. (1950). ``Ecological Correlations and the Behavior of Individuals.'' \emph{American Sociological Review}, vol. 15, pp.351-357. \cr \cr King, G. (1997). \dQuote{A Solution to the Ecological Inference Problem: Reconstructing Individual Behavior from Aggregate Data}. Princeton University Press, Princeton, NJ. } \keyword{datasets} eco/man/predict.ecoNPX.Rd0000644000176200001440000000566414330337373014670 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict.ecoNPX.R \name{predict.ecoNPX} \alias{predict.ecoNPX} \title{Out-of-Sample Posterior Prediction under the Nonparametric Bayesian Model for Ecological Inference in 2x2 Tables} \usage{ \method{predict}{ecoNPX}( object, newdraw = NULL, subset = NULL, obs = NULL, cond = FALSE, verbose = FALSE, ... ) } \arguments{ \item{object}{An output object from \code{ecoNP}.} \item{newdraw}{An optional list containing two matrices (or three dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu} and \eqn{\Sigma}. Those elements should be named as \code{mu} and \code{Sigma}, respectively. The default is the original MCMC draws stored in \code{object}.} \item{subset}{A scalar or numerical vector specifying the row number(s) of \code{mu} and \code{Sigma} in the output object from \code{eco}. If specified, the posterior draws of parameters for those rows are used for posterior prediction. The default is \code{NULL} where all the posterior draws are used.} \item{obs}{An integer or vector of integers specifying the observation number(s) whose posterior draws will be used for predictions. The default is \code{NULL} where all the observations in the data set are selected.} \item{cond}{logical. If \code{TRUE}, then the conditional prediction will made for the parametric model with contextual effects. The default is \code{FALSE}.} \item{verbose}{logical. If \code{TRUE}, helpful messages along with a progress report on the Monte Carlo sampling from the posterior predictive distributions are printed on the screen. The default is \code{FALSE}.} \item{...}{further arguments passed to or from other methods.} } \value{ \code{predict.eco} yields a matrix of class \code{predict.eco} containing the Monte Carlo sample from the posterior predictive distribution of inner cells of ecological tables. \code{summary.predict.eco} will summarize the output, and \code{print.summary.predict.eco} will print the summary. } \description{ Obtains out-of-sample posterior predictions under the fitted nonparametric Bayesian model for ecological inference. \code{predict} method for class \code{ecoNP} and \code{ecoNPX}. } \details{ The posterior predictive values are computed using the Monte Carlo sample stored in the \code{eco} or \code{ecoNP} output (or other sample if \code{newdraw} is specified). Given each Monte Carlo sample of the parameters, we sample the vector-valued latent variable from the appropriate multivariate Normal distribution. Then, we apply the inverse logit transformation to obtain the predictive values of proportions, \eqn{W}. The computation may be slow (especially for the nonparametric model) if a large Monte Carlo sample of the model parameters is used. In either case, setting \code{verbose = TRUE} may be helpful in monitoring the progress of the code. } \seealso{ \code{eco}, \code{ecoNP}, \code{summary.eco}, \code{summary.ecoNP} } \keyword{methods} eco/man/predict.ecoX.Rd0000644000176200001440000000555114330337373014425 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict.ecoX.R \name{predict.ecoX} \alias{predict.ecoX} \title{Out-of-Sample Posterior Prediction under the Parametric Bayesian Model for Ecological Inference in 2x2 Tables} \usage{ \method{predict}{ecoX}( object, newdraw = NULL, subset = NULL, newdata = NULL, cond = FALSE, verbose = FALSE, ... ) } \arguments{ \item{object}{An output object from \code{eco} or \code{ecoNP}.} \item{newdraw}{An optional list containing two matrices (or three dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu} and \eqn{\Sigma}. Those elements should be named as \code{mu} and \code{Sigma}, respectively. The default is the original MCMC draws stored in \code{object}.} \item{subset}{A scalar or numerical vector specifying the row number(s) of \code{mu} and \code{Sigma} in the output object from \code{eco}. If specified, the posterior draws of parameters for those rows are used for posterior prediction. The default is \code{NULL} where all the posterior draws are used.} \item{newdata}{An optional data frame containing a new data set for which posterior predictions will be made. The new data set must have the same variable names as those in the original data.} \item{cond}{logical. If \code{TRUE}, then the conditional prediction will made for the parametric model with contextual effects. The default is \code{FALSE}.} \item{verbose}{logical. If \code{TRUE}, helpful messages along with a progress report on the Monte Carlo sampling from the posterior predictive distributions are printed on the screen. The default is \code{FALSE}.} \item{...}{further arguments passed to or from other methods.} } \value{ \code{predict.eco} yields a matrix of class \code{predict.eco} containing the Monte Carlo sample from the posterior predictive distribution of inner cells of ecological tables. \code{summary.predict.eco} will summarize the output, and \code{print.summary.predict.eco} will print the summary. } \description{ Obtains out-of-sample posterior predictions under the fitted parametric Bayesian model for ecological inference. \code{predict} method for class \code{eco} and \code{ecoX}. } \details{ The posterior predictive values are computed using the Monte Carlo sample stored in the \code{eco} output (or other sample if \code{newdraw} is specified). Given each Monte Carlo sample of the parameters, we sample the vector-valued latent variable from the appropriate multivariate Normal distribution. Then, we apply the inverse logit transformation to obtain the predictive values of proportions, \eqn{W}. The computation may be slow (especially for the nonparametric model) if a large Monte Carlo sample of the model parameters is used. In either case, setting \code{verbose = TRUE} may be helpful in monitoring the progress of the code. } \seealso{ \code{eco}, \code{predict.ecoNP} } \keyword{methods} eco/man/varcov.Rd0000644000176200001440000000061314330337373013370 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/varcov.R \name{varcov} \alias{varcov} \title{Calculate the variance or covariance of the object} \usage{ varcov(object, ...) } \arguments{ \item{object}{An object} \item{...}{The rest of the input parameters if any} } \description{ \code{varcov} returns the variance or covariance of the object. } \keyword{methods} eco/man/summary.ecoNP.Rd0000644000176200001440000000430314330337373014570 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/summary.ecoNP.R \name{summary.ecoNP} \alias{summary.ecoNP} \title{Summarizing the Results for the Bayesian Nonparametric Model for Ecological Inference in 2x2 Tables} \usage{ \method{summary}{ecoNP}( object, CI = c(2.5, 97.5), param = FALSE, units = FALSE, subset = NULL, ... ) } \arguments{ \item{object}{An output object from \code{ecoNP}.} \item{CI}{A vector of lower and upper bounds for the Bayesian credible intervals used to summarize the results. The default is the equal tail 95 percent credible interval.} \item{param}{Logical. If \code{TRUE}, the posterior estimates of the population parameters will be provided. The default value is \code{FALSE}.} \item{units}{Logical. If \code{TRUE}, the in-sample predictions for each unit or for a subset of units will be provided. The default value is \code{FALSE}.} \item{subset}{A numeric vector indicating the subset of the units whose in-sample predications to be provided when \code{units} is \code{TRUE}. The default value is \code{NULL} where the in-sample predictions for each unit will be provided.} \item{...}{further arguments passed to or from other methods.} } \value{ \code{summary.ecoNP} yields an object of class \code{summary.ecoNP} containing the following elements: \item{call}{The call from \code{ecoNP}.} \item{n.obs}{The number of units.} \item{n.draws}{The number of Monte Carlo samples.} \item{agg.table}{Aggregate posterior estimates of the marginal means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If \code{param = TRUE}, the following elements are also included: \item{param.table}{Posterior estimates of model parameters: population mean estimates of \eqn{W_1} and \eqn{W_2}. If \code{subset} is specified, only a subset of the population parameters are included.} If \code{unit = TRUE}, the following elements are also included: \item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.} \item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.} This object can be printed by \code{print.summary.ecoNP} } \description{ \code{summary} method for class \code{ecoNP}. } \seealso{ \code{ecoNP}, \code{predict.eco} } \keyword{methods} eco/man/predict.eco.Rd0000644000176200001440000000472514330337373014277 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict.eco.R \name{predict.eco} \alias{predict.eco} \title{Out-of-Sample Posterior Prediction under the Parametric Bayesian Model for Ecological Inference in 2x2 Tables} \usage{ \method{predict}{eco}(object, newdraw = NULL, subset = NULL, verbose = FALSE, ...) } \arguments{ \item{object}{An output object from \code{eco} or \code{ecoNP}.} \item{newdraw}{An optional list containing two matrices (or three dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu} and \eqn{\Sigma}. Those elements should be named as \code{mu} and \code{Sigma}, respectively. The default is the original MCMC draws stored in \code{object}.} \item{subset}{A scalar or numerical vector specifying the row number(s) of \code{mu} and \code{Sigma} in the output object from \code{eco}. If specified, the posterior draws of parameters for those rows are used for posterior prediction. The default is \code{NULL} where all the posterior draws are used.} \item{verbose}{logical. If \code{TRUE}, helpful messages along with a progress report on the Monte Carlo sampling from the posterior predictive distributions are printed on the screen. The default is \code{FALSE}.} \item{...}{further arguments passed to or from other methods.} } \value{ \code{predict.eco} yields a matrix of class \code{predict.eco} containing the Monte Carlo sample from the posterior predictive distribution of inner cells of ecological tables. \code{summary.predict.eco} will summarize the output, and \code{print.summary.predict.eco} will print the summary. } \description{ Obtains out-of-sample posterior predictions under the fitted parametric Bayesian model for ecological inference. \code{predict} method for class \code{eco} and \code{ecoX}. } \details{ The posterior predictive values are computed using the Monte Carlo sample stored in the \code{eco} output (or other sample if \code{newdraw} is specified). Given each Monte Carlo sample of the parameters, we sample the vector-valued latent variable from the appropriate multivariate Normal distribution. Then, we apply the inverse logit transformation to obtain the predictive values of proportions, \eqn{W}. The computation may be slow (especially for the nonparametric model) if a large Monte Carlo sample of the model parameters is used. In either case, setting \code{verbose = TRUE} may be helpful in monitoring the progress of the code. } \seealso{ \code{eco}, \code{predict.ecoNP} } \keyword{methods} eco/man/housep88.Rd0000644000176200001440000000307714330337373013562 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/housep88.R \docType{data} \name{housep88} \alias{housep88} \title{Electoral Results for the House and Presidential Races in 1988} \format{ A data frame containing 5 variables and 424 observations \tabular{lll}{ X \tab numeric \tab proportion voting for the Democrat in the presidential race \cr Y \tab numeric \tab proportion voting for the Democrat in the House race \cr N \tab numeric \tab number of major party voters in the presidential contest \cr HPCT \tab numeric \tab House election turnout divided by presidential election turnout (set to 1 if House turnout exceeds presidential turnout) \cr DIST \tab numeric \tab 4-digit ICPSR state and district code: first 2 digits for the state code, last two digits for the district number (e.g., 2106=IL 6th) } } \description{ This data set contains, on a House district level, the percentage of the vote for the Democratic House candidate, the percentage of the vote for the Democratic presidential candidate (Dukakis), the number of voters who voted for a major party candidate in the presidential race, and the ratio of voters in the House race versus the number who cast a ballot for President. Eleven (11) uncontested races are not included. Dataset compiled and analyzed by Burden and Kimball (1988). Complete dataset and documentation available at ICSPR study number 1140. } \references{ Burden, Barry C. and David C. Kimball (1988). ``A New Approach To Ticket- Splitting.'' The American Political Science Review. vol 92., no. 3, pp. 553-544. } \keyword{datasets} eco/man/eco.Rd0000644000176200001440000002115014330337373012635 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/eco.R \name{eco} \alias{eco} \title{Fitting the Parametric Bayesian Model of Ecological Inference in 2x2 Tables} \usage{ eco( formula, data = parent.frame(), N = NULL, supplement = NULL, context = FALSE, mu0 = 0, tau0 = 2, nu0 = 4, S0 = 10, mu.start = 0, Sigma.start = 10, parameter = TRUE, grid = FALSE, n.draws = 5000, burnin = 0, thin = 0, verbose = FALSE ) } \arguments{ \item{formula}{A symbolic description of the model to be fit, specifying the column and row margins of \eqn{2 \times 2} ecological tables. \code{Y ~ X} specifies \code{Y} as the column margin (e.g., turnout) and \code{X} as the row margin (e.g., percent African-American). Details and specific examples are given below.} \item{data}{An optional data frame in which to interpret the variables in \code{formula}. The default is the environment in which \code{eco} is called.} \item{N}{An optional variable representing the size of the unit; e.g., the total number of voters. \code{N} needs to be a vector of same length as \code{Y} and \code{X} or a scalar.} \item{supplement}{An optional matrix of supplemental data. The matrix has two columns, which contain additional individual-level data such as survey data for \eqn{W_1} and \eqn{W_2}, respectively. If \code{NULL}, no additional individual-level data are included in the model. The default is \code{NULL}.} \item{context}{Logical. If \code{TRUE}, the contextual effect is also modeled, that is to assume the row margin \eqn{X} and the unknown \eqn{W_1} and \eqn{W_2} are correlated. See Imai, Lu and Strauss (2008, 2011) for details. The default is \code{FALSE}.} \item{mu0}{A scalar or a numeric vector that specifies the prior mean for the mean parameter \eqn{\mu} for \eqn{(W_1,W_2)} (or for \eqn{(W_1, W_2, X)} if \code{context=TRUE}). When the input of \code{mu0} is a scalar, its value will be repeated to yield a vector of the length of \eqn{\mu}, otherwise, it needs to be a vector of same length as \eqn{\mu}. When \code{context=TRUE}, the length of \eqn{\mu} is 3, otherwise it is 2. The default is \code{0}.} \item{tau0}{A positive integer representing the scale parameter of the Normal-Inverse Wishart prior for the mean and variance parameter \eqn{(\mu, \Sigma)}. The default is \code{2}.} \item{nu0}{A positive integer representing the prior degrees of freedom of the Normal-Inverse Wishart prior for the mean and variance parameter \eqn{(\mu, \Sigma)}. The default is \code{4}.} \item{S0}{A positive scalar or a positive definite matrix that specifies the prior scale matrix of the Normal-Inverse Wishart prior for the mean and variance parameter \eqn{(\mu, \Sigma)} . If it is a scalar, then the prior scale matrix will be a diagonal matrix with the same dimensions as \eqn{\Sigma} and the diagonal elements all take value of \code{S0}, otherwise \code{S0} needs to have same dimensions as \eqn{\Sigma}. When \code{context=TRUE}, \eqn{\Sigma} is a \eqn{3 \times 3} matrix, otherwise, it is \eqn{2 \times 2}. The default is \code{10}.} \item{mu.start}{A scalar or a numeric vector that specifies the starting values of the mean parameter \eqn{\mu}. If it is a scalar, then its value will be repeated to yield a vector of the length of \eqn{\mu}, otherwise, it needs to be a vector of same length as \eqn{\mu}. When \code{context=FALSE}, the length of \eqn{\mu} is 2, otherwise it is 3. The default is \code{0}.} \item{Sigma.start}{A scalar or a positive definite matrix that specified the starting value of the variance matrix \eqn{\Sigma}. If it is a scalar, then the prior scale matrix will be a diagonal matrix with the same dimensions as \eqn{\Sigma} and the diagonal elements all take value of \code{S0}, otherwise \code{S0} needs to have same dimensions as \eqn{\Sigma}. When \code{context=TRUE}, \eqn{\Sigma} is a \eqn{3 \times 3} matrix, otherwise, it is \eqn{2 \times 2}. The default is \code{10}.} \item{parameter}{Logical. If \code{TRUE}, the Gibbs draws of the population parameters, \eqn{\mu} and \eqn{\Sigma}, are returned in addition to the in-sample predictions of the missing internal cells, \eqn{W}. The default is \code{TRUE}.} \item{grid}{Logical. If \code{TRUE}, the grid method is used to sample \eqn{W} in the Gibbs sampler. If \code{FALSE}, the Metropolis algorithm is used where candidate draws are sampled from the uniform distribution on the tomography line for each unit. Note that the grid method is significantly slower than the Metropolis algorithm. The default is \code{FALSE}.} \item{n.draws}{A positive integer. The number of MCMC draws. The default is \code{5000}.} \item{burnin}{A positive integer. The burnin interval for the Markov chain; i.e. the number of initial draws that should not be stored. The default is \code{0}.} \item{thin}{A positive integer. The thinning interval for the Markov chain; i.e. the number of Gibbs draws between the recorded values that are skipped. The default is \code{0}.} \item{verbose}{Logical. If \code{TRUE}, the progress of the Gibbs sampler is printed to the screen. The default is \code{FALSE}.} } \value{ An object of class \code{eco} containing the following elements: \item{call}{The matched call.} \item{X}{The row margin, \eqn{X}.} \item{Y}{The column margin, \eqn{Y}.} \item{N}{The size of each table, \eqn{N}.} \item{burnin}{The number of initial burnin draws.} \item{thin}{The thinning interval.} \item{nu0}{The prior degrees of freedom.} \item{tau0}{The prior scale parameter.} \item{mu0}{The prior mean.} \item{S0}{The prior scale matrix.} \item{W}{A three dimensional array storing the posterior in-sample predictions of \eqn{W}. The first dimension indexes the Monte Carlo draws, the second dimension indexes the columns of the table, and the third dimension represents the observations.} \item{Wmin}{A numeric matrix storing the lower bounds of \eqn{W}.} \item{Wmax}{A numeric matrix storing the upper bounds of \eqn{W}.} The following additional elements are included in the output when \code{parameter = TRUE}. \item{mu}{The posterior draws of the population mean parameter, \eqn{\mu}.} \item{Sigma}{The posterior draws of the population variance matrix, \eqn{\Sigma}.} } \description{ \code{eco} is used to fit the parametric Bayesian model (based on a Normal/Inverse-Wishart prior) for ecological inference in \eqn{2 \times 2} tables via Markov chain Monte Carlo. It gives the in-sample predictions as well as the estimates of the model parameters. The model and algorithm are described in Imai, Lu and Strauss (2008, 2011). } \details{ An example of \eqn{2 \times 2} ecological table for racial voting is given below: \tabular{llccc}{ \tab \tab black voters \tab white voters \tab \cr \tab vote \tab \eqn{W_{1i}} \tab \eqn{W_{2i}} \tab \eqn{Y_i} \cr \tab not vote \tab \eqn{1-W_{1i}} \tab \eqn{1-W_{2i}} \tab \eqn{1-Y_i} \cr \tab \tab \eqn{X_i} \tab \eqn{1-X_i} \tab } where \eqn{Y_i} and \eqn{X_i} represent the observed margins, and \eqn{W_1} and \eqn{W_2} are unknown variables. In this exmaple, \eqn{Y_i} is the turnout rate in the ith precint, \eqn{X_i} is the proproption of African American in the ith precinct. The unknowns \eqn{W_{1i}} an d\eqn{W_{2i}} are the black and white turnout, respectively. All variables are proportions and hence bounded between 0 and 1. For each \eqn{i}, the following deterministic relationship holds, \eqn{Y_i=X_i W_{1i}+(1-X_i)W_{2i}}. } \examples{ ## load the registration data \dontrun{data(reg) ## NOTE: convergence has not been properly assessed for the following ## examples. See Imai, Lu and Strauss (2008, 2011) for more ## complete analyses. ## fit the parametric model with the default prior specification res <- eco(Y ~ X, data = reg, verbose = TRUE) ## summarize the results summary(res) ## obtain out-of-sample prediction out <- predict(res, verbose = TRUE) ## summarize the results summary(out) ## load the Robinson's census data data(census) ## fit the parametric model with contextual effects and N ## using the default prior specification res1 <- eco(Y ~ X, N = N, context = TRUE, data = census, verbose = TRUE) ## summarize the results summary(res1) ## obtain out-of-sample prediction out1 <- predict(res1, verbose = TRUE) ## summarize the results summary(out1) } } \references{ Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R Package for Ecological Inference in 2x2 Tables} Journal of Statistical Software, Vol. 42, No. 5, pp. 1-23. Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. } \seealso{ \code{ecoML}, \code{ecoNP}, \code{predict.eco}, \code{summary.eco} } \keyword{models} eco/man/summary.ecoML.Rd0000644000176200001440000000561014330337373014565 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/summary.ecoML.R \name{summary.ecoML} \alias{summary.ecoML} \title{Summarizing the Results for the Maximum Likelihood Parametric Model for Ecological Inference in 2x2 Tables} \usage{ \method{summary}{ecoML}( object, CI = c(2.5, 97.5), param = TRUE, units = FALSE, subset = NULL, ... ) } \arguments{ \item{object}{An output object from \code{eco}.} \item{CI}{A vector of lower and upper bounds for the Bayesian credible intervals used to summarize the results. The default is the equal tail 95 percent credible interval.} \item{param}{Ignored.} \item{units}{Logical. If \code{TRUE}, the in-sample predictions for each unit or for a subset of units will be provided. The default value is \code{FALSE}.} \item{subset}{A numeric vector indicating the subset of the units whose in-sample predications to be provided when \code{units} is \code{TRUE}. The default value is \code{NULL} where the in-sample predictions for each unit will be provided.} \item{...}{further arguments passed to or from other methods.} } \value{ \code{summary.eco} yields an object of class \code{summary.eco} containing the following elements: \item{call}{The call from \code{eco}.} \item{sem}{Whether the SEM algorithm was executed, as specified by the user upon calling \code{ecoML}.} \item{fix.rho}{Whether the correlation parameter was fixed or allowed to vary, as specified by the user upon calling \code{ecoML}.} \item{epsilon}{The convergence threshold specified by the user upon calling \code{ecoML}.} \item{n.obs}{The number of units.} \item{iters.em}{The number iterations the EM algorithm cycled through before convergence or reaching the maximum number of iterations allowed.} \item{iters.sem}{The number iterations the SEM algorithm cycled through before convergence or reaching the maximum number of iterations allowed.} \item{loglik}{The final observed log-likelihood.} \item{rho}{A matrix of \code{iters.em} rows specifying the correlation parameters at each iteration of the EM algorithm. The number of columns depends on how many correlation parameters exist in the model. Column order is the same as the order of the parameters in \code{param.table}.} \item{param.table}{Final estimates of the parameter values for the model. Excludes parameters fixed by the user upon calling \code{ecoML}. See \code{ecoML} documentation for order of parameters.} \item{agg.table}{Aggregate estimates of the marginal means of \eqn{W_1} and \eqn{W_2}} \item{agg.wtable}{Aggregate estimates of the marginal means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If \code{units = TRUE}, the following elements are also included: \item{W.table}{Unit-level estimates for \eqn{W_1} and \eqn{W_2}.} This object can be printed by \code{print.summary.eco} } \description{ \code{summary} method for class \code{eco}. } \seealso{ \code{ecoML} } \keyword{methods} eco/man/print.summary.ecoML.Rd0000644000176200001440000000456614330337373015731 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/print.summary.ecoML.R \name{print.summary.ecoML} \alias{print.summary.ecoML} \title{Print the Summary of the Results for the Maximum Likelihood Parametric Model for Ecological Inference in 2x2 Tables} \usage{ \method{print}{summary.ecoML}(x, digits = max(3, getOption("digits") - 3), ...) } \arguments{ \item{x}{An object of class \code{summary.ecoML}.} \item{digits}{the number of significant digits to use when printing.} \item{...}{further arguments passed to or from other methods.} } \value{ \code{summary.eco} yields an object of class \code{summary.eco} containing the following elements: \item{call}{The call from \code{eco}.} \item{sem}{Whether the SEM algorithm was executed, as specified by the user upon calling \code{ecoML}.} \item{fix.rho}{Whether the correlation parameter was fixed or allowed to vary, as specified by the user upon calling \code{ecoML}.} \item{epsilon}{The convergence threshold specified by the user upon calling \code{ecoML}.} \item{n.obs}{The number of units.} \item{iters.em}{The number iterations the EM algorithm cycled through before convergence or reaching the maximum number of iterations allowed.} \item{iters.sem}{The number iterations the SEM algorithm cycled through before convergence or reaching the maximum number of iterations allowed.} \item{loglik}{The final observed log-likelihood.} \item{rho}{A matrix of \code{iters.em} rows specifying the correlation parameters at each iteration of the EM algorithm. The number of columns depends on how many correlation parameters exist in the model. Column order is the same as the order of the parameters in \code{param.table}.} \item{param.table}{Final estimates of the parameter values for the model. Excludes parameters fixed by the user upon calling \code{ecoML}. See \code{ecoML} documentation for order of parameters.} \item{agg.table}{Aggregate estimates of the marginal means of \eqn{W_1} and \eqn{W_2}} \item{agg.wtable}{Aggregate estimates of the marginal means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If \code{units = TRUE}, the following elements are also included: \item{W.table}{Unit-level estimates for \eqn{W_1} and \eqn{W_2}.} This object can be printed by \code{print.summary.eco} } \description{ \code{summary} method for class \code{eco}. } \seealso{ \code{ecoML} } \keyword{methods} eco/man/predict.ecoNP.Rd0000644000176200001440000000540014330337373014524 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict.ecoNP.R \name{predict.ecoNP} \alias{predict.ecoNP} \title{Out-of-Sample Posterior Prediction under the Nonparametric Bayesian Model for Ecological Inference in 2x2 Tables} \usage{ \method{predict}{ecoNP}( object, newdraw = NULL, subset = NULL, obs = NULL, verbose = FALSE, ... ) } \arguments{ \item{object}{An output object from \code{ecoNP}.} \item{newdraw}{An optional list containing two matrices (or three dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu} and \eqn{\Sigma}. Those elements should be named as \code{mu} and \code{Sigma}, respectively. The default is the original MCMC draws stored in \code{object}.} \item{subset}{A scalar or numerical vector specifying the row number(s) of \code{mu} and \code{Sigma} in the output object from \code{eco}. If specified, the posterior draws of parameters for those rows are used for posterior prediction. The default is \code{NULL} where all the posterior draws are used.} \item{obs}{An integer or vector of integers specifying the observation number(s) whose posterior draws will be used for predictions. The default is \code{NULL} where all the observations in the data set are selected.} \item{verbose}{logical. If \code{TRUE}, helpful messages along with a progress report on the Monte Carlo sampling from the posterior predictive distributions are printed on the screen. The default is \code{FALSE}.} \item{...}{further arguments passed to or from other methods.} } \value{ \code{predict.eco} yields a matrix of class \code{predict.eco} containing the Monte Carlo sample from the posterior predictive distribution of inner cells of ecological tables. \code{summary.predict.eco} will summarize the output, and \code{print.summary.predict.eco} will print the summary. } \description{ Obtains out-of-sample posterior predictions under the fitted nonparametric Bayesian model for ecological inference. \code{predict} method for class \code{ecoNP} and \code{ecoNPX}. } \details{ The posterior predictive values are computed using the Monte Carlo sample stored in the \code{eco} or \code{ecoNP} output (or other sample if \code{newdraw} is specified). Given each Monte Carlo sample of the parameters, we sample the vector-valued latent variable from the appropriate multivariate Normal distribution. Then, we apply the inverse logit transformation to obtain the predictive values of proportions, \eqn{W}. The computation may be slow (especially for the nonparametric model) if a large Monte Carlo sample of the model parameters is used. In either case, setting \code{verbose = TRUE} may be helpful in monitoring the progress of the code. } \seealso{ \code{eco}, \code{ecoNP}, \code{summary.eco}, \code{summary.ecoNP} } \keyword{methods} eco/man/wallace.Rd0000644000176200001440000000234314330337373013502 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/wallace.R \docType{data} \name{wallace} \alias{wallace} \title{Black voting rates for Wallace for President, 1968} \format{ A data frame containing 3 variables and 1009 observations \tabular{lll}{ X \tab numeric \tab proportion of the population that is Black \cr Y \tab numeric \tab proportion presidential votes cast for Wallace \cr FIPS \tab numeric \tab the FIPS county code } } \description{ This data set contains, on a county level, the proportion of county residents who are Black and the proportion of presidential votes cast for Wallace. Demographic data is based on the 1960 census. Presidential returns are from ICPSR study 13. County data from 10 southern states (Alabama, Arkansas, Georgia, Florida, Louisiana, Mississippi, North Carolina, South Carolina, Tennessee, Texas) are included. (Virginia is excluded due to the difficulty of matching counties between the datasets.) This data is analyzed in Wallace and Segal (1973). } \references{ Wasserman, Ira M. and David R. Segal (1973). ``Aggregation Effects in the Ecological Study of Presidential Voting.'' American Journal of Political Science. vol. 17, pp. 177-81. } \keyword{datasets} eco/man/ecoML.Rd0000644000176200001440000002620414330337373013073 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/emeco.R \name{ecoML} \alias{ecoML} \title{Fitting Parametric Models and Quantifying Missing Information for Ecological Inference in 2x2 Tables} \usage{ ecoML( formula, data = parent.frame(), N = NULL, supplement = NULL, theta.start = c(0, 0, 1, 1, 0), fix.rho = FALSE, context = FALSE, sem = TRUE, epsilon = 10^(-6), maxit = 1000, loglik = TRUE, hyptest = FALSE, verbose = FALSE ) } \arguments{ \item{formula}{A symbolic description of the model to be fit, specifying the column and row margins of \eqn{2 \times 2} ecological tables. \code{Y ~ X} specifies \code{Y} as the column margin (e.g., turnout) and \code{X} (e.g., percent African-American) as the row margin. Details and specific examples are given below.} \item{data}{An optional data frame in which to interpret the variables in \code{formula}. The default is the environment in which \code{ecoML} is called.} \item{N}{An optional variable representing the size of the unit; e.g., the total number of voters. \code{N} needs to be a vector of same length as \code{Y} and \code{X} or a scalar.} \item{supplement}{An optional matrix of supplemental data. The matrix has two columns, which contain additional individual-level data such as survey data for \eqn{W_1} and \eqn{W_2}, respectively. If \code{NULL}, no additional individual-level data are included in the model. The default is \code{NULL}.} \item{theta.start}{A numeric vector that specifies the starting values for the mean, variance, and covariance. When \code{context = FALSE}, the elements of \code{theta.start} correspond to (\eqn{E(W_1)}, \eqn{E(W_2)}, \eqn{var(W_1)}, \eqn{var(W_2)}, \eqn{cor(W_1,W_2)}). When \code{context = TRUE}, the elements of \code{theta.start} correspond to (\eqn{E(W_1)}, \eqn{E(W_2)}, \eqn{var(W_1)}, \eqn{var(W_2)}, \eqn{corr(W_1, X)}, \eqn{corr(W_2, X)}, \eqn{corr(W_1,W_2)}). Moreover, when \code{fix.rho=TRUE}, \eqn{corr(W_1,W_2)} is set to be the correlation between \eqn{W_1} and \eqn{W_2} when \code{context = FALSE}, and the partial correlation between \eqn{W_1} and \eqn{W_2} given \eqn{X} when \code{context = FALSE}. The default is \code{c(0,0,1,1,0)}.} \item{fix.rho}{Logical. If \code{TRUE}, the correlation (when \code{context=TRUE}) or the partial correlation (when \code{context=FALSE}) between \eqn{W_1} and \eqn{W_2} is fixed through the estimation. For details, see Imai, Lu and Strauss(2006). The default is \code{FALSE}.} \item{context}{Logical. If \code{TRUE}, the contextual effect is also modeled. In this case, the row margin (i.e., X) and the individual-level rates (i.e., \eqn{W_1} and \eqn{W_2}) are assumed to be distributed tri-variate normally (after logit transformations). See Imai, Lu and Strauss (2006) for details. The default is \code{FALSE}.} \item{sem}{Logical. If \code{TRUE}, the standard errors of parameter estimates are estimated via SEM algorithm, as well as the fraction of missing data. The default is \code{TRUE}.} \item{epsilon}{A positive number that specifies the convergence criterion for EM algorithm. The square root of \code{epsilon} is the convergence criterion for SEM algorithm. The default is \code{10^(-6)}.} \item{maxit}{A positive integer specifies the maximum number of iterations before the convergence criterion is met. The default is \code{1000}.} \item{loglik}{Logical. If \code{TRUE}, the value of the log-likelihood function at each iteration of EM is saved. The default is \code{TRUE}.} \item{hyptest}{Logical. If \code{TRUE}, model is estimated under the null hypothesis that means of \eqn{W1} and \eqn{W2} are the same. The default is \code{FALSE}.} \item{verbose}{Logical. If \code{TRUE}, the progress of the EM and SEM algorithms is printed to the screen. The default is \code{FALSE}.} } \value{ An object of class \code{ecoML} containing the following elements: \item{call}{The matched call.} \item{X}{The row margin, \eqn{X}.} \item{Y}{The column margin, \eqn{Y}.} \item{N}{The size of each table, \eqn{N}.} \item{context}{The assumption under which model is estimated. If \code{context = FALSE}, CAR assumption is adopted and no contextual effect is modeled. If \code{context = TRUE}, NCAR assumption is adopted, and contextual effect is modeled.} \item{sem}{Whether SEM algorithm is used to estimate the standard errors and observed information matrix for the parameter estimates.} \item{fix.rho}{Whether the correlation or the partial correlation between \eqn{W_1} an \eqn{W_2} is fixed in the estimation.} \item{r12}{If \code{fix.rho = TRUE}, the value that \eqn{corr(W_1, W_2)} is fixed to.} \item{epsilon}{The precision criterion for EM convergence. \eqn{\sqrt{\epsilon}} is the precision criterion for SEM convergence.} \item{theta.sem}{The ML estimates of \eqn{E(W_1)},\eqn{E(W_2)}, \eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}. If \code{context = TRUE}, \eqn{E(X)},\eqn{cov(W_1,X)}, \eqn{cov(W_2,X)} are also reported.} \item{W}{In-sample estimation of \eqn{W_1} and \eqn{W_2}.} \item{suff.stat}{The sufficient statistics for \code{theta.em}.} \item{iters.em}{Number of EM iterations before convergence is achieved.} \item{iters.sem}{Number of SEM iterations before convergence is achieved.} \item{loglik}{The log-likelihood of the model when convergence is achieved.} \item{loglik.log.em}{A vector saving the value of the log-likelihood function at each iteration of the EM algorithm.} \item{mu.log.em}{A matrix saving the unweighted mean estimation of the logit-transformed individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2}) at each iteration of the EM process.} \item{Sigma.log.em}{A matrix saving the log of the variance estimation of the logit-transformed individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2}) at each iteration of EM process. Note, non-transformed variances are displayed on the screen (when \code{verbose = TRUE}).} \item{rho.fisher.em}{A matrix saving the fisher transformation of the estimation of the correlations between the logit-transformed individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2}) at each iteration of EM process. Note, non-transformed correlations are displayed on the screen (when \code{verbose = TRUE}).} Moreover, when \code{sem=TRUE}, \code{ecoML} also output the following values: \item{DM}{The matrix characterizing the rates of convergence of the EM algorithms. Such information is also used to calculate the observed-data information matrix} \item{Icom}{The (expected) complete data information matrix estimated via SEM algorithm. When \code{context=FALSE, fix.rho=TRUE}, \code{Icom} is 4 by 4. When \code{context=FALSE, fix.rho=FALSE}, \code{Icom} is 5 by 5. When \code{context=TRUE}, \code{Icom} is 9 by 9.} \item{Iobs}{The observed information matrix. The dimension of \code{Iobs} is same as \code{Icom}.} \item{Imiss}{The difference between \code{Icom} and \code{Iobs}. The dimension of \code{Imiss} is same as \code{miss}.} \item{Vobs}{The (symmetrized) variance-covariance matrix of the ML parameter estimates. The dimension of \code{Vobs} is same as \code{Icom}.} \item{Iobs}{The (expected) complete-data variance-covariance matrix. The dimension of \code{Iobs} is same as \code{Icom}.} \item{Vobs.original}{The estimated variance-covariance matrix of the ML parameter estimates. The dimension of \code{Vobs} is same as \code{Icom}.} \item{Fmis}{The fraction of missing information associated with each parameter estimation. } \item{VFmis}{The proportion of increased variance associated with each parameter estimation due to observed data. } \item{Ieigen}{The largest eigen value of \code{Imiss}.} \item{Icom.trans}{The complete data information matrix for the fisher transformed parameters.} \item{Iobs.trans}{The observed data information matrix for the fisher transformed parameters.} \item{Fmis.trans}{The fractions of missing information associated with the fisher transformed parameters.} } \description{ \code{ecoML} is used to fit parametric models for ecological inference in \eqn{2 \times 2} tables via Expectation Maximization (EM) algorithms. The data is specified in proportions. At it's most basic setting, the algorithm assumes that the individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2}) and distributed bivariate normally (after logit transformations). The function calculates point estimates of the parameters for models based on different assumptions. The standard errors of the point estimates are also computed via Supplemented EM algorithms. Moreover, \code{ecoML} quantifies the amount of missing information associated with each parameter and allows researcher to examine the impact of missing information on parameter estimation in ecological inference. The models and algorithms are described in Imai, Lu and Strauss (2008, 2011). } \details{ When \code{SEM} is \code{TRUE}, \code{ecoML} computes the observed-data information matrix for the parameters of interest based on Supplemented-EM algorithm. The inverse of the observed-data information matrix can be used to estimate the variance-covariance matrix for the parameters estimated from EM algorithms. In addition, it also computes the expected complete-data information matrix. Based on these two measures, one can further calculate the fraction of missing information associated with each parameter. See Imai, Lu and Strauss (2006) for more details about fraction of missing information. Moreover, when \code{hytest=TRUE}, \code{ecoML} allows to estimate the parametric model under the null hypothesis that \code{mu_1=mu_2}. One can then construct the likelihood ratio test to assess the hypothesis of equal means. The associated fraction of missing information for the test statistic can be also calculated. For details, see Imai, Lu and Strauss (2006) for details. } \examples{ ## load the census data data(census) ## NOTE: convergence has not been properly assessed for the following ## examples. See Imai, Lu and Strauss (2006) for more complete analyses. ## In the first example below, in the interest of time, only part of the ## data set is analyzed and the convergence requirement is less stringent ## than the default setting. ## In the second example, the program is arbitrarily halted 100 iterations ## into the simulation, before convergence. ## load the Robinson's census data data(census) ## fit the parametric model with the default model specifications \dontrun{res <- ecoML(Y ~ X, data = census[1:100,], N=census[1:100,3], epsilon=10^(-6), verbose = TRUE)} ## summarize the results \dontrun{summary(res)} ## fit the parametric model with some individual ## level data using the default prior specification surv <- 1:600 \dontrun{res1 <- ecoML(Y ~ X, context = TRUE, data = census[-surv,], supplement = census[surv,c(4:5,1)], maxit=100, verbose = TRUE)} ## summarize the results \dontrun{summary(res1)} } \references{ Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R Package for Ecological Inference in 2x2 Tables} Journal of Statistical Software, Vol. 42, No. 5, pp. 1-23. Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. } \seealso{ \code{eco}, \code{ecoNP}, \code{summary.ecoML} } \keyword{models} eco/man/reg.Rd0000644000176200001440000000211214330337373012641 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/reg.R \docType{data} \name{reg} \alias{reg} \title{Voter Registration in US Southern States} \format{ A data frame containing 5 variables and 275 observations \tabular{lll}{ X \tab numeric \tab the fraction of Black voters \cr Y \tab numeric \tab the fraction of voters who registered themselves\cr N \tab numeric \tab the total number of voters in each county \cr W1 \tab numeric \tab the actual fraction of Black voters who registered themselves \cr W2 \tab numeric \tab the actual fraction of White voters who registered themselves } } \description{ This data set contains the racial composition, the registration rate, the number of eligible voters as well as the actual observed racial registration rates for every county in four US southern states: Florida, Louisiana, North Carolina, and South Carolina. } \references{ King, G. (1997). \dQuote{A Solution to the Ecological Inference Problem: Reconstructing Individual Behavior from Aggregate Data}. Princeton University Press, Princeton, NJ. } \keyword{datasets} eco/man/Qfun.Rd0000644000176200001440000000226414330337373013005 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Qfun.R \name{Qfun} \alias{Qfun} \title{Fitting the Parametric Bayesian Model of Ecological Inference in 2x2 Tables} \usage{ Qfun(theta, suff.stat, n) } \arguments{ \item{theta}{A vector that contains the MLE \eqn{E(W_1)},\eqn{E(W_2)}, \eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}. Typically it is the element \code{theta.em} of an object of class \code{ecoML}.} \item{suff.stat}{A vector of sufficient statistics of \eqn{E(W_1)}, \eqn{E(W_2)}, \eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}.} \item{n}{A integer representing the sample size.} } \description{ \code{Qfun} returns the complete log-likelihood that is used to calculate the fraction of missing information. } \references{ Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R Package for Ecological Inference in 2x2 Tables} Journal of Statistical Software, Vol. 42, No. 5, pp. 1-23. Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. } \seealso{ \code{ecoML} } \keyword{models} eco/man/print.summary.eco.Rd0000644000176200001440000000273714330337373015476 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/print.summary.eco.R \name{print.summary.eco} \alias{print.summary.eco} \title{Print the Summary of the Results for the Bayesian Parametric Model for Ecological Inference in 2x2 Tables} \usage{ \method{print}{summary.eco}(x, digits = max(3, getOption("digits") - 3), ...) } \arguments{ \item{x}{An object of class \code{summary.eco}.} \item{digits}{the number of significant digits to use when printing.} \item{...}{further arguments passed to or from other methods.} } \value{ \code{summary.eco} yields an object of class \code{summary.eco} containing the following elements: \item{call}{The call from \code{eco}.} \item{n.obs}{The number of units.} \item{n.draws}{The number of Monte Carlo samples.} \item{agg.table}{Aggregate posterior estimates of the marginal means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If \code{param = TRUE}, the following elements are also included: \item{param.table}{Posterior estimates of model parameters: population mean estimates of \eqn{W_1} and \eqn{W_2} and their logit transformations.} If \code{units = TRUE}, the following elements are also included: \item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.} \item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.} This object can be printed by \code{print.summary.eco} } \description{ \code{summary} method for class \code{eco}. } \seealso{ \code{eco}, \code{predict.eco} } \keyword{methods} eco/man/forgnlit30.Rd0000644000176200001440000000225114330337373014057 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/forgnlit30.R \docType{data} \name{forgnlit30} \alias{forgnlit30} \title{Foreign-born literacy in 1930} \format{ A data frame containing 5 variables and 48 observations \tabular{lll}{ X \tab numeric \tab proportion of the white population at least 10 years of age that is foreign born \cr Y \tab numeric \tab proportion of the white population at least 10 years of age that is illiterate \cr W1 \tab numeric \tab proportion of the foreign-born white population at least 10 years of age that is illiterate \cr W2 \tab numeric \tab proportion of the native-born white population at least 10 years of age that is illiterate \cr ICPSR \tab numeric \tab the ICPSR state code } } \description{ This data set contains, on a state level, the proportion of white residents ten years and older who are foreign born, and the proportion of those residents who are literate. Data come from the 1930 census and were first analyzed by Robinson (1950). } \references{ Robinson, W.S. (1950). ``Ecological Correlations and the Behavior of Individuals.'' \emph{American Sociological Review}, vol. 15, pp.351-357. } \keyword{datasets} eco/man/ecoNP.Rd0000644000176200001440000002063014330337373013075 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ecoNP.R \name{ecoNP} \alias{ecoNP} \title{Fitting the Nonparametric Bayesian Models of Ecological Inference in 2x2 Tables} \usage{ ecoNP( formula, data = parent.frame(), N = NULL, supplement = NULL, context = FALSE, mu0 = 0, tau0 = 2, nu0 = 4, S0 = 10, alpha = NULL, a0 = 1, b0 = 0.1, parameter = FALSE, grid = FALSE, n.draws = 5000, burnin = 0, thin = 0, verbose = FALSE ) } \arguments{ \item{formula}{A symbolic description of the model to be fit, specifying the column and row margins of \eqn{2 \times 2} ecological tables. \code{Y ~ X} specifies \code{Y} as the column margin (e.g., turnout) and \code{X} as the row margin (e.g., percent African-American). Details and specific examples are given below.} \item{data}{An optional data frame in which to interpret the variables in \code{formula}. The default is the environment in which \code{ecoNP} is called.} \item{N}{An optional variable representing the size of the unit; e.g., the total number of voters. \code{N} needs to be a vector of same length as \code{Y} and \code{X} or a scalar.} \item{supplement}{An optional matrix of supplemental data. The matrix has two columns, which contain additional individual-level data such as survey data for \eqn{W_1} and \eqn{W_2}, respectively. If \code{NULL}, no additional individual-level data are included in the model. The default is \code{NULL}.} \item{context}{Logical. If \code{TRUE}, the contextual effect is also modeled, that is to assume the row margin \eqn{X} and the unknown \eqn{W_1} and \eqn{W_2} are correlated. See Imai, Lu and Strauss (2008, 2011) for details. The default is \code{FALSE}.} \item{mu0}{A scalar or a numeric vector that specifies the prior mean for the mean parameter \eqn{\mu} of the base prior distribution \eqn{G_0} (see Imai, Lu and Strauss (2008, 2011) for detailed descriptions of Dirichlete prior and the normal base prior distribution) . If it is a scalar, then its value will be repeated to yield a vector of the length of \eqn{\mu}, otherwise, it needs to be a vector of same length as \eqn{\mu}. When \code{context=TRUE }, the length of \eqn{\mu} is 3, otherwise it is 2. The default is \code{0}.} \item{tau0}{A positive integer representing the scale parameter of the Normal-Inverse Wishart prior for the mean and variance parameter \eqn{(\mu_i, \Sigma_i)} of each observation. The default is \code{2}.} \item{nu0}{A positive integer representing the prior degrees of freedom of the variance matrix \eqn{\Sigma_i}. the default is \code{4}.} \item{S0}{A positive scalar or a positive definite matrix that specifies the prior scale matrix for the variance matrix \eqn{\Sigma_i}. If it is a scalar, then the prior scale matrix will be a diagonal matrix with the same dimensions as \eqn{\Sigma_i} and the diagonal elements all take value of \code{S0}, otherwise \code{S0} needs to have same dimensions as \eqn{\Sigma_i}. When \code{context=TRUE}, \eqn{\Sigma} is a \eqn{3 \times 3} matrix, otherwise, it is \eqn{2 \times 2}. The default is \code{10}.} \item{alpha}{A positive scalar representing a user-specified fixed value of the concentration parameter, \eqn{\alpha}. If \code{NULL}, \eqn{\alpha} will be updated at each Gibbs draw, and its prior parameters \code{a0} and \code{b0} need to be specified. The default is \code{NULL}.} \item{a0}{A positive integer representing the value of shape parameter of the gamma prior distribution for \eqn{\alpha}. The default is \code{1}.} \item{b0}{A positive integer representing the value of the scale parameter of the gamma prior distribution for \eqn{\alpha}. The default is \code{0.1}.} \item{parameter}{Logical. If \code{TRUE}, the Gibbs draws of the population parameters, \eqn{\mu} and \eqn{\Sigma}, are returned in addition to the in-sample predictions of the missing internal cells, \eqn{W}. The default is \code{FALSE}. This needs to be set to \code{TRUE} if one wishes to make population inferences through \code{predict.eco}. See an example below.} \item{grid}{Logical. If \code{TRUE}, the grid method is used to sample \eqn{W} in the Gibbs sampler. If \code{FALSE}, the Metropolis algorithm is used where candidate draws are sampled from the uniform distribution on the tomography line for each unit. Note that the grid method is significantly slower than the Metropolis algorithm.} \item{n.draws}{A positive integer. The number of MCMC draws. The default is \code{5000}.} \item{burnin}{A positive integer. The burnin interval for the Markov chain; i.e. the number of initial draws that should not be stored. The default is \code{0}.} \item{thin}{A positive integer. The thinning interval for the Markov chain; i.e. the number of Gibbs draws between the recorded values that are skipped. The default is \code{0}.} \item{verbose}{Logical. If \code{TRUE}, the progress of the Gibbs sampler is printed to the screen. The default is \code{FALSE}.} } \value{ An object of class \code{ecoNP} containing the following elements: \item{call}{The matched call.} \item{X}{The row margin, \eqn{X}.} \item{Y}{The column margin, \eqn{Y}.} \item{burnin}{The number of initial burnin draws.} \item{thin}{The thinning interval.} \item{nu0}{The prior degrees of freedom.} \item{tau0}{The prior scale parameter.} \item{mu0}{The prior mean.} \item{S0}{The prior scale matrix.} \item{a0}{The prior shape parameter.} \item{b0}{The prior scale parameter.} \item{W}{A three dimensional array storing the posterior in-sample predictions of \eqn{W}. The first dimension indexes the Monte Carlo draws, the second dimension indexes the columns of the table, and the third dimension represents the observations.} \item{Wmin}{A numeric matrix storing the lower bounds of \eqn{W}.} \item{Wmax}{A numeric matrix storing the upper bounds of \eqn{W}.} The following additional elements are included in the output when \code{parameter = TRUE}. \item{mu}{A three dimensional array storing the posterior draws of the population mean parameter, \eqn{\mu}. The first dimension indexes the Monte Carlo draws, the second dimension indexes the columns of the table, and the third dimension represents the observations.} \item{Sigma}{A three dimensional array storing the posterior draws of the population variance matrix, \eqn{\Sigma}. The first dimension indexes the Monte Carlo draws, the second dimension indexes the parameters, and the third dimension represents the observations. } \item{alpha}{The posterior draws of \eqn{\alpha}.} \item{nstar}{The number of clusters at each Gibbs draw.} } \description{ \code{ecoNP} is used to fit the nonparametric Bayesian model (based on a Dirichlet process prior) for ecological inference in \eqn{2 \times 2} tables via Markov chain Monte Carlo. It gives the in-sample predictions as well as out-of-sample predictions for population inference. The models and algorithms are described in Imai, Lu and Strauss (2008, 2011). } \examples{ ## load the registration data data(reg) ## NOTE: We set the number of MCMC draws to be a very small number in ## the following examples; i.e., convergence has not been properly ## assessed. See Imai, Lu and Strauss (2006) for more complete examples. ## fit the nonparametric model to give in-sample predictions ## store the parameters to make population inference later \dontrun{res <- ecoNP(Y ~ X, data = reg, n.draws = 50, param = TRUE, verbose = TRUE) ##summarize the results summary(res) ## obtain out-of-sample prediction out <- predict(res, verbose = TRUE) ## summarize the results summary(out) ## density plots of the out-of-sample predictions par(mfrow=c(2,1)) plot(density(out[,1]), main = "W1") plot(density(out[,2]), main = "W2") ## load the Robinson's census data data(census) ## fit the parametric model with contextual effects and N ## using the default prior specification res1 <- ecoNP(Y ~ X, N = N, context = TRUE, param = TRUE, data = census, n.draws = 25, verbose = TRUE) ## summarize the results summary(res1) ## out-of sample prediction pres1 <- predict(res1) summary(pres1)} } \references{ Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R Package for Ecological Inference in 2x2 Tables} Journal of Statistical Software, Vol. 42, No. 5, pp. 1-23. Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. } \seealso{ \code{eco}, \code{ecoML}, \code{predict.eco}, \code{summary.ecoNP} } \keyword{models} eco/man/ecoBD.Rd0000644000176200001440000001234414330337373013050 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ecoBD.R \name{ecoBD} \alias{ecoBD} \title{Calculating the Bounds for Ecological Inference in RxC Tables} \usage{ ecoBD(formula, data = parent.frame(), N = NULL) } \arguments{ \item{formula}{A symbolic description of ecological table to be used, specifying the column and row margins of \eqn{R \times C} ecological tables. Details and specific examples are given below.} \item{data}{An optional data frame in which to interpret the variables in \code{formula}. The default is the environment in which \code{ecoBD} is called.} \item{N}{An optional variable representing the size of the unit; e.g., the total number of voters. If \code{formula} is entered as counts and the last row and/or column is omitted, this input is necessary.} } \value{ An object of class \code{ecoBD} containing the following elements (When three dimensional arrays are used, the first dimension indexes the observations, the second dimension indexes the row numbers, and the third dimension indexes the column numbers): \item{call}{The matched call.} \item{X}{A matrix of the observed row margin, \eqn{X}.} \item{Y}{A matrix of the observed column margin, \eqn{Y}.} \item{N}{A vector of the size of ecological tables, \eqn{N}.} \item{aggWmin}{A three dimensional array of aggregate lower bounds for proportions.} \item{aggWmax}{A three dimensional array of aggregate upper bounds for proportions.} \item{Wmin}{A three dimensional array of lower bounds for proportions.} \item{Wmax}{A three dimensional array of upper bounds for proportions.} \item{Nmin}{A three dimensional array of lower bounds for counts.} \item{Nmax}{A three dimensional array of upper bounds for counts.} The object can be printed through \code{print.ecoBD}. } \description{ \code{ecoBD} is used to calculate the bounds for missing internal cells of \eqn{R \times C} ecological table. The data can be entered either in the form of counts or proportions. } \details{ The data may be entered either in the form of counts or proportions. If proportions are used, \code{formula} may omit the last row and/or column of tables, which can be calculated from the remaining margins. For example, \code{Y ~ X} specifies \code{Y} as the first column margin and \code{X} as the first row margin in \eqn{2 \times 2} tables. If counts are used, \code{formula} may omit the last row and/or column margin of the table only if \code{N} is supplied. In this example, the columns will be labeled as \code{X} and \code{not X}, and the rows will be labeled as \code{Y} and \code{not Y}. For larger tables, one can use \code{cbind()} and \code{+}. For example, \code{cbind(Y1, Y2, Y3) ~ X1 + X2 + X3 + X4)} specifies \eqn{3 \times 4} tables. An \eqn{R \times C} ecological table in the form of counts: \tabular{lcccc}{ \eqn{n_{i11}} \tab \eqn{n_{i12}} \tab \dots{} \tab \eqn{n_{i1C}} \tab \eqn{n_{i1.}} \cr \eqn{n_{i21}} \tab \eqn{n_{i22}} \tab \dots{} \tab \eqn{n_{i2C}} \tab \eqn{n_{i2.}} \cr \dots{} \tab \dots{} \tab \dots{} \tab \dots{} \tab \dots{}\cr \eqn{n_{iR1}} \tab \eqn{n_{iR2}} \tab \dots{} \tab \eqn{n_{iRC}} \tab \eqn{n_{iR.}} \cr \eqn{n_{i.1}} \tab \eqn{n_{i.2}} \tab \dots{} \tab \eqn{n_{i.C}} \tab \eqn{N_i} } where \eqn{n_{nr.}} and \eqn{n_{i.c}} represent the observed margins, \eqn{N_i} represents the size of the table, and \eqn{n_{irc}} are unknown variables. Note that for each \eqn{i}, the following deterministic relationships hold; \eqn{n_{ir.} = \sum_{c=1}^C n_{irc}} for \eqn{r=1,\dots,R}, and \eqn{n_{i.c}=\sum_{r=1}^R n_{irc}} for \eqn{c=1,\dots,C}. Then, each of the unknown inner cells can be bounded in the following manner, \deqn{\max(0, n_{ir.}+n_{i.c}-N_i) \le n_{irc} \le \min(n_{ir.}, n_{i.c}).} If the size of tables, \code{N}, is provided, An \eqn{R \times C} ecological table in the form of proportions: \tabular{lcccc}{ \eqn{W_{i11}} \tab \eqn{W_{i12}} \tab \dots{} \tab \eqn{W_{i1C}} \tab \eqn{Y_{i1}} \cr \eqn{W_{i21}} \tab \eqn{W_{i22}} \tab \dots{} \tab \eqn{W_{i2C}} \tab \eqn{Y_{i2}} \cr \dots{} \tab \dots{} \tab \dots{} \tab \dots{} \tab \dots{} \cr \eqn{W_{iR1}} \tab \eqn{W_{iR2}} \tab \dots{} \tab \eqn{W_{iRC}} \tab \eqn{Y_{iR}} \cr \eqn{X_{i1}} \tab \eqn{X_{i2}} \tab \dots{} \tab \eqn{X_{iC}} \tab } where \eqn{Y_{ir}} and \eqn{X_{ic}} represent the observed margins, and \eqn{W_{irc}} are unknown variables. Note that for each \eqn{i}, the following deterministic relationships hold; \eqn{Y_{ir} = \sum_{c=1}^C X_{ic} W_{irc}} for \eqn{r=1,\dots,R}, and \eqn{\sum_{r=1}^R W_{irc}=1} for \eqn{c=1,\dots,C}. Then, each of the inner cells of the table can be bounded in the following manner, \deqn{\max(0, (X_{ic} + Y_{ir}-1)/X_{ic}) \le W_{irc} \le \min(1, Y_{ir}/X_{ir}).} } \examples{ ## load the registration data data(reg) ## calculate the bounds res <- ecoBD(Y ~ X, N = N, data = reg) ## print the results print(res) } \references{ Imai, Kosuke, Ying Lu and Aaron Strauss. (2011) \dQuote{eco: R Package for Ecological Inference in 2x2 Tables} Journal of Statistical Software, Vol. 42, No. 5, pp. 1-23. Imai, Kosuke, Ying Lu and Aaron Strauss. (2008) \dQuote{Bayesian and Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data Approach} Political Analysis, Vol. 16, No. 1, (Winter), pp. 41-69. } \seealso{ \code{eco}, \code{ecoNP} } \keyword{models} eco/man/forgnlit30c.Rd0000644000176200001440000000250014330337373014217 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/forgnlit30c.R \docType{data} \name{forgnlit30c} \alias{forgnlit30c} \title{Foreign-born literacy in 1930, County Level} \format{ A data frame containing 6 variables and 1976 observations \tabular{lll}{ X \tab numeric \tab proportion of the white population at least 10 years of age that is foreign born \cr Y \tab numeric \tab proportion of the white population at least 10 years of age that is illiterate \cr W1 \tab numeric \tab proportion of the foreign-born white population at least 10 years of age that is illiterate \cr W2 \tab numeric \tab proportion of the native-born white population at least 10 years of age that is illiterate \cr state \tab numeric \tab the ICPSR state code \cr county \tab numeric \tab the ICPSR (within state) county code } } \description{ This data set contains, on a county level, the proportion of white residents ten years and older who are foreign born, and the proportion of those residents who are literate. Data come from the 1930 census and were first analyzed by Robinson (1950). Counties with fewer than 100 foreign born residents are dropped. } \references{ Robinson, W.S. (1950). ``Ecological Correlations and the Behavior of Individuals.'' \emph{American Sociological Review}, vol. 15, pp.351-357. } \keyword{datasets} eco/DESCRIPTION0000644000176200001440000000325214331034742012531 0ustar liggesusersPackage: eco Version: 4.0-3 Date: 2022-11-01 Title: Ecological Inference in 2x2 Tables Authors@R: c( person("Kosuke", "Imai", , "imai@Harvard.Edu", c("aut")), person("Ying", "Lu", , "ying.lu@nyu.edu", c("aut", "cre")), person("Aaron", "Strauss", , "aaronbstrauss@gmail.com", c("aut")), person("Hubert", "Jin", , "hubertj@princeton.edu", c("ctb")) ) Maintainer: Ying Lu Depends: R (>= 2.0), MASS, utils Suggests: testthat Description: Implements the Bayesian and likelihood methods proposed in Imai, Lu, and Strauss (2008 ) and (2011 ) for ecological inference in 2 by 2 tables as well as the method of bounds introduced by Duncan and Davis (1953). The package fits both parametric and nonparametric models using either the Expectation-Maximization algorithms (for likelihood models) or the Markov chain Monte Carlo algorithms (for Bayesian models). For all models, the individual-level data can be directly incorporated into the estimation whenever such data are available. Along with in-sample and out-of-sample predictions, the package also provides a functionality which allows one to quantify the effect of data aggregation on parameter estimation and hypothesis testing under the parametric likelihood models. LazyLoad: yes LazyData: yes License: GPL (>= 2) URL: https://github.com/kosukeimai/eco BugReports: https://github.com/kosukeimai/eco/issues RoxygenNote: 7.1.1 NeedsCompilation: yes Packaged: 2022-11-03 01:20:39 UTC; kosukeimai Author: Kosuke Imai [aut], Ying Lu [aut, cre], Aaron Strauss [aut], Hubert Jin [ctb] Repository: CRAN Date/Publication: 2022-11-03 22:00:02 UTC eco/tests/0000755000176200001440000000000014330337373012170 5ustar liggesuserseco/tests/testthat/0000755000176200001440000000000014331034742014023 5ustar liggesuserseco/tests/testthat/test-all.R0000644000176200001440000001417714330337373015712 0ustar liggesusersrm(list=ls()) library(eco) library(testthat) context("tests eco") accuracy1 <- ifelse(capabilities("long.double"), 0.002, 0.005) accuracy2 <- ifelse(capabilities("long.double"), 0.2, 0.5) # set random seed set.seed(12345) # for the tests that may take a long time to finish, skip them donotrun = 1 test_that("tests eco on registration data", { ## load the data data(reg) # fit the parametric model with the default prior specification res <- eco(Y ~ X, data = reg, verbose = TRUE) # summarize the results x <- summary(res) expect_that(length(x), is_equivalent_to(8)) expect_true("W2.table" %in% names(x)) expect_equal(x$param.table[2,1], 2.976173, tolerance = accuracy1) expect_equal(x$param.table[3,4], 8.238363, tolerance = accuracy1) # obtain out-of-sample prediction out <- predict(res, verbose = TRUE) # summarize the results x <- summary(out) expect_that(length(x), is_equivalent_to(2)) expect_true("n.draws" %in% names(x)) expect_equal(x$W.table[1,1], 0.4896912, tolerance = accuracy1) expect_equal(x$W.table[2,3], 0.3071461, tolerance = accuracy1) }) if (!donotrun) test_that("tests eco on Robinson census", { # load the Robinson's census data data(census) # fit the parametric model with contextual effects and N using the default prior specification res1 <- eco(Y ~ X, N = N, context = TRUE, data = census, verbose = TRUE) # summarize the results x <- summary(res1) expect_that(length(x), is_equivalent_to(8)) expect_true("W2.table" %in% names(x)) expect_equal(x$param.table[2,3], 2.068228, tolerance = accuracy1) expect_equal(x$agg.wtable[1,3], 0.6631372, tolerance = accuracy1) # obtain out-of-sample prediction out1 <- predict(res1, verbose = TRUE) # summarize the results x <- summary(out1) expect_that(length(x), is_equivalent_to(2)) expect_true("n.draws" %in% names(x)) expect_equal(x$W.table[1,3], 0.4499757, tolerance = accuracy1) expect_equal(x$W.table[3,1], 0.3400277, tolerance = accuracy1) }) # ecoBD test_that("tests ecoBD on registration data", { # load the registration data data(reg) # calculate the bounds x <- ecoBD(Y ~ X, N = N, data = reg) expect_that(length(x), is_equivalent_to(12)) expect_true("aggWmin" %in% names(x)) expect_equal(x$aggWmin[1,1], 0.216785, tolerance = accuracy1) expect_that(x$aggNmax[2,2], is_equivalent_to(2046800)) }) # ecoML if (!donotrun) test_that("tests ecoML on census data", { # load the census data data(census) # fit the parametric model with the default model specifications res <- ecoML(Y ~ X, data = census[1:100,], N=census[1:100,3], epsilon=10^(-6), verbose = TRUE) # summarize the results x <- summary(res) expect_that(length(x), is_equivalent_to(13)) expect_true("iters.sem" %in% names(x)) expect_equal(x$loglik, -70.80674, tolerance = accuracy2) expect_equal(x$param.table[2,3], 0.07192878, tolerance = accuracy1) ##################################################################################### # NOTE: this example does not work! There is no predict.ecoML defined in the package. # # obtain out-of-sample prediction # out <- predict(res, verbose = TRUE) # summarize the results # summary(out) ##################################################################################### # fit the parametric model with some individual # level data using the default prior specification surv <- 1:600 res1 <- ecoML(Y ~ X, context = TRUE, data = census[-surv,], supplement = census[surv,c(4:5,1)], maxit=100, verbose = TRUE) # summarize the results x <- summary(res1) expect_that(length(x), is_equivalent_to(13)) expect_true("iters.sem" %in% names(x)) expect_equal(x$loglik, -3481.877, tolerance = accuracy2) expect_equal(x$param.table[2,3], 0.006055498, tolerance = accuracy1) expect_true(is.na(x$param.table[2,2])) expect_true(is.na(x$param.table[2,5])) expect_false(is.na(x$param.table[2,6])) }) # set random seed set.seed(12345) # ecoNP test_that("tests ecoNP on census data", { # load the registration data data(reg) # NOTE: We set the number of MCMC draws to be a very small number in # the following examples; i.e., convergence has not been properly # assessed. See Imai, Lu and Strauss (2006) for more complete examples. # fit the nonparametric model to give in-sample predictions # store the parameters to make population inference later res <- ecoNP(Y ~ X, data = reg, n.draws = 50, param = TRUE, verbose = TRUE) #summarize the results x <- summary(res) expect_that(length(x), is_equivalent_to(8)) expect_true(is.null(x$agg.wtable)) expect_equal(x$agg.table[1,2], 0.04059766, tolerance = accuracy1) expect_equal(x$agg.table[2,3], 0.8129786, tolerance = accuracy1) # obtain out-of-sample prediction out <- predict(res, verbose = TRUE) # summarize the results x <- summary(out) expect_that(length(x), is_equivalent_to(2)) expect_true("n.draws" %in% names(x)) expect_equal(x$W.table[1,3], 0.02617743, tolerance = accuracy1) expect_equal(x$W.table[2,1], 0.8137116, tolerance = accuracy1) # density plots of the out-of-sample predictions # par(mfrow=c(2,1)) # plot(density(out[,1]), main = "W1") # plot(density(out[,2]), main = "W2") }) if (!donotrun) test_that("tests ecoNP on Robinson census data", { # load the Robinson's census data data(census) # fit the parametric model with contextual effects and N # using the default prior specification res1 <- ecoNP(Y ~ X, N = N, context = TRUE, param = TRUE, data = census, n.draws = 25, verbose = TRUE) # summarize the results x <- summary(res1) expect_that(length(x), is_equivalent_to(8)) expect_false(is.null(x$agg.wtable)) expect_equal(x$agg.table[1,2], 0.009952511, tolerance = accuracy1) expect_equal(x$agg.table[2,3], 0.8690776, tolerance = accuracy1) expect_equal(x$agg.wtable[1,2], 0.009508983, tolerance = accuracy1) expect_equal(x$agg.wtable[2,3], 0.9005222, tolerance = accuracy1) # out-of sample prediction pres1 <- predict(res1) x <- summary(pres1) expect_that(length(x), is_equivalent_to(2)) expect_true("n.draws" %in% names(x)) expect_equal(x$W.table[1,3], 0.1333375, tolerance = accuracy1) expect_equal(x$W.table[2,1], 0.8434944, tolerance = accuracy1) }) eco/tests/testthat.R0000644000176200001440000000006214330337373014151 0ustar liggesuserslibrary(testthat) library(eco) test_check("eco") eco/src/0000755000176200001440000000000014330610075011606 5ustar liggesuserseco/src/gibbsEM.c0000644000176200001440000015560114330337373013301 0ustar liggesusers#include #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" #include "sample.h" #include "bayes.h" #include "macros.h" #include "fintegrate.h" void readData(Param* params, int n_dim, double* pdX, double* sur_W, double* x1_W1, double* x0_W2, int n_samp, int s_samp, int x1_samp, int x0_samp); void ecoSEM(double* optTheta, double* pdTheta, Param* params, double Rmat_old[7][7], double Rmat[7][7]); void ecoEStep(Param* params, double* suff); void ecoMStep(double* Suff, double* pdTheta, Param* params); void ecoMStepNCAR(double* Suff, double* pdTheta, Param* params); void ecoMStepCCAR(double* pdTheta, Param* params); void MStepHypTest(Param* params, double* pdTheta); void initTheta(double* pdTheta_in,Param* params, double* pdTheta); void initNCAR(Param* params, double* pdTheta); void setHistory(double* t_pdTheta, double loglik, int iter,setParam* setP,double history_full[][10]); int closeEnough(double* pdTheta, double* pdTheta_old, int len, double maxerr); int semDoneCheck(setParam* setP); void gridEStep(Param* params, int n_samp, int s_samp, int x1_samp, int x0_samp, double* suff, int verbose, double minW1, double maxW1); void transformTheta(double* pdTheta, double* t_pdTheta, int len, setParam* setP); void untransformTheta(double* t_pdTheta,double* pdTheta, int len, setParam* setP); void ncarFixedRhoTransform(double* pdTheta); void ncarFixedRhoUnTransform(double* pdTheta); void printColumnHeader(int main_loop, int iteration_max, setParam* setP, int finalTheta); /** * Main function. * Important mutations (i.e., outputs): pdTheta, Suff, DMmatrix, history * See internal comments for details. */ void cEMeco( /*data input */ double *pdX, /* data (X, Y) */ double *pdTheta_in, /* Theta^ t CAR: mu1, mu2, var1, var2, rho NCAR: mu1, mu2, var1, var2, p13,p13,p12*/ int *pin_samp, /* sample size */ /* loop vairables */ int *iteration_max, /* number of maximum iterations */ double *convergence, /* abs value limit before stopping */ /*incorporating survey data */ int *survey, /*1 if survey data available(W_1, W_2) 0 not*/ int *sur_samp, /*sample size of survey data*/ double *sur_W, /*set of known W_1, W_2 */ /*incorporating homeogenous areas */ int *x1, /* 1 if X=1 type areas available W_1 known, W_2 unknown */ int *sampx1, /* number X=1 type areas */ double *x1_W1, /* values of W_1 for X1 type areas */ int *x0, /* 1 if X=0 type areas available W_2 known, W_1 unknown */ int *sampx0, /* number X=0 type areas */ double *x0_W2, /* values of W_2 for X0 type areas */ /* bounds of W1 */ double *minW1, double *maxW1, /* options */ int *flag, /*0th (rightmost) bit: 1 = NCAR, 0=normal; 1st bit: 1 = fixed rho, 0 = not fixed rho*/ int *verbosiosity, /*How much to print out, 0=silent, 1=cycle, 2=data*/ int *calcLoglik, /*How much to print out, 0=silent, 1=cycle, 2=data*/ int *hypTest_L, /* number of hypothesis constraints */ double *optTheta, /*optimal theta obtained from previous EM result; if set, then we're doing SEM*/ /* storage */ //Theta under CAR: mu1,mu2,s1,s2,p12 //Theta under NCAR: mu_3, mu_1, mu_2, sig_3, sig_1, sig_2, r_13, r_23, r_12 double *pdTheta, /*EM result for Theta^(t+1) */ double *Suff, /*out put suffucient statistics (E(W_1i|Y_i), E(E_1i*W_1i|Y_i..) when conveges */ double *inSample, /* In Sample info */ double *DMmatrix, /* DM matrix for SEM*/ int *itersUsed, /* number of iterations used */ double *history /* history of param (transformed) as well as logliklihood*/ ){ int n_samp = *pin_samp; /* sample size */ int s_samp = *survey ? *sur_samp : 0; /* sample size of survey data */ int x1_samp = *x1 ? *sampx1 : 0; /* sample size for X=1 */ int x0_samp = *x0 ? *sampx0 : 0; /* sample size for X=0 */ //int t_samp=n_samp+s_samp+x1_samp+x0_samp; /* total sample size*/ int t_samp=n_samp+s_samp; /* total sample size, ignoring homog data*/ int n_dim=2; /* dimensions */ setParam setP; //set options setP.ncar=bit(*flag,0); setP.fixedRho=bit(*flag,1); setP.sem=bit(*flag,2) & (optTheta[2]!=-1.1); setP.ccar=0; setP.ccar_nvar=0; //hard-coded hypothesis test //hypTest is the number of constraints. hyptTest==0 when we're not checking a hypothesis setP.hypTest=(*hypTest_L); if (setP.hypTest>1) error("Unable to do hypothesis testing with more than one constraint"); if (setP.hypTest==1) { setP.hypTestCoeff=doubleMatrix(setP.ncar ? 3 : 2,setP.hypTest); setP.hypTestCoeff[0][0]=1; setP.hypTestCoeff[1][0]=-1; if (setP.ncar) setP.hypTestCoeff[2][0]=0; setP.hypTestResult=0; } setP.verbose=*verbosiosity; if (setP.verbose>=1) Rprintf("OPTIONS:: Ncar: %s; Fixed Rho: %s; SEM: %s\n",setP.ncar==1 ? "Yes" : "No", setP.fixedRho==1 ? "Yes" : "No",setP.sem==1 ? "Second run" : (bit(*flag,2)==1 ? "First run" : "No")); setP.calcLoglik=*calcLoglik; setP.convergence=*convergence; setP.t_samp=t_samp; setP.n_samp=n_samp; setP.s_samp=s_samp; setP.x1_samp=x1_samp; setP.x0_samp=x0_samp; int param_len=setP.ccar ? setP.ccar_nvar : (setP.ncar ? 9 : 5); setP.param_len=param_len; setP.pdTheta=doubleArray(param_len); setP.suffstat_len=(setP.ncar ? 9 : 5); setP.SigmaK=doubleMatrix(param_len,param_len); //CCAR setP.InvSigmaK=doubleMatrix(param_len,param_len); //CCAR /* model parameters */ //double **Sigma=doubleMatrix(n_dim,n_dim);/* inverse covariance matrix*/ //double **InvSigma=doubleMatrix(n_dim,n_dim);/* inverse covariance matrix*/ double *pdTheta_old=doubleArray(param_len); double *t_pdTheta=doubleArray(param_len); //transformed theta double *t_pdTheta_old=doubleArray(param_len); double Rmat_old[7][7]; double Rmat[7][7]; double history_full[*iteration_max+1][10]; /* misc variables */ int i, j,main_loop, start; /* used for various loops */ /* get random seed */ GetRNGstate(); //assign param Param* params=(Param*) R_alloc(t_samp,sizeof(Param)); for(i=0;i=1) { if ((main_loop - 1) % 15 == 0) printColumnHeader(main_loop,*iteration_max,&setP,0); Rprintf("cycle %d/%d:",main_loop,*iteration_max); for(i=0;i=0) Rprintf("% 5.3f",pdTheta[i]); else Rprintf(" % 5.2f",pdTheta[i]); } if (setP.calcLoglik==1 && main_loop>2) Rprintf(" Prev LL: %5.2f",Suff[setP.suffstat_len]); Rprintf("\n"); } //keep the old theta around for comaprison for(i=0;i=2) { Rprintf("theta and suff\n"); if (param_len>5) { Rprintf("%10g%10g%10g%10g%10g%10g%10g%10g%10g\n",pdTheta[0],pdTheta[1],pdTheta[2],pdTheta[3],pdTheta[4],pdTheta[5],pdTheta[6],pdTheta[7],pdTheta[8]); } else { Rprintf("%10g%10g%10g%10g%10g (%10g)\n",pdTheta[0],pdTheta[1],pdTheta[2],pdTheta[3],pdTheta[4],pdTheta[4]*sqrt(pdTheta[2]*pdTheta[3])); } Rprintf("%10g%10g%10g%10g%10g\n",Suff[0],Suff[1],Suff[2],Suff[3],Suff[4]); Rprintf("Sig: %10g%10g%10g\n",setP.Sigma[0][0],setP.Sigma[1][1],setP.Sigma[0][1]); if (setP.ncar) Rprintf("Sig3: %10g%10g%10g%10g\n",setP.Sigma3[0][0],setP.Sigma3[1][1],setP.Sigma3[2][2]); //char x; //R_ReadConsole("hit enter\n",(char*)&x,4,0); } main_loop++; R_FlushConsole(); R_CheckUserInterrupt(); } /***End main loop ***/ //finish up: record results and loglik Param* param; Suff[setP.suffstat_len]=0.0; for(i=0;icaseP.W[j]; //setBounds(param); //setNormConst(param); } Suff[setP.suffstat_len]+=getLogLikelihood(param); } if (setP.verbose>=1) { printColumnHeader(main_loop,*iteration_max,&setP,1); Rprintf("Final Theta:"); for(i=0;i=0) Rprintf("% 5.3f",pdTheta[i]); else Rprintf(" % 5.2f",pdTheta[i]); } if (setP.calcLoglik==1 && main_loop>2) { Rprintf(" Final LL: %5.2f",Suff[setP.suffstat_len]); history_full[main_loop-1][param_len]=Suff[setP.suffstat_len]; } Rprintf("\n"); } //set the DM matrix (only matters for SEM) if (setP.sem==1) { int DMlen=0; for(i=0; iparam_len; int i; if (!setP->ncar) { for(i=0;ivarParam[i]=1; } if (setP->fixedRho) setP->varParam[4]=0; } else { //constants double lx,mu3sq; pdTheta[0]=0; mu3sq=0; for(i=0;it_samp;i++) { lx=logit(params[i].caseP.X,"initpdTheta0"); pdTheta[0] += lx; mu3sq += lx*lx; } pdTheta[0] = pdTheta[0]/setP->t_samp; mu3sq = mu3sq/setP->t_samp; pdTheta[3] = mu3sq-pdTheta[0]*pdTheta[0]; //variance //fill from pdTheta_in pdTheta[1]=pdTheta_in[0]; pdTheta[2]=pdTheta_in[1]; pdTheta[4]=pdTheta_in[2]; pdTheta[5]=pdTheta_in[3]; pdTheta[6]=pdTheta_in[4]; pdTheta[7]=pdTheta_in[5]; pdTheta[8]=pdTheta_in[6]; for(i=0;ivarParam[i]=1; setP->varParam[0]=0;setP->varParam[3]=0; //if (setP->fixedRho) setP->varParam[8]=0; } int varlen=0; for(i=0; ivarParam[i]) varlen++; for(i=0; isemDone[i]=0; } /** * The E-step for parametric ecological inference * Takes in a Param array of length n_samp + t_samp + x0_samp + x1_samp * Suff should be an array with the same length as the number of params (+1) * On exit: suff holds the sufficient statistics and loglik as follows * CAR: (0) E[W1*] (1) E[W2*] (2) E[W1*^2] (3) E[W2*^2] (4) E[W1*W2*] (5) loglik * NCAR: (0) X, (1) W1, (2) W2, (3) X^2, (4) W1^2, (5) W2^2, (6) x*W1, (7) X*W2, (8) W1*W2, (9) loglik **/ void ecoEStep(Param* params, double* suff) { int t_samp,n_samp,s_samp,x1_samp,x0_samp,i,j, verbose; // double loglik,testdens; double loglik; Param* param; setParam* setP; caseParam* caseP; setP=params[0].setP; verbose=setP->verbose; t_samp=setP->t_samp; n_samp=setP->n_samp; x1_samp=setP->x1_samp; x0_samp=setP->x0_samp; s_samp=setP->s_samp; double **Wstar=doubleMatrix(t_samp,5); /* pseudo data(transformed)*/ loglik=0; if (verbose>=3 && !setP->sem) Rprintf("E-step start\n"); for (i = 0; icaseP); if (caseP->Y>=.990 || caseP->Y<=.010) { //if Y is near the edge, then W1 and W2 are very constrained Wstar[i][0]=logit(caseP->Y,"Y maxmin W1"); Wstar[i][1]=logit(caseP->Y,"Y maxmin W2"); Wstar[i][2]=Wstar[i][0]*Wstar[i][0]; Wstar[i][3]=Wstar[i][0]*Wstar[i][1]; Wstar[i][4]=Wstar[i][1]*Wstar[i][1]; caseP->Wstar[0]=Wstar[i][0]; caseP->Wstar[1]=Wstar[i][1]; caseP->W[0]=caseP->Y; caseP->W[1]=caseP->Y; if (setP->calcLoglik==1 && setP->iter>1) loglik+=getLogLikelihood(param); //Rprintf("Skipping %d, Y=%5g",i,caseP->Y); } else { setBounds(param); //I think you only have to do this once...check later /*if (verbose>=2 && setP->iter==12 && i==422) { Rprintf("Bounds: %5g %5g %5g %5g\n",caseP->Wbounds[0][0],caseP->Wbounds[0][1],caseP->Wbounds[1][0],caseP->Wbounds[1][1]); setP->weirdness=1; } else setP->weirdness=0;*/ setNormConst(param); for (j=0;j<5;j++) { caseP->suff=j; Wstar[i][j]=paramIntegration(&SuffExp,param); if (j<2) caseP->Wstar[j]=Wstar[i][j]; } caseP->suff=SS_W1; caseP->W[0]=paramIntegration(&SuffExp,param); caseP->suff=SS_W2; caseP->W[1]=paramIntegration(&SuffExp,param); caseP->suff=SS_Test; // testdens=paramIntegration(&SuffExp,param); if (setP->calcLoglik==1 && setP->iter>1) loglik+=getLogLikelihood(param); //report error E1 if E[W1],E[W2] is not on the tomography line if (fabs(caseP->W[0]-getW1FromW2(caseP->X, caseP->Y,caseP->W[1]))>0.011) { Rprintf("E1 %d %5g %5g %5g %5g %5g %5g %5g %5g err:%5g\n", i, caseP->X, caseP->Y, caseP->mu[0], caseP->mu[1], caseP->normcT,Wstar[i][0],Wstar[i][1],Wstar[i][2],fabs(caseP->W[0]-getW1FromW2(caseP->X, caseP->Y,caseP->W[1]))); // char ch; // scanf("Hit enter to continue %c\n", &ch ); } //report error E2 if Jensen's inequality doesn't hold if (Wstar[i][4]X, caseP->Y, caseP->normcT, caseP->mu[1],Wstar[i][0],Wstar[i][1],Wstar[i][2],Wstar[i][4]); //used for debugging if necessary if (verbose>=2 && !setP->sem && ((i<10 && verbose>=3) || (caseP->mu[1] < -1.7 && caseP->mu[0] > 1.4))) Rprintf("%d %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n", i, caseP->X, caseP->Y, caseP->mu[0], caseP->mu[1], param->setP->Sigma[0][1], caseP->normcT, caseP->W[0],caseP->W[1],Wstar[i][2]); } } /* Use the values given by the survey data */ //Calculate loglik also for (i=n_samp; icaseP); Wstar[i][0]=caseP->Wstar[0]; Wstar[i][1]=caseP->Wstar[1]; Wstar[i][2]=Wstar[i][0]*Wstar[i][0]; Wstar[i][3]=Wstar[i][0]*Wstar[i][1]; Wstar[i][4]=Wstar[i][1]*Wstar[i][1]; if (setP->calcLoglik==1 && setP->iter>1) loglik+=getLogLikelihood(param); } /* analytically compute E{W2_i|Y_i} given W1_i, mu and Sigma in x1 homeogeneous areas */ for (i=n_samp+s_samp; iSigma[0][1]/setP->Sigma[0][0]*(temp0-params[i].caseP.mu[0]); Wstar[i][0]=temp0; Wstar[i][1]=temp1; Wstar[i][2]=temp0*temp0; Wstar[i][3]=temp0*temp1; Wstar[i][4]=temp1*temp1;*/ } /*analytically compute E{W1_i|Y_i} given W2_i, mu and Sigma in x0 homeogeneous areas */ for (i=n_samp+s_samp+x1_samp; iSigma[0][1]/setP->Sigma[1][1]*(temp1-params[i].caseP.mu[1]); Wstar[i][0]=temp0; Wstar[i][1]=temp1; Wstar[i][2]=temp0*temp0; Wstar[i][3]=temp0*temp1; Wstar[i][4]=temp1*temp1;*/ } /*Calculate sufficient statistics */ for (j=0; jsuffstat_len; j++) suff[j]=0; //CAR: (0) E[W1*] (1) E[W2*] (2) E[W1*^2] (3) E[W2*^2] (4) E[W1*W2*] (5) loglik //NCAR: (0) X, (1) W1, (2) W2, (3) X^2, (4) W1^2, (5) W2^2, (6) x*W1, (7) X*W2, (8) W1*W2, (9) loglik /* compute sufficient statistics */ for (i=0; incar) { suff[0] += Wstar[i][0]; /* sumE(W_i1|Y_i) */ suff[1] += Wstar[i][1]; /* sumE(W_i2|Y_i) */ suff[2] += Wstar[i][2]; /* sumE(W_i1^2|Y_i) */ suff[3] += Wstar[i][4]; /* sumE(W_i2^2|Y_i) */ suff[4] += Wstar[i][3]; /* sumE(W_i1*W_i2|Y_i) */ } else if (setP->ncar) { double lx= logit(params[i].caseP.X,"mstep X"); suff[0] += lx; suff[1] += Wstar[i][0]; suff[2] += Wstar[i][1]; suff[3] += lx*lx; suff[4] += Wstar[i][2]; suff[5] += Wstar[i][4]; suff[6] += params[i].caseP.Wstar[0]*lx; suff[7] += params[i].caseP.Wstar[1]*lx; suff[8] += Wstar[i][3]; } } for(j=0; jsuffstat_len; j++) suff[j]=suff[j]/t_samp; //Rprintf("%5g suff0,2,4 %5g %5g %5g\n",setP->pdTheta[6],suff[0],suff[2],suff[4]); //if(verbose>=1) Rprintf("Log liklihood %15g\n",loglik); suff[setP->suffstat_len]=loglik; FreeMatrix(Wstar,t_samp); } /** * CAR M-Step * inputs: Suff (sufficient statistics) * CAR Suff: E[W1], E[W2], E[W1^2], E[W2^2], E[W1W2] * mutated (i.e., output): pdTheta, params */ void ecoMStep(double* Suff, double* pdTheta, Param* params) { int i; setParam* setP=params[0].setP; pdTheta[0]=Suff[0]; /*mu1*/ pdTheta[1]=Suff[1]; /*mu2*/ if (setP->hypTest>0) { MStepHypTest(params,pdTheta); } if (!setP->fixedRho) { //standard pdTheta[2]=Suff[2]-2*Suff[0]*pdTheta[0]+pdTheta[0]*pdTheta[0]; //sigma11 pdTheta[3]=Suff[3]-2*Suff[1]*pdTheta[1]+pdTheta[1]*pdTheta[1]; //sigma22 pdTheta[4]=Suff[4]-Suff[0]*pdTheta[1]-Suff[1]*pdTheta[0]+pdTheta[0]*pdTheta[1]; //sigma12 pdTheta[4]=pdTheta[4]/sqrt(pdTheta[2]*pdTheta[3]); /*rho*/ } else { //fixed rho double Imat[2][2]; Imat[0][0]=Suff[2]-2*pdTheta[0]*Suff[0]+pdTheta[0]*pdTheta[0]; //I_11 Imat[1][1]=Suff[3]-2*Suff[1]*pdTheta[1]+pdTheta[1]*pdTheta[1]; //I_22 Imat[0][1]=Suff[4]-Suff[0]*pdTheta[1]-Suff[1]*pdTheta[0]+pdTheta[0]*pdTheta[1]; //I_12 pdTheta[2]=(Imat[0][0]-pdTheta[4]*Imat[0][1]*pow(Imat[0][0]/Imat[1][1],0.5))/(1-pdTheta[4]*pdTheta[4]); //sigma11 pdTheta[3]=(Imat[1][1]-pdTheta[4]*Imat[0][1]*pow(Imat[1][1]/Imat[0][0],0.5))/(1-pdTheta[4]*pdTheta[4]); //sigma22 //sigma12 will be determined below by rho } //set Sigma setP->Sigma[0][0] = pdTheta[2]; setP->Sigma[1][1] = pdTheta[3]; setP->Sigma[0][1] = pdTheta[4]*sqrt(pdTheta[2]*pdTheta[3]); setP->Sigma[1][0] = setP->Sigma[0][1]; //if(setP->verbose>=3) Rprintf("Sigma mstep: %5g %5g %5g %5g\n",setP->Sigma[0][0],setP->Sigma[0][1],setP->Sigma[1][0],setP->Sigma[1][1]); dinv2D((double*)(&(setP->Sigma[0][0])), 2, (double*)(&(setP->InvSigma[0][0])),"regular M-step"); /* assign each data point the new mu (same for all points) */ for(i=0;it_samp;i++) { params[i].caseP.mu[0]=pdTheta[0]; params[i].caseP.mu[1]=pdTheta[1]; } } /** * M-Step under NCAR * Input: Suff (sufficient statistics) * (0) X, (1) W1, (2) W2, (3) X^2, (4) W1^2, (5) W2^2, (6) x*W1, (7) X*W2, (8) W1*W2, (9) loglik * mutated (i.e., output): pdTheta, params */ void ecoMStepNCAR(double* Suff, double* pdTheta, Param* params) { setParam* setP=params[0].setP; //double Sigma[2][2]=setP->Sigma; //double[2][2] InvSigma=setP->InvSigma; //double[3][3] Sigma3=setP->Sigma3; /* covariance matrix*/ //double[3][3] InvSigma3=setP->Sigma3; /* inverse covariance matrix*/ //int ii,i,j,verbose,t_samp; int ii,i,j; int verbose=0; int t_samp=0; verbose=t_samp; t_samp=verbose; verbose=setP->verbose; t_samp=setP->t_samp; //set E[XW*] double XW1=Suff[6]; double XW2=Suff[7]; //for(i = 0;i<9; i++) Rprintf("%f5.2\n",pdTheta[i]); if (!setP->fixedRho) { //variable rho //pdTheta[0] is const pdTheta[1]=Suff[1]; /*mu1*/ pdTheta[2]=Suff[2]; /*mu2*/ //set variances and correlations //pdTheta[3] is const pdTheta[4]=Suff[4]-2*Suff[1]*pdTheta[1]+pdTheta[1]*pdTheta[1]; //s11 pdTheta[5]=Suff[5]-2*Suff[2]*pdTheta[2]+pdTheta[2]*pdTheta[2]; //s22 pdTheta[6]=(XW1 - pdTheta[0]*Suff[1])/sqrt((Suff[4] - Suff[1]*Suff[1])*pdTheta[3]); //rho_13 pdTheta[7]=(XW2 - pdTheta[0]*Suff[2])/sqrt((Suff[5] - Suff[2]*Suff[2])*pdTheta[3]); //rho_23 pdTheta[8]=Suff[8]-Suff[1]*pdTheta[2]-Suff[2]*pdTheta[1]+pdTheta[1]*pdTheta[2]; //sigma12 pdTheta[8]=pdTheta[8]/sqrt(pdTheta[4]*pdTheta[5]); //rho_12 //reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12 //variances setP->Sigma3[0][0] = pdTheta[4]; setP->Sigma3[1][1] = pdTheta[5]; setP->Sigma3[2][2] = pdTheta[3]; //covariances setP->Sigma3[0][1] = pdTheta[8]*sqrt(pdTheta[4]*pdTheta[5]); setP->Sigma3[0][2] = pdTheta[6]*sqrt(pdTheta[4]*pdTheta[3]); setP->Sigma3[1][2] = pdTheta[7]*sqrt(pdTheta[5]*pdTheta[3]); //symmetry setP->Sigma3[1][0] = setP->Sigma3[0][1]; setP->Sigma3[2][0] = setP->Sigma3[0][2]; setP->Sigma3[2][1] = setP->Sigma3[1][2]; //if (verbose>=2) { //Rprintf("Sigma3: %5g %5g %5g %5g %5g\n",setP->Sigma3[0][0],setP->Sigma3[0][1],setP->Sigma3[1][1],setP->Sigma3[1][2],setP->Sigma3[2][2]); //} } else { //fixed rho //reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1 | 3, (5) sig_2 | 3, (6) beta1, (7) beta2, (8) r_12 | 3 ncarFixedRhoTransform(pdTheta); //need the fixed param (pdTheta[8]) to be the conditional correlation //CODE BLOCK D //compute beta based on previous sigma //beta is mu1,beta1,mu2,beta, which are pdTheta 1,2,6,7 double **InvSigma=doubleMatrix(2,2); double **Zmat=doubleMatrix(4,2); double **Zmat_t=doubleMatrix(2,4); double **tmp41=doubleMatrix(4,1); double **tmp42=doubleMatrix(4,2); double **tmp44=doubleMatrix(4,4); double **tmp21=doubleMatrix(2,1); double **denom=doubleMatrix(4,4); double **numer=doubleMatrix(4,1); for (i=0;i<4;i++) { for(j=0;j<4;j++) { if (j<2) { if (i<2) InvSigma[i][j]=setP->InvSigma[i][j]; Zmat[i][j]=0; Zmat_t[j][i]=0; } denom[i][j]=0; } numer[i][0]=0; } //Rprintf("InvSigma %5g %5g %5g\n",InvSigma[0][0],InvSigma[1][1],InvSigma[0][1]); for(ii=0;iit_samp;ii++) { double lx=logit(params[ii].caseP.X,"NCAR beta"); for(j=0;j<2;j++) { Zmat_t[j][j*2+1]=lx - pdTheta[0]; Zmat_t[j][j*2]=1; Zmat[j*2+1][j]=lx - pdTheta[0]; Zmat[j*2][j]=1; } matrixMul(Zmat,InvSigma,4,2,2,2,tmp42); matrixMul(tmp42,Zmat_t,4,2,2,4,tmp44); for (i=0;i<4;i++) for(j=0;j<4;j++) denom[i][j]+=tmp44[i][j]; //for (i=0;i<2;i++) tmp21[i][0]=(params[ii].caseP.Wstar[i] - pdTheta[i+1]); //Wtilde ?? for (i=0;i<2;i++) tmp21[i][0]=params[ii].caseP.Wstar[i]; //Wstar //matrixMul(Zmat,InvSigma,4,2,2,2,tmp42); //no need to repeat calculation matrixMul(tmp42,tmp21,4,2,2,1,tmp41); for (i=0;i<4;i++) numer[i][0]+=tmp41[i][0]; } dinv(denom,4,denom); matrixMul(denom,numer,4,4,4,1,numer); pdTheta[1]=numer[0][0]; //mu1 pdTheta[6]=numer[1][0]; //beta1 pdTheta[2]=numer[2][0]; //mu2 pdTheta[7]=numer[3][0]; //beta2 //pdTheta[8] is constant //Rprintf("Compare Suff1 %5g to pdT1 %5g \n",Suff[1],pdTheta[1]); //Rprintf("Compare Suff2 %5g to pdT2 %5g \n",Suff[2],pdTheta[2]); if (setP->hypTest>0) { MStepHypTest(params,pdTheta); } //CAR: (0) E[W1*] (1) E[W2*] (2) E[W1*^2] (3) E[W2*^2] (4) E[W1*W2*] (5) loglik //NCAR: (0) X, (1) W1, (2) W2, (3) X^2, (4) W1^2, (5) W2^2, (6) x*W1, (7) X*W2, (8) W1*W2, (9) loglik //0->1, 1->2, 2->4, 3->5, 4->8 //CODE BLOCK C //Compute sigma conditional on beta //reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1 | 3, (5) sig_2 | 3, (6) beta1, (7) beta2, (8) r_12 | 3 double Smat[2][2]; //the S matrix (divided by n) in the paper double Tmat[2][2]; //the T matrix (divided by n) in the paper double S1=Suff[1]; //S_1 = Sufficient stat of W1* - beta1 * (sum of [(X_i - \mu3)]) ; second term goes to zero double S2=Suff[2]; //S_2 = Sufficient stat of W2* Smat[0][0]=Suff[4] - 2*pdTheta[6]*(XW1 - pdTheta[0]*Suff[1]) + pdTheta[6]*pdTheta[6]*pdTheta[3]; //S_11 Smat[1][1]=Suff[5] - 2*pdTheta[7]*(XW2 - pdTheta[0]*Suff[2]) + pdTheta[7]*pdTheta[7]*pdTheta[3]; //S_22 Smat[0][1]=Suff[8] - pdTheta[6]*(XW2 - pdTheta[0]*Suff[2]) - pdTheta[7]*(XW1 - pdTheta[0]*Suff[1]) + pdTheta[6]*pdTheta[7]*pdTheta[3] ; //S_12 Tmat[0][0]=Smat[0][0] - S1*S1; Tmat[1][1]=Smat[1][1] - S2*S2; Tmat[0][1]=Smat[0][1] - S1*S2; pdTheta[4]=(Tmat[0][0]-pdTheta[8]*Tmat[0][1]*pow(Tmat[0][0]/Tmat[1][1],0.5))/(1-pdTheta[8]*pdTheta[8]); //sigma11 | 3 pdTheta[5]=(Tmat[1][1]-pdTheta[8]*Tmat[0][1]*pow(Tmat[1][1]/Tmat[0][0],0.5))/(1-pdTheta[8]*pdTheta[8]); //sigma22 | 3 //variances //CODE BLOCK B setP->Sigma3[0][0] = pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3]; setP->Sigma3[1][1] = pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3]; setP->Sigma3[2][2] = pdTheta[3]; //covariances setP->Sigma3[0][1] = (pdTheta[8]*sqrt(pdTheta[4]*pdTheta[5]) + pdTheta[6]*pdTheta[7]*pdTheta[3])/ (sqrt((pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3])*(pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3])));//rho_12 unconditional setP->Sigma3[0][1] = setP->Sigma3[0][1]*sqrt(setP->Sigma3[0][0]*setP->Sigma3[1][1]); //sig_12 setP->Sigma3[0][2] = pdTheta[6]*sqrt((pdTheta[3])/(pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3]))*sqrt(setP->Sigma3[0][0]*setP->Sigma3[2][2]); setP->Sigma3[1][2] = pdTheta[7]*sqrt((pdTheta[3])/(pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3]))*sqrt(setP->Sigma3[1][1]*setP->Sigma3[2][2]); //symmetry setP->Sigma3[1][0] = setP->Sigma3[0][1]; setP->Sigma3[2][0] = setP->Sigma3[0][2]; setP->Sigma3[2][1] = setP->Sigma3[1][2]; } dinv2D((double*)(&(setP->Sigma3[0][0])), 3, (double*)(&(setP->InvSigma3[0][0])),"NCAR M-step S3"); initNCAR(params,pdTheta); if (setP->fixedRho) ncarFixedRhoUnTransform(pdTheta); } /** * M-Step under CCAR * Input: params * mutated (i.e., output): pdTheta, params */ void ecoMStepCCAR(double* pdTheta, Param* params) { setParam* setP=params[0].setP; int k=setP->ccar_nvar; //int ii,i,j,verbose,t_samp; int ii,i,j; int verbose=0; int t_samp=0; verbose=t_samp; t_samp=verbose; verbose=setP->verbose; t_samp=setP->t_samp; double **InvSigma=doubleMatrix(2,2); double **Z_i=doubleMatrix(k,2); double **Z_i_t=doubleMatrix(2,k); double **tmpk1=doubleMatrix(k,1); double **tmpk2=doubleMatrix(k,2); double **tmpkk=doubleMatrix(k,k); double **tmp21=doubleMatrix(2,1); double **tmp21_b=doubleMatrix(2,1); double **tmp12=doubleMatrix(1,2); double **tmp22=doubleMatrix(2,2); double **denom=doubleMatrix(k,k); double **numer=doubleMatrix(k,1); //betas for (i=0;iInvSigma[i][j]; } denom[i][j]=0; } numer[i][0]=0; } //Rprintf("InvSigma %5g %5g %5g\n",InvSigma[0][0],InvSigma[1][1],InvSigma[0][1]); for(ii=0;iit_samp;ii++) { for (i=0;ihypTest>0) { MStepHypTest(params,pdTheta); } //conditional Sigma //start at 0 for(i=0; i<2;i++) for(j=0; j<2;j++) setP->Sigma[i][j] = 0; for(ii=0;iit_samp;ii++) { for (i=0;iSigma[i][j] += tmp22[i][j]; } dinv2D((double*)(&(setP->Sigma[0][0])), 2, (double*)(&(setP->InvSigma[0][0])),"CCAR M-step S2"); //variances //CODE BLOCK B setP->Sigma3[0][0] = pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3]; setP->Sigma3[1][1] = pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3]; setP->Sigma3[2][2] = pdTheta[3]; //covariances setP->Sigma3[0][1] = (pdTheta[8]*sqrt(pdTheta[4]*pdTheta[5]) + pdTheta[6]*pdTheta[7]*pdTheta[3])/ (sqrt((pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3])*(pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3])));//rho_12 unconditional setP->Sigma3[0][1] = setP->Sigma3[0][1]*sqrt(setP->Sigma3[0][0]*setP->Sigma3[1][1]); //sig_12 setP->Sigma3[0][2] = pdTheta[6]*sqrt((pdTheta[3])/(pdTheta[4] + pdTheta[6]*pdTheta[6]*pdTheta[3]))*sqrt(setP->Sigma3[0][0]*setP->Sigma3[2][2]); setP->Sigma3[1][2] = pdTheta[7]*sqrt((pdTheta[3])/(pdTheta[5] + pdTheta[7]*pdTheta[7]*pdTheta[3]))*sqrt(setP->Sigma3[1][1]*setP->Sigma3[2][2]); //symmetry setP->Sigma3[1][0] = setP->Sigma3[0][1]; setP->Sigma3[2][0] = setP->Sigma3[0][2]; setP->Sigma3[2][1] = setP->Sigma3[1][2]; dinv2D((double*)(&(setP->Sigma3[0][0])), 3, (double*)(&(setP->InvSigma3[0][0])),"NCAR M-step S3"); initNCAR(params,pdTheta); } /** * Exta M-Step for hypothesis testing * Input: params * Mutates pdTheta */ void MStepHypTest(Param* params, double* pdTheta) { setParam* setP=params[0].setP; double offset,denom; int dim,i,j,l,k; dim=setP->ncar ? 3 : 2; l=setP->hypTest; double** Sigma=doubleMatrix(dim,dim); double** temp_LbyD=doubleMatrix(l,dim); double** temp_DbyL=doubleMatrix(dim,l); double** temp_LbyL=doubleMatrix(l,l); for(i=0;iSigma3[i][j]; } else { Sigma[i][j]=setP->Sigma[i][j]; } } //transpose double** hypTestCoeffT=doubleMatrix(l,dim); for(i=0;ihypTestCoeff[i][0]; //numerator for(k=0;k<2;k++) temp_DbyL[k][0]=0; for(i=0;it_samp;i++) { temp_DbyL[0][0]+=params[i].caseP.Wstar[0]; temp_DbyL[1][0]+=params[i].caseP.Wstar[1]; } matrixMul(hypTestCoeffT,temp_DbyL,l,dim,dim,l,temp_LbyL); temp_LbyL[0][0]=temp_LbyL[0][0]-(setP->t_samp*setP->hypTestResult); matrixMul(Sigma,setP->hypTestCoeff,dim,dim,dim,l,temp_DbyL); for(k=0;k<2;k++) temp_DbyL[k][0]*=temp_LbyL[0][0]; //denominator //matrixMul(hypTestCoeffT,InvSigma,l,dim,dim,dim,temp_LbyD); matrixMul(hypTestCoeffT,Sigma,l,dim,dim,dim,temp_LbyD); matrixMul(temp_LbyD,setP->hypTestCoeff,l,dim,dim,l,temp_LbyL); denom=setP->t_samp*temp_LbyL[0][0]; //offset theta for(k=0;k<2;k++) { offset=temp_DbyL[k][0]/denom; int kindex= (setP->ncar) ? (k+1) : k; pdTheta[kindex]=pdTheta[kindex]-offset; } } /** * NCAR initialize * note that for fixed rho, the input is the UNTRANSFORMED PARAMETERS * input: pdTheta * mutates: params */ void initNCAR(Param* params, double* pdTheta) { setParam* setP=params[0].setP; int i; if (!setP->fixedRho) { //variable rho //reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12 setP->Sigma[0][0]= pdTheta[4]*(1 - pdTheta[6]*pdTheta[6]); setP->Sigma[1][1]= pdTheta[5]*(1 - pdTheta[7]*pdTheta[7]); setP->Sigma[0][1]= (pdTheta[8] - pdTheta[6]*pdTheta[7])/sqrt((1 - pdTheta[6]*pdTheta[6])*(1 - pdTheta[7]*pdTheta[7])); //correlation setP->Sigma[0][1]= setP->Sigma[0][1]*sqrt(setP->Sigma[0][0]*setP->Sigma[1][1]); //covar setP->Sigma[1][0]= setP->Sigma[0][1]; //symmetry dinv2D((double*)(&(setP->Sigma[0][0])), 2, (double*)(&(setP->InvSigma[0][0])),"NCAR M-step S2"); //assign each data point the new mu (different for each point) for(i=0;it_samp;i++) { params[i].caseP.mu[0]=pdTheta[1] + pdTheta[6]*sqrt(pdTheta[4]/pdTheta[3])*(logit(params[i].caseP.X,"initNCAR mu0")-pdTheta[0]); params[i].caseP.mu[1]=pdTheta[2] + pdTheta[7]*sqrt(pdTheta[5]/pdTheta[3])*(logit(params[i].caseP.X,"initNCAR mu1")-pdTheta[0]); if(setP->verbose>=2 && !setP->sem && (i<3 || i==422)) //if(setP->verbose>=2 && i<3) Rprintf("mu primes for %d: %5g %5g (mu2: %5g p7: %5g p5: %5g X-T: %5g)\n",i,params[i].caseP.mu[0],params[i].caseP.mu[1],pdTheta[2],pdTheta[7],pdTheta[5],logit(params[i].caseP.X,"initNCAR mu0")-pdTheta[0]); } } else { //fixed rho //reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1 | 3, (5) sig_2 | 3, (6) beta1, (7) beta2, (8) r_12 | 3 //CODE BLOCK A setP->Sigma[0][0]= pdTheta[4]; setP->Sigma[1][1]= pdTheta[5]; setP->Sigma[0][1]= pdTheta[8]*sqrt(pdTheta[4]*pdTheta[5]); //covar setP->Sigma[1][0]= setP->Sigma[0][1]; //symmetry dinv2D((double*)(&(setP->Sigma[0][0])), 2, (double*)(&(setP->InvSigma[0][0])),"NCAR M-step S2"); for(i=0;it_samp;i++) { params[i].caseP.mu[0]=pdTheta[1] + pdTheta[6]*(logit(params[i].caseP.X,"initNCAR mu0")-pdTheta[0]); params[i].caseP.mu[1]=pdTheta[2] + pdTheta[7]*(logit(params[i].caseP.X,"initNCAR mu1")-pdTheta[0]); if(setP->verbose>=2 && !setP->sem && (i<3 || i==422)) //if(setP->verbose>=2 && i<3) Rprintf("mu primes for %d: %5g %5g (mu2: %5g p7: %5g p5: %5g X-T: %5g)\n",i,params[i].caseP.mu[0],params[i].caseP.mu[1],pdTheta[2],pdTheta[7],pdTheta[5],logit(params[i].caseP.X,"initNCAR mu0")-pdTheta[0]); } } } /** * CCAR initialize * Note that fixed rho is currently unimplemented * input: pdTheta * mutates: params */ void initCCAR(Param* params, double* pdTheta) { setParam* setP=params[0].setP; int i; if (!setP->fixedRho) { //variable rho //reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12 setP->Sigma[0][0]= pdTheta[4]*(1 - pdTheta[6]*pdTheta[6]); setP->Sigma[1][1]= pdTheta[5]*(1 - pdTheta[7]*pdTheta[7]); setP->Sigma[0][1]= (pdTheta[8] - pdTheta[6]*pdTheta[7])/sqrt((1 - pdTheta[6]*pdTheta[6])*(1 - pdTheta[7]*pdTheta[7])); //correlation setP->Sigma[0][1]= setP->Sigma[0][1]*sqrt(setP->Sigma[0][0]*setP->Sigma[1][1]); //covar setP->Sigma[1][0]= setP->Sigma[0][1]; //symmetry dinv2D((double*)(&(setP->Sigma[0][0])), 2, (double*)(&(setP->InvSigma[0][0])),"NCAR M-step S2"); //assign each data point the new mu (different for each point) for(i=0;it_samp;i++) { params[i].caseP.mu[0]=pdTheta[1] + pdTheta[6]*sqrt(pdTheta[4]/pdTheta[3])*(logit(params[i].caseP.X,"initNCAR mu0")-pdTheta[0]); params[i].caseP.mu[1]=pdTheta[2] + pdTheta[7]*sqrt(pdTheta[5]/pdTheta[3])*(logit(params[i].caseP.X,"initNCAR mu1")-pdTheta[0]); if(setP->verbose>=2 && !setP->sem && (i<3 || i==422)) //if(setP->verbose>=2 && i<3) Rprintf("mu primes for %d: %5g %5g (mu2: %5g p7: %5g p5: %5g X-T: %5g)\n",i,params[i].caseP.mu[0],params[i].caseP.mu[1],pdTheta[2],pdTheta[7],pdTheta[5],logit(params[i].caseP.X,"initNCAR mu0")-pdTheta[0]); } } else { //fixed rho } } /** * input: optTheta,pdTheta,params,Rmat * mutate/output: matrices Rmat and Rmat_old (dimensions of param_len x param_len) * optTheta is optimal theta * pdTheta is current theta * Rmat_old contains the input Rmat */ void ecoSEM(double* optTheta, double* pdTheta, Param* params, double Rmat_old[7][7], double Rmat[7][7]) { //assume we have optTheta, ie \hat{phi} //pdTheta is phi^{t+1} int i,j,verbose,len,param_len; setParam setP_sem=*(params[0].setP); param_len=setP_sem.param_len; double *SuffSem=doubleArray(setP_sem.suffstat_len+1); //sufficient stats double phiTI[param_len]; //phi^t_i double phiTp1I[param_len]; //phi^{t+1}_i double t_optTheta[param_len]; //transformed optimal double t_phiTI[param_len]; //transformed phi^t_i double t_phiTp1I[param_len]; //transformed phi^{t+1}_i Param* params_sem=(Param*) Calloc(params->setP->t_samp,Param); verbose=setP_sem.verbose; //determine length of R matrix len=0; for(j=0; j=2) Rprintf("Theta(%d):",(i+1)); int switch_index_ir=0; int switch_index_it=0; for(j=0;j=2) Rprintf(" %5g ", phiTI[j]); } //if (setP_sem.fixedRho) { // phiTI[len-1]=pdTheta[len-1]; // phiTp1I[len-1]=pdTheta[len-1]; // if (verbose>=2) Rprintf(" %5g ", phiTI[len-1]); //} if (verbose>=2) Rprintf("\n"); for(j=0;j=2) { Rprintf("Sigma3: %5g %5g %5g %5g %5g %5g; %5g %5g\n",setP_sem.Sigma3[0][0],setP_sem.Sigma3[0][1],setP_sem.Sigma3[1][1],setP_sem.Sigma3[0][2],setP_sem.Sigma3[1][2],setP_sem.Sigma3[2][2],*(&(setP_sem.Sigma3[0][0])+0),*(&(setP_sem.Sigma3[0][0])+8)); } dinv2D((double*)(&(setP_sem.Sigma3[0][0])), 3, (double*)(&(setP_sem.InvSigma3[0][0])),"SEM: NCAR Sig3 init"); if (verbose>=2) { Rprintf("Check 1"); } if (setP_sem.fixedRho) ncarFixedRhoTransform(phiTI); initNCAR(params_sem,phiTI); if (setP_sem.fixedRho) ncarFixedRhoUnTransform(phiTI); if (verbose>=2) { Rprintf("Check 2"); } } //if (verbose>=2) { // Rprintf("Sigma: %5g %5g %5g %5g\n",setP_sem.Sigma[0][0],setP_sem.Sigma[0][1],setP_sem.Sigma[1][0],setP_sem.Sigma[1][1]); //} ecoEStep(params_sem, SuffSem); if (!params[0].setP->ncar) ecoMStep(SuffSem,phiTp1I,params_sem); else ecoMStepNCAR(SuffSem,phiTp1I,params_sem); //step 3: create new R matrix row transformTheta(phiTp1I,t_phiTp1I,setP_sem.param_len,&setP_sem); transformTheta(optTheta,t_optTheta,setP_sem.param_len,&setP_sem); transformTheta(phiTI,t_phiTI,setP_sem.param_len,&setP_sem); /*if (verbose>=2) { Rprintf("T+1:"); for (j=0;jsemDone[i]=closeEnough((double*)Rmat[i],(double*)Rmat_old[i],len,sqrt(params[0].setP->convergence)); } else { //keep row the same for(j = 0; j=1) { for(i=0;isemDone[i]) ? " Done" : "Not done"); for(j=0;j= 1) ? .9999 : ((params[i].caseP.X <= 0) ? 0.0001 : params[i].caseP.X); //fix Y edge cases params[i].caseP.Y=(params[i].caseP.Y >= 1) ? .9999 : ((params[i].caseP.Y <= 0) ? 0.0001 : params[i].caseP.Y); } /*read the survey data */ itemp=0; surv_dim=n_dim + (setP->ncar ? 1 : 0); //if NCAR, the survey data will include X's for (j=0; j0) { Rprintf("WARNING: Homogenous data is ignored and not handled by the current version of eco."); } if (setP->verbose>=2) { Rprintf("Y X\n"); for(i=0;i<5;i++) Rprintf("%5d%14g%14g\n",i,params[i].caseP.Y,params[i].caseP.X); if (s_samp>0) { Rprintf("SURVEY data\nY X\n"); int s_max=fmin2(n_samp+x1_samp+x0_samp+s_samp,n_samp+x1_samp+x0_samp+5); for(i=n_samp+x1_samp+x0_samp; iparam_len; //trying to print nicely, but it throws an error //char temp[50]; int hlen; //if (!finalTheta) hlen=sprintf(temp, "cycle %d/%d:",main_loop,iteration_max); //Length of cycle text //else hlen=sprintf(temp, "Final Theta:"); //for (i=0;ifixedRho || finalTheta) Rprintf(" r_12"); } else { //NCAR if (finalTheta) { Rprintf(" mu_3 mu_1 mu_2 sig_3 sig_1 sig_2 r_13 r_23 r_12"); } else { Rprintf(" mu_1 mu_2 sig_1 sig_2 r_13 r_23 r_12"); } } Rprintf("\n"); } /** * Parameterizes the elements of theta * Input: pdTheta * Mutates: t_pdTheta */ void transformTheta(double* pdTheta, double* t_pdTheta, int len, setParam* setP) { if (len<=5) { t_pdTheta[0]=pdTheta[0]; t_pdTheta[1]=pdTheta[1]; t_pdTheta[2]=log(pdTheta[2]); t_pdTheta[3]=log(pdTheta[3]); t_pdTheta[4]=.5*(log(1+pdTheta[4])-log(1-pdTheta[4])); } else { t_pdTheta[0]=pdTheta[0]; t_pdTheta[1]=pdTheta[1]; t_pdTheta[2]=pdTheta[2]; t_pdTheta[3]=log(pdTheta[3]); t_pdTheta[4]=log(pdTheta[4]); t_pdTheta[5]=log(pdTheta[5]); t_pdTheta[6]=.5*(log(1+pdTheta[6])-log(1-pdTheta[6])); t_pdTheta[7]=.5*(log(1+pdTheta[7])-log(1-pdTheta[7])); t_pdTheta[8]=.5*(log(1+pdTheta[8])-log(1-pdTheta[8])); } } /** * Un-parameterizes the elements of theta * Input: t_pdTheta * Mutates: pdTheta */ void untransformTheta(double* t_pdTheta,double* pdTheta, int len, setParam* setP) { if (len<=5) { pdTheta[0]=t_pdTheta[0]; pdTheta[1]=t_pdTheta[1]; pdTheta[2]=exp(t_pdTheta[2]); pdTheta[3]=exp(t_pdTheta[3]); pdTheta[4]=(exp(2*t_pdTheta[4])-1)/(exp(2*t_pdTheta[4])+1); } else { pdTheta[0]=t_pdTheta[0]; pdTheta[1]=t_pdTheta[1]; pdTheta[2]=t_pdTheta[2]; pdTheta[3]=exp(t_pdTheta[3]); pdTheta[4]=exp(t_pdTheta[4]); pdTheta[5]=exp(t_pdTheta[5]); if (!setP->fixedRho) { pdTheta[6]=(exp(2*t_pdTheta[6])-1)/(exp(2*t_pdTheta[6])+1); pdTheta[7]=(exp(2*t_pdTheta[7])-1)/(exp(2*t_pdTheta[7])+1); } else { pdTheta[6]=t_pdTheta[6]; pdTheta[7]=t_pdTheta[7]; } pdTheta[8]=(exp(2*t_pdTheta[8])-1)/(exp(2*t_pdTheta[8])+1); } } /** * untransforms theta under ncar -- fixed rho * input reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1 | 3, (5) sig_2 | 3, (6) beta1, (7) beta2, (8) r_12 | 3 * output reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12 * mutates: pdTheta **/ void ncarFixedRhoUnTransform(double* pdTheta) { double* tmp=doubleArray(9); int i; for (i=0;i<9;i++) tmp[i]=pdTheta[i]; pdTheta[0]=tmp[0]; pdTheta[1]=tmp[1]; pdTheta[2]=tmp[2]; pdTheta[3]=tmp[3]; pdTheta[4]=tmp[4] + tmp[6]*tmp[6]*tmp[3]; pdTheta[5]=tmp[5] + tmp[7]*tmp[7]*tmp[3]; pdTheta[6]=(tmp[6]*sqrt(tmp[3]))/(sqrt(pdTheta[4])); pdTheta[7]=(tmp[7]*sqrt(tmp[3]))/(sqrt(pdTheta[5])); pdTheta[8]=(tmp[8]*sqrt(tmp[4]*tmp[5]) + tmp[6]*tmp[7]*tmp[3])/(sqrt(pdTheta[4]*pdTheta[5])); Free(tmp); } /** * transforms theta under ncar -- fixed rho * input reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1, (5) sig_2, (6) r_13, (7) r_23, (8) r_12 * output reference: (0) mu_3, (1) mu_1, (2) mu_2, (3) sig_3, (4) sig_1 | 3, (5) sig_2 | 3, (6) beta1, (7) beta2, (8) r_12 | 3 * mutates: pdTheta **/ void ncarFixedRhoTransform(double* pdTheta) { double* tmp=doubleArray(9); int i; for (i=0;i<9;i++) tmp[i]=pdTheta[i]; pdTheta[0]=tmp[0]; pdTheta[1]=tmp[1]; pdTheta[2]=tmp[2]; pdTheta[3]=tmp[3]; pdTheta[4]=tmp[4] - tmp[6]*tmp[6]*tmp[4]; pdTheta[5]=tmp[5] - tmp[7]*tmp[7]*tmp[5]; pdTheta[6]=tmp[6]*sqrt(tmp[4]/tmp[3]); pdTheta[7]=tmp[7]*sqrt(tmp[5]/tmp[3]); pdTheta[8]=(tmp[8] - tmp[6]*tmp[7])/(sqrt((1 - tmp[6]*tmp[6])*(1 - tmp[7]*tmp[7]))); Free(tmp); } /** * Input transformed theta, loglikelihood, iteration * Mutates: history_full **/ void setHistory(double* t_pdTheta, double loglik, int iter,setParam* setP,double history_full[][10]) { int len=setP->param_len; int j; for(j=0;j0) history_full[iter-1][len]=loglik; } /** * Determines whether we have converged * Takes in the current and old (one step previous) array of theta values * maxerr is the maximum difference two corresponding values can have before the * function returns false */ int closeEnough(double* pdTheta, double* pdTheta_old, int len, double maxerr) { int j; for(j = 0; j=maxerr) return 0; return 1; } /** * Is the SEM process completely done. **/ int semDoneCheck(setParam* setP) { int varlen=0; int j; for(j=0; jparam_len;j++) if(setP->varParam[j]) varlen++; for(j=0;jsemDone[j]==0) return 0; return 1; } /** * Older function. No longer used. **/ void gridEStep(Param* params, int n_samp, int s_samp, int x1_samp, int x0_samp, double* suff, int verbose, double minW1, double maxW1) { int n_dim=2; int n_step=5000; /* The default size of grid step */ int ndraw=10000; int trapod=0; /* 1 if use trapozodial ~= in numer. int.*/ int *n_grid=intArray(n_samp); /* grid size */ double **W1g=doubleMatrix(n_samp, n_step); /* grids for W1 */ double **W2g=doubleMatrix(n_samp, n_step); /* grids for W2 */ double *vtemp=doubleArray(n_dim); int *mflag=intArray(n_step); double *prob_grid=doubleArray(n_step); double *prob_grid_cum=doubleArray(n_step); double **X=doubleMatrix(n_samp,n_dim); /* Y and covariates */ int itemp,i,j,k,t_samp; double dtemp,dtemp1,temp0,temp1; t_samp=n_samp+x1_samp+x0_samp+s_samp; double **W=doubleMatrix(t_samp,n_dim); /* W1 and W2 matrix */ double **Wstar=doubleMatrix(t_samp,5); /* pseudo data(transformed*/ for (i=0;iInvSigma), 2, 1) - log(W1g[i][j])-log(W2g[i][j])-log(1-W1g[i][j])-log(1-W2g[i][j]); prob_grid[j]=exp(prob_grid[j]); dtemp+=prob_grid[j]; prob_grid_cum[j]=dtemp; } for (j=0;j=1 && trapod==1) { if (prob_grid_cum[j]!=prob_grid_cum[(j-1)]) { dtemp1=((double)(1+k)/(ndraw+1)-prob_grid_cum[(j-1)])/(prob_grid_cum[j]-prob_grid_cum[(j-1)]); W[i][0]=dtemp1*(W1g[i][j]-W1g[i][(j-1)])+W1g[i][(j-1)]; W[i][1]=dtemp1*(W2g[i][j]-W2g[i][(j-1)])+W2g[i][(j-1)]; } else if (prob_grid_cum[j]==prob_grid_cum[(j-1)]) { W[i][0]=W1g[i][j]; W[i][1]=W2g[i][j]; } } temp0=log(W[i][0])-log(1-W[i][0]); temp1=log(W[i][1])-log(1-W[i][1]); Wstar[i][0]+=temp0; Wstar[i][1]+=temp1; Wstar[i][2]+=temp0*temp0; Wstar[i][3]+=temp0*temp1; Wstar[i][4]+=temp1*temp1; } } } // compute E_{W_i|Y_i} for n_samp for (i=0; i #include #include #include #include #include #include #include "vector.h" #include "rand.h" #include "subroutines.h" /* * Computes the dot product of two vectors */ double dotProduct(double* a, double* b, int size) { int i; double ans=0; for (i=0; i0) { Rprintf("The matrix being inverted is singular. Error code %d\n", errorM); } else { Rprintf("The matrix being inverted contained an illegal value. Error code %d.\n", errorM); } error("Exiting from dinv().\n"); } } else { if (errorM>0) { Rprintf("The matrix being inverted was not positive definite. Error code %d\n", errorM); } else { Rprintf("The matrix being inverted contained an illegal value. Error code %d.\n", errorM); } error("Exiting from dinv().\n"); } for (i = 0, j = 0; j < size; j++) { for (k = 0; k <= j; k++) { X_inv[j][k] = pdInv[i]; X_inv[k][j] = pdInv[i++]; } } Free(pdInv); } /* inverting a matrix, first tyring positive definite trick, and then symmetric * Uses special syntax since we don't know dimensions of array * Prevents memory errors for matrices created with double[][] */ void dinv2D(double* X, int size, double* X_inv,char* emsg) { int i,j, k, errorM, skip; double *pdInv = doubleArray(size*size); skip=0; for (i = 0, j = 0; j < size; j++) for (k = 0; k <= j; k++) //pdInv[i++] = X[k][j]; pdInv[i++] = *(X+k*size+j); //Rprintf("test: %5g %5g %d",pdInv[0],pdInv[(size == 3) ? 5 : 2],i); F77_CALL(dpptrf)("U", &size, pdInv, &errorM FCONE); if (!errorM) { F77_CALL(dpptri)("U", &size, pdInv, &errorM FCONE); if (errorM) { Rprintf(emsg); if (errorM>0) { Rprintf(": The matrix being inverted is singular. Error code %d\n", errorM); } else { Rprintf(": The matrix being inverted contained an illegal value. Error code %d.\n", errorM); } error("Exiting from dinv2D().\n"); } } else { Rprintf(emsg); if (errorM>0) { /* The matrix is not positive definite. * This error does occur with proper data, when the likelihood curve is flat, * usually with the combination of NCAR and SEM. At one point we tried * inverting the matrix via an alternative method that does not rely on * positive definiteness (see below), but that just led to further errors. * Instead, the program halts as gracefully as possible. */ //Inverting the matrix anyway: //Rprintf(": Warning, the matrix being inverted was not positive definite on minor order %d.\n", errorM); //dinv2D_sym(X,size,X_inv,emsg); //skip=1; Rprintf(": Error, the matrix being inverted was not positive definite on minor order %d.\n", errorM); error("The program cannot continue; try a different model or including supplemental data.\n"); } else { Rprintf(": The matrix being inverted contained an illegal value. Error code %d.\n", errorM); error("Exiting from dinv2D().\n"); } } if (skip==0) { for (i = 0, j = 0; j < size; j++) { for (k = 0; k <= j; k++) { *(X_inv+size*j+k) = pdInv[i]; *(X_inv+size*k+j) = pdInv[i++]; } } } Free(pdInv); } /* inverting a matrix, assumes symmtretric, but not pos def * Uses special syntax since we don't know dimensions of array * Prevents memory errors for matrices created with double[][] */ void dinv2D_sym(double* X, int size, double* X_inv,char* emsg) { int i,j, k, errorM, size2; size2=size*size; double *pdInv = doubleArray(size2); double *B= doubleArray(size2); int *factor_out = intArray(size); //init pdInv and B. B is identity for (i = 0, j = 0; j < size; j++) for (k = 0; k < size; k++) { if (j==k) B[i]=1; else B[i]=0; pdInv[i]=*(X+k*size+j); i++; } //for (i = 0, j = 0; j < size; j++) // for (k = 0; k <= j; k++) { // pdInv[i++] = *(X+k*size+j); // } double *work0 = doubleArray(size2); int test=-1; F77_CALL(dsysv)("U", &size, &size, pdInv, &size, factor_out, B, &size, work0, &test, &errorM FCONE); int lwork=(int)work0[0]; Free(work0); //Rprintf("work size %d\n",lwork); double *work = doubleArray(lwork); //Rprintf("In A: %5g %5g %5g %5g\n",pdInv[0],pdInv[1],pdInv[2],pdInv[3]); //Rprintf("In B: %5g %5g %5g %5g\n",B[0],B[1],B[2],B[3]); F77_CALL(dsysv)("U", &size, &size, pdInv, &size, factor_out, B, &size, work, &lwork, &errorM FCONE); Free(work); //Rprintf("Out1: %5g %5g %5g %5g %d\n",B[0],B[1],B[2],B[3],errorM); if (errorM) { Rprintf(emsg); if (errorM>0) { Rprintf(": The matrix being inverted is singular. Error code %d\n", errorM); } else { Rprintf(": The matrix being inverted contained an illegal value. Error code %d.\n", errorM); } error("Exiting from dinv2D_sym() (dsytrf).\n"); } for (i = 0, j = 0; j < size; j++) { for (k = 0; k < size; k++) { *(X_inv+size*j+k) = B[i++]; } } free(factor_out); Free(B); Free(pdInv); } /* Cholesky decomposition */ /* returns lower triangular matrix */ void dcholdc(double **X, int size, double **L) { int i, j, k, errorM; double *pdTemp = doubleArray(size*size); for (j = 0, i = 0; j < size; j++) for (k = 0; k <= j; k++) pdTemp[i++] = X[k][j]; F77_CALL(dpptrf)("U", &size, pdTemp, &errorM FCONE); if (errorM) { if (errorM>0) { Rprintf("The matrix being inverted was not positive definite. Error code %d\n", errorM); } else { Rprintf("The matrix being inverted contained an illegal value. Error code %d.\n", errorM); } error("Exiting from dcholdc().\n"); } for (j = 0, i = 0; j < size; j++) { for (k = 0; k < size; k++) { if(j0) { Rprintf("The matrix being inverted was not positive definite. Error code %d\n", errorM); } else { Rprintf("The matrix being inverted contained an illegal value. Error code %d.\n", errorM); } error("Exiting from dcholdc2D().\n"); } for (j = 0, i = 0; j < size; j++) { for (k = 0; k < size; k++) { if(j #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" #include "bayes.h" #include "sample.h" /* Normal Parametric Model for 2xC (with C > 2) Tables */ void cBase2C( /*data input */ double *pdX, /* X: matrix */ double *Y, /* Y: vector */ double *pdWmin, /* lower bounds */ double *pdWmax, /* uppwer bounds */ int *pin_samp, /* sample size */ int *pin_col, /* number of columns */ /*MCMC draws */ int *reject, /* whether to use rejection sampling */ int *maxit, /* max number of iterations for rejection sampling */ int *n_gen, /* number of gibbs draws */ int *burn_in, /* number of draws to be burned in */ int *pinth, /* keep every nth draw */ int *verbose, /* 1 for output monitoring */ /* prior specification*/ int *pinu0, /* prior df parameter for InvWish */ double *pdtau0, /* prior scale parameter for Sigma */ double *mu0, /* prior mean for mu */ double *pdS0, /* prior scale for Sigma */ /* starting values */ double *mu, double *SigmaStart, /* storage */ int *parameter, /* 1 if save population parameter */ double *pdSmu, double *pdSSigma, double *pdSW ){ /* some integers */ int n_samp = *pin_samp; /* sample size */ int nth = *pinth; /* keep every pth draw */ int n_col = *pin_col; /* dimension */ /* prior parameters */ double tau0 = *pdtau0; /* prior scale for mu */ int nu0 = *pinu0; /* prior degrees of freedom */ double **S0 = doubleMatrix(n_col, n_col); /* prior scale for Sigma */ /* data */ double **X = doubleMatrix(n_samp, n_col); /* X */ double **W = doubleMatrix(n_samp, n_col); /* The W matrix */ double **Wstar = doubleMatrix(n_samp, n_col); /* logit(W) */ /* The lower and upper bounds of U = W*X/Y **/ double **minU = doubleMatrix(n_samp, n_col); double **maxU = doubleMatrix(n_samp, n_col); /* model parameters */ double **Sigma = doubleMatrix(n_col, n_col); /* The covariance matrix */ double **InvSigma = doubleMatrix(n_col, n_col); /* The inverse covariance matrix */ /* misc variables */ int i, j, k, main_loop; /* used for various loops */ int itemp; int itempM = 0; /* for mu */ int itempS = 0; /* for Sigma */ int itempW = 0; /* for W */ int itempC = 0; /* control nth draw */ int progress = 1, itempP = ftrunc((double) *n_gen/10); double dtemp, dtemp1; double *param = doubleArray(n_col); /* Dirichlet parameters */ double *dvtemp = doubleArray(n_col); /* get random seed */ GetRNGstate(); /* read X */ itemp = 0; for (j = 0; j < n_col; j++) for (i = 0; i < n_samp; i++) X[i][j] = pdX[itemp++]; /* read initial values of Sigma */ itemp = 0; for (k = 0; k < n_col; k++) for (j = 0; j < n_col; j++) Sigma[j][k] = SigmaStart[itemp++]; dinv(Sigma, n_col, InvSigma); /* compute bounds on U */ itemp = 0; for (j = 0; j < n_col; j++) for (i = 0; i < n_samp; i++) minU[i][j] = fmax2(0, pdWmin[itemp++]*X[i][j]/Y[i]); itemp = 0; for (j = 0; j < n_col; j++) for (i = 0; i < n_samp; i++) maxU[i][j] = fmin2(1, pdWmax[itemp++]*X[i][j]/Y[i]); /* initial values for W */ for (j = 0; j < n_col; j++) param[j] = 1; for (i = 0; i < n_samp; i++) { k = 0; itemp = 1; while (itemp > 0) { /* rejection sampling */ rDirich(dvtemp, param, n_col); itemp = 0; k++; for (j = 0; j < n_col; j++) if (dvtemp[j] > maxU[i][j] || dvtemp[j] < minU[i][j]) itemp++; if (itemp == 0) for (j = 0; j < n_col; j++) W[i][j] = dvtemp[j]*Y[i]/X[i][j]; if (k > *maxit) { /* if rejection sampling fails, then use midpoits of bounds sequentially */ itemp = 0; dtemp = Y[i]; dtemp1 = 1; for (j = 0; j < n_col-1; j++) { W[i][j] = 0.5*(fmax2(0,(X[i][j]/dtemp1+dtemp-1)*dtemp1/X[i][j])+ fmin2(1,dtemp*dtemp1/X[i][j])); dtemp -= W[i][j]*X[i][j]/dtemp1; dtemp1 -= X[i][j]; } W[i][n_col-1] = dtemp; } } for (j = 0; j < n_col; j++) Wstar[i][j] = log(W[i][j])-log(1-W[i][j]); } /* read the prior */ itemp = 0; for(k = 0; k < n_col; k++) for(j = 0; j < n_col; j++) S0[j][k] = pdS0[itemp++]; /*** Gibbs sampler! ***/ if (*verbose) Rprintf("Starting Gibbs sampler...\n"); for(main_loop = 0; main_loop < *n_gen; main_loop++){ /** update W, Wstar given mu, Sigma **/ for (i = 0; i < n_samp; i++){ rMH2c(W[i], X[i], Y[i], minU[i], maxU[i], mu, InvSigma, n_col, *maxit, *reject); for (j = 0; j < n_col; j++) Wstar[i][j] = log(W[i][j])-log(1-W[i][j]); } /* update mu, Sigma given wstar using effective sample of Wstar */ NIWupdate(Wstar, mu, Sigma, InvSigma, mu0, tau0, nu0, S0, n_samp, n_col); /*store Gibbs draw after burn-in and every nth draws */ if (main_loop>=*burn_in){ itempC++; if (itempC==nth){ for (j = 0; j < n_col; j++) { pdSmu[itempM++]=mu[j]; for (k = 0; k < n_col; k++) if (j <=k) pdSSigma[itempS++]=Sigma[j][k]; } for(i = 0; i < n_samp; i++) for (j = 0; j < n_col; j++) pdSW[itempW++] = W[i][j]; itempC=0; } } if (*verbose) if (itempP == main_loop) { Rprintf("%3d percent done.\n", progress*10); itempP+=ftrunc((double) *n_gen/10); progress++; R_FlushConsole(); } R_CheckUserInterrupt(); } /* end of Gibbs sampler */ if(*verbose) Rprintf("100 percent done.\n"); /** write out the random seed **/ PutRNGstate(); /* Freeing the memory */ FreeMatrix(S0, n_col); FreeMatrix(X, n_samp); FreeMatrix(W, n_samp); FreeMatrix(Wstar, n_samp); FreeMatrix(minU, n_samp); FreeMatrix(maxU, n_samp); FreeMatrix(Sigma, n_col); FreeMatrix(InvSigma, n_col); Free(dvtemp); Free(param); } /* main */ eco/src/fintegrate.h0000644000176200001440000000201414330337373014113 0ustar liggesusers/****************************************************************** This file is a part of eco: R Package for Fitting Bayesian Models of Ecological Inference for 2x2 Tables by Kosuke Imai and Ying Lu Copyright: GPL version 2 or later. *******************************************************************/ #include void NormConstT(double *t, int n, void *param); void SuffExp(double *t, int n, void *param); double getLogLikelihood(Param* param) ; void setNormConst(Param* param); double getW2starFromW1star(double X, double Y, double W1, int* imposs); double getW1starFromW2star(double X, double Y, double W2, int* imposs); double getW1FromW2(double X, double Y, double W2); double getW1starFromT(double t, Param* param, int* imposs); double getW2starFromT(double t, Param* param, int* imposs); double getW1starPrimeFromT(double t, Param* param); double getW2starPrimeFromT(double t, Param* param); double paramIntegration(integr_fn f, void *ex); void setNormConst(Param* param); void setBounds(Param* param); eco/src/gibbsBase.c0000644000176200001440000002123514330337373013645 0ustar liggesusers #include #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" #include "bayes.h" #include "sample.h" /* Normal Parametric Model for 2x2 Tables */ void cBaseeco( /*data input */ double *pdX, /* data (X, Y) */ int *pin_samp, /* sample size */ /*MCMC draws */ int *n_gen, /* number of gibbs draws */ int *burn_in, /* number of draws to be burned in */ int *pinth, /* keep every nth draw */ int *verbose, /* 1 for output monitoring */ /* prior specification*/ int *pinu0, /* prior df parameter for InvWish */ double *pdtau0, /* prior scale parameter for Sigma */ double *mu0, /* prior mean for mu */ double *pdS0, /* prior scale for Sigma */ double *mustart, /* starting values for mu */ double *Sigmastart, /* starting values for Sigma */ /* incorporating survey data */ int *survey, /*1 if survey data available (set of W_1, W_2) 0 not*/ int *sur_samp, /*sample size of survey data*/ double *sur_W, /*set of known W_1, W_2 */ /* incorporating homeogenous areas */ int *x1, /* 1 if X=1 type areas available W_1 known, W_2 unknown */ int *sampx1, /* number X=1 type areas */ double *x1_W1, /* values of W_1 for X1 type areas */ int *x0, /* 1 if X=0 type areas available W_2 known, W_1 unknown */ int *sampx0, /* number X=0 type areas */ double *x0_W2, /* values of W_2 for X0 type areas */ /* bounds of W1 */ double *minW1, double *maxW1, /* flags */ int *parameter, /* 1 if save population parameter */ int *Grid, /* 1 if Grid algorithm is used; 0 for Metropolis */ /* storage for Gibbs draws of mu/sigmat*/ double *pdSMu0, double *pdSMu1, double *pdSSig00, double *pdSSig01, double *pdSSig11, /* storage for Gibbs draws of W*/ double *pdSW1, double *pdSW2 ){ /* some integers */ int n_samp = *pin_samp; /* sample size */ int s_samp = *sur_samp; /* sample size of survey data */ int x1_samp = *sampx1; /* sample size for X=1 */ int x0_samp = *sampx0; /* sample size for X=0 */ int t_samp = n_samp+s_samp+x1_samp+x0_samp; /* total sample size */ int nth = *pinth; int n_dim = 2; /* dimension */ int n_step = 1000; /* 1/The default size of grid step */ /* prior parameters */ double tau0 = *pdtau0; /* prior scale */ int nu0 = *pinu0; /* prior degrees of freedom */ double **S0 = doubleMatrix(n_dim, n_dim); /* The prior S parameter for InvWish */ /* data */ double **X = doubleMatrix(n_samp, n_dim); /* The Y and covariates */ double **W = doubleMatrix(t_samp, n_dim); /* The W1 and W2 matrix */ double **Wstar = doubleMatrix(t_samp, n_dim); /* logit tranformed W */ double **S_W = doubleMatrix(s_samp, n_dim); /* The known W1 and W2 matrix*/ double **S_Wstar = doubleMatrix(s_samp, n_dim); /* logit transformed S_W*/ /* grids */ double **W1g = doubleMatrix(n_samp, n_step); /* grids for W1 */ double **W2g = doubleMatrix(n_samp, n_step); /* grids for W2 */ int *n_grid = intArray(n_samp); /* grid size */ /* model parameters */ double *mu = doubleArray(n_dim); /* The mean */ double **Sigma = doubleMatrix(n_dim, n_dim); /* The covariance matrix */ double **InvSigma = doubleMatrix(n_dim, n_dim); /* The inverse covariance matrix */ /* misc variables */ int i, j, k, main_loop; /* used for various loops */ int itemp, itempS, itempC, itempA; int progress = 1, itempP = ftrunc((double) *n_gen/10); double dtemp, dtemp1; /* get random seed */ GetRNGstate(); /* read the priors */ itemp=0; for(k=0;k=*burn_in){ itempC++; if (itempC==nth){ pdSMu0[itempA]=mu[0]; pdSMu1[itempA]=mu[1]; pdSSig00[itempA]=Sigma[0][0]; pdSSig01[itempA]=Sigma[0][1]; pdSSig11[itempA]=Sigma[1][1]; itempA++; for(i=0; i<(n_samp+x1_samp+x0_samp); i++){ pdSW1[itempS]=W[i][0]; pdSW2[itempS]=W[i][1]; itempS++; } itempC=0; } } if (*verbose) if (itempP == main_loop) { Rprintf("%3d percent done.\n", progress*10); itempP+=ftrunc((double) *n_gen/10); progress++; R_FlushConsole(); } R_CheckUserInterrupt(); } /* end of Gibbs sampler */ if(*verbose) Rprintf("100 percent done.\n"); /** write out the random seed **/ PutRNGstate(); /* Freeing the memory */ FreeMatrix(X, n_samp); FreeMatrix(W, t_samp); FreeMatrix(Wstar, t_samp); FreeMatrix(S_W, s_samp); FreeMatrix(S_Wstar, s_samp); FreeMatrix(S0, n_dim); FreeMatrix(W1g, n_samp); FreeMatrix(W2g, n_samp); free(n_grid); Free(mu); FreeMatrix(Sigma,n_dim); FreeMatrix(InvSigma, n_dim); } /* main */ eco/src/preDP.c0000644000176200001440000000350514330337373012776 0ustar liggesusers#include #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" #include "bayes.h" #include "sample.h" /* Prediction for Nonparametric Model for 2x2 Tables */ void preDP( double *pdmu, double *pdSigma, int *pin_samp, int *pin_draw, int *pin_dim, int *verbose, /* 1 for output monitoring */ double *pdStore ){ /* some integers */ int n_samp = *pin_samp; /* sample size */ int n_draw = *pin_draw; /* sample size of survey data */ int n_dim = *pin_dim; /* dimension */ double *mu = doubleArray(n_dim); /* The mean */ double *Wstar = doubleArray(n_dim); double **Sigma = doubleMatrix(n_dim, n_dim); /* The covariance matrix */ /* misc variables */ int i, j, k, main_loop; /* used for various loops */ int itemp = 0; int itempM = 0; int itempS = 0; int progress = 1, itempP = ftrunc((double) n_draw/10); /* get random seed */ GetRNGstate(); for(main_loop=0; main_loop // for NULL #include /* FIXME: Check these declarations against the C/Fortran source code. */ /* .C calls */ extern void cBase2C(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void cBaseeco(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void cBaseecoX(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void cBaseecoZ(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void cBaseRC(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void cDPeco(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void cDPecoX(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void cEMeco(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); extern void preBaseX(void *, void *, void *, void *, void *, void *, void *); extern void preDP(void *, void *, void *, void *, void *, void *, void *); extern void preDPX(void *, void *, void *, void *, void *, void *, void *, void *); static const R_CMethodDef CEntries[] = { {"cBase2C", (DL_FUNC) &cBase2C, 22}, {"cBaseeco", (DL_FUNC) &cBaseeco, 32}, {"cBaseecoX", (DL_FUNC) &cBaseecoX, 36}, {"cBaseecoZ", (DL_FUNC) &cBaseecoZ, 29}, {"cBaseRC", (DL_FUNC) &cBaseRC, 23}, {"cDPeco", (DL_FUNC) &cDPeco, 36}, {"cDPecoX", (DL_FUNC) &cDPecoX, 40}, {"cEMeco", (DL_FUNC) &cEMeco, 27}, {"preBaseX", (DL_FUNC) &preBaseX, 7}, {"preDP", (DL_FUNC) &preDP, 7}, {"preDPX", (DL_FUNC) &preDPX, 8}, {NULL, NULL, 0} }; void R_init_eco(DllInfo *dll) { R_registerRoutines(dll, CEntries, NULL, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } eco/src/vector.c0000644000176200001440000000511014330337373013260 0ustar liggesusers/****************************************************************** This file is a part of eco: R Package for Fitting Bayesian Models of Ecological Inference for 2x2 Tables by Kosuke Imai and Ying Lu Copyright: GPL version 2 or later. *******************************************************************/ #include #include #include #include #include #include int* intArray(int num) { int *iArray = (int *)malloc(num * sizeof(int)); if (iArray) return iArray; else { error("Out of memory error in intArray\n"); return NULL; } } int** intMatrix(int row, int col) { int i; int **iMatrix = (int **)malloc(row * sizeof(int *)); if (iMatrix) { for (i = 0; i < row; i++) { iMatrix[i] = (int *)malloc(col * sizeof(int)); if (!iMatrix[i]) error("Out of memory error in intMatrix\n"); } return iMatrix; } else { error("Out of memory error in intMatrix\n"); return NULL; } } double* doubleArray(int num) { //double *dArray = (double *)malloc(num * sizeof(double)); double *dArray = Calloc(num,double); if (dArray) return dArray; else { error("Out of memory error in doubleArray\n"); return NULL; } } double** doubleMatrix(int row, int col) { int i; //double **dMatrix = (double **)malloc((size_t)(row * sizeof(double *))); double **dMatrix = Calloc(row,double*); if (dMatrix) { for (i = 0; i < row; i++) { dMatrix[i] = Calloc(col,double); if (!dMatrix[i]) { error("Out of memory error in doubleMatrix\n"); return NULL; } } return dMatrix; } else { error("Out of memory error in doubleMatrix\n"); return NULL; } } double*** doubleMatrix3D(int x, int y, int z) { int i; double ***dM3 = (double ***)malloc(x * sizeof(double **)); if (dM3) { for (i = 0; i < x; i++) dM3[i] = doubleMatrix(y, z); return dM3; } else { error("Out of memory error in doubleMatrix3D\n"); return NULL; } } long* longArray(int num) { long *lArray = (long *)malloc(num * sizeof(long)); if (lArray) return lArray; else { error("Out of memory error in longArray\n"); return NULL; } } void FreeMatrix(double **Matrix, int row) { int i; for (i = 0; i < row; i++) Free(Matrix[i]); Free(Matrix); } void FreeintMatrix(int **Matrix, int row) { int i; for (i = 0; i < row; i++) free(Matrix[i]); free(Matrix); } void Free3DMatrix(double ***Matrix, int index, int row) { int i; for (i = 0; i < index; i++) FreeMatrix(Matrix[i], row); free(Matrix); } eco/src/fintegrate.c0000644000176200001440000003033114330337373014111 0ustar liggesusers/****************************************************************** This file is a part of eco: R Package for Estimating Fitting Bayesian Models of Ecological Inference for 2X2 tables by Kosuke Imai, Ying Lu, and Aaron Strauss Copyright: GPL version 2 or later. *******************************************************************/ #include #include #include #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" #include "sample.h" #include "bayes.h" #include "macros.h" #include "fintegrate.h" //#include /** * Bivariate normal distribution, with parameterization * see: http://mathworld.wolfram.com/BivariateNormalDistribution.html * see for param: http://www.math.uconn.edu/~binns/reviewII210.pdf */ void NormConstT(double *t, int n, void *param) { int ii; int dim=2; double *mu=doubleArray(dim); double **Sigma=doubleMatrix(dim,dim); double *W1,*W1p,*W2,*W2p; // double X, Y, rho; double rho; //double dtemp, inp, pfact; double dtemp, pfact; int imposs; W1 = doubleArray(n); W1p = doubleArray(n); W2 = doubleArray(n); W2p = doubleArray(n); Param *pp=(Param *)param; mu[0]= pp->caseP.mu[0]; mu[1]= pp->caseP.mu[1]; Sigma[0][0]=pp->setP->Sigma[0][0]; Sigma[1][1]=pp->setP->Sigma[1][1]; Sigma[0][1]=pp->setP->Sigma[0][1]; Sigma[1][0]=pp->setP->Sigma[1][0]; rho=Sigma[0][1]/sqrt(Sigma[0][0]*Sigma[1][1]); //Rprintf("TESTING: %4g %4g %4g %4g", pp->caseP.mu[0], pp->caseP.mu[1], pp->setP->Sigma[0][0],pp->setP->Sigma[0][1]); //X=pp->caseP.X; //Y=pp->caseP.Y; imposs=0; dtemp=1/(2*M_PI*sqrt(Sigma[0][0]*Sigma[1][1]*(1-rho*rho))); for (ii=0; iisetP->weirdness) // Rprintf("Normc... %d %d %5g -> %5g %5g => %5g with %5g imposs %d\n", ii, n, inp, W1[ii], W2[ii],t[ii],pfact,imposs); //char ch; //scanf(" %c", &ch ); } } Free(W1); Free(W1p); Free(W2); Free(W2p); Free(mu); FreeMatrix(Sigma,dim); } /** * Integrand for computing sufficient statistic * Which statistic to estimate depends on param->suff (see macros.h) */ void SuffExp(double *t, int n, void *param) { int ii,imposs,i,j; sufficient_stat suff; Param *pp=(Param *)param; int dim = (pp->setP->ncar==1) ? 3 : 2; double *mu=doubleArray(dim); double **Sigma=doubleMatrix(dim,dim); double **InvSigma=doubleMatrix(dim,dim);/* inverse covariance matrix*/ //double Sigma[dim][dim]; //double InvSigma[dim][dim]; double *W1,*W1p,*W2,*W2p,*vtemp; // double inp,density,pfact,normc; double density,pfact,normc; vtemp=doubleArray(dim); W1 = doubleArray(n); W1p = doubleArray(n); W2 = doubleArray(n); W2p = doubleArray(n); mu[0]= pp->caseP.mu[0]; mu[1]= pp->caseP.mu[1]; for(i=0;isetP->Sigma3[i][j]; InvSigma[i][j]=pp->setP->InvSigma3[i][j]; } else { Sigma[i][j]=pp->setP->Sigma[i][j]; InvSigma[i][j]=pp->setP->InvSigma[i][j]; } } } normc=pp->caseP.normcT; suff=pp->caseP.suff; imposs=0; for (ii=0; iisetP->verbose>=2 && dim==3) Rprintf("InvSigma loglik: %5g %5g %5g %5g %5g %5g\n",InvSigma[0][0],InvSigma[0][1],InvSigma[1][0],InvSigma[1][1],InvSigma[1][2],InvSigma[2][2]); vtemp[2]=logit(pp->caseP.X,"log-likelihood"); mu[0]=pp->setP->pdTheta[1]; mu[1]=pp->setP->pdTheta[2]; mu[2]=pp->setP->pdTheta[0]; } t[ii]=dMVN(vtemp,mu,InvSigma,dim,0)*pfact; //t[ii]=dMVN3(vtemp,mu,(double*)(&(InvSigma[0][0])),dim,0)*pfact; } else if (suff!=SS_Test) Rprintf("Error Suff= %d",suff); } } Free(W1);Free(W1p);Free(W2);Free(W2p);Free(mu);Free(vtemp); FreeMatrix(Sigma,dim); FreeMatrix(InvSigma,dim); } /** * Returns the log likelihood of a particular case (i.e, record, datapoint) */ double getLogLikelihood(Param* param) { if (param->caseP.dataType==DPT_General && !(param->caseP.Y>=.990 || param->caseP.Y<=.010)) { //non-survey data: do integration to find likelihood param->caseP.suff=SS_Loglik; return log(paramIntegration(&SuffExp,(void*)param)); } else if (param->caseP.dataType==DPT_Homog_X1 || param->caseP.dataType==DPT_Homog_X0) { //Homogenenous data: just do normal likelihood on one dimension double lik,sigma2,val,mu; val = (param->caseP.dataType==DPT_Homog_X1) ? param->caseP.Wstar[0] : param->caseP.Wstar[1]; if (!param->setP->ncar) { mu = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[0] : param->setP->pdTheta[1]; sigma2 = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[2] : param->setP->pdTheta[3]; } else { mu = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[1] : param->setP->pdTheta[2]; sigma2 = (param->caseP.dataType==DPT_Homog_X1) ? param->setP->pdTheta[4] : param->setP->pdTheta[5]; } lik=(1/(sqrt(2*M_PI*sigma2)))*exp(-(.5/sigma2)*(val - mu)*(val - mu)); return log(lik); // return 0; //fix later } else if (param->caseP.dataType==DPT_Survey || (param->caseP.Y>=.990 || param->caseP.Y<=.010)) { //Survey data (or v tight bounds): multi-variate normal int dim=param->setP->ncar ? 3 : 2; double *mu=doubleArray(dim); double *vtemp=doubleArray(dim); double **InvSig=doubleMatrix(dim,dim);/* inverse covariance matrix*/ int i,j; for(i=0;isetP->InvSigma3[i][j]; } else { InvSig[i][j]=param->setP->InvSigma[i][j]; } } } double loglik; vtemp[0] = param->caseP.Wstar[0]; vtemp[1] = param->caseP.Wstar[1]; mu[0]= param->caseP.mu[0]; mu[1]= param->caseP.mu[1]; if (param->setP->ncar) { vtemp[2]=logit(param->caseP.X,"log-likelihood survey"); mu[0]=param->setP->pdTheta[1]; mu[1]=param->setP->pdTheta[2]; mu[2]=param->setP->pdTheta[0]; loglik=dMVN(vtemp,mu,InvSig,dim,1); } else { loglik=dMVN(vtemp,mu,InvSig,dim,1); } Free(mu); Free(vtemp); FreeMatrix(InvSig,dim); return loglik; } else { //Unknown type Rprintf("Error; unkown type: %d\n",param->caseP.dataType); return 0; } } /** ********** * Line integral helper function ********** */ /** * Returns W2star from W1star, given the following equalities * Y=XW1 + (1-X)W2 and the Wi-star=logit(Wi) * mutation: imposs is set to 1 if the equation cannot be satisfied */ double getW2starFromW1star(double X, double Y, double W1star, int* imposs) { double W1; if (W1star>30) W1=1; //prevent overflow or underflow else W1=1/(1+exp(-1*W1star)); double W2=Y/(1-X)-X*W1/(1-X); if(W2>=1 || W2<=0) *imposs=1; //impossible pair of values else W2=log(W2/(1-W2)); return W2; } /** * Returns W1star from W2star, given the following equalities * Y=XW1 + (1-X)W2 and the Wi-star=logit(Wi) * mutation: imposs is set to 1 if the equation cannot be satisfied */ double getW1starFromW2star(double X, double Y, double W2star, int* imposs) { double W2; if (W2star>30) W2=1; //prevent overflow or underflow else W2=1/(1+exp(-1*W2star)); double W1=(Y-(1-X)*W2)/X; if(W1>=1 || W1<=0) *imposs=1; //impossible pair of values else W1=log(W1/(1-W1)); return W1; } /** * Returns W1 from W2, X, and Y given * Y=XW1 + (1-X)W2 */ double getW1FromW2(double X, double Y, double W2) { return (Y-(1-X)*W2)/X; } /** * W1star(t) * W1(t)=(W1_ub - W1_lb)*t + W1_lb * mutates impossible to true if W1 is non-finite at t */ double getW1starFromT(double t, Param* param, int* imposs) { double W1=(param->caseP.Wbounds[0][1] - param->caseP.Wbounds[0][0])*t + param->caseP.Wbounds[0][0]; if (W1==1 || W1==0) *imposs=1; else W1=log(W1/(1-W1)); return W1; } /** * W2star(t) * W2(t)=(W2_lb - W2_ub)*t + W2_lb */ double getW2starFromT(double t, Param* param, int* imposs) { double W2=(param->caseP.Wbounds[1][0] - param->caseP.Wbounds[1][1])*t + param->caseP.Wbounds[1][1]; if (W2==1 || W2==0) *imposs=1; else W2=log(W2/(1-W2)); return W2; } /** * W1star'(t) * see paper for derivation: W1*(t) = (1/W1)*((w1_ub - w1_lb)/(1-W1) */ double getW1starPrimeFromT(double t, Param* param) { double m=(param->caseP.Wbounds[0][1] - param->caseP.Wbounds[0][0]); double W1=m*t + param->caseP.Wbounds[0][0]; W1=(1/W1)*(m/(1-W1)); return W1; } /** * W2star'(t) * see paper for derivation: W2*(t) = (1/W2)*((w2_lb - w2_ub)/(1-W2) */ double getW2starPrimeFromT(double t, Param* param) { double m=(param->caseP.Wbounds[1][0] - param->caseP.Wbounds[1][1]); double W2=m*t + param->caseP.Wbounds[1][1]; W2=(1/W2)*(m/(1-W2)); return W2; } /** * parameterized line integration * lower bound is t=0, upper bound is t=1 */ double paramIntegration(integr_fn f, void *ex) { double epsabs=pow(10,-11), epsrel=pow(10,-11); double result=9999, anserr=9999; int limit=100; int last, neval, ier; int lenw=5*limit; int *iwork=(int *) Calloc(limit, int); double *work=(double *)Calloc(lenw, double); double lb=0.00001; double ub=.99999; Rdqags(f, ex, &lb, &ub, &epsabs, &epsrel, &result, &anserr, &neval, &ier, &limit, &lenw, &last, iwork, work); Free(iwork); Free(work); if (ier==0) return result; else { Param* p = (Param*) ex; Rprintf("Integration error %d: Sf %d X %5g Y %5g [%5g,%5g] -> %5g +- %5g\n",ier,p->caseP.suff,p->caseP.X,p->caseP.Y,p->caseP.Wbounds[0][0],p->caseP.Wbounds[0][1],result,anserr); // char ch; // scanf("Hit enter to continue %c", &ch ); return result; } } /** * integrate normalizing constant and set it in param */ void setNormConst(Param* param) { param->caseP.normcT=paramIntegration(&NormConstT,(void*)param); } /** * Set the bounds on W1 and W2 in their parameter */ void setBounds(Param* param) { double X,Y,w1_lb,w1_ub,w2_lb,w2_ub; //int w1_inf,w2_inf; double tol0=0.0001; double tol1=0.9999; X=param->caseP.X; Y=param->caseP.Y; //find bounds for W1 w1_ub=(Y-(1-X)*0)/X; //W2=0 if (w1_ub>tol1) w1_ub=1; w1_lb=(Y-(1-X)*1)/X; //W2=1 if (w1_lbtol1) w2_ub=1; w2_lb=Y/(1-X)-X*1/(1-X); //W1=1 if (w2_lbcaseP.Wbounds[0][0]=w1_lb; param->caseP.Wbounds[0][1]=w1_ub; param->caseP.Wbounds[1][0]=w2_lb; param->caseP.Wbounds[1][1]=w2_ub; //param->W1_inf=w1_inf; //param->W2_inf=w2_inf; } eco/src/preBaseX.c0000644000176200001440000000427214330337373013477 0ustar liggesusers#include #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" #include "bayes.h" #include "sample.h" /* Conditional Prediction for Normal Parametric Model for 2x2 Tables */ void preBaseX( double *X, /* data X */ double *pdmu, double *pdSigma, int *pin_samp, int *pin_draw, int *verbose, /* 1 for output monitoring */ double *pdStore ){ /* some integers */ int n_samp = *pin_samp; /* sample size */ int n_draw = *pin_draw; /* sample size of survey data */ int n_dim = 2; double *mu = doubleArray(n_dim); /* The mean */ double *Wstar = doubleArray(n_dim); double **Sigma = doubleMatrix(n_dim, n_dim); /* The covariance matrix */ /* misc variables */ int i, j, main_loop; /* used for various loops */ int itemp=0; int itempM=0; int itempS=0; int progress = 1, itempP = ftrunc((double) n_draw/10); /* get random seed */ GetRNGstate(); for(main_loop=0; main_loop #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" /* Grid method samping from tomography line*/ void rGrid( double *Sample, /* W_i sampled from each tomography line */ double *W1gi, /* The grid lines of W1[i] */ double *W2gi, /* The grid lines of W2[i] */ int ni_grid, /* number of grids for observation i*/ double *mu, /* mean vector for normal */ double **InvSigma, /* Inverse covariance matrix for normal */ int n_dim) /* dimension of parameters */ { int j; double dtemp; double *vtemp=doubleArray(n_dim); double *prob_grid=doubleArray(ni_grid); /* density by grid */ double *prob_grid_cum=doubleArray(ni_grid); /* cumulative density by grid */ dtemp=0; for (j=0;j prob_grid_cum[j]) j++; Sample[0]=W1gi[j]; Sample[1]=W2gi[j]; Free(vtemp); Free(prob_grid); Free(prob_grid_cum); } /* preparation for Grid */ void GridPrep( double **W1g, /* grids holder for W1 */ double **W2g, /* grids holder for W2 */ double **X, /* data: [X Y] */ double *maxW1, /* upper bound for W1 */ double *minW1, /* lower bound for W1 */ int *n_grid, /* number of grids */ int n_samp, /* sample size */ int n_step /* step size */ ) { int i, j; double dtemp; double *resid = doubleArray(n_samp); for(i=0; i (2*dtemp)) { n_grid[i]=ftrunc((maxW1[i]-minW1[i])*n_step); resid[i]=(maxW1[i]-minW1[i])-n_grid[i]*dtemp; /*if (maxW1[i]-minW1[i]==1) resid[i]=dtemp/4; */ j=0; while (j 0) { rDirich(vtemp, param, n_dim); exceed = 0; for (j = 0; j < n_dim; j++) if (vtemp[j] > maxU[j] || vtemp[j] < minU[j]) exceed++; i++; if (i > maxit) error("rMH2c: rejection algorithm failed because bounds are too tight.\n increase maxit or use gibbs sampler instead."); } } else { /* gibbs sampler */ for (j = 0; j < n_dim; j++) vtemp[j] = W[j]*X[j]/Y; for (i = 0; i < iter; i++) { dtemp = vtemp[n_dim-1]; for (j = 0; j < n_dim-1; j++) { dtemp += vtemp[j]; vtemp[j] = runif(fmax2(minU[j], dtemp-maxU[n_dim-1]), fmin2(maxU[j], dtemp-minU[n_dim-1])); dtemp -= vtemp[j]; } vtemp[n_dim-1] = dtemp; } } /* calcualte W and its logit transformation */ for (j = 0; j < n_dim; j++) { Sample[j] = vtemp[j]*Y/X[j]; vtemp[j] = log(Sample[j])-log(1-Sample[j]); vtemp1[j] = log(W[j])-log(1-W[j]); } /* acceptance ratio */ dens1 = dMVN(vtemp, mu, InvSigma, n_dim, 1); dens2 = dMVN(vtemp1, mu, InvSigma, n_dim, 1); for (j=0; j #include #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" #include "sample.h" #include "macros.h" #include "fintegrate.h" /* Multivariate Normal density */ double dMVN( double *Y, /* The data */ double *MEAN, /* The parameters */ double **SIG_INV, /* inverse of the covariance matrix */ int dim, /* dimension */ int give_log){ /* 1 if log_scale 0 otherwise */ int j,k; double value=0.0; for(j=0;j0) for(k=0;k0) for(k=0;kcaseP.mu[0]; MEAN[1]=param->caseP.mu[1]; SIGMA[0][0]=param->setP->Sigma[0][0]; SIGMA[1][1]=param->setP->Sigma[1][1]; SIGMA[0][1]=param->setP->Sigma[0][1]; SIGMA[1][0]=param->setP->Sigma[1][0]; rho=SIGMA[0][1]/sqrt(SIGMA[0][0]*SIGMA[1][1]); dtemp=1/(2*M_PI*sqrt(SIGMA[0][0]*SIGMA[1][1]*(1-rho*rho))); density=-1/(2*(1-rho*rho))* ((Wstar[0]-MEAN[0])*(Wstar[0]-MEAN[0])/SIGMA[0][0]+ +(Wstar[1]-MEAN[1])*(Wstar[1]-MEAN[1])/SIGMA[1][1] -2*rho*(Wstar[0]-MEAN[0])*(Wstar[1]-MEAN[1])/sqrt(SIGMA[0][0]*SIGMA[1][1])) +log(dtemp)-log(normc); if (give_log==0) density=exp(density); /*Rprintf("s11 %5g s22 %5g normc %5g dtemp %5g ldensity %5g\n", SIGMA[0][0],SIGMA[1][1],normc, dtemp, density); char ch; scanf(" %c", &ch );*/ Free(MEAN); FreeMatrix(SIGMA,dim); return density; } double invLogit(double x) { if (x>30) return 0; else return (1/(1+exp(-1*x))); } double logit(double x,char* emsg) { if (x>=1 || x<=0) { Rprintf(emsg); Rprintf(": %5g is out of logit range\n",x); } return log(x/(1-x)); } int bit(int t, int n) { t=t>>n; return (t % 2); } eco/src/preDPX.c0000644000176200001440000000431314330337373013124 0ustar liggesusers#include #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" #include "bayes.h" #include "sample.h" /* Conditional Prediction for Nonparametric Model for 2x2 Tables */ void preDPX( double *pdmu, double *pdSigma, double *X, int *pin_samp, int *pin_draw, int *pin_dim, int *verbose, /* 1 for output monitoring */ double *pdStore ){ /* some integers */ int n_samp = *pin_samp; /* sample size */ int n_draw = *pin_draw; /* sample size of survey data */ int n_dim = *pin_dim; /* dimension */ double *mu = doubleArray(n_dim); /* The mean */ double *Wstar = doubleArray(n_dim); double **Sigma = doubleMatrix(n_dim, n_dim); /* The covariance matrix */ /* misc variables */ int i, j, main_loop; /* used for various loops */ int itemp = 0; int itempM = 0; int itempS = 0; int progress = 1, itempP = ftrunc((double) n_draw/10); /* get random seed */ GetRNGstate(); for(main_loop=0; main_loop #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" #include "bayes.h" #include "sample.h" /* Normal Parametric Model for 2x2 Tables with Contextual Effects */ void cBaseecoX( /*data input */ double *pdX, /* data (X, Y) */ int *pin_samp, /* sample size */ /*MCMC draws */ int *n_gen, /* number of gibbs draws */ int *burn_in, /* number of draws to be burned in */ int *pinth, /* keep every nth draw */ int *verbose, /* 1 for output monitoring */ /* prior specification*/ int *pinu0, /* prior df parameter for InvWish */ double *pdtau0, /* prior scale parameter for Sigma under G0*/ double *mu0, /* prior mean for mu under G0 */ double *pdS0, /* prior scale for Sigma */ double *mustart, /* starting values for mu */ double *Sigmastart, /* starting values for Sigma */ /*incorporating survey data */ int *survey, /*1 if survey data available (set of W_1, W_2) 0 not*/ int *sur_samp, /*sample size of survey data*/ double *sur_W, /*set of known W_1, W_2 */ /* incorporating homeogenous areas */ int *x1, /* 1 if X=1 type areas available W_1 known, W_2 unknown */ int *sampx1, /* number X=1 type areas */ double *x1_W1, /* values of W_1 for X1 type areas */ int *x0, /* 1 if X=0 type areas available W_2 known, W_1 unknown */ int *sampx0, /* number X=0 type areas */ double *x0_W2, /* values of W_2 for X0 type areas */ /* bounds fo W1 */ double *minW1, double *maxW1, /* flags */ int *parameter, /* 1 if save population parameter */ int *Grid, /* 1 if Grid algorithm is used; 0 for Metropolis */ /* storage for Gibbs draws of mu/sigmat*/ double *pdSMu0, double *pdSMu1, double *pdSMu2, double *pdSSig00, double *pdSSig01, double *pdSSig02, double *pdSSig11, double *pdSSig12, double *pdSSig22, /* storage for Gibbs draws of W*/ double *pdSW1, double *pdSW2 ){ /* some integers */ int n_samp = *pin_samp; /* sample size */ int s_samp = *sur_samp; /* sample size of survey data */ int x1_samp = *sampx1; /* sample size for X=1 */ int x0_samp = *sampx0; /* sample size for X=0 */ int t_samp = n_samp+s_samp+x1_samp+x0_samp; /* total sample size */ int nth = *pinth; int n_dim = 2; /* dimension */ int n_step = 1000; /* 1/The default size of grid step */ /* prior parameters */ double tau0 = *pdtau0; int nu0 = *pinu0; double **S0 = doubleMatrix(n_dim+1,n_dim+1); /* The prior S parameter for InvWish */ /* data */ double **X = doubleMatrix(n_samp,n_dim); /* The Y and covariates */ double **W = doubleMatrix(t_samp,n_dim); /* The W1 and W2 matrix */ double **Wstar = doubleMatrix(t_samp,n_dim+1); /* logit transformed W and X */ double **S_W = doubleMatrix(s_samp, n_dim+1); /* known W1, W2, X */ double **S_Wstar = doubleMatrix(s_samp, n_dim+1); /* logit transformed S_W */ /* grids */ double **W1g = doubleMatrix(n_samp, n_step); double **W2g = doubleMatrix(n_samp, n_step); int *n_grid = intArray(n_samp); /* grid size */ /* ordinary model variables */ double *mu = doubleArray(n_dim+1); double **Sigma = doubleMatrix(n_dim+1,n_dim+1); double **InvSigma = doubleMatrix(n_dim+1,n_dim+1); /* conditional mean & variance for (W1, W2) given X */ double *mu_w = doubleArray(n_dim); double **Sigma_w = doubleMatrix(n_dim,n_dim); double **InvSigma_w = doubleMatrix(n_dim,n_dim); /* misc variables */ int i, j, k, main_loop; /* used for various loops */ int itemp, itempS, itempC, itempA; int progress = 1, itempP = ftrunc((double) *n_gen/10); double dtemp, dtemp1; /* get random seed */ GetRNGstate(); /* priors */ itemp = 0; for(k=0; k<(n_dim+1); k++) for(j=0; j<(n_dim+1); j++) S0[j][k] = pdS0[itemp++]; /* read the data set */ itemp = 0; for (j = 0; j < n_dim; j++) for (i = 0; i < n_samp; i++) X[i][j] = pdX[itemp++]; /* Initialize W, Wstar for n_samp */ for (i=0; i< n_samp; i++) { if (X[i][1]!=0 && X[i][1]!=1) { W[i][0]=runif(minW1[i], maxW1[i]); W[i][1]=(X[i][1]-X[i][0]*W[i][0])/(1-X[i][0]); } if (X[i][1]==0) for (j=0; j=*burn_in){ itempC++; if (itempC==nth){ pdSMu0[itempA]=mu[0]; pdSMu1[itempA]=mu[1]; pdSMu2[itempA]=mu[2]; pdSSig00[itempA]=Sigma[0][0]; pdSSig01[itempA]=Sigma[0][1]; pdSSig02[itempA]=Sigma[0][2]; pdSSig11[itempA]=Sigma[1][1]; pdSSig12[itempA]=Sigma[1][2]; pdSSig22[itempA]=Sigma[2][2]; itempA++; for(i=0; i<(n_samp+x1_samp+x0_samp); i++){ pdSW1[itempS]=W[i][0]; pdSW2[itempS]=W[i][1]; itempS++; } itempC=0; } } /*end of stroage *burn_in*/ if (*verbose) if (itempP == main_loop) { Rprintf("%3d percent done.\n", progress*10); itempP+=ftrunc((double) *n_gen/10); progress++; R_FlushConsole(); } } /*end of MCMC for normal */ if(*verbose) Rprintf("100 percent done.\n"); /** write out the random seed **/ PutRNGstate(); /* Freeing the memory */ FreeMatrix(X, n_samp); FreeMatrix(W, t_samp); FreeMatrix(Wstar, t_samp); free(n_grid); FreeMatrix(S0, n_dim+1); FreeMatrix(W1g, n_samp); FreeMatrix(W2g, n_samp); FreeMatrix(S_W, s_samp); FreeMatrix(S_Wstar, s_samp); Free(mu); FreeMatrix(Sigma, n_dim+1); FreeMatrix(InvSigma, n_dim+1); Free(mu_w); FreeMatrix(Sigma_w, n_dim); FreeMatrix(InvSigma_w, n_dim); } /* main */ eco/src/subroutines.h0000644000176200001440000000144614330337373014355 0ustar liggesusers/****************************************************************** This file is a part of eco: R Package for Fitting Bayesian Models of Ecological Inference for 2x2 Tables by Kosuke Imai and Ying Lu Copyright: GPL version 2 or later. *******************************************************************/ void SWP( double **X, int k, int size); void dinv(double **X, int size, double **X_inv); void dinv2D(double *X, int size, double *X_inv,char* emsg); void dinv2D_sym(double *X, int size, double *X_inv,char* emsg); void dcholdc(double **X, int size, double **L); double ddet(double **X, int size, int give_log); double ddet2D(double **X, int size, int give_log); void dcholdc2D(double *X, int size, double *L); void matrixMul(double **A, double **B, int r1, int c1, int r2, int c2, double **C); eco/src/macros.h0000644000176200001440000000622014330337373013252 0ustar liggesusers# ifndef MACROS_H # define MACROS_H /****************/ /** structrues **/ /****************/ /* ENUMS * sufficient statistic to calculate: 0->W1*, 1->W2*, 2->(W1*)^2, 3->(W1*)(W2*), 4->(W2*)^2, 5->W1,6->W2 7->Log Lik,8->test * data point type: 0=general, 1= homogenous with (X==1), 2= homogenous with (X==0), 3=survey (W1 and W2 are known) */ enum e_sufficient_stats {SS_W1star, SS_W2star, SS_W1star2, SS_W1W2star, SS_W2star2, SS_W1, SS_W2, SS_Loglik, SS_Test}; typedef enum e_sufficient_stats sufficient_stat; enum e_datapoint_types {DPT_General,DPT_Homog_X1, DPT_Homog_X0, DPT_Survey}; typedef enum e_datapoint_types datapoint_type; /* parameters and observed data -- no longer used*/ struct Param_old{ double mu[2]; double Sigma[2][2]; double InvSigma[2][2]; double Sigma3[3][3]; double InvSigma3[3][3]; int NCAR; double data[2]; //collect the data double X; //X,Y here for ease of use double Y; double normcT; //normalized const on tomog line (integrating with parameterization) double W[2]; //if W is known, also handy place to store E[W1] when we calculate it each step double Wstar[2]; //place to store E[W1*] when we calculate it each step double W1_lb; //lower and upper bounds for W1 and W2 (not starred) double W1_ub; double W2_lb; double W2_ub; sufficient_stat suff; //the sufficient stat we're calculating: 0->W1, 1->W2,2->W1^2,3->W1W2,4->W2^2,7->Log Lik, 5/6,-1 ->test case }; typedef struct Param_old Param_old; /** * The structure that holds per-record infromation */ struct caseParam { double mu[2]; double data[2]; //collect the data double X; //X,Y here for ease of use double Y; double normcT; //normalized const on tomog line (integrating with parameterization) double W[2]; //if W is known, also handy place to store E[W1] when we calculate it each step double Wstar[2]; //place to store E[W1*] when we calculate it each step double Wbounds[2][2]; //[i][j] is {j:lower,upper}-bound of W{i+1} int suff; //the sufficient stat we're calculating: 0->W1, 1->W2,2->W1^2,3->W1W2,4->W2^2,7->Log Lik, 5/6,-1 ->test case datapoint_type dataType; double** Z_i; //CCAR: k x 2 }; typedef struct caseParam caseParam; /** * The structure that holds dataset infromation */ struct setParam { int n_samp, t_samp, s_samp,x1_samp,x0_samp,param_len,suffstat_len; //types of data sizes int iter, ncar, ccar, ccar_nvar, fixedRho, sem, hypTest, verbose, calcLoglik; //options int semDone[7]; //whether that row of the R matrix is done int varParam[9]; //whether the parameter is included in the R matrix double convergence; double Sigma[2][2]; double InvSigma[2][2]; double Sigma3[3][3]; double InvSigma3[3][3]; double** SigmaK; //for CCAR double** InvSigmaK; double** hypTestCoeff; double hypTestResult; double* pdTheta; }; typedef struct setParam setParam; struct Param { setParam* setP; //pointer to the singleton structure caseParam caseP; }; typedef struct Param Param; /***************************/ /** typedef functions **/ /***************************/ //typedef void integr_fn(double *x, int n, void *ex); //is already defined in Applic.h typedef double gsl_fn(double x, void *ex); # endif eco/src/gibbsDP.c0000644000176200001440000003367614330337373013312 0ustar liggesusers#include #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" #include "bayes.h" #include "sample.h" void cDPeco( /*data input */ double *pdX, /* data (X, Y) */ int *pin_samp, /* sample size */ /*MCMC draws */ int *n_gen, /* number of gibbs draws */ int *burn_in, /* number of draws to be burned in */ int *pinth, /* keep every nth draw */ int *verbose, /* 1 for output monitoring */ /* prior specification*/ int *pinu0, /* prior df parameter for InvWish */ double *pdtau0, /* prior scale parameter for Sigma under G0*/ double *mu0, /* prior mean for mu under G0 */ double *pdS0, /* prior scale for Sigma */ /* DP prior specification */ double *alpha0, /* precision parameter, can be fixed or updated*/ int *pinUpdate, /* 1 if alpha gets updated */ double *pda0, double *pdb0, /* prior for alpha if alpha updated*/ /*incorporating survey data */ int *survey, /* 1 if survey data available (set of W_1, W_2) */ /* 0 otherwise*/ int *sur_samp, /* sample size of survey data*/ double *sur_W, /* set of known W_1, W_2 */ /*incorporating homeogenous areas */ int *x1, /* 1 if X=1 type areas available W_1 known, W_2 unknown */ int *sampx1, /* number X=1 type areas */ double *x1_W1, /* values of W_1 for X1 type areas */ int *x0, /* 1 if X=0 type areas available W_2 known, W_1 unknown */ int *sampx0, /* number X=0 type areas */ double *x0_W2, /* values of W_2 for X0 type areas */ /* bounds of W1 */ double *minW1, double *maxW1, /* storage */ int *parameter, /* 1 if save population parameter */ int *Grid, /* 1 if Grid algorithm used; \ 0 if Metropolis algorithm used*/ /* storage for Gibbs draws of mu/sigmat*/ double *pdSMu0, double *pdSMu1, double *pdSSig00, double *pdSSig01, double *pdSSig11, /* storage for Gibbs draws of W*/ double *pdSW1, double *pdSW2, /* storage for Gibbs draws of alpha */ double *pdSa, /* storage for nstar at each Gibbs draw*/ int *pdSn ){ /*some integers */ int n_samp = *pin_samp; /* sample size */ int s_samp = *sur_samp; /* sample size of survey data */ int x1_samp = *sampx1; /* sample size for X=1 */ int x0_samp = *sampx0; /* sample size for X=0 */ int t_samp = n_samp+x1_samp+x0_samp+s_samp; /* total sample size */ int nth = *pinth; /* keep every nth draw */ int n_dim = 2; /* dimension */ int n_step=1000; /* The default size of grid step */ /*prior parameters */ double tau0 = *pdtau0; /* prior scale */ int nu0 = *pinu0; /* prior degree of freedom*/ double **S0 = doubleMatrix(n_dim,n_dim);/*The prior S parameter for InvWish*/ double alpha = *alpha0; /* precision parameter*/ double a0 = *pda0, b0 = *pdb0; /* hyperprior for alpha */ /* data */ double **X = doubleMatrix(n_samp,n_dim); /* The Y and covariates */ double **W = doubleMatrix(t_samp,n_dim); /* The W1 and W2 matrix */ double **Wstar = doubleMatrix(t_samp,n_dim); /* The pseudo data */ double **S_W = doubleMatrix(s_samp,n_dim); /* The known W1 and W2 matrix*/ double **S_Wstar = doubleMatrix(s_samp,n_dim); /* The logit transformed S_W*/ /* grids */ double **W1g = doubleMatrix(n_samp, n_step); /* grids for W1 */ double **W2g = doubleMatrix(n_samp, n_step); /* grids for W2 */ int *n_grid = intArray(n_samp); /* grids size */ /* Model parameters */ /* Dirichlet variables */ double **mu = doubleMatrix(t_samp,n_dim); /* mean matrix */ double ***Sigma = doubleMatrix3D(t_samp,n_dim,n_dim); /*covarince matrix*/ double ***InvSigma = doubleMatrix3D(t_samp,n_dim,n_dim); /* inv of Sigma*/ int nstar; /* # clusters with distict theta values */ int *C = intArray(t_samp); /* vector of cluster membership */ double *q = doubleArray(t_samp); /* Weights of posterior of Dirichlet */ double *qq = doubleArray(t_samp); /* cumulative weight vector of q */ double **S_bvt = doubleMatrix(n_dim,n_dim); /* S paramter for BVT in q0 */ /* variables defined in remixing step: cycle through all clusters */ double **Wstarmix = doubleMatrix(t_samp,n_dim); /*data matrix used */ double *mu_mix = doubleArray(n_dim); /*updated MEAN parameter */ double **Sigma_mix = doubleMatrix(n_dim,n_dim); /*updated VAR parameter */ double **InvSigma_mix = doubleMatrix(n_dim,n_dim); /* Inv of Sigma_mix */ int nj; /* record # of obs in each cluster */ int *sortC = intArray(t_samp); /* record (sorted)original obs id */ int *indexC = intArray(t_samp); /* record original obs id */ int *label = intArray(t_samp); /* store index values */ /* misc variables */ int i, j, k, l, main_loop; /* used for various loops */ int itemp; int itempA=0; /* counter for alpha */ int itempS=0; /* counter for storage */ int itempC=0; /* counter to control nth draw */ int progress = 1, itempP = ftrunc((double) *n_gen/10); double dtemp, dtemp1; double *vtemp = doubleArray(n_dim); double **mtemp = doubleMatrix(n_dim,n_dim); double **mtemp1 = doubleMatrix(n_dim,n_dim); double **onedata = doubleMatrix(1, n_dim); /* get random seed */ GetRNGstate(); /* read priors under G0*/ itemp=0; for(k=0;k qq[j]) j++; /** Dirichlet update Sigma_i, mu_i|Sigma_i **/ /* j=i: posterior update given Wstar[i] */ if (j==i){ onedata[0][0] = Wstar[i][0]; onedata[0][1] = Wstar[i][1]; NIWupdate(onedata, mu[i], Sigma[i], InvSigma[i], mu0, tau0,nu0, S0, 1, n_dim); C[i]=nstar; nstar++; } /* j=i': replace with i' obs */ else { /*1. mu_i=mu_j, Sigma_i=Sigma_j*/ /*2. update C[i]=C[j] */ for(k=0;k=*burn_in) { itempC++; if (itempC==nth){ if(*pinUpdate) { pdSa[itempA]=alpha; } pdSn[itempA]=nstar; itempA++; for(i=0; i<(n_samp+x1_samp+x0_samp); i++) { pdSMu0[itempS]=mu[i][0]; pdSMu1[itempS]=mu[i][1]; pdSSig00[itempS]=Sigma[i][0][0]; pdSSig01[itempS]=Sigma[i][0][1]; pdSSig11[itempS]=Sigma[i][1][1]; pdSW1[itempS]=W[i][0]; pdSW2[itempS]=W[i][1]; itempS++; } itempC=0; } } if (*verbose) if (itempP == main_loop) { Rprintf("%3d percent done.\n", progress*10); itempP+=ftrunc((double) *n_gen/10); progress++; R_FlushConsole(); } } /*end of MCMC for DP*/ if (*verbose) Rprintf("100 percent done.\n"); /** write out the random seed **/ PutRNGstate(); /* Freeing the memory */ FreeMatrix(S0, n_dim); FreeMatrix(X, n_samp); FreeMatrix(W, t_samp); FreeMatrix(Wstar, t_samp); FreeMatrix(S_W, s_samp); FreeMatrix(S_Wstar, s_samp); FreeMatrix(W1g, n_samp); FreeMatrix(W2g, n_samp); free(n_grid); FreeMatrix(mu, t_samp); Free3DMatrix(Sigma, t_samp,n_dim); Free3DMatrix(InvSigma, t_samp, n_dim); free(C); Free(q); Free(qq); FreeMatrix(S_bvt, n_dim); FreeMatrix(Wstarmix, t_samp); Free(mu_mix); FreeMatrix(Sigma_mix, n_dim); FreeMatrix(InvSigma_mix, n_dim); free(sortC); free(indexC); free(label); Free(vtemp); FreeMatrix(mtemp, n_dim); FreeMatrix(mtemp1, n_dim); FreeMatrix(onedata, 1); } /* main */ eco/src/vector.h0000644000176200001440000000130014330337373013262 0ustar liggesusers/****************************************************************** This file is a part of eco: R Package for Fitting Bayesian Models of Ecological Inference for 2x2 Tables by Kosuke Imai and Ying Lu Copyright: GPL version 2 or later. *******************************************************************/ #include #include int *intArray(int num); int **intMatrix(int row, int col); double *doubleArray(int num); double **doubleMatrix(int row, int col); double ***doubleMatrix3D(int x, int y, int z); long *longArray(int num); void FreeMatrix(double **Matrix, int row); void FreeintMatrix(int **Matrix, int row); void Free3DMatrix(double ***Matrix, int index, int row); eco/src/gibbsZBase.c0000644000176200001440000003141514330337373014000 0ustar liggesusers#include #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" #include "sample.h" void cBaseecoZ( /*data input */ double *pdX, /* data (X, Y) */ double *pdZ, /* covariates Z */ int *pinZp, /* dimension of Z if =1, =gibbsBase =2 and Z=X, gibbsXBase >2 or Z!=X, regression*/ int *pin_samp, /* sample size */ /*MCMC draws */ int *n_gen, /* number of gibbs draws */ int *burn_in, /* number of draws to be burned in */ int *pinth, /* keep every nth draw */ int *verbose, /* 1 for output monitoring */ /* prior specification for imputation, (beta, Sigma)~N-InvWish*/ /* prior for Sigma~InvWish(nu, S)*/ int *pinu0, /* prior df parameter for InvWish */ double *pdS0, /* prior scale for Sigma */ /* prior for beta~N(b0, Sigma*A0^-1) */ double *pdbeta0, /* prior mean for beta*/ double *pdA0, /* prior PRECISION=1/SCALE parameter for beta*/ /* staring values */ double *betastart, double *Sigmastart, /*incorporating survey data */ int *survey, /*1 if survey data available (set of W_1, W_2) 0 not*/ int *sur_samp, /*sample size of survey data*/ double *sur_W, /*set of known W_1, W_2 */ /*incorporating homeogenous areas */ int *x1, /* 1 if X=1 type areas available W_1 known, W_2 unknown */ int *sampx1, /* number X=1 type areas */ double *x1_W1, /* values of W_1 for X1 type areas */ int *x0, /* 1 if X=0 type areas available W_2 known, W_1 unknown */ int *sampx0, /* number X=0 type areas */ double *x0_W2, /* values of W_2 for X0 type areas */ /* bounds of W1 */ double *minW1, double *maxW1, /* storage */ int *parameter,/* 1 if save population parameter */ int *Grid, /* storage for Gibbs draws of beta and Sigam, packed */ double *pdSBeta, double *pdSSigma, /* storage for Gibbs draws of W*/ double *pdSW1, double *pdSW2 ){ int n_samp = *pin_samp; /* sample size */ int nth = *pinth; int s_samp = *sur_samp; /* sample size of survey data */ int x1_samp = *sampx1; int x0_samp = *sampx0; int t_samp = n_samp+s_samp+x1_samp+x0_samp; /* total sample size */ int n_dim = 2; /* The dimension of the ecological table */ int n_cov = *pinZp; /* The dimension of the covariates */ int n_step = 1000; /* priors */ double *beta0 = doubleArray(n_cov); /* prior mean of beta */ double **S0 = doubleMatrix(n_dim, n_dim); /* prior scale for Sigma */ double **A0 = doubleMatrix(n_cov, n_cov); /* prior precision for beta */ int nu0 = *pinu0; /* prior df for Sigma */ /* data */ double **X = doubleMatrix(n_samp, n_dim); /* The Y and X */ /*The known W1 and W2 matrix*/ double **S_W = doubleMatrix(s_samp, n_dim); double **S_Wstar=doubleMatrix(s_samp, n_dim); /* pseudo data Wstar */ double **W = doubleMatrix(t_samp, n_dim); double **Wstar = doubleMatrix(t_samp, n_dim); double *Wstar_bar = doubleArray(n_dim); /* The covariates and W */ double **Z = doubleMatrix(t_samp*n_dim+n_cov, n_cov+1); /* Z*cholesky factor of covaraince matrix*/ double **Zstar = doubleMatrix(t_samp*n_dim+n_cov, n_cov+1); /* grids */ double **W1g = doubleMatrix(n_samp, n_step); /* grids for W1 */ double **W2g = doubleMatrix(n_samp, n_step); /* grids for W2 */ int *n_grid = intArray(n_samp); /* grid size */ /* paramters for Wstar under Normal baseline model */ double *beta = doubleArray(n_cov); /* vector of regression coefficients */ double **mu = doubleMatrix(t_samp, n_dim); double **Sigma = doubleMatrix(n_dim, n_dim); double **InvSigma = doubleMatrix(n_dim, n_dim); /*posterior parameters for beta and Sigma*/ double *mbeta = doubleArray(n_cov); /* posterior mean of beta*/ double **Vbeta = doubleMatrix(n_cov,n_cov); /* posterior varaince of beta */ /* matrices used for sweep */ /* quantities used in sweep */ double **SS = doubleMatrix(n_cov+1, n_cov+1); /* the sum of square matrix */ double *epsilon = doubleArray(t_samp*n_dim); /* The error term */ double **R = doubleMatrix(n_dim, n_dim); /* ee' */ /* misc variables */ int i, j, k, l, main_loop; /* used for various loops */ int itemp; int itempA=0; /* counter for alpha */ int itempB=0; int itempC=0; /* counter to control nth draw */ int itempS=0; /* counter for storage */ int progress = 1, itempP = ftrunc((double) *n_gen/10); double dtemp, dtemp1; double *vtemp = doubleArray(n_dim); double **mtemp = doubleMatrix(n_dim, n_dim); double **mtemp1 = doubleMatrix(n_dim, n_dim); double **mtemp2 = doubleMatrix(n_cov, n_cov); /* get random seed */ GetRNGstate(); /**read prior information*/ itemp=0; for (k=0; k=*burn_in){ itempC++; if (itempC==nth){ for (j=0; j #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" #include "bayes.h" #include "sample.h" void cDPecoX( /*data input */ double *pdX, /* data (X, Y) */ int *pin_samp, /* sample size */ /*MCMC draws */ int *n_gen, /* number of gibbs draws */ int *burn_in, /* number of draws to be burned in */ int *pinth, /* keep every nth draw */ int *verbose, /* 1 for output monitoring */ /* prior specification*/ int *pinu0, /* prior df parameter for InvWish */ double *pdtau0, /* prior scale parameter for Sigma under G0*/ double *mu0, /* prior mean for mu under G0 (3x1) */ double *pdS0, /* prior scale for Sigma (3x3) */ double *alpha0, /* precision parameter, can be fixed or updated*/ int *pinUpdate, /* 1 if alpha gets updated */ double *pda0, double *pdb0, /* prior for alpha if alpha updated*/ /*incorporating survey data */ int *survey, /*1 if survey data available(set of W_1, W_2,X)*/ /*0 otherwise*/ int *sur_samp, /*sample size of survey data*/ double *sur_W, /*set of known W_1, W_2 */ /*incorporating homeogenous areas */ int *x1, /* 1 if X=1 type areas available W_1 known, W_2 unknown */ int *sampx1, /* number X=1 type areas */ double *x1_W1, /* values of W_1 for X1 type areas */ int *x0, /* 1 if X=0 type areas available W_2 known, W_1 unknown */ int *sampx0, /* number X=0 type areas */ double *x0_W2, /* values of W_2 for X0 type areas */ /* bounds fo W1 */ double *minW1, double *maxW1, /* flags */ int *parameter, /* 1 if save population parameter */ int *Grid, /* 1 if Grid algorithm is used; 0 for Metropolis */ /* storage for Gibbs draws of mu/sigmat*/ double *pdSMu0, double *pdSMu1, double *pdSMu2, double *pdSSig00, double *pdSSig01, double *pdSSig02, double *pdSSig11, double *pdSSig12, double *pdSSig22, /* storage for Gibbs draws of W*/ double *pdSW1, double *pdSW2, /* storage for Gibbs draws of alpha */ double *pdSa, /* storage for nstar at each Gibbs draw*/ int *pdSn ){ /*some integers */ int n_samp = *pin_samp; /* sample size */ int s_samp = *sur_samp; /* sample size of survey data */ int x1_samp = *sampx1; /* sample size for X=1 */ int x0_samp = *sampx0; /* sample size for X=0 */ int t_samp = n_samp+x1_samp+x0_samp+s_samp; /* total sample size */ int nth = *pinth; /* keep every nth draw */ int n_dim = 2; /* dimension */ int n_step=1000; /* The default size of grid step */ /*prior parameters */ double tau0 = *pdtau0; /* prior scale */ int nu0 = *pinu0; /* prior degree of freedom*/ double **S0 = doubleMatrix(n_dim+1,n_dim+1);/*The prior S parameter for InvWish*/ double alpha = *alpha0; /* precision parameter*/ double a0 = *pda0, b0 = *pdb0; /* hyperprior for alpha */ /* data */ double **X = doubleMatrix(n_samp,n_dim); /* The Y and covariates */ double **W = doubleMatrix(t_samp,n_dim); /* The W1 and W2 matrix */ double **Wstar = doubleMatrix(t_samp,(n_dim+1)); /* The pseudo data */ double **S_W = doubleMatrix(s_samp,n_dim+1); /* The known W1 and W2,X */ double **S_Wstar = doubleMatrix(s_samp,n_dim+1);/* The logit transformed S_W*/ /* grids */ double **W1g = doubleMatrix(n_samp, n_step); /* grids for W1 */ double **W2g = doubleMatrix(n_samp, n_step); /* grids for W2 */ int *n_grid = intArray(n_samp); /* grids size */ /* Model parameters */ /* Dirichlet variables */ double **mu = doubleMatrix(t_samp,(n_dim+1)); /* mean matrix */ double ***Sigma = doubleMatrix3D(t_samp,(n_dim+1),(n_dim+1)); /*covarince matrix*/ double ***InvSigma = doubleMatrix3D(t_samp,(n_dim+1),(n_dim+1)); /* inv of Sigma*/ /*conditional distribution parameter */ double **Sigma_w=doubleMatrix(n_dim,n_dim); double **InvSigma_w=doubleMatrix(n_dim,n_dim); double *mu_w=doubleArray(n_dim); int nstar; /* # clusters with distict theta values */ int *C = intArray(t_samp); /* vector of cluster membership */ double *q = doubleArray(t_samp); /* Weights of posterior of Dirichlet */ double *qq = doubleArray(t_samp); /* cumulative weight vector of q */ double **S_tvt = doubleMatrix((n_dim+1),(n_dim+1)); /* S paramter for BVT in q0 */ /* variables defined in remixing step: cycle through all clusters */ double **Wstarmix = doubleMatrix(t_samp,(n_dim+1)); /*data matrix used */ double *mu_mix = doubleArray((n_dim+1)); /*updated MEAN parameter */ double **Sigma_mix = doubleMatrix((n_dim+1),(n_dim+1)); /*updated VAR parameter */ double **InvSigma_mix = doubleMatrix((n_dim+1),(n_dim+1)); /* Inv of Sigma_mix */ int nj; /* record # of obs in each cluster */ int *sortC = intArray(t_samp); /* record (sorted)original obs id */ int *indexC = intArray(t_samp); /* record original obs id */ int *label = intArray(t_samp); /* store index values */ /* misc variables */ int i, j, k, l, main_loop; /* used for various loops */ int itemp; int itempA=0; /* counter for alpha */ int itempS=0; /* counter for storage */ int itempC=0; /* counter to control nth draw */ int progress = 1, itempP = ftrunc((double) *n_gen/10); double dtemp, dtemp1, dtemp2; double *vtemp = doubleArray((n_dim+1)); double **mtemp = doubleMatrix((n_dim+1),(n_dim+1)); double **mtemp1 = doubleMatrix((n_dim+1),(n_dim+1)); double **onedata = doubleMatrix(1, (n_dim+1)); /* get random seed */ GetRNGstate(); /* read priors under G0*/ itemp=0; for(k=0;k<(n_dim+1);k++) for(j=0;j<(n_dim+1);j++) S0[j][k]=pdS0[itemp++]; /* read the data set */ itemp = 0; for (j = 0; j < n_dim; j++) for (i = 0; i < n_samp; i++) X[i][j] = pdX[itemp++]; /*Intialize W, Wsatr for n_samp */ for (i=0; i< n_samp; i++) { if (X[i][1]!=0 && X[i][1]!=1) { W[i][0]=runif(minW1[i], maxW1[i]); W[i][1]=(X[i][1]-X[i][0]*W[i][0])/(1-X[i][0]); } if (X[i][1]==0) for (j=0; j=n_samp && i<(n_samp+x1_samp)) { dtemp=mu_w[1]+Sigma_w[0][1]/Sigma_w[0][0]*(Wstar[i][0]-mu_w[0]); dtemp1=Sigma_w[1][1]*(1-Sigma_w[0][1]*Sigma_w[0][1]/(Sigma_w[0][0]*Sigma_w[1][1])); Wstar[i][1]=norm_rand()*sqrt(dtemp1)+dtemp; W[i][1]=exp(Wstar[i][1])/(1+exp(Wstar[i][1])); } /*update W1 given W2, mu_ord and Sigma_ord in x0 homeogeneous areas */ if (*x0==1 && i>=(n_samp+x1_samp) && i<(n_samp+x1_samp+x0_samp)) { dtemp=mu_w[0]+Sigma_w[0][1]/Sigma_w[1][1]*(Wstar[i][1]-mu_w[1]); dtemp1=Sigma_w[0][0]*(1-Sigma_w[0][1]*Sigma_w[0][1]/(Sigma_w[0][0]*Sigma_w[1][1])); Wstar[i][0]=norm_rand()*sqrt(dtemp1)+dtemp; W[i][0]=exp(Wstar[i][0])/(1+exp(Wstar[i][0])); } } /**updating mu, Sigma given Wstar uisng effective sample size t_samp**/ for (i=0; i qq[j]) j++; /** Dirichlet update Sigma_i, mu_i|Sigma_i **/ if (j==i){ onedata[0][0] = Wstar[i][0]; onedata[0][1] = Wstar[i][1]; onedata[0][2] = Wstar[i][2]; NIWupdate(onedata, mu[i], Sigma[i], InvSigma[i], mu0, tau0,nu0, S0, 1, n_dim+1); C[i]=nstar; nstar++; } else { /*1. mu_i=mu_j, Sigma_i=Sigma_j*/ /*2. update C[i]=C[j] */ for(k=0;k<=n_dim;k++) { mu[i][k]=mu[j][k]; for(l=0;l<=n_dim;l++) { Sigma[i][k][l]=Sigma[j][k][l]; InvSigma[i][k][l]=InvSigma[j][k][l]; } } C[i]=C[j]; } sortC[i]=C[i]; } /* end of i loop*/ /** remixing step using effective sample**/ for(i=0;i=*burn_in) { itempC++; if (itempC==nth){ if(*pinUpdate) { pdSa[itempA]=alpha; pdSn[itempA]=nstar; itempA++; } for(i=0; i<(n_samp+x1_samp+x0_samp); i++) { pdSMu0[itempS]=mu[i][0]; pdSMu1[itempS]=mu[i][1]; pdSMu2[itempS]=mu[i][2]; pdSSig00[itempS]=Sigma[i][0][0]; pdSSig01[itempS]=Sigma[i][0][1]; pdSSig02[itempS]=Sigma[i][0][2]; pdSSig11[itempS]=Sigma[i][1][1]; pdSSig12[itempS]=Sigma[i][1][2]; pdSSig22[itempS]=Sigma[i][2][2]; pdSW1[itempS]=W[i][0]; pdSW2[itempS]=W[i][1]; itempS++; } itempC=0; } } if (*verbose) if (itempP == main_loop) { Rprintf("%3d percent done.\n", progress*10); itempP+=ftrunc((double) *n_gen/10); progress++; R_FlushConsole(); } } /*end of MCMC for DP*/ if (*verbose) Rprintf("100 percent done.\n"); /** write out the random seed **/ PutRNGstate(); /* Freeing the memory */ FreeMatrix(S0, n_dim+1); FreeMatrix(X, n_samp); FreeMatrix(W, t_samp); FreeMatrix(Wstar, t_samp); FreeMatrix(S_W, s_samp); FreeMatrix(S_Wstar, s_samp); FreeMatrix(W1g, n_samp); FreeMatrix(W2g, n_samp); free(n_grid); FreeMatrix(mu, t_samp); Free3DMatrix(Sigma, t_samp,n_dim+1); Free3DMatrix(InvSigma, t_samp, n_dim+1); Free(mu_w); FreeMatrix(Sigma_w, n_dim); FreeMatrix(InvSigma_w, n_dim); free(C); Free(q); Free(qq); FreeMatrix(S_tvt, n_dim+1); FreeMatrix(Wstarmix, t_samp); Free(mu_mix); FreeMatrix(Sigma_mix, n_dim+1); FreeMatrix(InvSigma_mix, n_dim+1); free(sortC); free(indexC); free(label); Free(vtemp); FreeMatrix(mtemp, n_dim+1); FreeMatrix(mtemp1, n_dim+1); FreeMatrix(onedata, 1); } /* main */ eco/src/bayes.c0000644000176200001440000000410314330337373013062 0ustar liggesusers/****************************************************************** This file is a part of eco: R Package for Fitting Bayesian Models of Ecological Inference for 2x2 Tables by Kosuke Imai and Ying Lu Copyright: GPL version 2 or later. *******************************************************************/ #include #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" /** Normal-InvWishart updating Y|mu, Sigma ~ N(mu, Sigma) mu|Sigma ~ N(mu0, Sigma/tau0) Sigma ~ InvWish(nu0, S0^{-1}) **/ void NIWupdate( double **Y, /* data */ double *mu, /* mean */ double **Sigma, /* variance */ double **InvSigma, /* precision */ double *mu0, /* prior mean */ double tau0, /* prior scale */ int nu0, /* prior df */ double **S0, /* prior scale */ int n_samp, /* sample size */ int n_dim) /* dimension */ { int i,j,k; double *Ybar = doubleArray(n_dim); double *mun = doubleArray(n_dim); double **Sn = doubleMatrix(n_dim, n_dim); double **mtemp = doubleMatrix(n_dim, n_dim); /*read data */ for (j=0; j #include #include #include #include #include #include "vector.h" #include "subroutines.h" #include "rand.h" #include "bayes.h" #include "sample.h" /* Normal Parametric Model for RxC (with R >= 2, C >= 2) Tables */ void cBaseRC( /*data input */ double *pdX, /* X */ double *pdY, /* Y */ double *pdWmin, /* lower bounds */ double *pdWmax, /* uppwer bounds */ int *pin_samp, /* sample size */ int *pin_col, /* number of columns */ int *pin_row, /* number of rows */ /*MCMC draws */ int *reject, /* whether to use rejection sampling */ int *maxit, /* max number of iterations for rejection sampling */ int *n_gen, /* number of gibbs draws */ int *burn_in, /* number of draws to be burned in */ int *pinth, /* keep every nth draw */ int *verbose, /* 1 for output monitoring */ /* prior specification*/ int *pinu0, /* prior df parameter for InvWish */ double *pdtau0, /* prior scale parameter for Sigma */ double *mu0, /* prior mean for mu */ double *pdS0, /* prior scale for Sigma */ /* starting values */ double *pdMu, double *pdSigma, /* storage */ int *parameter, /* 1 if save population parameter */ double *pdSmu, double *pdSSigma, double *pdSW ){ /* some integers */ int n_samp = *pin_samp; /* sample size */ int nth = *pinth; /* keep every pth draw */ int n_col = *pin_col; /* number of columns */ int n_dim = *pin_row-1; /* number of rows - 1 */ /* prior parameters */ double tau0 = *pdtau0; /* prior scale */ int nu0 = *pinu0; /* prior degrees of freedom */ double **S0 = doubleMatrix(n_col, n_col); /* prior scale for InvWish */ /* data */ double **Y = doubleMatrix(n_samp, n_dim); /* Y */ double **X = doubleMatrix(n_samp, n_col); /* X */ double ***W = doubleMatrix3D(n_samp, n_dim, n_col); /* W */ double ***Wstar = doubleMatrix3D(n_col, n_samp, n_dim); /* logratio(W) */ double **Wsum = doubleMatrix(n_samp, n_col); /* sum_{r=1}^{R-1} W_{irc} */ double **SWstar = doubleMatrix(n_col, n_dim); /* The lower and upper bounds of U = W*X/Y **/ double ***minU = doubleMatrix3D(n_samp, n_dim, n_col); double *maxU = doubleArray(n_col); /* model parameters */ double **mu = doubleMatrix(n_col, n_dim); /* mean */ double ***Sigma = doubleMatrix3D(n_col, n_dim, n_dim); /* covariance */ double ***InvSigma = doubleMatrix3D(n_col, n_dim, n_dim); /* inverse */ /* misc variables */ int i, j, k, l, main_loop; /* used for various loops */ int itemp, counter; int itempM = 0; /* for mu */ int itempS = 0; /* for Sigma */ int itempW = 0; /* for W */ int itempC = 0; /* control nth draw */ int progress = 1, itempP = ftrunc((double) *n_gen/10); double dtemp, dtemp1; double *param = doubleArray(n_col); /* Dirichlet parameters */ double *dvtemp = doubleArray(n_col); double *dvtemp1 = doubleArray(n_col); /* get random seed */ GetRNGstate(); /* read X */ itemp = 0; for (k = 0; k < n_col; k++) for (i = 0; i < n_samp; i++) X[i][k] = pdX[itemp++]; /* read Y */ itemp = 0; for (j = 0; j < n_dim; j++) for (i = 0; i < n_samp; i++) Y[i][j] = pdY[itemp++]; /* compute bounds on U */ itemp = 0; for (k = 0; k < n_col; k++) for (j = 0; j < n_dim; j++) for (i = 0; i < n_samp; i++) minU[i][j][k] = fmax2(0, (X[i][k]+Y[i][j]-1)/Y[i][j]); /* initial values for mu and Sigma */ itemp = 0; for (k = 0; k < n_col; k++) for (j = 0; j < n_dim; j++) mu[k][j] = pdMu[itemp++]; itemp = 0; for (k = 0; k < n_col; k++) for (j = 0; j < n_dim; j++) for (i = 0; i < n_dim; i++) Sigma[k][j][i] = pdSigma[itemp++]; for (k = 0; k < n_col; k++) dinv(Sigma[k], n_dim, InvSigma[k]); /* initial values for W */ for (k = 0; k < n_col; k++) param[k] = 1.0; for (i = 0; i < n_samp; i++) { for (k = 0; k < n_col; k++) Wsum[i][k] = 0.0; for (j = 0; j < n_dim; j++) { counter = 0; itemp = 1; while (itemp > 0) { /* first try rejection sampling */ rDirich(dvtemp, param, n_col); itemp = 0; for (k = 0; k < n_col; k++) { if (dvtemp[k] < minU[i][j][k] || dvtemp[k] > fmin2(1, X[i][k]*(1-Wsum[i][k])/Y[i][j])) itemp++; } if (itemp < 1) for (k = 0; k < n_col; k++) { W[i][j][k] = dvtemp[k]*Y[i][j]/X[i][k]; Wsum[i][k] += W[i][j][k]; } counter++; if (counter > *maxit && itemp > 0) { /* if rejection sampling fails, then use midpoints of bounds */ itemp = 0; dtemp = Y[i][j]; dtemp1 = 1; for (k = 0; k < n_col-1; k++) { W[i][j][k] = 0.25*(fmax2(0,(X[i][k]/dtemp1+dtemp-1)*dtemp1/X[i][k])+ fmin2(1-Wsum[i][k],dtemp*dtemp1/X[i][k])); dtemp -= W[i][j][k]*X[i][k]/dtemp1; dtemp1 -= X[i][k]; Wsum[i][k] += W[i][j][k]; } W[i][j][n_col-1] = dtemp; Wsum[i][n_col-1] += dtemp; } R_CheckUserInterrupt(); } for (l = 0; l < n_dim; l++) for (k = 0; k < n_col; k++) Wstar[k][i][l] = log(W[i][l][k])-log(1-Wsum[i][k]); } } /* read the prior */ itemp = 0; for(k = 0; k < n_dim; k++) for(j = 0; j < n_dim; j++) S0[j][k] = pdS0[itemp++]; /*** Gibbs sampler! ***/ if (*verbose) Rprintf("Starting Gibbs sampler...\n"); for(main_loop = 0; main_loop < *n_gen; main_loop++){ /** update W, Wstar given mu, Sigma **/ for (i = 0; i < n_samp; i++) { /* sampling W through Metropolis Step for each row */ for (j = 0; j < n_dim; j++) { /* computing upper bounds for U */ for (k = 0; k < n_col; k++) { Wsum[i][k] -= W[i][j][k]; maxU[k] = fmin2(1, X[i][k]*(1-Wsum[i][k])/Y[i][j]); } /** MH step **/ /* Sample a candidate draw of W from truncated Dirichlet */ l = 0; itemp = 1; while (itemp > 0) { rDirich(dvtemp, param, n_col); itemp = 0; for (k = 0; k < n_col; k++) if (dvtemp[k] > maxU[k] || dvtemp[k] < minU[i][j][k]) itemp++; l++; if (l > *maxit) error("rejection algorithm failed because bounds are too tight.\n increase maxit or use gibbs sampler instead."); } /* get W and its log-ratio transformation */ for (k = 0; k < n_col; k++) { dvtemp[k] = dvtemp[k]*Y[i][j]/X[i][k]; dvtemp1[k] = Wsum[i][k]+dvtemp[k]; } for (k = 0; k < n_col; k++) for (l = 0; l < n_dim; l++) if (l == j) SWstar[k][l] = log(dvtemp[k])-log(1-dvtemp1[k]); else SWstar[k][l] = log(W[i][j][k])-log(1-dvtemp1[k]); /* computing acceptance ratio */ dtemp = 0; dtemp1 = 0; for (k= 0; k < n_col; k++) { dtemp += dMVN(SWstar[k], mu[k], InvSigma[k], n_dim, 1); dtemp1 += dMVN(Wstar[k][i], mu[k], InvSigma[k], n_dim, 1); dtemp -= log(dvtemp[k]); dtemp1 -= log(W[i][j][k]); } if (unif_rand() < fmin2(1, exp(dtemp-dtemp1))) for (k = 0; k < n_col; k++) W[i][j][k] = dvtemp[k]; /* updating Wsum and Wstar with new draws */ for (k = 0; k < n_col; k++) { Wsum[i][k] += W[i][j][k]; for (l = 0; l < n_dim; l++) Wstar[k][i][l] = log(W[i][l][k])-log(1-Wsum[i][k]); } } } /* update mu, Sigma given wstar using effective sample of Wstar */ for (k = 0; k < n_col; k++) NIWupdate(Wstar[k], mu[k], Sigma[k], InvSigma[k], mu0, tau0, nu0, S0, n_samp, n_dim); /*store Gibbs draw after burn-in and every nth draws */ if (main_loop >= *burn_in){ itempC++; if (itempC==nth){ for (k = 0; k < n_col; k++) for (j = 0; j < n_dim; j++) { pdSmu[itempM++]=mu[k][j]; for (i = 0; i < n_dim; i++) if (j <= i) pdSSigma[itempS++]=Sigma[k][j][i]; } for(i = 0; i < n_samp; i++) for (k = 0; k < n_col; k++) for (j = 0; j < n_dim; j++) pdSW[itempW++] = W[i][j][k]; itempC=0; } } if (*verbose) if (itempP == main_loop) { Rprintf("%3d percent done.\n", progress*10); itempP+=ftrunc((double) *n_gen/10); progress++; R_FlushConsole(); } R_CheckUserInterrupt(); } /* end of Gibbs sampler */ if (*verbose) Rprintf("100 percent done.\n"); /** write out the random seed **/ PutRNGstate(); /* Freeing the memory */ FreeMatrix(S0, n_col); FreeMatrix(X, n_samp); FreeMatrix(Y, n_samp); Free3DMatrix(W, n_samp, n_dim); Free3DMatrix(Wstar, n_col, n_samp); FreeMatrix(Wsum, n_samp); Free3DMatrix(minU, n_samp, n_dim); FreeMatrix(mu, n_col); Free3DMatrix(Sigma, n_col, n_dim); Free3DMatrix(InvSigma, n_col, n_dim); Free(param); Free(dvtemp); } /* main */ eco/R/0000755000176200001440000000000014330337373011227 5ustar liggesuserseco/R/emeco.R0000644000176200001440000004264614330337373012456 0ustar liggesusers### ### main function ### #' Fitting Parametric Models and Quantifying Missing Information for Ecological #' Inference in 2x2 Tables #' #' \code{ecoML} is used to fit parametric models for ecological inference in #' \eqn{2 \times 2} tables via Expectation Maximization (EM) algorithms. The #' data is specified in proportions. At it's most basic setting, the algorithm #' assumes that the individual-level proportions (i.e., \eqn{W_1} and #' \eqn{W_2}) and distributed bivariate normally (after logit transformations). #' The function calculates point estimates of the parameters for models based #' on different assumptions. The standard errors of the point estimates are #' also computed via Supplemented EM algorithms. Moreover, \code{ecoML} #' quantifies the amount of missing information associated with each parameter #' and allows researcher to examine the impact of missing information on #' parameter estimation in ecological inference. The models and algorithms are #' described in Imai, Lu and Strauss (2008, 2011). #' #' When \code{SEM} is \code{TRUE}, \code{ecoML} computes the observed-data #' information matrix for the parameters of interest based on Supplemented-EM #' algorithm. The inverse of the observed-data information matrix can be used #' to estimate the variance-covariance matrix for the parameters estimated from #' EM algorithms. In addition, it also computes the expected complete-data #' information matrix. Based on these two measures, one can further calculate #' the fraction of missing information associated with each parameter. See #' Imai, Lu and Strauss (2006) for more details about fraction of missing #' information. #' #' Moreover, when \code{hytest=TRUE}, \code{ecoML} allows to estimate the #' parametric model under the null hypothesis that \code{mu_1=mu_2}. One can #' then construct the likelihood ratio test to assess the hypothesis of equal #' means. The associated fraction of missing information for the test statistic #' can be also calculated. For details, see Imai, Lu and Strauss (2006) for #' details. #' #' @param formula A symbolic description of the model to be fit, specifying the #' column and row margins of \eqn{2 \times 2} ecological tables. \code{Y ~ X} #' specifies \code{Y} as the column margin (e.g., turnout) and \code{X} (e.g., #' percent African-American) as the row margin. Details and specific examples #' are given below. #' @param data An optional data frame in which to interpret the variables in #' \code{formula}. The default is the environment in which \code{ecoML} is #' called. #' @param N An optional variable representing the size of the unit; e.g., the #' total number of voters. \code{N} needs to be a vector of same length as #' \code{Y} and \code{X} or a scalar. #' @param supplement An optional matrix of supplemental data. The matrix has #' two columns, which contain additional individual-level data such as survey #' data for \eqn{W_1} and \eqn{W_2}, respectively. If \code{NULL}, no #' additional individual-level data are included in the model. The default is #' \code{NULL}. #' @param fix.rho Logical. If \code{TRUE}, the correlation (when #' \code{context=TRUE}) or the partial correlation (when \code{context=FALSE}) #' between \eqn{W_1} and \eqn{W_2} is fixed through the estimation. For #' details, see Imai, Lu and Strauss(2006). The default is \code{FALSE}. #' @param context Logical. If \code{TRUE}, the contextual effect is also #' modeled. In this case, the row margin (i.e., X) and the individual-level #' rates (i.e., \eqn{W_1} and \eqn{W_2}) are assumed to be distributed #' tri-variate normally (after logit transformations). See Imai, Lu and Strauss #' (2006) for details. The default is \code{FALSE}. #' @param sem Logical. If \code{TRUE}, the standard errors of parameter #' estimates are estimated via SEM algorithm, as well as the fraction of #' missing data. The default is \code{TRUE}. #' @param theta.start A numeric vector that specifies the starting values for #' the mean, variance, and covariance. When \code{context = FALSE}, the #' elements of \code{theta.start} correspond to (\eqn{E(W_1)}, \eqn{E(W_2)}, #' \eqn{var(W_1)}, \eqn{var(W_2)}, \eqn{cor(W_1,W_2)}). When \code{context = #' TRUE}, the elements of \code{theta.start} correspond to (\eqn{E(W_1)}, #' \eqn{E(W_2)}, \eqn{var(W_1)}, \eqn{var(W_2)}, \eqn{corr(W_1, X)}, #' \eqn{corr(W_2, X)}, \eqn{corr(W_1,W_2)}). Moreover, when #' \code{fix.rho=TRUE}, \eqn{corr(W_1,W_2)} is set to be the correlation #' between \eqn{W_1} and \eqn{W_2} when \code{context = FALSE}, and the partial #' correlation between \eqn{W_1} and \eqn{W_2} given \eqn{X} when \code{context #' = FALSE}. The default is \code{c(0,0,1,1,0)}. #' @param epsilon A positive number that specifies the convergence criterion #' for EM algorithm. The square root of \code{epsilon} is the convergence #' criterion for SEM algorithm. The default is \code{10^(-6)}. #' @param maxit A positive integer specifies the maximum number of iterations #' before the convergence criterion is met. The default is \code{1000}. #' @param loglik Logical. If \code{TRUE}, the value of the log-likelihood #' function at each iteration of EM is saved. The default is \code{TRUE}. #' @param hyptest Logical. If \code{TRUE}, model is estimated under the null #' hypothesis that means of \eqn{W1} and \eqn{W2} are the same. The default is #' \code{FALSE}. #' @param verbose Logical. If \code{TRUE}, the progress of the EM and SEM #' algorithms is printed to the screen. The default is \code{FALSE}. #' @return An object of class \code{ecoML} containing the following elements: #' \item{call}{The matched call.} #' \item{X}{The row margin, \eqn{X}.} #' \item{Y}{The column margin, \eqn{Y}.} #' \item{N}{The size of each table, \eqn{N}.} #' \item{context}{The assumption under which model is estimated. If #' \code{context = FALSE}, CAR assumption is adopted and no contextual effect #' is modeled. If \code{context = TRUE}, NCAR assumption is adopted, and #' contextual effect is modeled.} \item{sem}{Whether SEM algorithm is used to #' estimate the standard errors and observed information matrix for the #' parameter estimates.} #' \item{fix.rho}{Whether the correlation or the partial #' correlation between \eqn{W_1} an \eqn{W_2} is fixed in the estimation.} #' \item{r12}{If \code{fix.rho = TRUE}, the value that \eqn{corr(W_1, W_2)} is #' fixed to.} #' \item{epsilon}{The precision criterion for EM convergence. #' \eqn{\sqrt{\epsilon}} is the precision criterion for SEM convergence.} #' \item{theta.sem}{The ML estimates of \eqn{E(W_1)},\eqn{E(W_2)}, #' \eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}. If \code{context = #' TRUE}, \eqn{E(X)},\eqn{cov(W_1,X)}, \eqn{cov(W_2,X)} are also reported.} #' \item{W}{In-sample estimation of \eqn{W_1} and \eqn{W_2}.} #' \item{suff.stat}{The sufficient statistics for \code{theta.em}.} #' \item{iters.em}{Number of EM iterations before convergence is achieved.} #' \item{iters.sem}{Number of SEM iterations before convergence is achieved.} #' \item{loglik}{The log-likelihood of the model when convergence is achieved.} #' \item{loglik.log.em}{A vector saving the value of the log-likelihood #' function at each iteration of the EM algorithm.} #' \item{mu.log.em}{A matrix saving the unweighted mean estimation of the #' logit-transformed individual-level proportions (i.e., \eqn{W_1} and \eqn{W_2}) #' at each iteration of the EM process.} \item{Sigma.log.em}{A matrix saving the #' log of the variance estimation of the logit-transformed individual-level #' proportions (i.e., \eqn{W_1} and \eqn{W_2}) at each iteration of EM process. #' Note, non-transformed variances are displayed on the screen (when #' \code{verbose = TRUE}).} #' \item{rho.fisher.em}{A matrix saving the fisher #' transformation of the estimation of the correlations between the #' logit-transformed individual-level proportions (i.e., \eqn{W_1} and #' \eqn{W_2}) at each iteration of EM process. Note, non-transformed #' correlations are displayed on the screen (when \code{verbose = TRUE}).} #' Moreover, when \code{sem=TRUE}, \code{ecoML} also output the following #' values: #' \item{DM}{The matrix characterizing the rates of convergence of the #' EM algorithms. Such information is also used to calculate the observed-data #' information matrix} #' \item{Icom}{The (expected) complete data information #' matrix estimated via SEM algorithm. When \code{context=FALSE, fix.rho=TRUE}, #' \code{Icom} is 4 by 4. When \code{context=FALSE, fix.rho=FALSE}, \code{Icom} #' is 5 by 5. When \code{context=TRUE}, \code{Icom} is 9 by 9.} #' \item{Iobs}{The observed information matrix. The dimension of \code{Iobs} #' is same as \code{Icom}.} #' \item{Imiss}{The difference between \code{Icom} and \code{Iobs}. #' The dimension of \code{Imiss} is same as \code{miss}.} #' \item{Vobs}{The (symmetrized) variance-covariance matrix of the ML parameter #' estimates. The dimension of \code{Vobs} is same as \code{Icom}.} #' \item{Iobs}{The (expected) complete-data variance-covariance matrix. The #' dimension of \code{Iobs} is same as \code{Icom}.} #' \item{Vobs.original}{The estimated variance-covariance matrix of the ML parameter #' estimates. The dimension of \code{Vobs} is same as \code{Icom}.} #' \item{Fmis}{The fraction of missing information associated with each parameter estimation. } #' \item{VFmis}{The proportion of increased variance associated with each #' parameter estimation due to observed data. } #' \item{Ieigen}{The largest eigen value of \code{Imiss}.} #' \item{Icom.trans}{The complete data information #' matrix for the fisher transformed parameters.} #' \item{Iobs.trans}{The observed data information matrix for the fisher transformed parameters.} #' \item{Fmis.trans}{The fractions of missing information associated with the #' fisher transformed parameters.} #' @seealso \code{eco}, \code{ecoNP}, \code{summary.ecoML} #' @references Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R #' Package for Ecological Inference in 2x2 Tables} Journal of Statistical #' Software, Vol. 42, No. 5, pp. 1-23. #' #' Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and #' Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data #' Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. #' @keywords models #' @examples #' #' #' ## load the census data #' data(census) #' #' ## NOTE: convergence has not been properly assessed for the following #' ## examples. See Imai, Lu and Strauss (2006) for more complete analyses. #' ## In the first example below, in the interest of time, only part of the #' ## data set is analyzed and the convergence requirement is less stringent #' ## than the default setting. #' #' ## In the second example, the program is arbitrarily halted 100 iterations #' ## into the simulation, before convergence. #' #' ## load the Robinson's census data #' data(census) #' #' ## fit the parametric model with the default model specifications #' \dontrun{res <- ecoML(Y ~ X, data = census[1:100,], N=census[1:100,3], #' epsilon=10^(-6), verbose = TRUE)} #' ## summarize the results #' \dontrun{summary(res)} #' #' ## fit the parametric model with some individual #' ## level data using the default prior specification #' surv <- 1:600 #' \dontrun{res1 <- ecoML(Y ~ X, context = TRUE, data = census[-surv,], #' supplement = census[surv,c(4:5,1)], maxit=100, verbose = TRUE)} #' ## summarize the results #' \dontrun{summary(res1)} #' #' @export ecoML ecoML <- function(formula, data = parent.frame(), N=NULL, supplement = NULL, theta.start = c(0,0,1,1,0), fix.rho = FALSE, context = FALSE, sem = TRUE, epsilon=10^(-6), maxit = 1000, loglik = TRUE, hyptest=FALSE, verbose= FALSE) { ## getting X and Y mf <- match.call() tt <- terms(formula) attr(tt, "intercept") <- 0 if (is.matrix(eval.parent(mf$data))) data <- as.data.frame(data) X <- model.matrix(tt, data) Y <- model.response(model.frame(tt, data=data)) #n.var: total number of parameters involved in the estimation #n.par: number of nonstatic paramters need to estimate through EM # also need SEM #ndim: dimension of the multivariate normal distribution ndim<-2 if (context) ndim<-3 n.var<-2*ndim+ ndim*(ndim-1)/2 n.par<-n.S<-n.var if (context) { n.par<-n.var-2 } r12<-NULL if (fix.rho) r12<-theta.start[n.par] if (!context & fix.rho) n.par<-n.par-1 flag<-as.integer(context)+2*as.integer(fix.rho)+2^2*as.integer(sem) ##checking data tmp <- checkdata(X, Y, supplement, ndim) bdd <- ecoBD(formula=formula, data=data) W1min <- bdd$Wmin[order(tmp$order.old)[1:nrow(tmp$d)],1,1] W1max <- bdd$Wmax[order(tmp$order.old)[1:nrow(tmp$d)],1,1] n <- tmp$n.samp+tmp$survey.samp+tmp$samp.X1+tmp$samp.X0 wcol<-ndim if (context) { wcol<-wcol-1 } inSample.length <- wcol*tmp$n.samp #if NCAR and the user did not provide a theta.start if (context && (length(theta.start)==5) ) theta.start<-c(0,0,1,1,0,0,0) ## Fitting the model via EM res <- .C("cEMeco", as.double(tmp$d), as.double(theta.start), as.integer(tmp$n.samp), as.integer(maxit), as.double(epsilon), as.integer(tmp$survey.yes), as.integer(tmp$survey.samp), as.double(tmp$survey.data), as.integer(tmp$X1type), as.integer(tmp$samp.X1), as.double(tmp$X1.W1), as.integer(tmp$X0type), as.integer(tmp$samp.X0), as.double(tmp$X0.W2), as.double(W1min), as.double(W1max), as.integer(flag),as.integer(verbose),as.integer(loglik),as.integer(hyptest), optTheta=rep(-1.1,n.var), pdTheta=double(n.var), S=double(n.S+1),inSample=double(inSample.length),DMmatrix=double(n.par*n.par), itersUsed=as.integer(0),history=double((maxit+1)*(n.var+1)), PACKAGE="eco") ##record results from EM theta.em<-res$pdTheta theta.fisher<-param.trans(theta.em, transformation="Fisher") iters.em<-res$itersUsed mu.log.em <- matrix(rep(NA,iters.em*ndim),ncol=ndim) sigma.log.em <- matrix(rep(NA,iters.em*ndim),ncol=ndim) loglike.log.em <- as.double(rep(NA,iters.em)) nrho<-length(theta.em)-2*ndim rho.fisher.em <- matrix(rep(NA,iters.em*nrho),ncol=nrho) for(i in 1:iters.em) { mu.log.em[i,1:ndim]=res$history[(i-1)*(n.var+1)+(1:ndim)] sigma.log.em[i,1:ndim]=res$history[(i-1)*(n.var+1)+ndim+(1:ndim)] if (nrho!=0) rho.fisher.em[i, 1:nrho]=res$history[(i-1)*(n.var+1)+2*ndim+(1:nrho)] loglike.log.em[i]=res$history[(i-1)*(n.var+1)+2*ndim+nrho+1] } ## In sample prediction of W W <- matrix(rep(NA,inSample.length),ncol=wcol) for (i in 1:tmp$n.samp) for (j in 1:wcol) W[i,j]=res$inSample[(i-1)*2+j] ## SEM step iters.sem<-0 suff.stat<-res$S if (context) { suff.stat<-rep(0,(n.var+1)) suff.stat[1]<-mean(logit(c(X,supplement[,3]))) suff.stat[2:3]<-res$S[1:2] suff.stat[4]<-mean((logit(c(X, supplement[,3])))^2) suff.stat[5:6]<-res$S[3:4] suff.stat[7:8]<-res$S[6:7] suff.stat[9]<-res$S[5] suff.stat[10]<-res$S[8] } if (sem) { DM <- matrix(rep(NA,n.par*n.par),ncol=n.par) res <- .C("cEMeco", as.double(tmp$d), as.double(theta.start), as.integer(tmp$n.samp), as.integer(maxit), as.double(epsilon), as.integer(tmp$survey.yes), as.integer(tmp$survey.samp), as.double(tmp$survey.data), as.integer(tmp$X1type), as.integer(tmp$samp.X1), as.double(tmp$X1.W1), as.integer(tmp$X0type), as.integer(tmp$samp.X0), as.double(tmp$X0.W2), as.double(bdd$Wmin[,1,1]), as.double(bdd$Wmax[,1,1]), as.integer(flag),as.integer(verbose),as.integer(loglik),as.integer(hyptest), res$pdTheta, pdTheta=double(n.var), S=double(n.S+1), inSample=double(inSample.length),DMmatrix=double(n.par*n.par), itersUsed=as.integer(0),history=double((maxit+1)*(n.var+1)), PACKAGE="eco") iters.sem<-res$itersUsed for(i in 1:n.par) for(j in 1:n.par) DM[i,j]=res$DMmatrix[(i-1)*n.par+j] } if (!context) names(theta.em)<-c("u1","u2","s1","s2","r12") if (context) names(theta.em)<-c("ux","u1","u2","sx","s1","s2","r1x","r2x","r12") ## output res.out<-list(call = mf, Y = Y, X = X, N = N, fix.rho = fix.rho, context = context, sem=sem, epsilon=epsilon, theta.em=theta.em, r12=r12, sigma.log = theta.fisher[(ndim+1):(2*ndim)], suff.stat = suff.stat[1:n.S], loglik = res$S[n.S+1], iters.em = iters.em, iters.sem = iters.sem, mu.log.em = mu.log.em, sigma.log.em = sigma.log.em, rho.fisher.em = rho.fisher.em, loglike.log.em = loglike.log.em, W = W) if (sem) { res.out$DM<-DM #print(dim(data)) # n<-dim(data)[1] if (!is.null(supplement)) n<-n+dim(supplement)[1] #cat("n2=", n,"\n") res.info<- ecoINFO(theta.em=res.out$theta.em, suff.stat=res.out$suff.stat, DM=res.out$DM, context=context, fix.rho=fix.rho, sem=sem, r12=res.out$r12, n=n) res.out$DM<-res.info$DM res.out$Icom<-res.info$Icom res.out$Iobs<-res.info$Iobs res.out$Fmis<-res.info$Fmis res.out$Vobs.original<-res.info$Vobs.original res.out$Vobs<-res.info$Vobs res.out$Iobs<-res.info$Iobs res.out$VFmis<-res.info$VFmis res.out$Icom.trans<-res.info$Icom.trans res.out$Iobs.trans<-res.info$Iobs.trans res.out$Fmis.trans<-res.info$Fmis.trans res.out$Imiss<-res.info$Imiss res.out$Ieigen<-res.info$Ieigen res.out$Iobs<-res.info$Iobs } class(res.out) <- "ecoML" return(res.out) } eco/R/logit.R0000644000176200001440000000014414330337373012467 0ustar liggesuserslogit <- function(x) return(log(x)-log(1-x)) invlogit <- function(x) return(exp(x)/(1+exp(x))) eco/R/print.ecoBD.R0000644000176200001440000000147514330337373013470 0ustar liggesusers#' @export print.ecoBD <- function(x, digits = max(3, getOption("digits") -3), ...) { cat("\nCall:\n", deparse(x$call), "\n\n", sep="") cat("Aggregate Lower Bounds (Proportions):\n") print.default(format(x$aggWmin, digits = digits), print.gap = 2, quote = FALSE) cat("\nAggregate Upper Bounds (Proportions):\n") print.default(format(x$aggWmax, digits = digits), print.gap = 2, quote = FALSE) if (!is.null(x$aggNmin)) { cat("\nAggregate Lower Bounds (Counts):\n") print.default(format(x$aggNmin, digits = digits), print.gap = 2, quote = FALSE) cat("\nAggregate Upper Bounds (Counts):\n") print.default(format(x$aggNmax, digits = digits), print.gap = 2, quote = FALSE) } cat("\n") invisible(x) } eco/R/summary.predict.eco.R0000644000176200001440000000134414330337373015247 0ustar liggesusers#' @export summary.predict.eco <- function(object, CI=c(2.5, 97.5), ...) { if (any(CI < 0) || any(CI > 100)) stop("Invalid input for CI") n.draws <- nrow(object) n.var <- ncol(object) table.names<-c("mean", "std.dev", paste(min(CI), "%", sep=" "), paste(max(CI), "%", sep=" ")) W.table <- matrix(NA, ncol=length(table.names), nrow=n.var) for (i in 1:n.var) W.table[i,] <- cbind(mean(object[,i]), sd(object[,i]), quantile(object[,i], min(CI)/100), quantile(object[,i], max(CI)/100)) colnames(W.table) <- table.names rownames(W.table) <- colnames(object) res <- list(W.table = W.table, n.draws = n.draws) class(res) <- "summary.predict.eco" return(res) } eco/R/checkdata.R0000644000176200001440000000261614330337373013266 0ustar liggesuserscheckdata <- function(X,Y, supplement, ndim) { # check and reorganize inputs if (any(X<0) || any(X>1) || any(Y<0) || any(Y>1)) stop("Values of X and Y have to be between 0 and 1.") ind <- 1:length(X) res <- list() res$X1type <- res$samp.X1 <- res$X1.W1 <- 0 res$X0type <- res$samp.X0 <- res$X0.W2 <- 0 ## X = 1 X1.ind <- ind[along=(X==1)] if (length(X[X!=1])1)) stop("survey data have to be between 0 and 1.") if(is.null(supplement)) res$survey.samp <- res$survey.data <- res$survey.yes <- 0 else if (dim(supplement)[2] != ndim) stop("when context=TRUE, use n by 3. Otherwise use n by 2 matrix for survey data") else { res$survey.samp <- length(supplement[,1]) res$survey.data <- as.matrix(supplement) res$survey.yes <- 1 } return(res) } eco/R/wallace.R0000644000176200001440000000226014330337373012762 0ustar liggesusers #' Black voting rates for Wallace for President, 1968 #' #' This data set contains, on a county level, the proportion of county #' residents who are Black and the proportion of presidential votes cast for #' Wallace. Demographic data is based on the 1960 census. Presidential returns #' are from ICPSR study 13. County data from 10 southern states (Alabama, #' Arkansas, Georgia, Florida, Louisiana, Mississippi, North Carolina, South #' Carolina, Tennessee, Texas) are included. (Virginia is excluded due to the #' difficulty of matching counties between the datasets.) This data is #' analyzed in Wallace and Segal (1973). #' #' #' @name wallace #' @docType data #' @format A data frame containing 3 variables and 1009 observations #' \tabular{lll}{ #' X \tab numeric \tab proportion of the population that is Black \cr #' Y \tab numeric \tab proportion presidential votes cast for Wallace \cr #' FIPS \tab numeric \tab the FIPS county code #' } #' @references Wasserman, Ira M. and David R. Segal (1973). ``Aggregation #' Effects in the Ecological Study of Presidential Voting.'' American Journal #' of Political Science. vol. 17, pp. 177-81. #' @keywords datasets NULL eco/R/predict.ecoNP.R0000644000176200001440000000720614330337373014014 0ustar liggesusers#' Out-of-Sample Posterior Prediction under the Nonparametric Bayesian Model #' for Ecological Inference in 2x2 Tables #' #' Obtains out-of-sample posterior predictions under the fitted nonparametric #' Bayesian model for ecological inference. \code{predict} method for class #' \code{ecoNP} and \code{ecoNPX}. #' #' The posterior predictive values are computed using the Monte Carlo sample #' stored in the \code{eco} or \code{ecoNP} output (or other sample if #' \code{newdraw} is specified). Given each Monte Carlo sample of the #' parameters, we sample the vector-valued latent variable from the appropriate #' multivariate Normal distribution. Then, we apply the inverse logit #' transformation to obtain the predictive values of proportions, \eqn{W}. The #' computation may be slow (especially for the nonparametric model) if a large #' Monte Carlo sample of the model parameters is used. In either case, setting #' \code{verbose = TRUE} may be helpful in monitoring the progress of the code. #' #' @aliases predict.ecoNP #' @param object An output object from \code{ecoNP}. #' @param newdraw An optional list containing two matrices (or three #' dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu} #' and \eqn{\Sigma}. Those elements should be named as \code{mu} and #' \code{Sigma}, respectively. The default is the original MCMC draws stored in #' \code{object}. #' @param subset A scalar or numerical vector specifying the row number(s) of #' \code{mu} and \code{Sigma} in the output object from \code{eco}. If #' specified, the posterior draws of parameters for those rows are used for #' posterior prediction. The default is \code{NULL} where all the posterior #' draws are used. #' @param obs An integer or vector of integers specifying the observation #' number(s) whose posterior draws will be used for predictions. The default is #' \code{NULL} where all the observations in the data set are selected. #' @param verbose logical. If \code{TRUE}, helpful messages along with a #' progress report on the Monte Carlo sampling from the posterior predictive #' distributions are printed on the screen. The default is \code{FALSE}. #' @param ... further arguments passed to or from other methods. #' @return \code{predict.eco} yields a matrix of class \code{predict.eco} #' containing the Monte Carlo sample from the posterior predictive distribution #' of inner cells of ecological tables. \code{summary.predict.eco} will #' summarize the output, and \code{print.summary.predict.eco} will print the #' summary. #' @seealso \code{eco}, \code{ecoNP}, \code{summary.eco}, \code{summary.ecoNP} #' @keywords methods #' @export predict.ecoNP <- function(object, newdraw = NULL, subset = NULL, obs = NULL, verbose = FALSE, ...){ if (is.null(newdraw) && is.null(object$mu)) stop("Posterior draws of mu and Sigma must be supplied") else if (!is.null(newdraw)){ if (is.null(newdraw$mu) && is.null(newdraw$Sigma)) stop("Posterior draws of both mu and Sigma must be supplied.") object <- newdraw } n.draws <- dim(object$mu)[1] p <- dim(object$mu)[2] n <- dim(object$mu)[3] mu <- aperm(coef(object, subset = subset, obs = obs), c(2,3,1)) if (is.null(subset)) subset <- 1:n.draws if (is.null(obs)) obs <- 1:n Sigma <- aperm(object$Sigma[subset,,obs], c(2,3,1)) res <- .C("preDP", as.double(mu), as.double(Sigma), as.integer(n), as.integer(n.draws), as.integer(p), as.integer(verbose), pdStore = double(n.draws*p*n), PACKAGE="eco")$pdStore res <- matrix(res, ncol=p, nrow=n.draws*n, byrow=TRUE) colnames(res) <- c("W1", "W2") class(res) <- c("predict.eco", "matrix") return(res) } eco/R/predict.ecoX.R0000644000176200001440000001027614330337373013707 0ustar liggesusers#' Out-of-Sample Posterior Prediction under the Parametric Bayesian Model for #' Ecological Inference in 2x2 Tables #' #' Obtains out-of-sample posterior predictions under the fitted parametric #' Bayesian model for ecological inference. \code{predict} method for class #' \code{eco} and \code{ecoX}. #' #' The posterior predictive values are computed using the Monte Carlo sample #' stored in the \code{eco} output (or other sample if \code{newdraw} is #' specified). Given each Monte Carlo sample of the parameters, we sample the #' vector-valued latent variable from the appropriate multivariate Normal #' distribution. Then, we apply the inverse logit transformation to obtain the #' predictive values of proportions, \eqn{W}. The computation may be slow #' (especially for the nonparametric model) if a large Monte Carlo sample of #' the model parameters is used. In either case, setting \code{verbose = TRUE} #' may be helpful in monitoring the progress of the code. #' #' @aliases predict.ecoX #' @param object An output object from \code{eco} or \code{ecoNP}. #' @param newdraw An optional list containing two matrices (or three #' dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu} #' and \eqn{\Sigma}. Those elements should be named as \code{mu} and #' \code{Sigma}, respectively. The default is the original MCMC draws stored in #' \code{object}. #' @param newdata An optional data frame containing a new data set for which #' posterior predictions will be made. The new data set must have the same #' variable names as those in the original data. #' @param subset A scalar or numerical vector specifying the row number(s) of #' \code{mu} and \code{Sigma} in the output object from \code{eco}. If #' specified, the posterior draws of parameters for those rows are used for #' posterior prediction. The default is \code{NULL} where all the posterior #' draws are used. #' @param cond logical. If \code{TRUE}, then the conditional prediction will #' made for the parametric model with contextual effects. The default is #' \code{FALSE}. #' @param verbose logical. If \code{TRUE}, helpful messages along with a #' progress report on the Monte Carlo sampling from the posterior predictive #' distributions are printed on the screen. The default is \code{FALSE}. #' @param ... further arguments passed to or from other methods. #' @return \code{predict.eco} yields a matrix of class \code{predict.eco} #' containing the Monte Carlo sample from the posterior predictive distribution #' of inner cells of ecological tables. \code{summary.predict.eco} will #' summarize the output, and \code{print.summary.predict.eco} will print the #' summary. #' @seealso \code{eco}, \code{predict.ecoNP} #' @keywords methods #' @export predict.ecoX <- function(object, newdraw = NULL, subset = NULL, newdata = NULL, cond = FALSE, verbose = FALSE, ...){ if (is.null(newdraw) && is.null(object$mu)) stop("Posterior draws of mu and Sigma must be supplied") else if (!is.null(newdraw)){ if (is.null(newdraw$mu) && is.null(newdraw$Sigma)) stop("Posterior draws of both mu and Sigma must be supplied.") object <- newdraw } if (cond) { ## conditional prediction mu <- coef(object, subset = subset) n.draws <- nrow(mu) if (is.null(subset)) subset <- 1:n.draws Sigma <- object$Sigma[subset,] if (is.null(newdata)) X <- object$X else { mf <- match.call() if (is.matrix(eval.parent(mf$newdata))) data <- as.data.frame(data) tt <- terms(object) attr(tt, "intercept") <- 0 X <- model.matrix(tt, newdata) } n <- nrow(X) res <- .C("preBaseX", as.double(X), as.double(mu), as.double(t(Sigma)), as.integer(length(c(X))), as.integer(nrow(mu)), as.integer(verbose), pdStore = double(n.draws*n*2), PACKAGE="eco")$pdStore res <- array(res, c(2, n, n.draws), dimnames=list(c("W1", "W2"), rownames(X), 1:n.draws)) class(res) <- c("predict.ecoX", "array") } else { res <- predict.eco(object, newdraw = newdraw, subset = subset, newdata = newdata, verbose = verbose, ...) colnames(res) <- c("W1", "W2", "X") } return(res) } eco/R/housep88.R0000644000176200001440000000302214330337373013032 0ustar liggesusers #' Electoral Results for the House and Presidential Races in 1988 #' #' This data set contains, on a House district level, the percentage of the #' vote for the Democratic House candidate, the percentage of the vote for the #' Democratic presidential candidate (Dukakis), the number of voters who voted #' for a major party candidate in the presidential race, and the ratio of #' voters in the House race versus the number who cast a ballot for President. #' Eleven (11) uncontested races are not included. Dataset compiled and #' analyzed by Burden and Kimball (1988). Complete dataset and documentation #' available at ICSPR study number 1140. #' #' #' @name housep88 #' @docType data #' @format A data frame containing 5 variables and 424 observations #' \tabular{lll}{ X \tab numeric \tab proportion voting for the Democrat in the #' presidential race \cr Y \tab numeric \tab proportion voting for the Democrat #' in the House race \cr N \tab numeric \tab number of major party voters in #' the presidential contest \cr HPCT \tab numeric \tab House election turnout #' divided by presidential election turnout (set to 1 if House turnout exceeds #' presidential turnout) \cr DIST \tab numeric \tab 4-digit ICPSR state and #' district code: first 2 digits for the state code, last two digits for the #' district number (e.g., 2106=IL 6th) } #' @references Burden, Barry C. and David C. Kimball (1988). ``A New Approach #' To Ticket- Splitting.'' The American Political Science Review. vol 92., no. #' 3, pp. 553-544. #' @keywords datasets NULL eco/R/census.R0000644000176200001440000000245314330337373012656 0ustar liggesusers #' Black Illiteracy Rates in 1910 US Census #' #' This data set contains the proportion of the residents who are black, the #' proportion of those who can read, the total population as well as the actual #' black literacy rate and white literacy rate for 1040 counties in the US. The #' dataset was originally analyzed by Robinson (1950) at the state level. King #' (1997) recoded the 1910 census at county level. The data set only includes #' those who are older than 10 years of age. #' #' #' @name census #' @docType data #' @format A data frame containing 5 variables and 1040 observations #' \tabular{lll}{ X \tab numeric \tab the proportion of Black residents in each #' county\cr Y \tab numeric \tab the overall literacy rates in each county\cr N #' \tab numeric \tab the total number of residents in each county \cr W1 \tab #' numeric \tab the actual Black literacy rate \cr W2 \tab numeric \tab the #' actual White literacy rate } #' @references Robinson, W.S. (1950). ``Ecological Correlations and the #' Behavior of Individuals.'' \emph{American Sociological Review}, vol. 15, #' pp.351-357. \cr \cr King, G. (1997). \dQuote{A Solution to the Ecological #' Inference Problem: Reconstructing Individual Behavior from Aggregate Data}. #' Princeton University Press, Princeton, NJ. #' @keywords datasets NULL eco/R/print.summary.eco.R0000644000176200001440000000444714330337373014760 0ustar liggesusers#' Print the Summary of the Results for the Bayesian Parametric Model for Ecological #' Inference in 2x2 Tables #' #' \code{summary} method for class \code{eco}. #' #' #' @aliases print.summary.eco #' @param x An object of class \code{summary.eco}. #' @param digits the number of significant digits to use when printing. #' @param ... further arguments passed to or from other methods. #' @return \code{summary.eco} yields an object of class \code{summary.eco} #' containing the following elements: #' \item{call}{The call from \code{eco}.} #' \item{n.obs}{The number of units.} #' \item{n.draws}{The number of Monte Carlo samples.} #' \item{agg.table}{Aggregate posterior estimates of the marginal #' means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If #' \code{param = TRUE}, the following elements are also included: #' \item{param.table}{Posterior estimates of model parameters: population mean #' estimates of \eqn{W_1} and \eqn{W_2} and their logit transformations.} If #' \code{units = TRUE}, the following elements are also included: #' \item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.} #' \item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.} #' #' This object can be printed by \code{print.summary.eco} #' @seealso \code{eco}, \code{predict.eco} #' @keywords methods #' @export print.summary.eco <- function(x, digits=max(3, getOption("digits")-3), ...) { cat("\nCall: ") cat(paste(deparse(x$call), sep="\n", collapse="\n")) cat("\n") if (!is.null(x$param.table)) { cat("\nParameter Estimates:\n") print(x$param.table, digits=digits, na.print="NA",...) } cat("\n*** Insample Predictions ***\n") cat("\nUnweighted:\n") print(x$agg.table, digits=digits, na.print="NA",...) if (!is.null(x$agg.wtable)) { cat("\nWeighted:\n") print(x$agg.wtable, digits=digits, na.print="NA",...) } cat("\nNumber of Units:", x$n.obs) cat("\nNumber of Monte Carlo Draws:", x$n.draws) if (!is.null(x$W1.table)) { cat("\n\nUnit-level Estimates of W1:\n") print(x$W1.table, digits=digits, na.print="NA",...) cat("\n\nUnit-level Estimates of W2:\n") print(x$W2.table, digits=digits, na.print="NA",...) } cat("\n") invisible(x) } eco/R/print.eco.R0000644000176200001440000000112114330337373013246 0ustar liggesusers#' @export print.eco <- function(x, digits = max(3, getOption("digits") -3), ...){ cat("\nCall:\n", deparse(x$call), "\n\n", sep="") if (is.null(x$N)) N <- rep(1, nrow(x$X)) else N <- x$N W.mean <- cbind(mean(x$W[,1,] %*% (x$X*N/sum(x$X*N))), mean(x$W[,2,] %*% ((1-x$X)*N/sum((1-x$X)*N)))) colnames(W.mean) <- c("W1", "W2") rownames(W.mean) <- "posterior mean" cat("Aggregate In-sample Estimates:\n\n") print.default(format(W.mean, digits = digits), print.gap = 2, quote = FALSE) cat("\n") invisible(x) } eco/R/print.ecoML.R0000644000176200001440000000225614330337373013511 0ustar liggesusers#' @export print.ecoML <- function(x, digits = max(3, getOption("digits") -3), ...){ cat("\nCall:\n", deparse(x$call), "\n\n", sep="") n.col<-5 if (x$fix.rho) n.col<-4 n.row<-1 if (x$sem) n.row<-3 param.table<-matrix(NA, n.row, n.col) if (!x$context) param.table[1,]<-x$theta.em else if (x$context && !x$fix.rho) param.table[1,]<-x$theta.em[c(2,3,5,6,9)] else if (x$context && x$fix.rho) param.table[1,]<-x$theta.em[c(2,3,5,6)] if (n.row>1) { if (!x$context) { param.table[2,]<-sqrt(diag(x$Vobs)) param.table[3,]<-Fmis<-1-diag(x$Iobs)/diag(x$Icom) } else if (x$context && !x$fix.rho) { param.table[2,]<-sqrt(diag(x$Vobs))[c(2,3,5,6,9)] param.table[3,]<-Fmis<-(1-diag(x$Iobs)/diag(x$Icom))[c(2,3,5,6,9)] } else if (x$context && x$fix.rho) { param.table[2,]<-sqrt(diag(x$Vobs))[c(2,3,5,6)] param.table[3,]<-Fmis<-(1-diag(x$Iobs)/diag(x$Icom))[c(2,3,5,6)] } } cname<-c("mu1", "mu2", "sigma1", "sigma2", "rho") rname<-c("EM est.", "std. err.", "frac. missing") rownames(param.table)<-rname[1:n.row] colnames(param.table)<-cname[1:n.col] print(param.table) cat("\n") invisible(x) } eco/R/varcov.R0000644000176200001440000000404014330337373012650 0ustar liggesusers#' Calculate the variance or covariance of the object #' #' \code{varcov} returns the variance or covariance of the object. #' #' @param object An object #' @param ... The rest of the input parameters if any #' @keywords methods #' @export varcov <- function(object, ...) UseMethod("varcov") #' @export varcov.eco <- function(object, subset = NULL, ...) { if (is.null(subset)) subset <- 1:nrow(object$Sigma) else if (max(subset) > nrow(object$Sigma)) stop(paste("invalid input for `subset.' only", nrow(object$Sigma), "draws are stored.")) p <- ncol(object$mu) n <- length(subset) Sigma <- array(0, c(p, p, n)) cov <- object$Sigma[subset,] for (i in 1:n) { count <- 1 for (j in 1:p) { Sigma[j,j:p,i] <- cov[i,count:(count+p-j)] count <- count + p - j + 1 } diag(Sigma[,,i]) <- diag(Sigma[,,i]/2) Sigma[,,i] <- Sigma[,,i] + t(Sigma[,,i]) } if (n > 1) return(Sigma) else return(Sigma[,,1]) } #' @export varcov.ecoNP <- function(object, subset = NULL, obs = NULL, ...) { if (is.null(subset)) subset <- 1:nrow(object$Sigma) else if (max(subset) > nrow(object$Sigma)) stop(paste("invalid input for `subset.' only", nrow(object$Sigma), "draws are stored.")) if (is.null(obs)) obs <- 1:dim(object$Sigma)[3] else if (max(subset) > dim(object$Sigma)[3]) stop(paste("invalid input for `obs.' only", dim(object$Sigma)[3], "draws are stored.")) p <- ncol(object$mu) n <- length(subset) m <- length(obs) Sigma <- array(0, c(p, p, n, m)) cov <- object$Sigma[subset,,obs] for (k in 1:m) { for (i in 1:n) { count <- 1 for (j in 1:p) { Sigma[j,j:p,i,k] <- cov[i,count:(count+p-j),k] count <- count + p - j + 1 } diag(Sigma[,,i,k]) <- diag(Sigma[,,i,k]/2) Sigma[,,i,k] <- Sigma[,,i,k] + t(Sigma[,,i,k]) } } if (n > 1) if (m > 1) return(Sigma) else return(Sigma[,,,1]) else if (m > 1) return(Sigma[,,1,]) else return(Sigma[,,1,1]) } eco/R/coef.ecoNP.R0000644000176200001440000000073414330337373013275 0ustar liggesusers#' @export coef.ecoNP <- function(object, subset = NULL, obs = NULL, ...) { mu <- object$mu if (is.null(subset)) subset <- 1:nrow(mu) else if (max(subset) > nrow(mu)) stop(paste("invalid input for `subset.' only", nrow(mu), "draws are stored.")) if (is.null(obs)) obs <- 1:dim(object$mu)[3] else if (max(subset) > dim(object$mu)[3]) stop(paste("invalid input for `obs.' only", dim(object$mu)[3], "draws are stored.")) return(mu[subset,,obs]) } eco/R/ecoBD.R0000644000176200001440000002357314330337373012340 0ustar liggesusers#' Calculating the Bounds for Ecological Inference in RxC Tables #' #' \code{ecoBD} is used to calculate the bounds for missing internal cells of #' \eqn{R \times C} ecological table. The data can be entered either in the #' form of counts or proportions. #' #' The data may be entered either in the form of counts or proportions. If #' proportions are used, \code{formula} may omit the last row and/or column of #' tables, which can be calculated from the remaining margins. For example, #' \code{Y ~ X} specifies \code{Y} as the first column margin and \code{X} as #' the first row margin in \eqn{2 \times 2} tables. If counts are used, #' \code{formula} may omit the last row and/or column margin of the table only #' if \code{N} is supplied. In this example, the columns will be labeled as #' \code{X} and \code{not X}, and the rows will be labeled as \code{Y} and #' \code{not Y}. #' #' For larger tables, one can use \code{cbind()} and \code{+}. For example, #' \code{cbind(Y1, Y2, Y3) ~ X1 + X2 + X3 + X4)} specifies \eqn{3 \times 4} #' tables. #' #' An \eqn{R \times C} ecological table in the form of counts: \tabular{lcccc}{ #' \eqn{n_{i11}} \tab \eqn{n_{i12}} \tab \dots{} \tab \eqn{n_{i1C}} \tab #' \eqn{n_{i1.}} \cr \eqn{n_{i21}} \tab \eqn{n_{i22}} \tab \dots{} \tab #' \eqn{n_{i2C}} \tab \eqn{n_{i2.}} \cr \dots{} \tab \dots{} \tab \dots{} \tab #' \dots{} \tab \dots{}\cr \eqn{n_{iR1}} \tab \eqn{n_{iR2}} \tab \dots{} \tab #' \eqn{n_{iRC}} \tab \eqn{n_{iR.}} \cr \eqn{n_{i.1}} \tab \eqn{n_{i.2}} \tab #' \dots{} \tab \eqn{n_{i.C}} \tab \eqn{N_i} } where \eqn{n_{nr.}} and #' \eqn{n_{i.c}} represent the observed margins, \eqn{N_i} represents the size #' of the table, and \eqn{n_{irc}} are unknown variables. Note that for each #' \eqn{i}, the following deterministic relationships hold; \eqn{n_{ir.} = #' \sum_{c=1}^C n_{irc}} for \eqn{r=1,\dots,R}, and \eqn{n_{i.c}=\sum_{r=1}^R #' n_{irc}} for \eqn{c=1,\dots,C}. Then, each of the unknown inner cells can be #' bounded in the following manner, \deqn{\max(0, n_{ir.}+n_{i.c}-N_i) \le #' n_{irc} \le \min(n_{ir.}, n_{i.c}).} If the size of tables, \code{N}, is #' provided, #' #' An \eqn{R \times C} ecological table in the form of proportions: #' \tabular{lcccc}{ \eqn{W_{i11}} \tab \eqn{W_{i12}} \tab \dots{} \tab #' \eqn{W_{i1C}} \tab \eqn{Y_{i1}} \cr \eqn{W_{i21}} \tab \eqn{W_{i22}} \tab #' \dots{} \tab \eqn{W_{i2C}} \tab \eqn{Y_{i2}} \cr \dots{} \tab \dots{} \tab #' \dots{} \tab \dots{} \tab \dots{} \cr \eqn{W_{iR1}} \tab \eqn{W_{iR2}} \tab #' \dots{} \tab \eqn{W_{iRC}} \tab \eqn{Y_{iR}} \cr \eqn{X_{i1}} \tab #' \eqn{X_{i2}} \tab \dots{} \tab \eqn{X_{iC}} \tab } where \eqn{Y_{ir}} and #' \eqn{X_{ic}} represent the observed margins, and \eqn{W_{irc}} are unknown #' variables. Note that for each \eqn{i}, the following deterministic #' relationships hold; \eqn{Y_{ir} = \sum_{c=1}^C X_{ic} W_{irc}} for #' \eqn{r=1,\dots,R}, and \eqn{\sum_{r=1}^R W_{irc}=1} for \eqn{c=1,\dots,C}. #' Then, each of the inner cells of the table can be bounded in the following #' manner, \deqn{\max(0, (X_{ic} + Y_{ir}-1)/X_{ic}) \le W_{irc} \le \min(1, #' Y_{ir}/X_{ir}).} #' #' @param formula A symbolic description of ecological table to be used, #' specifying the column and row margins of \eqn{R \times C} ecological tables. #' Details and specific examples are given below. #' @param data An optional data frame in which to interpret the variables in #' \code{formula}. The default is the environment in which \code{ecoBD} is #' called. #' @param N An optional variable representing the size of the unit; e.g., the #' total number of voters. If \code{formula} is entered as counts and the last #' row and/or column is omitted, this input is necessary. #' @return An object of class \code{ecoBD} containing the following elements #' (When three dimensional arrays are used, the first dimension indexes the #' observations, the second dimension indexes the row numbers, and the third #' dimension indexes the column numbers): #' \item{call}{The matched call.} #' \item{X}{A matrix of the observed row margin, \eqn{X}.} #' \item{Y}{A matrix of the observed column margin, \eqn{Y}.} #' \item{N}{A vector of the size of ecological tables, \eqn{N}.} #' \item{aggWmin}{A three dimensional array of #' aggregate lower bounds for proportions.} #' \item{aggWmax}{A three dimensional array of aggregate upper bounds for proportions.} #' \item{Wmin}{A three dimensional array of lower bounds for proportions.} #' \item{Wmax}{A three dimensional array of upper bounds for proportions.} #' \item{Nmin}{A three dimensional array of lower bounds for counts.} #' \item{Nmax}{A three dimensional array of upper bounds for counts.} The object #' can be printed through \code{print.ecoBD}. #' @seealso \code{eco}, \code{ecoNP} #' @references Imai, Kosuke, Ying Lu and Aaron Strauss. (2011) \dQuote{eco: R #' Package for Ecological Inference in 2x2 Tables} Journal of Statistical #' Software, Vol. 42, No. 5, pp. 1-23. #' #' Imai, Kosuke, Ying Lu and Aaron Strauss. (2008) \dQuote{Bayesian and #' Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data #' Approach} Political Analysis, Vol. 16, No. 1, (Winter), pp. 41-69. #' @keywords models #' @examples #' #' #' ## load the registration data #' data(reg) #' #' ## calculate the bounds #' res <- ecoBD(Y ~ X, N = N, data = reg) #' ## print the results #' print(res) #' #' @export ecoBD ecoBD <- function(formula, data = parent.frame(), N=NULL){ mf <- match.call() tt <- terms(formula) attr(tt, "intercept") <- 0 vnames <- attr(tt, "variables") vnamesR <- vnames[[2]] if (is.matrix(eval.parent(mf$data))) data <- as.data.frame(data) X <- model.matrix(tt, data) Y <- as.matrix(model.response(model.frame(tt, data = data))) N <- eval(mf$N, data) n.obs <- nrow(X) ## counts if (all(X>1) & all(Y>1)) { if (!is.null(N)) { if (!all(apply(X, 1, sum) == N)) X <- cbind(X, N-apply(X, 1, sum)) if (!all(apply(Y, 1, sum) == N)) Y <- cbind(Y, N-apply(Y, 1, sum)) if(any(X<0) || any(Y<0)) stop("Invalid inputs for X, Y, or/and N") } else { if (!all(apply(X, 1, sum) == apply(Y, 1, sum))) stop("X and Y do not sum to the same number. Input N.") N <- apply(X, 1, sum) } C <- ncol(X) R <- ncol(Y) Wmin <- Wmax <- Nmin <- Nmax <- array(NA, c(n.obs, R, C)) clab <- rlab <- NULL if (length(vnames) == 3) clab <- c(vnames[[3]], paste("not",vnames[[3]])) else { for (j in 1:C) { if ((j == C) & (length(vnames) < j+2)) clab <- c(clab, "other") else clab <- c(clab, vnames[[j+2]]) } } if (length(vnamesR) == 1) rlab <- c(vnamesR, paste("not",vnamesR)) else { for (i in 1:R) { if ((i == R) & (length(vnamesR) < i+1)) rlab <- c(rlab, "other") else rlab <- c(rlab, vnamesR[[i]]) } } for (i in 1:R) { for (j in 1:C) { Nmin[,i,j] <- apply(cbind(0, X[,j]+Y[,i]-N), 1, max) Nmax[,i,j] <- apply(cbind(Y[,i], X[,j]), 1, min) Wmin[,i,j] <- Nmin[,i,j]/X[,j] Wmax[,i,j] <- Nmax[,i,j]/X[,j] } } dimnames(Wmin) <- dimnames(Wmax) <- dimnames(Nmin) <- dimnames(Nmax) <- list(if (is.null(rownames(X))) 1:n.obs else rownames(X), rlab, clab) } else { ## proportions if (any(apply(X, 1, sum) > 1.000000001)) stop("invalid input for X") if (any(apply(X, 1, sum) < 0.9999999999)) X <- cbind(X, 1-X) if (any(apply(Y, 1, sum) > 1.0000000001)) stop("invalid input for Y") if (any(apply(Y, 1, sum) < 0.9999999999)) Y <- cbind(Y, 1-Y) C <- ncol(X) R <- ncol(Y) Wmin <- Wmax <- array(NA, c(n.obs, R, C)) clab <- rlab <- NULL if (length(vnames) == 3) clab <- c(vnames[[3]], paste("not",vnames[[3]])) else { for (j in 1:C) { if ((j == C) & (length(vnames) < j+2)) clab <- c(clab, "other") else clab <- c(clab, vnames[[j+2]]) } } if (length(vnamesR) == 1) rlab <- c(vnamesR, paste("not",vnamesR)) else { for (i in 1:R) { if ((i == R) & (length(vnamesR) < i+1)) rlab <- c(rlab, "other") else rlab <- c(rlab, vnamesR[[i]]) } } for (i in 1:R) { for (j in 1:C) { Wmin[,i,j] <- apply(cbind(0, (X[,j]+Y[,i]-1)/X[,j]), 1, max) Wmax[,i,j] <- apply(cbind(1, Y[,i]/X[,j]), 1, min) } } dimnames(Wmin) <- dimnames(Wmax) <- list(if (is.null(rownames(X))) 1:n.obs else rownames(X), rlab, clab) colnames(X) <- clab colnames(Y) <- rlab if (!is.null(N)) { Nmin <- Nmax <- array(NA, c(n.obs, R, C), dimnames = dimnames(Wmin)) for (i in 1:R) for (j in 1:C) { Nmin[,i,j] <- Wmin[,i,j]*X[,j]*N Nmax[,i,j] <- Wmax[,i,j]*X[,j]*N } } else Nmin <- Nmax <- NULL } ## aggregate bounds aggWmin <- aggWmax <- matrix(NA, R, C, dimnames = list(dimnames(Wmin)[[2]], dimnames(Wmin)[[3]])) if (is.null(N)) for (j in 1:C) { aggWmin[,j] <- apply(Wmin[,,j], 2, weighted.mean, X[,j]) aggWmax[,j] <- apply(Wmax[,,j], 2, weighted.mean, X[,j]) } else for (j in 1:C) { aggWmin[,j] <- apply(Wmin[,,j], 2, weighted.mean, X[,j]*N) aggWmax[,j] <- apply(Wmax[,,j], 2, weighted.mean, X[,j]*N) } if (!is.null(Nmin) & !is.null(Nmax)) { aggNmin <- aggNmax <- matrix(NA, R, C, dimnames = list(dimnames(Nmin)[[2]], dimnames(Nmin)[[3]])) for (j in 1:C) { aggNmin[,j] <- apply(Nmin[,,j], 2, sum) aggNmax[,j] <- apply(Nmax[,,j], 2, sum) } } else aggNmin <- aggNmax <- NULL ## output res <- list(call = mf, X = X, Y = Y, N = N, aggWmin = aggWmin, aggWmax = aggWmax, aggNmin = aggNmin, aggNmax = aggNmax, Wmin = Wmin, Wmax = Wmax, Nmin = Nmin, Nmax = Nmax) class(res) <- c("ecoBD", "eco") return(res) } eco/R/onAttach.R0000644000176200001440000000037214330337373013115 0ustar liggesusers".onAttach" <- function(lib, pkg) { mylib <- dirname(system.file(package = pkg)) title <- packageDescription(pkg)$Title ver <- packageDescription(pkg)$Version packageStartupMessage(paste(pkg, ": ", title, "\nVersion: ", ver, "\n", sep="")) } eco/R/ecoRC.R0000644000176200001440000000602514330337373012350 0ustar liggesusersecoRC <- function(formula, data = parent.frame(), mu0 = 0, tau0 = 2, nu0 = 4, S0 = 10, mu.start = 0, Sigma.start = 1, reject = TRUE, maxit = 10e5, parameter = TRUE, n.draws = 5000, burnin = 0, thin = 0, verbose = FALSE){ ## checking inputs if (burnin >= n.draws) stop("n.draws should be larger than burnin") mf <- match.call() ## getting X, Y, and N tt <- terms(formula) attr(tt, "intercept") <- 0 if (is.matrix(eval.parent(mf$data))) data <- as.data.frame(data) X <- model.matrix(tt, data) n.samp <- nrow(X) C <- ncol(X) Y <- matrix(model.response(model.frame(tt, data = data)), nrow = n.samp) R <- ncol(Y) ## fitting the model n.store <- floor((n.draws-burnin)/(thin+1)) tmp <- ecoBD(formula, data=data) res.out <- list(call = mf, X = X, Y = Y, Wmin = tmp$Wmin, Wmax = tmp$Wmax) if (R == 1) { mu0 <- rep(mu0, C) S0 <- diag(S0, C) mu.start <- rep(mu.start, C) Sigma.start <- diag(Sigma.start, C) res <- .C("cBase2C", as.double(X), as.double(Y), as.double(tmp$Wmin[,1,]), as.double(tmp$Wmax[,1,]), as.integer(n.samp), as.integer(C), as.integer(reject), as.integer(maxit), as.integer(n.draws), as.integer(burnin), as.integer(thin+1), as.integer(verbose), as.integer(nu0), as.double(tau0), as.double(mu0), as.double(S0), as.double(mu.start), as.double(Sigma.start), as.integer(parameter), pdSmu = double(n.store*C), pdSSigma = double(n.store*C*(C+1)/2), pdSW = double(n.store*n.samp*C), PACKAGE="eco") res.out$mu <- matrix(res$pdSmu, n.store, C, byrow=TRUE) res.out$Sigma <- matrix(res$pdSSigma, n.store, C*(C+1)/2, byrow=TRUE) res.out$W <- array(res$pdSW, c(C, n.samp, n.store)) } else { mu0 <- rep(mu0, R-1) S0 <- diag(S0, R-1) mu.start <- matrix(rep(rep(mu.start, R-1), C), nrow = R-1, ncol = C, byrow = FALSE) Sigma.start <- array(rep(diag(Sigma.start, R-1), C), c(R-1, R-1, C)) res <- .C("cBaseRC", as.double(X), as.double(Y[,1:(R-1)]), as.double(tmp$Wmin[,1:(R-1),]), as.double(tmp$Wmax[,1:(R-1),]), as.integer(n.samp), as.integer(C), as.integer(R), as.integer(reject), as.integer(maxit), as.integer(n.draws), as.integer(burnin), as.integer(thin+1), as.integer(verbose), as.integer(nu0), as.double(tau0), as.double(mu0), as.double(S0), as.double(mu.start), as.double(Sigma.start), as.integer(parameter), pdSmu = double(n.store*C*(R-1)), pdSSigma = double(n.store*C*(R-1)*R/2), pdSW = double(n.store*n.samp*(R-1)*C), PACKAGE="eco") res.out$mu <- array(res$pdSmu, c(R-1, C, n.store)) res.out$Sigma <- array(res$pdSSigma, c(R*(R-1)/2, C, n.store)) res.out$W <- array(res$pdSW, c(R-1, C, n.samp, n.store)) } class(res.out) <- c("ecoRC", "eco") return(res.out) } eco/R/eco.R0000644000176200001440000003504014330337373012122 0ustar liggesusers#' Fitting the Parametric Bayesian Model of Ecological Inference in 2x2 Tables #' #' \code{eco} is used to fit the parametric Bayesian model (based on a #' Normal/Inverse-Wishart prior) for ecological inference in \eqn{2 \times 2} #' tables via Markov chain Monte Carlo. It gives the in-sample predictions as #' well as the estimates of the model parameters. The model and algorithm are #' described in Imai, Lu and Strauss (2008, 2011). #' #' An example of \eqn{2 \times 2} ecological table for racial voting is given #' below: \tabular{llccc}{ \tab \tab black voters \tab white voters \tab \cr #' \tab vote \tab \eqn{W_{1i}} \tab \eqn{W_{2i}} \tab \eqn{Y_i} \cr \tab not #' vote \tab \eqn{1-W_{1i}} \tab \eqn{1-W_{2i}} \tab \eqn{1-Y_i} \cr \tab \tab #' \eqn{X_i} \tab \eqn{1-X_i} \tab } where \eqn{Y_i} and \eqn{X_i} represent #' the observed margins, and \eqn{W_1} and \eqn{W_2} are unknown variables. In #' this exmaple, \eqn{Y_i} is the turnout rate in the ith precint, \eqn{X_i} is #' the proproption of African American in the ith precinct. The unknowns #' \eqn{W_{1i}} an d\eqn{W_{2i}} are the black and white turnout, respectively. #' All variables are proportions and hence bounded between 0 and 1. For each #' \eqn{i}, the following deterministic relationship holds, \eqn{Y_i=X_i #' W_{1i}+(1-X_i)W_{2i}}. #' #' @param formula A symbolic description of the model to be fit, specifying the #' column and row margins of \eqn{2 \times 2} ecological tables. \code{Y ~ X} #' specifies \code{Y} as the column margin (e.g., turnout) and \code{X} as the #' row margin (e.g., percent African-American). Details and specific examples #' are given below. #' @param data An optional data frame in which to interpret the variables in #' \code{formula}. The default is the environment in which \code{eco} is #' called. #' @param N An optional variable representing the size of the unit; e.g., the #' total number of voters. \code{N} needs to be a vector of same length as #' \code{Y} and \code{X} or a scalar. #' @param supplement An optional matrix of supplemental data. The matrix has #' two columns, which contain additional individual-level data such as survey #' data for \eqn{W_1} and \eqn{W_2}, respectively. If \code{NULL}, no #' additional individual-level data are included in the model. The default is #' \code{NULL}. #' @param context Logical. If \code{TRUE}, the contextual effect is also #' modeled, that is to assume the row margin \eqn{X} and the unknown \eqn{W_1} #' and \eqn{W_2} are correlated. See Imai, Lu and Strauss (2008, 2011) for #' details. The default is \code{FALSE}. #' @param mu0 A scalar or a numeric vector that specifies the prior mean for #' the mean parameter \eqn{\mu} for \eqn{(W_1,W_2)} (or for \eqn{(W_1, W_2, X)} #' if \code{context=TRUE}). When the input of \code{mu0} is a scalar, its value #' will be repeated to yield a vector of the length of \eqn{\mu}, otherwise, it #' needs to be a vector of same length as \eqn{\mu}. When \code{context=TRUE}, #' the length of \eqn{\mu} is 3, otherwise it is 2. The default is \code{0}. #' @param tau0 A positive integer representing the scale parameter of the #' Normal-Inverse Wishart prior for the mean and variance parameter \eqn{(\mu, #' \Sigma)}. The default is \code{2}. #' @param nu0 A positive integer representing the prior degrees of freedom of #' the Normal-Inverse Wishart prior for the mean and variance parameter #' \eqn{(\mu, \Sigma)}. The default is \code{4}. #' @param S0 A positive scalar or a positive definite matrix that specifies the #' prior scale matrix of the Normal-Inverse Wishart prior for the mean and #' variance parameter \eqn{(\mu, \Sigma)} . If it is a scalar, then the prior #' scale matrix will be a diagonal matrix with the same dimensions as #' \eqn{\Sigma} and the diagonal elements all take value of \code{S0}, #' otherwise \code{S0} needs to have same dimensions as \eqn{\Sigma}. When #' \code{context=TRUE}, \eqn{\Sigma} is a \eqn{3 \times 3} matrix, otherwise, #' it is \eqn{2 \times 2}. The default is \code{10}. #' @param mu.start A scalar or a numeric vector that specifies the starting #' values of the mean parameter \eqn{\mu}. If it is a scalar, then its value #' will be repeated to yield a vector of the length of \eqn{\mu}, otherwise, it #' needs to be a vector of same length as \eqn{\mu}. When #' \code{context=FALSE}, the length of \eqn{\mu} is 2, otherwise it is 3. The #' default is \code{0}. #' @param Sigma.start A scalar or a positive definite matrix that specified the #' starting value of the variance matrix \eqn{\Sigma}. If it is a scalar, then #' the prior scale matrix will be a diagonal matrix with the same dimensions as #' \eqn{\Sigma} and the diagonal elements all take value of \code{S0}, #' otherwise \code{S0} needs to have same dimensions as \eqn{\Sigma}. When #' \code{context=TRUE}, \eqn{\Sigma} is a \eqn{3 \times 3} matrix, otherwise, #' it is \eqn{2 \times 2}. The default is \code{10}. #' @param parameter Logical. If \code{TRUE}, the Gibbs draws of the population #' parameters, \eqn{\mu} and \eqn{\Sigma}, are returned in addition to the #' in-sample predictions of the missing internal cells, \eqn{W}. The default is #' \code{TRUE}. #' @param grid Logical. If \code{TRUE}, the grid method is used to sample #' \eqn{W} in the Gibbs sampler. If \code{FALSE}, the Metropolis algorithm is #' used where candidate draws are sampled from the uniform distribution on the #' tomography line for each unit. Note that the grid method is significantly #' slower than the Metropolis algorithm. The default is \code{FALSE}. #' @param n.draws A positive integer. The number of MCMC draws. The default is #' \code{5000}. #' @param burnin A positive integer. The burnin interval for the Markov chain; #' i.e. the number of initial draws that should not be stored. The default is #' \code{0}. #' @param thin A positive integer. The thinning interval for the Markov chain; #' i.e. the number of Gibbs draws between the recorded values that are skipped. #' The default is \code{0}. #' @param verbose Logical. If \code{TRUE}, the progress of the Gibbs sampler is #' printed to the screen. The default is \code{FALSE}. #' @return An object of class \code{eco} containing the following elements: #' \item{call}{The matched call.} #' \item{X}{The row margin, \eqn{X}.} #' \item{Y}{The column margin, \eqn{Y}.} #' \item{N}{The size of each table, \eqn{N}.} #' \item{burnin}{The number of initial burnin draws.} #' \item{thin}{The thinning interval.} #' \item{nu0}{The prior degrees of freedom.} #' \item{tau0}{The prior scale parameter.} #' \item{mu0}{The prior mean.} #' \item{S0}{The prior scale matrix.} #' \item{W}{A three dimensional array storing the posterior in-sample predictions of \eqn{W}. #' The first dimension indexes the Monte Carlo draws, the second dimension indexes the #' columns of the table, and the third dimension represents the observations.} #' \item{Wmin}{A numeric matrix storing the lower bounds of \eqn{W}.} #' \item{Wmax}{A numeric matrix storing the upper bounds of \eqn{W}.} The #' following additional elements are included in the output when #' \code{parameter = TRUE}. #' \item{mu}{The posterior draws of the population mean parameter, \eqn{\mu}.} #' \item{Sigma}{The posterior draws of the population variance matrix, \eqn{\Sigma}.} #' @seealso \code{ecoML}, \code{ecoNP}, \code{predict.eco}, \code{summary.eco} #' @references Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R #' Package for Ecological Inference in 2x2 Tables} Journal of Statistical #' Software, Vol. 42, No. 5, pp. 1-23. #' #' Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and #' Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data #' Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. #' @keywords models #' #' @useDynLib eco, .registration = TRUE #' #' @importFrom MASS mvrnorm #' @importFrom utils packageDescription #' @importFrom stats as.formula coef model.frame model.matrix model.response predict quantile sd terms weighted.mean #' #' @examples #' #' #' ## load the registration data #' \dontrun{data(reg) #' #' ## NOTE: convergence has not been properly assessed for the following #' ## examples. See Imai, Lu and Strauss (2008, 2011) for more #' ## complete analyses. #' #' ## fit the parametric model with the default prior specification #' res <- eco(Y ~ X, data = reg, verbose = TRUE) #' ## summarize the results #' summary(res) #' #' ## obtain out-of-sample prediction #' out <- predict(res, verbose = TRUE) #' ## summarize the results #' summary(out) #' #' ## load the Robinson's census data #' data(census) #' #' ## fit the parametric model with contextual effects and N #' ## using the default prior specification #' res1 <- eco(Y ~ X, N = N, context = TRUE, data = census, verbose = TRUE) #' ## summarize the results #' summary(res1) #' #' ## obtain out-of-sample prediction #' out1 <- predict(res1, verbose = TRUE) #' ## summarize the results #' summary(out1) #' } #' #' @export eco eco <- function(formula, data = parent.frame(), N = NULL, supplement = NULL, context = FALSE, mu0 = 0, tau0 = 2, nu0 = 4, S0 = 10, mu.start = 0, Sigma.start = 10, parameter = TRUE, grid = FALSE, n.draws = 5000, burnin = 0, thin = 0, verbose = FALSE){ ## contextual effects if (context) ndim <- 3 else ndim <- 2 ## checking inputs if (burnin >= n.draws) stop("n.draws should be larger than burnin") if (length(mu0)==1) mu0 <- rep(mu0, ndim) else if (length(mu0)!=ndim) stop("invalid inputs for mu0") if (is.matrix(S0)) { if (any(dim(S0)!=ndim)) stop("invalid inputs for S0") } else S0 <- diag(S0, ndim) if (length(mu.start)==1) mu.start <- rep(mu.start, ndim) else if (length(mu.start)!=ndim) stop("invalid inputs for mu.start") if (is.matrix(Sigma.start)) { if (any(dim(Sigma.start)!=ndim)) stop("invalid inputs for Sigma.start") } else Sigma.start <- diag(Sigma.start, ndim) ## getting X, Y, and N mf <- match.call() tt <- terms(formula) attr(tt, "intercept") <- 0 if (is.matrix(eval.parent(mf$data))) data <- as.data.frame(data) X <- model.matrix(tt, data) Y <- model.response(model.frame(tt, data = data)) N <- eval(mf$N, data) # check data and modify inputs tmp <- checkdata(X,Y, supplement, ndim) bdd <- ecoBD(formula=formula, data=data) W1min <- bdd$Wmin[order(tmp$order.old)[1:nrow(tmp$d)],1,1] W1max <- bdd$Wmax[order(tmp$order.old)[1:nrow(tmp$d)],1,1] ## fitting the model n.store <- floor((n.draws-burnin)/(thin+1)) unit.par <- 1 unit.w <- tmp$n.samp+tmp$samp.X1+tmp$samp.X0 n.w <- n.store * unit.w if (context) res <- .C("cBaseecoX", as.double(tmp$d), as.integer(tmp$n.samp), as.integer(n.draws), as.integer(burnin), as.integer(thin+1), as.integer(verbose), as.integer(nu0), as.double(tau0), as.double(mu0), as.double(S0), as.double(mu.start), as.double(Sigma.start), as.integer(tmp$survey.yes), as.integer(tmp$survey.samp), as.double(tmp$survey.data), as.integer(tmp$X1type), as.integer(tmp$samp.X1), as.double(tmp$X1.W1), as.integer(tmp$X0type), as.integer(tmp$samp.X0), as.double(tmp$X0.W2), as.double(W1min), as.double(W1max), as.integer(parameter), as.integer(grid), pdSMu0 = double(n.store), pdSMu1 = double(n.store), pdSMu2 = double(n.store), pdSSig00=double(n.store), pdSSig01=double(n.store), pdSSig02=double(n.store), pdSSig11=double(n.store), pdSSig12=double(n.store), pdSSig22=double(n.store), pdSW1=double(n.w), pdSW2=double(n.w), PACKAGE="eco") else res <- .C("cBaseeco", as.double(tmp$d), as.integer(tmp$n.samp), as.integer(n.draws), as.integer(burnin), as.integer(thin+1), as.integer(verbose), as.integer(nu0), as.double(tau0), as.double(mu0), as.double(S0), as.double(mu.start), as.double(Sigma.start), as.integer(tmp$survey.yes), as.integer(tmp$survey.samp), as.double(tmp$survey.data), as.integer(tmp$X1type), as.integer(tmp$samp.X1), as.double(tmp$X1.W1), as.integer(tmp$X0type), as.integer(tmp$samp.X0), as.double(tmp$X0.W2), as.double(W1min), as.double(W1max), as.integer(parameter), as.integer(grid), pdSMu0=double(n.store), pdSMu1=double(n.store), pdSSig00=double(n.store), pdSSig01=double(n.store), pdSSig11=double(n.store), pdSW1=double(n.w), pdSW2=double(n.w), PACKAGE="eco") W1.post <- matrix(res$pdSW1, n.store, unit.w, byrow=TRUE)[,tmp$order.old] W2.post <- matrix(res$pdSW2, n.store, unit.w, byrow=TRUE)[,tmp$order.old] W <- array(rbind(W1.post, W2.post), c(n.store, 2, unit.w)) colnames(W) <- c("W1", "W2") res.out <- list(call = mf, X = X, Y = Y, N = N, W = W, Wmin=bdd$Wmin[,1,], Wmax = bdd$Wmax[,1,], burin = burnin, thin = thin, nu0 = nu0, tau0 = tau0, mu0 = mu0, S0 = S0) if (parameter) if (context) { res.out$mu <- cbind(matrix(res$pdSMu0, n.store, unit.par, byrow=TRUE), matrix(res$pdSMu1, n.store, unit.par, byrow=TRUE), matrix(res$pdSMu2, n.store, unit.par, byrow=TRUE)) colnames(res.out$mu) <- c("mu1", "mu2", "mu3") res.out$Sigma <- cbind(matrix(res$pdSSig00, n.store, unit.par, byrow=TRUE), matrix(res$pdSSig01, n.store, unit.par, byrow=TRUE), matrix(res$pdSSig02, n.store, unit.par, byrow=TRUE), matrix(res$pdSSig11, n.store, unit.par, byrow=TRUE), matrix(res$pdSSig12, n.store, unit.par, byrow=TRUE), matrix(res$pdSSig22, n.store, unit.par, byrow=TRUE)) colnames(res.out$Sigma) <- c("Sigma11", "Sigma12", "Sigma13", "Sigma22", "Sigma23", "Sigma33") } else { res.out$mu <- cbind(matrix(res$pdSMu0, n.store, unit.par, byrow=TRUE), matrix(res$pdSMu1, n.store, unit.par, byrow=TRUE)) colnames(res.out$mu) <- c("mu1", "mu2") res.out$Sigma <- cbind(matrix(res$pdSSig00, n.store, unit.par, byrow=TRUE), matrix(res$pdSSig01, n.store, unit.par, byrow=TRUE), matrix(res$pdSSig11, n.store, unit.par, byrow=TRUE)) colnames(res.out$Sigma) <- c("Sigma11", "Sigma12", "Sigma22") } if (context) class(res.out) <- c("ecoX","eco") else class(res.out) <- c("eco") return(res.out) } eco/R/print.summary.ecoML.R0000644000176200001440000000671414330337373015210 0ustar liggesusers## for simlicity, this summary function only reports parameters related to W_1 and W_2 #' Print the Summary of the Results for the Maximum Likelihood Parametric Model for #' Ecological Inference in 2x2 Tables #' #' \code{summary} method for class \code{eco}. #' #' #' @aliases print.summary.ecoML #' @param x An object of class \code{summary.ecoML}. #' @param digits the number of significant digits to use when printing. #' @param ... further arguments passed to or from other methods. #' @return \code{summary.eco} yields an object of class \code{summary.eco} #' containing the following elements: #' \item{call}{The call from \code{eco}.} #' \item{sem}{Whether the SEM algorithm was executed, as specified by the user #' upon calling \code{ecoML}.} #' \item{fix.rho}{Whether the correlation parameter was fixed or allowed to vary, #' as specified by the user upon calling \code{ecoML}.} #' \item{epsilon}{The convergence threshold specified by the #' user upon calling \code{ecoML}.} #' \item{n.obs}{The number of units.} #' \item{iters.em}{The number iterations the EM algorithm cycled through before #' convergence or reaching the maximum number of iterations allowed.} #' \item{iters.sem}{The number iterations the SEM algorithm cycled through #' before convergence or reaching the maximum number of iterations allowed.} #' \item{loglik}{The final observed log-likelihood.} #' \item{rho}{A matrix of \code{iters.em} rows specifying the correlation parameters #' at each iteration of the EM algorithm. The number of columns depends on how many #' correlation parameters exist in the model. Column order is the same as the order of the #' parameters in \code{param.table}.} #' \item{param.table}{Final estimates of the parameter values for the model. #' Excludes parameters fixed by the user upon calling \code{ecoML}. #' See \code{ecoML} documentation for order of parameters.} #' \item{agg.table}{Aggregate estimates of the marginal means of \eqn{W_1} and \eqn{W_2}} #' \item{agg.wtable}{Aggregate estimates of the marginal means of \eqn{W_1} and \eqn{W_2} #' using \eqn{X} and \eqn{N} as weights.} If \code{units = TRUE}, the following elements #' are also included: #' \item{W.table}{Unit-level estimates for \eqn{W_1} and \eqn{W_2}.} #' #' This object can be printed by \code{print.summary.eco} #' @seealso \code{ecoML} #' @keywords methods #' @export print.summary.ecoML <- function(x, digits=max(3, getOption("digits")-3), ...) { cat("\nCall: ", paste(deparse(x$call), sep="\n", collapse="\n")) cat("\n") if (!is.null(x$param.table)) { cat("\n*** Parameter Estimates ***\n") if (x$fix.rho) cat("\nOriginal Model Parameters (rho is fixed at ", x$rho, "):\n", sep="") else cat("\nOriginal Model Parameters:\n") print(x$param.table, digits=digits, na.print="NA",...) } cat("\n*** Insample Predictions ***\n") cat("\nUnweighted:\n") print(x$agg.table, digits=digits, na.print="NA",...) if (!is.null(x$agg.wtable)) { cat("\nWeighted:\n") print(x$agg.wtable, digits=digits, na.print="NA",...) } if (!is.null(x$W.table)) { cat("\n\nUnit-level Estimates of W:\n") print(x$W.table, digits=digits, na.print="NA",...) } cat("\n\nLog-likelihood:", x$loglik) cat("\nNumber of Observations:", x$n.obs) cat("\nNumber of EM iterations:", x$iters.em) if (x$sem) cat("\nNumber of SEM iterations:", x$iters.sem) cat("\nConvergence threshold for EM:", x$epsilon) cat("\n\n") invisible(x) } eco/R/print.summary.predict.eco.R0000644000176200001440000000047514330337373016406 0ustar liggesusers#' @export print.summary.predict.eco <- function(x, digits=max(3, getOption("digits") -3), ...) { cat("\nOut-of-sample Prediction:\n") print(x$W.table, digits=digits, na.print="NA",...) cat("\nNumber of Monte Carlo Draws:", x$n.draws) cat("\n") invisible(x) } eco/R/forgnlit30.R0000644000176200001440000000215014330337373013337 0ustar liggesusers #' Foreign-born literacy in 1930 #' #' This data set contains, on a state level, the proportion of white residents #' ten years and older who are foreign born, and the proportion of those #' residents who are literate. Data come from the 1930 census and were first #' analyzed by Robinson (1950). #' #' #' @name forgnlit30 #' @docType data #' @format A data frame containing 5 variables and 48 observations #' \tabular{lll}{ X \tab numeric \tab proportion of the white population at #' least 10 years of age that is foreign born \cr Y \tab numeric \tab #' proportion of the white population at least 10 years of age that is #' illiterate \cr W1 \tab numeric \tab proportion of the foreign-born white #' population at least 10 years of age that is illiterate \cr W2 \tab numeric #' \tab proportion of the native-born white population at least 10 years of age #' that is illiterate \cr ICPSR \tab numeric \tab the ICPSR state code } #' @references Robinson, W.S. (1950). ``Ecological Correlations and the #' Behavior of Individuals.'' \emph{American Sociological Review}, vol. 15, #' pp.351-357. #' @keywords datasets NULL eco/R/Qfun.R0000644000176200001440000000307514330337373012270 0ustar liggesusers#' Fitting the Parametric Bayesian Model of Ecological Inference in 2x2 Tables #' #' \code{Qfun} returns the complete log-likelihood that is used to calculate #' the fraction of missing information. #' #' #' @param theta A vector that contains the MLE \eqn{E(W_1)},\eqn{E(W_2)}, #' \eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}. Typically it is the #' element \code{theta.em} of an object of class \code{ecoML}. #' @param suff.stat A vector of sufficient statistics of \eqn{E(W_1)}, #' \eqn{E(W_2)}, \eqn{var(W_1)},\eqn{var(W_2)}, and \eqn{cov(W_1,W_2)}. #' @param n A integer representing the sample size. #' @seealso \code{ecoML} #' @references Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R #' Package for Ecological Inference in 2x2 Tables} Journal of Statistical #' Software, Vol. 42, No. 5, pp. 1-23. #' #' Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and #' Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data #' Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. #' @keywords models #' @export Qfun Qfun <- function(theta, suff.stat, n) { mu<-rep(0,2) Sigma<-matrix(0, 2,2) Suff1<-rep(0,2) Suff2<-matrix(0,2,2) mu <- theta[1:2] Sigma[1,1]<-theta[3] Sigma[2,2]<-theta[4] Sigma[1,2]<-Sigma[2,1]<-theta[5]*sqrt(Sigma[1,1]*Sigma[2,2]) Suff1 <- n*suff.stat[1:2] Suff2[1,1]<-n*suff.stat[3] Suff2[2,2]<-n*suff.stat[4] Suff2[1,2]<-n*suff.stat[5] invSigma<-solve(Sigma) return(-0.5*n*log(det(Sigma))-0.5*sum(diag(invSigma%*%(Suff2-mu%*%t(Suff1)-Suff1%*%t(mu)+n*mu%*%t(mu))))) } eco/R/predict.ecoNPX.R0000644000176200001440000001035214330337373014140 0ustar liggesusers#' Out-of-Sample Posterior Prediction under the Nonparametric Bayesian Model #' for Ecological Inference in 2x2 Tables #' #' Obtains out-of-sample posterior predictions under the fitted nonparametric #' Bayesian model for ecological inference. \code{predict} method for class #' \code{ecoNP} and \code{ecoNPX}. #' #' The posterior predictive values are computed using the Monte Carlo sample #' stored in the \code{eco} or \code{ecoNP} output (or other sample if #' \code{newdraw} is specified). Given each Monte Carlo sample of the #' parameters, we sample the vector-valued latent variable from the appropriate #' multivariate Normal distribution. Then, we apply the inverse logit #' transformation to obtain the predictive values of proportions, \eqn{W}. The #' computation may be slow (especially for the nonparametric model) if a large #' Monte Carlo sample of the model parameters is used. In either case, setting #' \code{verbose = TRUE} may be helpful in monitoring the progress of the code. #' #' @aliases predict.ecoNPX #' @param object An output object from \code{ecoNP}. #' @param newdraw An optional list containing two matrices (or three #' dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu} #' and \eqn{\Sigma}. Those elements should be named as \code{mu} and #' \code{Sigma}, respectively. The default is the original MCMC draws stored in #' \code{object}. #' @param subset A scalar or numerical vector specifying the row number(s) of #' \code{mu} and \code{Sigma} in the output object from \code{eco}. If #' specified, the posterior draws of parameters for those rows are used for #' posterior prediction. The default is \code{NULL} where all the posterior #' draws are used. #' @param obs An integer or vector of integers specifying the observation #' number(s) whose posterior draws will be used for predictions. The default is #' \code{NULL} where all the observations in the data set are selected. #' @param cond logical. If \code{TRUE}, then the conditional prediction will #' made for the parametric model with contextual effects. The default is #' \code{FALSE}. #' @param verbose logical. If \code{TRUE}, helpful messages along with a #' progress report on the Monte Carlo sampling from the posterior predictive #' distributions are printed on the screen. The default is \code{FALSE}. #' @param ... further arguments passed to or from other methods. #' @return \code{predict.eco} yields a matrix of class \code{predict.eco} #' containing the Monte Carlo sample from the posterior predictive distribution #' of inner cells of ecological tables. \code{summary.predict.eco} will #' summarize the output, and \code{print.summary.predict.eco} will print the #' summary. #' @seealso \code{eco}, \code{ecoNP}, \code{summary.eco}, \code{summary.ecoNP} #' @keywords methods #' @export predict.ecoNPX <- function(object, newdraw = NULL, subset = NULL, obs = NULL, cond = FALSE, verbose = FALSE, ...){ if (is.null(newdraw) && is.null(object$mu)) stop("Posterior draws of mu and Sigma must be supplied") else if (!is.null(newdraw)){ if (is.null(newdraw$mu) && is.null(newdraw$Sigma)) stop("Posterior draws of both mu and Sigma must be supplied.") object <- newdraw } n.draws <- dim(object$mu)[1] n <- dim(object$mu)[3] mu <- aperm(coef(object, subset = subset, obs = obs), c(2,3,1)) if (is.null(subset)) subset <- 1:n.draws if (is.null(obs)) obs <- 1:n Sigma <- aperm(object$Sigma[subset,,obs], c(2,3,1)) if (cond) { # conditional prediction X <- object$X res <- .C("preDPX", as.double(mu), as.double(Sigma), as.double(X), as.integer(n), as.integer(n.draws), as.integer(2), as.integer(verbose), pdStore = double(n.draws*2*n), PACKAGE="eco")$pdStore res <- matrix(res, ncol=2, nrow=n.draws*n, byrow=TRUE) colnames(res) <- c("W1", "W2") } else { # unconditional prediction res <- .C("preDP", as.double(mu), as.double(Sigma), as.integer(n), as.integer(n.draws), as.integer(3), as.integer(verbose), pdStore = double(n.draws*3*n), PACKAGE="eco")$pdStore res <- matrix(res, ncol=3, nrow=n.draws*n, byrow=TRUE) colnames(res) <- c("W1", "W2", "X") } class(res) <- c("predict.eco", "matrix") return(res) } eco/R/summary.eco.R0000644000176200001440000001242514330337373013620 0ustar liggesusers#' Summarizing the Results for the Bayesian Parametric Model for Ecological #' Inference in 2x2 Tables #' #' \code{summary} method for class \code{eco}. #' #' #' @aliases summary.eco print.eco #' @param object An output object from \code{eco}. #' @param CI A vector of lower and upper bounds for the Bayesian credible #' intervals used to summarize the results. The default is the equal tail 95 #' percent credible interval. #' @param param Logical. If \code{TRUE}, the posterior estimates of the #' population parameters will be provided. The default value is \code{TRUE}. #' @param units Logical. If \code{TRUE}, the in-sample predictions for each #' unit or for a subset of units will be provided. The default value is #' \code{FALSE}. #' @param subset A numeric vector indicating the subset of the units whose #' in-sample predications to be provided when \code{units} is \code{TRUE}. The #' default value is \code{NULL} where the in-sample predictions for each unit #' will be provided. #' @param ... further arguments passed to or from other methods. #' @return \code{summary.eco} yields an object of class \code{summary.eco} #' containing the following elements: #' \item{call}{The call from \code{eco}.} #' \item{n.obs}{The number of units.} #' \item{n.draws}{The number of Monte Carlo samples.} #' \item{agg.table}{Aggregate posterior estimates of the marginal #' means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If #' \code{param = TRUE}, the following elements are also included: #' \item{param.table}{Posterior estimates of model parameters: population mean #' estimates of \eqn{W_1} and \eqn{W_2} and their logit transformations.} If #' \code{units = TRUE}, the following elements are also included: #' \item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.} #' \item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.} #' #' This object can be printed by \code{print.summary.eco} #' @seealso \code{eco}, \code{predict.eco} #' @keywords methods #' @export summary.eco <- function(object, CI = c(2.5, 97.5), param = TRUE, units = FALSE, subset = NULL,...) { n.obs <- ncol(object$W[,1,]) n.draws <- nrow(object$W[,1,]) if (is.null(subset)) subset <- 1:n.obs else if (!is.numeric(subset)) stop("Subset should be a numeric vector.") else if (!all(subset %in% c(1:n.obs))) stop("Subset should be any numbers in 1:obs.") table.names<-c("mean", "std.dev", paste(min(CI), "%", sep=" "), paste(max(CI), "%", sep=" ")) agg.table <-agg.wtable <-NULL N<-rep(1, length(object$X)) W1.agg.mean <- as.vector(object$W[,1,]%*% (object$X*N/sum(object$X*N))) W2.agg.mean <- as.vector(object$W[,2,]%*% ((1-object$X)*N/sum((1-object$X)*N))) agg.table <- rbind(cbind(mean(W1.agg.mean), sd(W1.agg.mean), quantile(W1.agg.mean, min(CI)/100), quantile(W1.agg.mean, max(CI)/100)), cbind(mean(W2.agg.mean), sd(W2.agg.mean), quantile(W2.agg.mean, min(CI)/100), quantile(W2.agg.mean, max(CI)/100))) colnames(agg.table) <- table.names rownames(agg.table) <- c("W1", "W2") if (!is.null(object$N)) { N <- object$N W1.agg.wmean <- as.vector(object$W[,1,] %*% (object$X*N/sum(object$X*N))) W2.agg.wmean <- as.vector(object$W[,2,] %*% ((1-object$X)*N/sum((1-object$X)*N))) agg.wtable <- rbind(cbind(mean(W1.agg.wmean), sd(W1.agg.wmean), quantile(W1.agg.wmean, min(CI)/100), quantile(W1.agg.wmean, max(CI)/100)), cbind(mean(W2.agg.wmean), sd(W2.agg.wmean), quantile(W2.agg.wmean, min(CI)/100), quantile(W2.agg.wmean, max(CI)/100))) colnames(agg.wtable) <- table.names rownames(agg.wtable) <- c("W1", "W2") } if (units) { W1.table <- cbind(apply(object$W[,1,subset], 2, mean), apply(object$W[,1,subset], 2, sd), apply(object$W[,1,subset], 2, quantile, min(CI)/100), apply(object$W[,1,subset], 2, quantile, max(CI)/100)) W2.table <- cbind(apply(object$W[,2,subset], 2, mean), apply(object$W[,2,subset], 2, sd), apply(object$W[,2,subset], 2, quantile, min(CI)/100), apply(object$W[,2,subset], 2, quantile, max(CI)/100)) colnames(W2.table) <- colnames(W1.table) <- table.names rownames(W1.table) <- rownames(W2.table) <- row.names(object$X[subset]) } else W1.table <- W2.table <- NULL if (param) { if (is.null(object$mu) || is.null(object$Sigma)) stop("Parameters are missing values.") else { param <- cbind(object$mu, object$Sigma) param.table <- cbind(apply(param, 2, mean), apply(param, 2, sd), apply(param, 2, quantile, min(CI)/100), apply(param, 2, quantile, max(CI)/100)) colnames(param.table) <- table.names } } else param.table <- NULL ans <- list(call = object$call, W1.table = W1.table, W2.table = W2.table, agg.table = agg.table, agg.wtable=agg.wtable, param.table = param.table, n.draws = n.draws, n.obs = n.obs) class(ans) <-"summary.eco" return(ans) } eco/R/reg.R0000644000176200001440000000202414330337373012125 0ustar liggesusers #' Voter Registration in US Southern States #' #' This data set contains the racial composition, the registration rate, the #' number of eligible voters as well as the actual observed racial registration #' rates for every county in four US southern states: Florida, Louisiana, North #' Carolina, and South Carolina. #' #' #' @name reg #' @docType data #' @format A data frame containing 5 variables and 275 observations #' \tabular{lll}{ X \tab numeric \tab the fraction of Black voters \cr Y \tab #' numeric \tab the fraction of voters who registered themselves\cr N \tab #' numeric \tab the total number of voters in each county \cr W1 \tab numeric #' \tab the actual fraction of Black voters who registered themselves \cr W2 #' \tab numeric \tab the actual fraction of White voters who registered #' themselves } #' @references King, G. (1997). \dQuote{A Solution to the Ecological Inference #' Problem: Reconstructing Individual Behavior from Aggregate Data}. Princeton #' University Press, Princeton, NJ. #' @keywords datasets NULL eco/R/print.summary.ecoNP.R0000644000176200001440000000555214330337373015214 0ustar liggesusers#' Print the Summary of the Results for the Bayesian Nonparametric Model for Ecological #' Inference in 2x2 Tables #' #' \code{summary} method for class \code{ecoNP}. #' #' #' @aliases print.summary.ecoNP #' @param x An object of class \code{summary.ecoNP}. #' @param digits the number of significant digits to use when printing. #' @param ... further arguments passed to or from other methods. #' @return \code{summary.ecoNP} yields an object of class \code{summary.ecoNP} #' containing the following elements: #' \item{call}{The call from \code{ecoNP}.} #' \item{n.obs}{The number of units.} #' \item{n.draws}{The number of Monte Carlo samples.} #' \item{agg.table}{Aggregate posterior estimates of the marginal #' means of \eqn{W_1} and \eqn{W_2} using \eqn{X} and \eqn{N} as weights.} If #' \code{param = TRUE}, the following elements are also included: #' \item{param.table}{Posterior estimates of model parameters: population mean #' estimates of \eqn{W_1} and \eqn{W_2}. If \code{subset} is specified, only a #' subset of the population parameters are included.} If \code{unit = TRUE}, #' the following elements are also included: #' \item{W1.table}{Unit-level posterior estimates for \eqn{W_1}.} #' \item{W2.table}{Unit-level posterior estimates for \eqn{W_2}.} #' #' This object can be printed by \code{print.summary.ecoNP} #' @seealso \code{ecoNP}, \code{predict.eco} #' @keywords methods #' @export print.summary.ecoNP <- function(x, digits=max(3, getOption("digits")-3), ...) { cat("\nCall: ") cat(paste(deparse(x$call), sep="\n", collapse="\n")) cat("\n\nIn-sample Predictions:\n") cat("\nUnweighted:\n") print(x$agg.table, digits=digits, na.print="NA",...) if (!is.null(x$agg.wtable)) { cat("\nWeighted:\n") print(x$agg.wtable, digits=digits, na.print="NA",...) } cat("\nNumber of Units:", x$n.obs) cat("\nNumber of Monte Carlo Draws:", x$n.draws) if (!is.null(x$param.table)) { tt <- x$param.table cat("\nParameter Estimates of mu1:\n") print(tt$mu1.table, digits=digits, na.print="NA",...) cat("\nParameter Estimates of mu2:\n") print(tt$mu2.table, digits=digits, na.print="NA",...) cat("\nParameter Estimates of Sigma11:\n") print(tt$Sigma11.table, digits=digits, na.print="NA",...) cat("\nParameter Estimates of Sigma12:\n") print(tt$Sigma12.table, digits=digits, na.print="NA",...) cat("\nParameter Estimates of Sigma22:\n") print(tt$Sigma22.table, digits=digits, na.print="NA",...) } if (!is.null(x$W1.table)) { cat("\n\nUnit-level Estimates of W1:\n") print(x$W1.table, digits=digits, na.print="NA",...) cat("\n\nUnit-level Estimates of W2:\n") print(x$W2.table, digits=digits, na.print="NA",...) } cat("\n") invisible(x) } eco/R/ecoNP.R0000644000176200001440000003626414330337373012371 0ustar liggesusers#' Fitting the Nonparametric Bayesian Models of Ecological Inference in 2x2 #' Tables #' #' \code{ecoNP} is used to fit the nonparametric Bayesian model (based on a #' Dirichlet process prior) for ecological inference in \eqn{2 \times 2} tables #' via Markov chain Monte Carlo. It gives the in-sample predictions as well as #' out-of-sample predictions for population inference. The models and #' algorithms are described in Imai, Lu and Strauss (2008, 2011). #' #' #' @param formula A symbolic description of the model to be fit, specifying the #' column and row margins of \eqn{2 \times 2} ecological tables. \code{Y ~ X} #' specifies \code{Y} as the column margin (e.g., turnout) and \code{X} as the #' row margin (e.g., percent African-American). Details and specific examples #' are given below. #' @param data An optional data frame in which to interpret the variables in #' \code{formula}. The default is the environment in which \code{ecoNP} is #' called. #' @param N An optional variable representing the size of the unit; e.g., the #' total number of voters. \code{N} needs to be a vector of same length as #' \code{Y} and \code{X} or a scalar. #' @param supplement An optional matrix of supplemental data. The matrix has #' two columns, which contain additional individual-level data such as survey #' data for \eqn{W_1} and \eqn{W_2}, respectively. If \code{NULL}, no #' additional individual-level data are included in the model. The default is #' \code{NULL}. #' @param context Logical. If \code{TRUE}, the contextual effect is also #' modeled, that is to assume the row margin \eqn{X} and the unknown \eqn{W_1} #' and \eqn{W_2} are correlated. See Imai, Lu and Strauss (2008, 2011) for #' details. The default is \code{FALSE}. #' @param mu0 A scalar or a numeric vector that specifies the prior mean for #' the mean parameter \eqn{\mu} of the base prior distribution \eqn{G_0} (see #' Imai, Lu and Strauss (2008, 2011) for detailed descriptions of Dirichlete #' prior and the normal base prior distribution) . If it is a scalar, then its #' value will be repeated to yield a vector of the length of \eqn{\mu}, #' otherwise, it needs to be a vector of same length as \eqn{\mu}. When #' \code{context=TRUE }, the length of \eqn{\mu} is 3, otherwise it is 2. The #' default is \code{0}. #' @param tau0 A positive integer representing the scale parameter of the #' Normal-Inverse Wishart prior for the mean and variance parameter #' \eqn{(\mu_i, \Sigma_i)} of each observation. The default is \code{2}. #' @param nu0 A positive integer representing the prior degrees of freedom of #' the variance matrix \eqn{\Sigma_i}. the default is \code{4}. #' @param S0 A positive scalar or a positive definite matrix that specifies the #' prior scale matrix for the variance matrix \eqn{\Sigma_i}. If it is a #' scalar, then the prior scale matrix will be a diagonal matrix with the same #' dimensions as \eqn{\Sigma_i} and the diagonal elements all take value of #' \code{S0}, otherwise \code{S0} needs to have same dimensions as #' \eqn{\Sigma_i}. When \code{context=TRUE}, \eqn{\Sigma} is a \eqn{3 \times 3} #' matrix, otherwise, it is \eqn{2 \times 2}. The default is \code{10}. #' @param alpha A positive scalar representing a user-specified fixed value of #' the concentration parameter, \eqn{\alpha}. If \code{NULL}, \eqn{\alpha} will #' be updated at each Gibbs draw, and its prior parameters \code{a0} and #' \code{b0} need to be specified. The default is \code{NULL}. #' @param a0 A positive integer representing the value of shape parameter of #' the gamma prior distribution for \eqn{\alpha}. The default is \code{1}. #' @param b0 A positive integer representing the value of the scale parameter #' of the gamma prior distribution for \eqn{\alpha}. The default is \code{0.1}. #' @param parameter Logical. If \code{TRUE}, the Gibbs draws of the population #' parameters, \eqn{\mu} and \eqn{\Sigma}, are returned in addition to the #' in-sample predictions of the missing internal cells, \eqn{W}. The default is #' \code{FALSE}. This needs to be set to \code{TRUE} if one wishes to make #' population inferences through \code{predict.eco}. See an example below. #' @param grid Logical. If \code{TRUE}, the grid method is used to sample #' \eqn{W} in the Gibbs sampler. If \code{FALSE}, the Metropolis algorithm is #' used where candidate draws are sampled from the uniform distribution on the #' tomography line for each unit. Note that the grid method is significantly #' slower than the Metropolis algorithm. #' @param n.draws A positive integer. The number of MCMC draws. The default is #' \code{5000}. #' @param burnin A positive integer. The burnin interval for the Markov chain; #' i.e. the number of initial draws that should not be stored. The default is #' \code{0}. #' @param thin A positive integer. The thinning interval for the Markov chain; #' i.e. the number of Gibbs draws between the recorded values that are skipped. #' The default is \code{0}. #' @param verbose Logical. If \code{TRUE}, the progress of the Gibbs sampler is #' printed to the screen. The default is \code{FALSE}. #' @return An object of class \code{ecoNP} containing the following elements: #' \item{call}{The matched call.} #' \item{X}{The row margin, \eqn{X}.} #' \item{Y}{The column margin, \eqn{Y}.} #' \item{burnin}{The number of initial burnin draws.} #' \item{thin}{The thinning interval.} #' \item{nu0}{The prior degrees of freedom.} #' \item{tau0}{The prior scale parameter.} #' \item{mu0}{The prior mean.} #' \item{S0}{The prior scale matrix.} #' \item{a0}{The prior shape parameter.} #' \item{b0}{The prior scale parameter.} #' \item{W}{A three dimensional array storing the posterior in-sample predictions #' of \eqn{W}. The first dimension indexes the Monte Carlo draws, the second dimension #' indexes the columns of the table, and the third dimension represents the observations.} #' \item{Wmin}{A numeric matrix storing the lower bounds of \eqn{W}.} #' \item{Wmax}{A numeric matrix storing the upper bounds of \eqn{W}.} #' The following additional elements are included in the output when #' \code{parameter = TRUE}. #' \item{mu}{A three dimensional array storing the #' posterior draws of the population mean parameter, \eqn{\mu}. The first #' dimension indexes the Monte Carlo draws, the second dimension indexes the #' columns of the table, and the third dimension represents the observations.} #' \item{Sigma}{A three dimensional array storing the posterior draws of the #' population variance matrix, \eqn{\Sigma}. The first dimension indexes the #' Monte Carlo draws, the second dimension indexes the parameters, and the #' third dimension represents the observations. } #' \item{alpha}{The posterior draws of \eqn{\alpha}.} #' \item{nstar}{The number of clusters at each Gibbs draw.} #' @seealso \code{eco}, \code{ecoML}, \code{predict.eco}, \code{summary.ecoNP} #' @references Imai, Kosuke, Ying Lu and Aaron Strauss. (2011). \dQuote{eco: R #' Package for Ecological Inference in 2x2 Tables} Journal of Statistical #' Software, Vol. 42, No. 5, pp. 1-23. #' #' Imai, Kosuke, Ying Lu and Aaron Strauss. (2008). \dQuote{Bayesian and #' Likelihood Inference for 2 x 2 Ecological Tables: An Incomplete Data #' Approach} Political Analysis, Vol. 16, No. 1 (Winter), pp. 41-69. #' @keywords models #' @examples #' #' #' ## load the registration data #' data(reg) #' #' ## NOTE: We set the number of MCMC draws to be a very small number in #' ## the following examples; i.e., convergence has not been properly #' ## assessed. See Imai, Lu and Strauss (2006) for more complete examples. #' #' ## fit the nonparametric model to give in-sample predictions #' ## store the parameters to make population inference later #' \dontrun{res <- ecoNP(Y ~ X, data = reg, n.draws = 50, param = TRUE, verbose = TRUE) #' #' ##summarize the results #' summary(res) #' #' ## obtain out-of-sample prediction #' out <- predict(res, verbose = TRUE) #' #' ## summarize the results #' summary(out) #' #' ## density plots of the out-of-sample predictions #' par(mfrow=c(2,1)) #' plot(density(out[,1]), main = "W1") #' plot(density(out[,2]), main = "W2") #' #' #' ## load the Robinson's census data #' data(census) #' #' ## fit the parametric model with contextual effects and N #' ## using the default prior specification #' #' res1 <- ecoNP(Y ~ X, N = N, context = TRUE, param = TRUE, data = census, #' n.draws = 25, verbose = TRUE) #' #' ## summarize the results #' summary(res1) #' #' ## out-of sample prediction #' pres1 <- predict(res1) #' summary(pres1)} #' #' @export ecoNP ecoNP <- function(formula, data = parent.frame(), N = NULL, supplement = NULL, context = FALSE, mu0 = 0, tau0 = 2, nu0 = 4, S0 = 10, alpha = NULL, a0 = 1, b0 = 0.1, parameter = FALSE, grid = FALSE, n.draws = 5000, burnin = 0, thin = 0, verbose = FALSE){ ## contextual effects if (context) ndim <- 3 else ndim <- 2 ## checking inputs if (burnin >= n.draws) stop("n.draws should be larger than burnin") if (length(mu0)==1) mu0 <- rep(mu0, ndim) else if (length(mu0)!=ndim) stop("invalid inputs for mu0") if (is.matrix(S0)) { if (any(dim(S0)!=ndim)) stop("invalid inputs for S0") } else S0 <- diag(S0, ndim) mf <- match.call() ## getting X, Y and N tt <- terms(formula) attr(tt, "intercept") <- 0 if (is.matrix(eval.parent(mf$data))) data <- as.data.frame(data) X <- model.matrix(tt, data) Y <- model.response(model.frame(tt, data = data)) N <- eval(mf$N, data) ## alpha if (is.null(alpha)) { alpha.update <- TRUE alpha <- 0 } else alpha.update <- FALSE ## checking the data and calculating the bounds tmp <- checkdata(X, Y, supplement, ndim) bdd <- ecoBD(formula, data=data) W1min <- bdd$Wmin[order(tmp$order.old)[1:nrow(tmp$d)],1,1] W1max <- bdd$Wmax[order(tmp$order.old)[1:nrow(tmp$d)],1,1] ## fitting the model n.store <- floor((n.draws-burnin)/(thin+1)) unit.par <- unit.w <- tmp$n.samp+tmp$samp.X1+tmp$samp.X0 n.par <- n.store * unit.par n.w <- n.store * unit.w unit.a <- 1 if (context) res <- .C("cDPecoX", as.double(tmp$d), as.integer(tmp$n.samp), as.integer(n.draws), as.integer(burnin), as.integer(thin+1), as.integer(verbose), as.integer(nu0), as.double(tau0), as.double(mu0), as.double(S0), as.double(alpha), as.integer(alpha.update), as.double(a0), as.double(b0), as.integer(tmp$survey.yes), as.integer(tmp$survey.samp), as.double(tmp$survey.data), as.integer(tmp$X1type), as.integer(tmp$samp.X1), as.double(tmp$X1.W1), as.integer(tmp$X0type), as.integer(tmp$samp.X0), as.double(tmp$X0.W2), as.double(W1min), as.double(W1max), as.integer(parameter), as.integer(grid), pdSMu0=double(n.par), pdSMu1=double(n.par), pdSMu2=double(n.par), pdSSig00=double(n.par), pdSSig01=double(n.par), pdSSig02=double(n.par), pdSSig11=double(n.par), pdSSig12=double(n.par), pdSSig22=double(n.par), pdSW1=double(n.w), pdSW2=double(n.w), pdSa=double(n.store), pdSn=integer(n.store), PACKAGE="eco") else res <- .C("cDPeco", as.double(tmp$d), as.integer(tmp$n.samp), as.integer(n.draws), as.integer(burnin), as.integer(thin+1), as.integer(verbose), as.integer(nu0), as.double(tau0), as.double(mu0), as.double(S0), as.double(alpha), as.integer(alpha.update), as.double(a0), as.double(b0), as.integer(tmp$survey.yes), as.integer(tmp$survey.samp), as.double(tmp$survey.data), as.integer(tmp$X1type), as.integer(tmp$samp.X1), as.double(tmp$X1.W1), as.integer(tmp$X0type), as.integer(tmp$samp.X0), as.double(tmp$X0.W2), as.double(W1min), as.double(W1max), as.integer(parameter), as.integer(grid), pdSMu0=double(n.par), pdSMu1=double(n.par), pdSSig00=double(n.par), pdSSig01=double(n.par), pdSSig11=double(n.par), pdSW1=double(n.w), pdSW2=double(n.w), pdSa=double(n.store), pdSn=integer(n.store), PACKAGE="eco") ## output W1.post <- matrix(res$pdSW1, n.store, unit.w, byrow=TRUE)[,tmp$order.old] W2.post <- matrix(res$pdSW2, n.store, unit.w, byrow=TRUE)[,tmp$order.old] W <- array(rbind(W1.post, W2.post), c(n.store, 2, unit.w)) colnames(W) <- c("W1", "W2") res.out <- list(call = mf, X = X, Y = Y, N = N, W = W, Wmin = bdd$Wmin[,1,], Wmax = bdd$Wmax[,1,], burin = burnin, thin = thin, nu0 = nu0, tau0 = tau0, mu0 = mu0, a0 = a0, b0 = b0, S0 = S0) ## optional outputs if (parameter){ if (context) { mu1.post <- matrix(res$pdSMu0, n.store, unit.par, byrow=TRUE)[,tmp$order.old] mu2.post <- matrix(res$pdSMu1, n.store, unit.par, byrow=TRUE)[,tmp$order.old] mu3.post <- matrix(res$pdSMu2, n.store, unit.par, byrow=TRUE)[,tmp$order.old] Sigma11.post <- matrix(res$pdSSig00, n.store, unit.par, byrow=TRUE)[,tmp$order.old] Sigma12.post <- matrix(res$pdSSig01, n.store, unit.par, byrow=TRUE)[,tmp$order.old] Sigma13.post <- matrix(res$pdSSig02, n.store, unit.par, byrow=TRUE)[,tmp$order.old] Sigma23.post <- matrix(res$pdSSig12, n.store, unit.par, byrow=TRUE)[,tmp$order.old] Sigma22.post <- matrix(res$pdSSig11, n.store, unit.par, byrow=TRUE)[,tmp$order.old] Sigma33.post <- matrix(res$pdSSig22, n.store, unit.par, byrow=TRUE)[,tmp$order.old] res.out$mu <- array(rbind(mu1.post, mu2.post, mu3.post), dim=c(n.store, 3, unit.par), dimnames=list(1:n.store, c("mu1", "mu2", "mu3"), 1:unit.par)) res.out$Sigma <- array(rbind(Sigma11.post, Sigma12.post, Sigma13.post, Sigma22.post, Sigma23.post, Sigma33.post), dim=c(n.store, 6, unit.par), dimnames=list(1:n.store, c("Sigma11", "Sigma12", "Sigma13", "Sigma22", "Sigma23", "Sigma33"), 1:unit.par)) } else { mu1.post <- matrix(res$pdSMu0, n.store, unit.par, byrow=TRUE)[,tmp$order.old] mu2.post <- matrix(res$pdSMu1, n.store, unit.par, byrow=TRUE)[,tmp$order.old] Sigma11.post <- matrix(res$pdSSig00, n.store, unit.par, byrow=TRUE)[,tmp$order.old] Sigma12.post <- matrix(res$pdSSig01, n.store, unit.par, byrow=TRUE)[,tmp$order.old] Sigma22.post <- matrix(res$pdSSig11, n.store, unit.par, byrow=TRUE)[,tmp$order.old] res.out$mu <- array(rbind(mu1.post, mu2.post), dim=c(n.store, 2, unit.par), dimnames=list(1:n.store, c("mu1", "mu2"), 1:unit.par)) res.out$Sigma <- array(rbind(Sigma11.post, Sigma12.post, Sigma22.post), dim=c(n.store, 3, unit.par), dimnames=list(1:n.store, c("Sigma11", "Sigma12", "Sigma22"), 1:unit.par)) } if (alpha.update) res.out$alpha <- matrix(res$pdSa, n.store, unit.a, byrow=TRUE) else res.out$alpha <- alpha res.out$nstar <- matrix(res$pdSn, n.store, unit.a, byrow=TRUE) } if (context) class(res.out) <- c("ecoNPX", "ecoNP", "eco") else class(res.out) <- c("ecoNP", "eco") return(res.out) } eco/R/eminfo.R0000644000176200001440000004016414330337373012634 0ustar liggesusers##logit and invlogit transformation logit <- function(X) { Y<-log(X/(1-X)) Y } invlogit <-function(Y) { X<-exp(Y)/(1+exp(Y)) X } ####assuming theta.em ##2 d: mu1, mu2, sig1, sig2, r12 ##3 d: mu3, mu1, mu2, sig3, sig1, sig2, r13, r23, r12 param.pack<-function(theta.em, fix.rho=FALSE,r12=0, dim) { mu<-rep(0, dim) Sig<-matrix(0,dim, dim) mu<-theta.em[1:dim] for (i in 1:dim) Sig[i,i]<-theta.em[dim+i] if (!fix.rho) { Sig[1,2]<-Sig[2,1]<-theta.em[2*dim+1]*sqrt(Sig[1,1]*Sig[2,2]) if (dim==3) { Sig[1,3]<-Sig[3,1]<-theta.em[2*dim+2]*sqrt(Sig[1,1]*Sig[3,3]) Sig[2,3]<-Sig[3,2]<-theta.em[2*dim+3]*sqrt(Sig[2,2]*Sig[3,3]) } } if (fix.rho) { if (dim==2) Sig[1,2]<-Sig[2,1]<-r12*sqrt(Sig[1,1]*Sig[2,2]) if (dim==3) { Sig[1,2]<-Sig[2,1]<-theta.em[2*dim+1]*sqrt(Sig[1,1]*Sig[2,2]) Sig[1,3]<-Sig[3,1]<-theta.em[2*dim+2]*sqrt(Sig[1,1]*Sig[3,3]) Sig[2,3]<-Sig[3,2]<-r12*sqrt(Sig[2,2]*Sig[3,3]) } } return(list(mu=mu, Sigma=Sig)) } ## transformation of BVN parameter into ## Fisher scale or unit scale ## in 2 D, mu1, mu2, sigma1, sigma2, r12 ## in 3 D, mu3, mu1, mu2, sigma3, sigma1, sigma2, sigma31, sigma32, sigma12 param.trans <-function(X, transformation="Fisher") { p<-length(X) Y<-rep(0,p) if (transformation=="Fisher") { if (p<=5) { Y[1:2]<-X[1:2] Y[3:4]<-log(X[3:4]) if (p==5) Y[5]<-0.5*log((1+X[5])/(1-X[5])) } if (p>5) { Y[1:3]<-X[1:3] Y[4:6]<-log(X[4:6]) Y[7:8]<-0.5*log((1+X[7:8])/(1-X[7:8])) if (p==9) Y[9]<-0.5*log((1+X[9])/(1-X[9])) } } if (transformation=="unitscale") { if (p<=5) { Y[1:2] <- invlogit(X[1:2]) Y[3:4] <- X[3:4]*exp(2*X[1:2])/(1+exp(X[1:2]))^4 if (p==5) Y[5] <- X[5] } if (p>5) { Y[1:3]<-invlogit(X[1:3]) Y[4:6]<-X[4:6]*exp(2*X[4:6])/(1+exp(X[4:6]))^4 Y[7:8]<-X[7:8] if (p==9) Y[9]<-X[9] } } return(Y) } vec<-function(mat) { v<-as.vector(mat, mode="any") v } tr<-function(mat) { trace<-sum(diag(mat)) trace } ## I_{com} ## the gradient function for multivariate normal function #du.theta and dSig.theta are the first derivative of mu and Sigma #with respect to theta #du.theta[n.u, n.theta] #dSig.theta[n.u, n.u, n.theta] d1st.mvn<-function(mu,Sigma, fix.rho=FALSE) { #r12, r13,r23 are internal here, # r12 doesn't correspond to cor(w1, w2) in 3d case (intead, r12=>cor(W1,x) d<-length(mu) p<-d+d+d*(d-1)/2 u1<-mu[1] u2<-mu[2] s1<-Sigma[1,1] s2<-Sigma[2,2] r12<-Sigma[1,2]/sqrt(s1*s2) if (d==3) { u3<-mu[3] s3<-Sigma[3,3] r13<-Sigma[1,3]/sqrt(s1*s3) r23<-Sigma[2,3]/sqrt(s2*s3) } if (fix.rho) p<-p-1 du.theta<-matrix(0,d,p) for (j in 1:d) du.theta[j,j]<-1 dSig.theta<-array(0,c(d,d,p)) for (i in 1:d) dSig.theta[i,i,d+i]<-1 dSig.theta[1,2,d+1]<-dSig.theta[2,1,d+1]<-1/2*s1^(-1/2)*s2^(1/2)*r12 dSig.theta[1,2,d+2]<-dSig.theta[2,1,d+2]<-1/2*s2^(-1/2)*s1^(1/2)*r12 if (d==3) { dSig.theta[1,3,d+1]<-dSig.theta[3,1,d+1]<-1/2*s1^(-1/2)*s3^(1/2)*r13 dSig.theta[1,3,d+3]<-dSig.theta[3,1,d+3]<-1/2*s3^(-1/2)*s1^(1/2)*r13 dSig.theta[2,3,d+2]<-dSig.theta[3,2,d+2]<-1/2*s2^(-1/2)*s3^(1/2)*r23 dSig.theta[2,3,d+3]<-dSig.theta[3,2,d+3]<-1/2*s3^(-1/2)*s2^(1/2)*r23 } if (!fix.rho) { dSig.theta[1,2,2*d+1]<-dSig.theta[2,1,2*d+1]<-sqrt(s1*s2) if (d==3) { dSig.theta[1,3,2*d+2]<-dSig.theta[3,1,2*d+2]<-sqrt(s1*s3) dSig.theta[2,3,2*d+3]<-dSig.theta[3,2,2*d+3]<-sqrt(s2*s3) } } if (fix.rho) { if (d==3) { dSig.theta[1,3,2*d+1]<-dSig.theta[3,1,2*d+1]<-sqrt(s1*s3) dSig.theta[2,3,2*d+2]<-dSig.theta[3,2,2*d+2]<-sqrt(s2*s3) } } return(list(du.theta=du.theta, dSig.theta=dSig.theta)) } d2nd.mvn<-function(mu,Sigma, fix.rho=FALSE) { #r12, r13,r23 are internal here, # r12 doesn't correspond to cor(w1, w2) in 3d case (intead, r12=>cor(W1,x) d<-length(mu) p<-d+d+d*(d-1)/2 u1<-mu[1] u2<-mu[2] s1<-Sigma[1,1] s2<-Sigma[2,2] r12<-Sigma[1,2]/sqrt(s1*s2) if (d==3) { u3<-mu[3] s3<-Sigma[3,3] r13<-Sigma[1,3]/sqrt(s1*s3) r23<-Sigma[2,3]/sqrt(s2*s3) } if (fix.rho) p<-p-1 ddu.theta<-array(0,c(d,p,p)) ddSig.theta<-array(0,c(d,d,p,p)) ddSig.theta[1,2,d+1,d+1]<-ddSig.theta[2,1,d+1,d+1]<- -1/4*s1^(-3/2)*s2^(1/2)*r12 ddSig.theta[1,2,d+1,d+2]<-ddSig.theta[2,1,d+1,d+2]<- 1/4*s1^(-1/2)*s2^(-1/2)*r12 ddSig.theta[1,2,d+2,d+2]<-ddSig.theta[2,1,d+2,d+2]<- -1/4*s1^(1/2)*s2^(-3/2)*r12 if (d==3) { ddSig.theta[1,3,d+1,d+1]<-ddSig.theta[3,1,d+1,d+1]<- -1/4*s1^(-3/2)*s3^(1/2)*r13 ddSig.theta[1,3,d+1,d+3]<-ddSig.theta[3,1,d+1,d+3]<- 1/4*s1^(-1/2)*s3^(-1/2)*r13 ddSig.theta[2,3,d+2,d+2]<-ddSig.theta[3,2,d+2,d+2]<- -1/4*s2^(-3/2)*s3^(1/2)*r23 ddSig.theta[2,3,d+2,d+3]<-ddSig.theta[3,2,d+2,d+3]<- 1/4*s2^(-1/2)*s3^(-1/2)*r23 ddSig.theta[1,3,d+3,d+3]<-ddSig.theta[3,1,d+3,d+3]<- -1/4*s1^(1/2)*s3^(-3/2)*r13 ddSig.theta[2,3,d+3,d+3]<-ddSig.theta[3,2,d+3,d+3]<- -1/4*s2^(1/2)*s3^(-3/2)*r23 } if (!fix.rho) { ddSig.theta[1,2,d+1,2*d+1]<-ddSig.theta[2,1,d+1,2*d+1]<- 1/2*s1^(-1/2)*s2^(1/2) ddSig.theta[1,2,d+2,2*d+1]<-ddSig.theta[2,1,d+2,2*d+1]<- 1/2*s1^(1/2)*s2^(-1/2) if (d==3) { ddSig.theta[1,3,d+1,2*d+2]<-ddSig.theta[3,1,d+1,2*d+2]<- 1/2*s1^(-1/2)*s3^(1/2) ddSig.theta[2,3,d+2,2*d+3]<-ddSig.theta[3,2,d+2,2*d+3]<- 1/2*s2^(-1/2)*s3^(1/2) ddSig.theta[1,3,d+3,2*d+2]<-ddSig.theta[3,1,d+3,2*d+2]<- 1/2*s1^(1/2)*s3^(-1/2) ddSig.theta[2,3,d+3,2*d+3]<-ddSig.theta[3,2,d+3,2*d+3]<- 1/2*s2^(1/2)*s3^(-1/2) } } if (fix.rho) { if (d==3) { ddSig.theta[1,2,d+1,2*d+1]<-ddSig.theta[2,1,d+1,2*d+1]<- 1/2*s1^(-1/2)*s3^(1/2) ddSig.theta[2,3,d+2,2*d+2]<-ddSig.theta[3,2,d+2,2*d+2]<- 1/2*s2^(-1/2)*s3^(1/2) ddSig.theta[1,3,d+3,2*d+1]<-ddSig.theta[3,1,d+3,2*d+1]<- 1/2*s1^(1/2)*s3^(-1/2) ddSig.theta[2,3,d+3,2*d+2]<-ddSig.theta[3,2,d+3,2*d+2]<- 1/2*s2^(1/2)*s3^(-1/2) } } for (i in 1:(p-1)) for (j in (i+1):p) { ddSig.theta[,,j,i]<-ddSig.theta[,,i,j] ddu.theta[,j,i]<-ddu.theta[,i,j] } return(list(ddu.theta=ddu.theta, ddSig.theta=ddSig.theta)) } ##assuming the order of sufficient statistics ## 2d, mean(W1), mean(W2), mean(W1^2) mean(W2^2), mean(W1W2) ## 3d, mean(X), mean(W1), mean(W2), mean(X^2),mean(W1^2) mean(W2^2), ## mean(XW1), mean(XW2), mean(W1W2) suff<-function(mu, suff.stat,n) { d<-length(mu) p<-d+d+d*(d-1)/2 u1<-mu[1] u2<-mu[2] if (d==3) u3<-mu[3] S1<-n*suff.stat[1] S2<-n*suff.stat[2] S11<-n*suff.stat[d+1] S22<-n*suff.stat[d+2] S12<-n*suff.stat[2*d+1] if (d==3) { S3<-n*suff.stat[d] S33<-n*suff.stat[2*d] S13<-n*suff.stat[2*d+2] S23<-n*suff.stat[2*d+3] } Vv<-rep(0,d) Vv[1]<-S1-n*u1 Vv[2]<-S2-n*u2 if (d==3) Vv[3]<-S3-n*u3 Ss<-matrix(0,d,d) Ss[1,1]<-S11-2*S1*u1+n*u1^2 Ss[2,2]<-S22-2*S2*u2+n*u2^2 Ss[1,2]<-Ss[2,1]<-S12-S1*u2-S2*u1+n*u1*u2 if (d==3) { Ss[3,3]<-S33-2*S3*u3+n*u3^2 Ss[1,3]<-Ss[3,1]<-S13-S1*u3-S3*u1+n*u1*u3 Ss[2,3]<-Ss[3,2]<-S23-S3*u2-S2*u3+n*u2*u3 } return(list(Ss=Ss, Vv=Vv)) } #du.theta and dSig.theta are the second derivative of mu and Sigma #with respect to theta #ddu.theta[n.u, n.theta, n.theta] #ddSig.theta[n.u, n.u, n.theta, n.theta] ##comput the gradient vector (expected first derivatives) for MVN ##not actually used here. Dcom.mvn<-function(mu, Sigma, suff.stat,n, fix.rho=FALSE) { d<-dim(Sigma)[1] p<-d*2+0.5*d*(d-1) if (fix.rho) { p<-p-1 } Dcom<-rep(0,p) invSigma<-solve(Sigma) temp<-suff(mu, suff.stat, n) Ss<-temp$Ss Vv<-temp$Vv temp<-d1st.mvn(mu=mu, Sigma=Sigma, fix.rho=fix.rho) du.theta<-temp$du.theta dSig.theta<-temp$dSig.theta for (i in 1:p) Dcom[i]<- -n/2*t(vec(invSigma))%*%vec(dSig.theta[,,i])+ 0.5*tr(invSigma%*%dSig.theta[,,i]%*%invSigma%*%Ss)+ t(du.theta[,i])%*%invSigma%*%Vv Dcom } #compute the information matrix of MVN # -1*second derivatives Icom.mvn<-function(mu, Sigma, suff.stat,n, fix.rho=FALSE) { d<-dim(Sigma)[1] p<-d*2+1/2*d*(d-1) if (fix.rho) { p<-p-1 } Icom<-matrix(0,p,p) invSigma<-solve(Sigma) temp<-suff(mu, suff.stat, n) Ss<-temp$Ss Vv<-temp$Vv temp<-d1st.mvn(mu, Sigma, fix.rho) du.theta<-temp$du.theta dSig.theta<-temp$dSig.theta temp<-d2nd.mvn(mu, Sigma, fix.rho) ddu.theta<-temp$ddu.theta ddSig.theta<-temp$ddSig.theta for (i in 1:p) { dinvSig.theta.i<- -invSigma%*%dSig.theta[,,i]%*%invSigma for (j in 1:i) { dinvSig.theta.j<- -invSigma%*%dSig.theta[,,j]%*%invSigma ddinvSig.theta.ij<- -dinvSig.theta.j%*%dSig.theta[,,i]%*%invSigma -invSigma%*%ddSig.theta[,,i,j]%*%invSigma-invSigma%*%dSig.theta[,,i]%*%dinvSig.theta.j a1<- -n/2*(t(vec(dinvSig.theta.j))%*%vec(dSig.theta[,,i]) + t(vec(invSigma))%*%vec(ddSig.theta[,,i,j])) a2<- t(du.theta[,j])%*%dinvSig.theta.i%*%Vv - 0.5*tr(ddinvSig.theta.ij%*%Ss) a3<- t(ddu.theta[,i,j])%*%invSigma%*%Vv + t(du.theta[,i])%*%dinvSig.theta.j%*%Vv - n*t(du.theta[,i])%*%invSigma%*%du.theta[,j] Icom[i,j]<-a1+a2+a3 if (i!=j) Icom[j,i]<-Icom[i,j] } } -Icom } ###compute the information matrix for various parameter transformation ### "Fisher" transformation (variance stablization?) ### unit scale transformation: first order approximation of mean and var, rho ##express T1 and T2 in more general form Icom.transform<-function(Icom, Dvec, theta, transformation="Fisher", context, fix.rho) { if (!context) { mu<-theta[1:2] sigma<-theta[3:4] rho<-theta[5] } if (context) { mu<-theta[1:3] # x,w1,w2 sigma<-theta[4:6] #x, w1, w2 rho<-theta[7:9] #r_xw1, r_xw2, r_w1w2 } ##T1: d(theta)/d(f(theta)), theta is the MVN parameterization ##T2, d2(theta)/d(f(theta))(d(f(theta))') ### transformation=Fisher, Icom_normal==>Icom_fisher Imat<- -Icom n.par<-dim(Imat)[1] if (transformation=="Fisher") { if (!context) { T1<-c(1,1,sigma[1], sigma[2]) T2<-matrix(0, n.par^2, n.par) T2[(2*n.par+3), 3]<-sigma[1] T2[(3*n.par+4), 4]<-sigma[2] if (!fix.rho) { T1<-c(T1, (1-(rho[1]^2))) T2[(4*n.par+5),5]<- -2*rho[1]*(1-rho[1]^2) } T1<-diag(T1) } if (context) { T1<-c(1,1,1,sigma[1:3],(1-(rho[1:2]^2))) T2<-matrix(0, n.par^2, n.par) T2[(3*n.par+4), 4]<-sigma[1] T2[(4*n.par+5), 5]<-sigma[2] T2[(5*n.par+6), 6]<-sigma[3] T2[(6*n.par+7),7]<- -2*rho[1]*(1-rho[1]^2) T2[(7*n.par+8),8]<- -2*rho[2]*(1-rho[2]^2) if (!fix.rho) { T1<-c(T1, (1-(rho[3]^2))) T2[(8*n.par+9),9]<- -2*rho[3]*(1-rho[3]^2) } T1<-diag(T1) } } ### transformation=unitscale, Icom_normal==>Icom_unitscale if (transformation=="unitscale") { T1<-matrix(0,n.par,n.par) T1[1,1]<-exp(-mu[1])*(1+exp(mu[1]))^2 T1[1,3]<-1/(sigma[1]*2*exp(2*mu[1])*(1+exp(mu[1]))^(-4)*(1-2*(1+exp(mu[1]))^(-1))) T1[2,2]<-exp(-mu[2])*(1+exp(mu[2]))^2 T1[2,4]<-1/(sigma[2]*2*exp(2*mu[2])*(1+exp(mu[2]))^(-4)*(1-2*(1+exp(mu[2]))^(-1))) T1[3,3]<-2*sigma[1]^0.5*(1+exp(mu[1]))^4*exp(-2*mu[1]) T1[4,4]<-2*sigma[2]^0.5*(1+exp(mu[2]))^4*exp(-2*mu[2]) # T2<-matrix(0, n.par^2, n.par) # T2[1,1]<- # T2[(1*n.par+2), (1*n.par+2)]<- ##compute T1 and T2 } Icom.tran<-matrix(NA, n.par, n.par) Icom.tran<-T1%*%Imat%*%t(T1) temp1<-matrix(0,n.par,n.par) for (i in 1:n.par) for (j in 1:n.par) temp1[i,j]<- Dvec%*%T2[((i-1)*n.par+(1:n.par)),j] Icom.tran<-Icom.tran+temp1 return(-Icom.tran) } ecoINFO<-function(theta.em, suff.stat, DM, context=TRUE, fix.rho=FALSE, sem=TRUE, r12=0, n) { if (context) fix.rho<-FALSE ndim<-2 if (context) ndim<-3 n.var<-2*ndim+ ndim*(ndim-1)/2 n.par<-n.var if (context) { n.par<-n.var-2 } if (!context & fix.rho) n.par<-n.par-1 mu<-param.pack(theta.em, fix.rho=fix.rho, r12=r12, dim=ndim)$mu Sigma<-param.pack(theta.em, fix.rho=fix.rho, r12=r12, dim=ndim)$Sigma theta.fisher<-param.trans(theta.em) Icom<-Icom.mvn(mu=mu, Sigma=Sigma, fix.rho=fix.rho, suff.stat=suff.stat, n=n) Dvec<-Dcom.mvn(mu=mu, Sigma=Sigma, fix.rho=fix.rho, suff.stat=suff.stat, n=n) theta.icom<-theta.em if (fix.rho) theta.icom<-c(theta.em[-n.var], r12) Icom.fisher<-Icom.transform(Icom=Icom, Dvec=Dvec, theta=theta.icom, transformation="Fisher", context=context, fix.rho=fix.rho) Vcom.fisher <- solve(Icom.fisher) if (!context) { dV <- Vcom.fisher%*%DM%*%solve(diag(1,n.par)-DM) Vobs.fisher <- Vcom.fisher+dV } ###verify with the parameters. ###repartition Icom if (context & !fix.rho) { index<-c(1,4,2,3,5,6,7,8,9) Itemp<-Icom.fisher[index,index] invItemp<-solve(Itemp) A1<-invItemp[1:2,1:2] A2<-invItemp[1:2,3:9] A3<-invItemp[3:9, 1:2] A4<-invItemp[3:9, 3:9] dV1<-(A4-t(A2)%*%solve(A1)%*%A2)%*%DM%*%solve(diag(rep(1,7))-DM) dV<-matrix(0,9,9) dV[3:9,3:9]<-dV1 Vobs.fisher<-invItemp+dV index2<-c(1,3,4,2,5,6,7,8,9) Vobs.fisher<-Vobs.fisher[index2,index2] } Iobs.fisher <- solve(Vobs.fisher) ##transform Iobs.fisher to Iobs via delta method ##V(theta)=d(fisher^(-1))V(bvn.trans(theta))d(fisher^(-1))' if (!context) { grad.invfisher <- c(1,1, exp(theta.fisher[3:4])) if (! fix.rho) grad.invfisher <- c(grad.invfisher,4*exp(2*theta.fisher[5])/(exp(2*theta.fisher[5])+1)^2) } if (context) { grad.invfisher <- c(1,1, 1, exp(theta.fisher[4:6])) grad.invfisher <- c(grad.invfisher,4*exp(2*theta.fisher[7:8])/(exp(2*theta.fisher[7:8])+1)^2) if (!fix.rho) grad.invfisher <- c(grad.invfisher,4*exp(2*theta.fisher[9])/(exp(2*theta.fisher[9])+1)^2) } Vobs<-diag(grad.invfisher)%*%Vobs.fisher%*%diag(grad.invfisher) Iobs<-solve(Vobs) ## obtain a symmetric Cov matrix Vobs.sym <- 0.5*(Vobs+t(Vobs)) ###unitscale transformation #theta.unit<-param.trans(theta.em, transformation="unitscale") #Icom.unit<-Icom.transform(Icom, Dvec,theta.em, transformation="unitscale") #Vobs.unit<-delta method if (!context) { names(mu)<-c("W1","W2") colnames(Sigma)<-rownames(Sigma)<-c("W1","W2") names(suff.stat)<-c("S1","S2","S11","S22","S12") if (!fix.rho) colnames(DM)<-rownames(DM)<-c("u1","u2","s1","s2","r12") if (fix.rho) colnames(DM)<-rownames(DM)<-c("u1","u2","s1","s2") } if (context) { names(mu)<-c("X","W1","W2") colnames(Sigma)<-rownames(Sigma)<-c("X","W1","W2") names(suff.stat)<-c("Sx","S1","S2","Sxx","S11","S22","Sx1","Sx2","S12") if (!fix.rho) { colnames(DM)<-rownames(DM)<-c("u1","u2","s1","s2","r1x","r2x","r12") colnames(Icom)<-rownames(Icom)<-c("ux","u1","u2","sx","s1","s2","r1x","r2x","r12") } if (fix.rho) { colnames(DM)<-rownames(DM)<-c("u1","u2","s1","s2","r1x","r2x") colnames(Icom)<-rownames(Icom)<-c("ux","u1","u2","sx","s1","s2","r1x","r2x") } } colnames(Iobs)<-colnames(Iobs.fisher)<-colnames(Icom.fisher)<-colnames(Vobs)<-colnames(Vobs.sym)<-colnames(Icom) rownames(Iobs)<-rownames(Iobs.fisher)<-rownames(Icom.fisher)<-rownames(Vobs)<-rownames(Vobs.sym)<-rownames(Icom) res.out<-list(mu=mu, Sigma=Sigma, suff.stat=suff.stat, context=context, fix.rho=fix.rho) res.out$DM<-DM res.out$Icom<-Icom res.out$Iobs<-Iobs res.out$Fmis<-1-diag(Iobs)/diag(Icom) res.out$Vcom<-Vcom<-solve(Icom) res.out$Vobs.original<-Vobs res.out$VFmis<-1-diag(Vcom)/diag(Vobs) res.out$Vobs<-Vobs.sym res.out$Icom.trans<-Icom.fisher res.out$Iobs.trans<-Iobs.fisher res.out$Fmis.trans<-1-diag(Iobs.fisher)/diag(Icom.fisher) res.out$Imiss<-res.out$Icom-res.out$Iobs res.out$Ieigen<-eigen(res.out$Imiss)[[1]][1] res.out } eco/R/ecoCV.R0000644000176200001440000000620014330337373012347 0ustar liggesusersecoX <- function(formula, Z, supplement = NULL, data = parent.frame(), nu0 = 4, S0 = 10, beta0 = 0, A0 = 100, grid = FALSE, parameter = FALSE, n.draws = 5000, burnin = 0, thin = 5, verbose = TRUE){ ## checking inputs if (burnin >= n.draws) stop("Error: n.draws should be larger than burnin") call <- match.call() ff <- as.formula(paste(call$Y, "~ -1 +", call$X)) if (is.matrix(eval.parent(call$data))) data <- as.data.frame(data) X <- model.matrix(ff, data) Y <- model.response(model.frame(ff, data=data)) ##survey data if (length(supplement) == 0) { survey.samp <- 0 survey.data <- 0 survey.yes<-0 } else { survey.samp <- length(supplement[,1]) survey.data <- as.matrix(supplement) survey.yes<-1 } ind<-c(1:length(X)) X1type<-0 X0type<-0 samp.X1<-0 samp.X0<-0 X1.W1<-0 X0.W2<-0 ##Xtype x=1 X1.ind<-ind[along=(X==1)] if (length(X[X!=1])1) { if (!object$context) { param.table[2,]<-sqrt(diag(object$Vobs)) param.table[3,]<-Fmis<-1-diag(object$Iobs)/diag(object$Icom) } else if (object$context && !object$fix.rho) { param.table[2,]<-sqrt(diag(object$Vobs)[c(2,3,5,6,7,8,9)]) param.table[3,]<-Fmis<-(1-diag(object$Iobs)/diag(object$Icom))[c(2,3,5,6,7,8,9)] } else if (object$context && object$fix.rho) { param.table[2,]<-sqrt(diag(object$Vobs)[c(2,3,5,6)]) param.table[3,]<-Fmis<-(1-diag(object$Iobs)/diag(object$Icom))[c(2,3,5,6)] } } rname<-c("ML est.", "std. err.", "frac. missing") rownames(param.table)<-rname[1:n.row] colnames(param.table)<-cname[1:n.col] n.obs <- nrow(object$W) if (is.null(subset)) subset <- 1:n.obs else if (!is.numeric(subset)) stop("Subset should be a numeric vector.") else if (!all(subset %in% c(1:n.obs))) stop("Subset should be any numbers in 1:obs.") table.names<-c("mean", "std.dev", paste(min(CI), "%", sep=" "), paste(max(CI), "%", sep=" ")) W1.mean <- mean(object$W[,1]) W2.mean <- mean(object$W[,2]) W1.sd <- sd(object$W[,1]) W2.sd <- sd(object$W[,2]) # W1.q1 <- W1.mean-1.96*W1.sd # W1.q2 <- W1.mean+1.96*W1.sd # W2.q1 <- W2.mean-1.96*W2.sd # W2.q2 <- W2.mean+1.96*W2.sd W1.q1 <- quantile(object$W[,1],min(CI)/100) W1.q2 <- quantile(object$W[,1],max(CI)/100) W2.q1 <- quantile(object$W[,2],min(CI)/100) W2.q2 <- quantile(object$W[,2],max(CI)/100) agg.table <- rbind(cbind(W1.mean, W1.sd, W1.q1, W1.q2), cbind(W2.mean, W2.sd, W2.q1, W2.q2)) colnames(agg.table) <- table.names rownames(agg.table) <- c("W1", "W2") # if (is.null(object$N)) # N <- rep(1, nrow(object$X)) # else agg.wtable<-NULL if (!is.null(object$N)) { N <- object$N } else { N <- rep(1:n.obs) } weighted.var <- function(x, w) { return(sum(w * (x - weighted.mean(x,w))^2)/((length(x)-1)*mean(w))) } W1.mean <- weighted.mean(object$W[,1], object$X*N) W2.mean <- weighted.mean(object$W[,2], (1-object$X)*N) W1.sd <- weighted.var(object$W[,1], object$X*N)^0.5 W2.sd <- weighted.var(object$W[,1], (1-object$X)*N)^0.5 W1.q1 <- W1.mean-1.96*W1.sd W1.q2 <- W1.mean+1.96*W1.sd W2.q1 <- W2.mean-1.96*W2.sd W2.q2 <- W2.mean+1.96*W2.sd # W1.q1 <- quantile(object$W[,1] * object$X*N/mean(object$X*N),min(CI)/100) # W1.q2 <- quantile(object$W[,1] * object$X*N/mean(object$X*N),max(CI)/100) # W2.q1 <- quantile(object$W[,2]*(1-object$X)*N/(mean((1-object$X)*N)),min(CI)/100) # W2.q2 <- quantile(object$W[,2]*(1-object$X)*N/(mean((1-object$X)*N)),max(CI)/100) agg.wtable <- rbind(cbind(W1.mean, W1.sd, W1.q1, W1.q2), cbind(W2.mean, W2.sd, W2.q1, W2.q2)) colnames(agg.wtable) <- table.names rownames(agg.wtable) <- c("W1", "W2") if (units) W.table <- object$W[subset,] else W.table <- NULL ans <- list(call = object$call, iters.sem = object$iters.sem, iters.em = object$iters.em, epsilon = object$epsilon, sem = object$sem, fix.rho = object$fix.rho, loglik = object$loglik, rho=object$rho, param.table = param.table, W.table = W.table, agg.wtable = agg.wtable, agg.table=agg.table, n.obs = n.obs) # if (object$fix.rho) # ans$rho<-object$rho class(ans) <-"summary.ecoML" return(ans) } eco/R/coef.eco.R0000644000176200001440000000041714330337373013035 0ustar liggesusers#' @export coef.eco <- function(object, subset = NULL, ...) { mu <- object$mu if (is.null(subset)) subset <- 1:nrow(mu) else if (max(subset) > nrow(mu)) stop(paste("invalid input for `subset.' only", nrow(mu), "draws are stored.")) return(mu[subset,]) } eco/R/predict.eco.R0000644000176200001440000000652514330337373013561 0ustar liggesusers#' Out-of-Sample Posterior Prediction under the Parametric Bayesian Model for #' Ecological Inference in 2x2 Tables #' #' Obtains out-of-sample posterior predictions under the fitted parametric #' Bayesian model for ecological inference. \code{predict} method for class #' \code{eco} and \code{ecoX}. #' #' The posterior predictive values are computed using the Monte Carlo sample #' stored in the \code{eco} output (or other sample if \code{newdraw} is #' specified). Given each Monte Carlo sample of the parameters, we sample the #' vector-valued latent variable from the appropriate multivariate Normal #' distribution. Then, we apply the inverse logit transformation to obtain the #' predictive values of proportions, \eqn{W}. The computation may be slow #' (especially for the nonparametric model) if a large Monte Carlo sample of #' the model parameters is used. In either case, setting \code{verbose = TRUE} #' may be helpful in monitoring the progress of the code. #' #' @aliases predict.eco #' @param object An output object from \code{eco} or \code{ecoNP}. #' @param newdraw An optional list containing two matrices (or three #' dimensional arrays for the nonparametric model) of MCMC draws of \eqn{\mu} #' and \eqn{\Sigma}. Those elements should be named as \code{mu} and #' \code{Sigma}, respectively. The default is the original MCMC draws stored in #' \code{object}. #' @param subset A scalar or numerical vector specifying the row number(s) of #' \code{mu} and \code{Sigma} in the output object from \code{eco}. If #' specified, the posterior draws of parameters for those rows are used for #' posterior prediction. The default is \code{NULL} where all the posterior #' draws are used. #' @param verbose logical. If \code{TRUE}, helpful messages along with a #' progress report on the Monte Carlo sampling from the posterior predictive #' distributions are printed on the screen. The default is \code{FALSE}. #' @param ... further arguments passed to or from other methods. #' @return \code{predict.eco} yields a matrix of class \code{predict.eco} #' containing the Monte Carlo sample from the posterior predictive distribution #' of inner cells of ecological tables. \code{summary.predict.eco} will #' summarize the output, and \code{print.summary.predict.eco} will print the #' summary. #' @seealso \code{eco}, \code{predict.ecoNP} #' @keywords methods #' @export predict.eco <- function(object, newdraw = NULL, subset = NULL, verbose = FALSE, ...){ if (is.null(newdraw) && is.null(object$mu)) stop("Posterior draws of mu and Sigma must be supplied") else if (!is.null(newdraw)){ if (is.null(newdraw$mu) && is.null(newdraw$Sigma)) stop("Posterior draws of both mu and Sigma must be supplied.") object <- newdraw } mu <- coef(object, subset = subset) n.draws <- nrow(mu) p <- ncol(mu) Sigma <- varcov(object, subset = subset) Wstar <- matrix(NA, nrow=n.draws, ncol=p) tmp <- floor(n.draws/10) inc <- 1 for (i in 1:n.draws) { Wstar[i,] <- mvrnorm(1, mu = mu[i,], Sigma = Sigma[,,i]) if (i == inc*tmp & verbose) { cat("", inc*10, "percent done.\n") inc <- inc + 1 } } res <- apply(Wstar, 2, invlogit) if (ncol(res) == 2) colnames(res) <- c("W1", "W2") else # this is called from predict.ecoX colnames(res) <- c("W1", "W2", "X") class(res) <- c("predict.eco", "matrix") return(res) } eco/R/forgnlit30c.R0000644000176200001440000000240614330337373013506 0ustar liggesusers #' Foreign-born literacy in 1930, County Level #' #' This data set contains, on a county level, the proportion of white residents #' ten years and older who are foreign born, and the proportion of those #' residents who are literate. Data come from the 1930 census and were first #' analyzed by Robinson (1950). Counties with fewer than 100 foreign born #' residents are dropped. #' #' #' @name forgnlit30c #' @docType data #' @format A data frame containing 6 variables and 1976 observations #' \tabular{lll}{ X \tab numeric \tab proportion of the white population at #' least 10 years of age that is foreign born \cr Y \tab numeric \tab #' proportion of the white population at least 10 years of age that is #' illiterate \cr W1 \tab numeric \tab proportion of the foreign-born white #' population at least 10 years of age that is illiterate \cr W2 \tab numeric #' \tab proportion of the native-born white population at least 10 years of age #' that is illiterate \cr state \tab numeric \tab the ICPSR state code \cr #' county \tab numeric \tab the ICPSR (within state) county code } #' @references Robinson, W.S. (1950). ``Ecological Correlations and the #' Behavior of Individuals.'' \emph{American Sociological Review}, vol. 15, #' pp.351-357. #' @keywords datasets NULL eco/MD50000644000176200001440000001117014331034742011331 0ustar liggesusers083360a0656af18c115d1fc1f7a0a2de *ChangeLog 058d4cae2d4f9574a7575bd9802161ab *DESCRIPTION 3826eefd9729ffe38d147eded95845a5 *NAMESPACE b1ad9b04dac0acdebed1fb5bd7680863 *R/Qfun.R b74ced70c45cdb899135650f263f04fe *R/census.R 19b54ec2f8a821715be3881948ab4dfd *R/checkdata.R 8e1ce138cb5879b65ed644df37619f79 *R/coef.eco.R d476b8612bdc8dd9e2268b6cefea3744 *R/coef.ecoNP.R b579481175dae1a52dcfd269a6211926 *R/eco.R c7cc9e7d929fb6426b2ac41a2a4e4a3d *R/ecoBD.R ec74fb120755d764a8c07ef534831b50 *R/ecoCV.R a087e70c8861c238572365918da20f1c *R/ecoNP.R d571c7dcb6ffae9532bff0b392edd3a1 *R/ecoRC.R 2e1d25eb19ed622ed27062927a0ecea0 *R/emeco.R 6c644c7e6e990443dcf79b10722e8180 *R/eminfo.R 0ecadfff0304da67d092fea6ae6e7e9d *R/forgnlit30.R 6460331fb0043c2c77d012e4a0a89830 *R/forgnlit30c.R fedd2983b7afaea7298382918e90115d *R/housep88.R 8b136280b6d870259d087afe9fb8f5c6 *R/logit.R 4d6444d66d75f4bd7aa967fe00d7f0ec *R/onAttach.R 64f21f12d4291a9a88f3bfe18a878acc *R/predict.eco.R a90128611bde8f85bb78a2e0e27fc711 *R/predict.ecoNP.R f60a49a401e9f2671fdebc70b078a31b *R/predict.ecoNPX.R 226ad3f17ed9847664fe728fe0419786 *R/predict.ecoX.R 1546360abc91e1377c62bd1f3e719d88 *R/print.eco.R f5322abd81423ea74add302c931cd954 *R/print.ecoBD.R 97101a04cebc50f7aa508854651b90ee *R/print.ecoML.R 29a4b907ff859e57d265235209e76e52 *R/print.summary.eco.R fa19f3a5c0770517ff5e2b9254bb3ab0 *R/print.summary.ecoML.R 6287e5a9f163a2d5a1e712fad25b6adb *R/print.summary.ecoNP.R dbb5b3c48799e8dea714899267a08c1b *R/print.summary.predict.eco.R c0265cba5df4817d065bc0ea179351ac *R/reg.R 2e00636aa11007af43448c631d8a563e *R/summary.eco.R c600d5312cbe9c3aff11665fc84ff9e6 *R/summary.ecoML.R 7a190a824fb980d16862e1ce0fe9d370 *R/summary.ecoNP.R 5b5ca50b6e6fcb513c0319f594ad14c1 *R/summary.predict.eco.R 2920c955b5ba71697ab8007a239006ee *R/varcov.R 0d0a29d123b6cefc03ac008d319ee66f *R/wallace.R 4999fe2d5c10c20ef7953c519ea4b864 *data/census.txt.gz bab848bf01c09ff663551d74a091aacb *data/forgnlit30.txt.gz 72a68f958f3d09b6bfbc57ac2999c31c *data/forgnlit30c.txt.gz 7232655f0724246d816712ee30d5dea6 *data/housep88.txt.gz 997a61242cc887b3b0e7167b850dc5fd *data/reg.txt.gz 2329fbd925f12169a0b92cbb3bc8863d *data/wallace.txt.gz 04fd5db9f02234ffe0cd7d0cf4e854a5 *inst/CITATION 6fc8d22f45f7da21418d73144daa1e41 *man/Qfun.Rd e07f2b7374c7f1e0b77d5da7c52256ab *man/census.Rd f7d123e6fa8930f753bd17d0cb03154b *man/eco.Rd dd4edfa5468da775b29ac620ad626833 *man/ecoBD.Rd dba91eb6130bdf5f914d9a83ba9db67f *man/ecoML.Rd 3f505d14d82dababd1a2230ccf99c976 *man/ecoNP.Rd 54999c40326f34d218754a9ed8a6361c *man/forgnlit30.Rd 184c21ad3ee98fa39360f322ea754469 *man/forgnlit30c.Rd 4b841a7479914844017958286689bae5 *man/housep88.Rd 2ef1fdb01269b38d9240d73eee78f6ad *man/predict.eco.Rd 22909804d234d2e438fd180924119169 *man/predict.ecoNP.Rd 902aa4848cfe3fdcc3821964a3370a10 *man/predict.ecoNPX.Rd a852dce75d4e817713d256370a21bfed *man/predict.ecoX.Rd 254ef2f9bb07838919d098e5988b708f *man/print.summary.eco.Rd 4f8390034390d7dbc6d022ea715b6f72 *man/print.summary.ecoML.Rd 23bbb7d8f9855ca54c121abc7af55815 *man/print.summary.ecoNP.Rd 169d675de28c92b617271d1eeeb093b6 *man/reg.Rd 26de6a9949e7333264ffda3530cb64ba *man/summary.eco.Rd 62c032deb37c5b6ca84ba6201ffbe78d *man/summary.ecoML.Rd 96b81af30204a81be6b1dfefcea21035 *man/summary.ecoNP.Rd 997791412de9a6a9ebae5a00ca7b4076 *man/varcov.Rd daa4f4fbd64cbdb9c36cc74f71e2c79d *man/wallace.Rd f009e46fcf131d28ea4ead122961b7bd *src/Makevars c56622823e268dff9415da4b417ab0f6 *src/bayes.c de17d4ca6e1eadef448d31e5bb278be9 *src/bayes.h 792dbd57c7980dff78d72bebb124cb23 *src/fintegrate.c 6547650d65505a06b847b26c2f0dfcdf *src/fintegrate.h ede9a6737cbcb306b94076da79c5274a *src/gibbsBase.c 806109adb0b5757ad59597f4245bd9b5 *src/gibbsBase2C.c bf196aa4cca45244d5ccc1ffcf6178d5 *src/gibbsBaseRC.c 6c7c6f39b32019aa7e2f2fc84e659f85 *src/gibbsDP.c 7972f84d78125022612a64997e4747ce *src/gibbsEM.c 4762c8cba543b942a7ed97ad8cfed1c0 *src/gibbsXBase.c c3f1df0e568e6529fac22c8fe6ea6fb1 *src/gibbsXDP.c 12f2241c590f9970ec67138cab661dfc *src/gibbsZBase.c 160b0534a7e76de76c312da1fe44ad34 *src/init.c c24852e22728b2f506134dd8221e522f *src/macros.h 912c8dbd398979b9a815f94fd83e6707 *src/preBaseX.c e9b3516efd7c79bcfe753e73f59d7cf5 *src/preDP.c fd2cae0e54c12937ccb2ffb3cb908ca9 *src/preDPX.c e4438e8bd9ac017ae34ebc0e1eb3e3b8 *src/rand.c 9a7e8d0aaa99088d05813349cf590f07 *src/rand.h 28cc06590c870a0b1ab3713f2a951e02 *src/sample.c d32b15707595f439df5fbaabee5b85c3 *src/sample.h 26c41af1bc8b77c72bd4d47d0bb74486 *src/subroutines.c 80400b50a31ad1b68a2c5b594edf0bf8 *src/subroutines.h fb277bf399a12283c95c33f0b990ead8 *src/vector.c 9f7d40b1458a95830ba736c18ab76522 *src/vector.h d3082437c056ac6d8ed00e5a42e94863 *tests/testthat.R 45eaf263e79906011c0a591fbdde8807 *tests/testthat/test-all.R eco/inst/0000755000176200001440000000000014330337373012003 5ustar liggesuserseco/inst/CITATION0000644000176200001440000000137714330350054013137 0ustar liggesuserscitHeader("To cite eco in publications use:") citEntry(entry = "Article", title = "eco: R Package for Ecological Inference in 2 x 2 Tables", author = personList(as.person("Kosuke Imai"), as.person("Ying Lu"), as.person("Aaron Strauss")), journal = "Journal of Statistical Software", year = "2011", volume = "42", number = "5", pages = "1--23", url = "https://doi.org/10.18637/jss.v042.i05", textVersion = paste("Kosuke Imai, Ying Lu, and Aaron Strauss (2011).", "eco: R Package for Ecological Inference in 2 x 2 Tables.", "Journal of Statistical Software, Vol. 42, No. 5, pp. 1-23.", "https://doi.org/10.18637/jss.v042.i05") )